diff --git a/.gitattributes b/.gitattributes index ee5f38aed69d11d2ebd82d931ac2cbf4fb71bcda..6e827b64d483dda1e7aac105fc3cde0ae52afbf9 100644 --- a/.gitattributes +++ b/.gitattributes @@ -27,3 +27,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text wandb/run-20220503_172048-zotxt8wa/run-zotxt8wa.wandb filter=lfs diff=lfs merge=lfs -text wandb/run-20220503_172048-zotxt8wa/logs/debug-internal.log filter=lfs diff=lfs merge=lfs -text +wandb/run-20220504_095140-cwhobv6l/run-cwhobv6l.wandb filter=lfs diff=lfs merge=lfs -text +wandb/run-20220504_142129-w4rlzz90/run-w4rlzz90.wandb filter=lfs diff=lfs merge=lfs -text diff --git a/config.json b/config.json index cdccadf57779543e07fc8dc0a6e2490bdd49d00f..b5157a950b9657f4b6903bd7976630c2a067fc83 100644 --- a/config.json +++ b/config.json @@ -182,7 +182,7 @@ "forced_eos_token_id": null, "gradient_checkpointing": false, "hidden_act": "gelu", - "hidden_dropout": 0.1742341660721257, + "hidden_dropout": 0.17305159310134854, "hidden_size": 1024, "id2label": { "0": "LABEL_0", diff --git a/pytorch_model.bin b/pytorch_model.bin index 187fecaffb5e60cf3888f8de19c0514c792de8a4..aa4899bd4919a7a322bd13ee787e22fdb7141504 100644 --- a/pytorch_model.bin +++ b/pytorch_model.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:60197bcce895c667b06077bcd384a0a7cbcb92374123b43cedf794ae7fde6ad8 +oid sha256:db6d4e2df5d05af63a5d5063df5d529ce8f7390a16a5ecafaa4c5dd1164a828e size 2353867057 diff --git a/run_xtreme_s.py b/run_xtreme_s.py deleted file mode 100644 index 63352178828e0fbdf8422058b5df8d9cd346e62d..0000000000000000000000000000000000000000 --- a/run_xtreme_s.py +++ /dev/null @@ -1,948 +0,0 @@ -#!/usr/bin/env python -# coding=utf-8 -# Copyright 2022 The HuggingFace Inc. team. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and - -""" Fine-tuning a πŸ€— Transformers pretrained speech model on the XTREME-S benchmark tasks""" - -import json -import logging -import os -import re -import sys -from collections import OrderedDict, defaultdict -from dataclasses import dataclass, field -from typing import Dict, List, Optional, Union - -import datasets -import numpy as np -import torch -from datasets import DatasetDict, load_dataset, load_metric - -import transformers -from transformers import ( - AutoConfig, - AutoFeatureExtractor, - AutoModelForAudioClassification, - AutoModelForCTC, - AutoModelForSpeechSeq2Seq, - AutoProcessor, - AutoTokenizer, - HfArgumentParser, - Seq2SeqTrainer, - Seq2SeqTrainingArguments, - SpeechEncoderDecoderModel, - Trainer, - set_seed, -) -from transformers.trainer_utils import get_last_checkpoint, is_main_process -from transformers.utils import check_min_version -from transformers.utils.versions import require_version - - -# Will error if the minimal version of Transformers is not installed. Remove at your own risks. -check_min_version("4.18.0.dev0") - -require_version("datasets>=1.18.0", "To fix: pip install -r examples/pytorch/speech-recognition/requirements.txt") - - -logger = logging.getLogger(__name__) - - -def list_field(default=None, metadata=None): - return field(default_factory=lambda: default, metadata=metadata) - - -TASK_TO_TARGET_COLUMN_NAME = { - "fleurs-asr": "transcription", - "fleurs-lang_id": "lang_id", - "mls": "transcription", - "voxpopuli": "transcription", - "covost2": "translation", - "minds14": "intent_class", - "babel": "transcription", -} - - -@dataclass -class ModelArguments: - """ - Arguments pertaining to which model/config/tokenizer we are going to fine-tune from. - """ - - model_name_or_path: str = field( - metadata={"help": "Path to pretrained model or model identifier from huggingface.co/models"} - ) - tokenizer_name_or_path: Optional[str] = field( - default=None, - metadata={"help": "Path to pretrained tokenizer or tokenizer identifier from huggingface.co/models"}, - ) - cache_dir: Optional[str] = field( - default=None, - metadata={ - "help": "Where do you want to store the pretrained models and datasets downloaded from " "huggingface.co" - }, - ) - freeze_feature_encoder: bool = field( - default=True, metadata={"help": "Whether to freeze the feature encoder layers of the model."} - ) - attention_dropout: float = field( - default=0.0, metadata={"help": "The dropout ratio for the attention probabilities."} - ) - activation_dropout: float = field( - default=0.0, metadata={"help": "The dropout ratio for activations inside the fully connected layer."} - ) - feat_proj_dropout: float = field(default=0.0, metadata={"help": "The dropout ratio for the projected features."}) - hidden_dropout: float = field( - default=0.0, - metadata={ - "help": "The dropout probability for all fully connected layers in the embeddings, encoder, and pooler." - }, - ) - final_dropout: float = field( - default=0.0, - metadata={"help": "The dropout probability for the final projection layer."}, - ) - mask_time_prob: float = field( - default=0.05, - metadata={ - "help": "Probability of each feature vector along the time axis to be chosen as the start of the vector" - "span to be masked. Approximately ``mask_time_prob * sequence_length // mask_time_length`` feature" - "vectors will be masked along the time axis." - }, - ) - mask_time_length: int = field( - default=10, - metadata={"help": "Length of vector span to mask along the time axis."}, - ) - mask_feature_prob: float = field( - default=0.0, - metadata={ - "help": "Probability of each feature vector along the feature axis to be chosen as the start of the vector" - "span to be masked. Approximately ``mask_feature_prob * sequence_length // mask_feature_length`` feature bins will be masked along the time axis." - }, - ) - mask_feature_length: int = field( - default=10, - metadata={"help": "Length of vector span to mask along the feature axis."}, - ) - layerdrop: float = field(default=0.0, metadata={"help": "The LayerDrop probability."}) - ctc_zero_infinity: bool = field( - default=False, - metadata={"help": "Whether to zero infinite losses and the associated gradients of `torch.nn.CTCLoss`."}, - ) - ctc_loss_reduction: Optional[str] = field( - default="mean", metadata={"help": "The way the ctc loss should be reduced. Should be one of 'mean' or 'sum'."} - ) - - -@dataclass -class DataTrainingArguments: - """ - Arguments pertaining to what data we are going to input our model for training and eval. - - Using `HfArgumentParser` we can turn this class - into argparse arguments to be able to specify them on - the command line. - """ - - dataset_name: str = field( - default="google/xtreme_s", - metadata={"help": "The name of the dataset to use (via the datasets library). Defaults to 'google/xtreme_s'"}, - ) - task: str = field( - default=None, - metadata={ - "help": "The task name of the benchmark to use (via the datasets library). Should be on of: " - "'fleurs-asr', 'mls', 'voxpopuli', 'covost2', 'minds14', 'fleurs-lang_id', 'babel'." - }, - ) - language: str = field( - default="all", - metadata={"help": "The language id as defined in the datasets config name or `all` for all languages."}, - ) - language_group: str = field( - default=None, - metadata={ - "help": "The language group to select a subset of languages to train on. " - "This option is only used the 'fleurs-asr' task. Should be one of: " - "'western_european_we', 'eastern_european_ee', 'central_asia_middle_north_african_cmn', " - "'sub_saharan_african_ssa', 'south_asian_sa', 'south_east_asian_sea', 'chinese_japanase_korean_cjk'." - }, - ) - train_split_name: str = field( - default="train", - metadata={ - "help": "The name of the training dataset split to use (via the datasets library). Defaults to 'train'" - }, - ) - eval_split_name: str = field( - default="validation", - metadata={ - "help": "The name of the evaluation dataset split to use (via the datasets library). " - "Defaults to 'validation'" - }, - ) - predict_split_name: str = field( - default="test", - metadata={ - "help": "The name of the prediction dataset split to use (via the datasets library). " "Defaults to 'test'" - }, - ) - audio_column_name: str = field( - default="audio", - metadata={"help": "The name of the dataset column containing the audio data. Defaults to 'audio'"}, - ) - target_column_name: str = field( - default=None, - metadata={ - "help": "The name of the dataset column containing the target data " - "(transcription/translation/label). If None, the name will be inferred from the task. Defaults to None." - }, - ) - overwrite_cache: bool = field( - default=False, metadata={"help": "Overwrite the cached preprocessed datasets or not."} - ) - preprocessing_num_workers: Optional[int] = field( - default=None, - metadata={"help": "The number of processes to use for the preprocessing."}, - ) - max_train_samples: Optional[int] = field( - default=None, - metadata={ - "help": "For debugging purposes or quicker training, truncate the number of training examples to this " - "value if set." - }, - ) - max_eval_samples: Optional[int] = field( - default=None, - metadata={ - "help": "For debugging purposes or quicker training, truncate the number of validation examples to this " - "value if set." - }, - ) - max_predict_samples: Optional[int] = field( - default=None, - metadata={ - "help": "For debugging purposes or quicker training, truncate the number of prediction examples to this " - "value if set." - }, - ) - chars_to_ignore: Optional[List[str]] = list_field( - default=', ? . ! - ; : " β€œ % β€˜ ” οΏ½'.split(" "), - metadata={"help": "A list of characters to remove from the transcripts."}, - ) - max_duration_in_seconds: float = field( - default=30.0, - metadata={ - "help": "Filter audio files that are longer than `max_duration_in_seconds` seconds to 'max_duration_in_seconds`" - }, - ) - min_duration_in_seconds: float = field( - default=0.0, metadata={"help": "Filter audio files that are shorter than `min_duration_in_seconds` seconds"} - ) - preprocessing_only: bool = field( - default=False, - metadata={ - "help": "Whether to only do data preprocessing and skip training. " - "This is especially useful when data preprocessing errors out in distributed training due to timeout. " - "In this case, one should run the preprocessing in a non-distributed setup with `preprocessing_only=True` " - "so that the cached datasets can consequently be loaded in distributed training" - }, - ) - use_auth_token: bool = field( - default=False, - metadata={ - "help": "If :obj:`True`, will use the token generated when running" - ":obj:`transformers-cli login` as HTTP bearer authorization for remote files." - }, - ) - unk_token: str = field( - default="[UNK]", - metadata={"help": "The unk token for the tokenizer"}, - ) - pad_token: str = field( - default="[PAD]", - metadata={"help": "The padding token for the tokenizer"}, - ) - word_delimiter_token: str = field( - default="|", - metadata={"help": "The word delimiter token for the tokenizer"}, - ) - phoneme_language: Optional[str] = field( - default=None, - metadata={ - "help": "The target language that should be used be" - " passed to the tokenizer for tokenization. Note that" - " this is only relevant if the model classifies the" - " input audio to a sequence of phoneme sequences." - }, - ) - per_lang_metrics: bool = field( - default=True, - metadata={ - "help": "If `True`, compute the test metrics separately for each language, and average the results. " - "If `False` compute the average test metrics in a single pass for all languages at once." - }, - ) - - -@dataclass -class SpeechDataCollatorWithPadding: - - processor: AutoProcessor - decoder_start_token_id: Optional[int] = None - padding: Union[bool, str] = "longest" - pad_labels: Optional[int] = True - pad_to_multiple_of: Optional[int] = None - pad_to_multiple_of_labels: Optional[int] = None - - def __call__(self, features: List[Dict[str, Union[List[int], torch.Tensor]]]) -> Dict[str, torch.Tensor]: - # split inputs and labels since they have to be of different lenghts and need - # different padding methods - input_features = [{"input_values": feature["input_values"]} for feature in features] - - batch = self.processor.pad( - input_features, - padding=self.padding, - pad_to_multiple_of=self.pad_to_multiple_of, - return_tensors="pt", - ) - - if self.pad_labels: - label_features = [{"input_ids": feature["labels"]} for feature in features] - with self.processor.as_target_processor(): - labels_batch = self.processor.pad( - label_features, - padding=self.padding, - pad_to_multiple_of=self.pad_to_multiple_of_labels, - return_tensors="pt", - ) - - # replace padding with -100 to ignore loss correctly - labels = labels_batch["input_ids"].masked_fill(labels_batch.attention_mask.ne(1), -100) - - # if bos token is appended in previous tokenization step, - # cut bos token here as it's append later anyways - if ( - self.decoder_start_token_id is not None - and (labels[:, 0] == self.decoder_start_token_id).all().cpu().item() - ): - labels = labels[:, 1:] - - batch["labels"] = labels - else: - batch["labels"] = torch.tensor([feature["labels"] for feature in features]) - - return batch - - -def create_vocabulary_from_data( - datasets: DatasetDict, - word_delimiter_token: Optional[str] = None, - unk_token: Optional[str] = None, - pad_token: Optional[str] = None, -): - # Given training and test labels create vocabulary - def extract_all_chars(batch): - all_text = " ".join(batch["target_text"]) - vocab = list(set(all_text)) - return {"vocab": [vocab], "all_text": [all_text]} - - vocabs = datasets.map( - extract_all_chars, - batched=True, - batch_size=-1, - keep_in_memory=True, - remove_columns=datasets["train"].column_names, - ) - - # take union of all unique characters in each dataset - vocab_set = ( - (set(vocabs["train"]["vocab"][0]) if "train" in vocabs else set()) - | (set(vocabs["eval"]["vocab"][0]) if "eval" in vocabs else set()) - | (set(vocabs["predict"]["vocab"][0]) if "predict" in vocabs else set()) - ) - - vocab_dict = {v: k for k, v in enumerate(sorted(list(vocab_set)))} - - # replace white space with delimiter token - if word_delimiter_token is not None: - vocab_dict[word_delimiter_token] = vocab_dict[" "] - del vocab_dict[" "] - - # add unk and pad token - if unk_token is not None: - vocab_dict[unk_token] = len(vocab_dict) - - if pad_token is not None: - vocab_dict[pad_token] = len(vocab_dict) - - return vocab_dict - - -def main(): - # See all possible arguments in src/transformers/training_args.py - # or by passing the --help flag to this script. - # We now keep distinct sets of args, for a cleaner separation of concerns. - - parser = HfArgumentParser((ModelArguments, DataTrainingArguments, Seq2SeqTrainingArguments)) - if len(sys.argv) == 2 and sys.argv[1].endswith(".json"): - # If we pass only one argument to the script and it's the path to a json file, - # let's parse it to get our arguments. - model_args, data_args, training_args = parser.parse_json_file(json_file=os.path.abspath(sys.argv[1])) - else: - model_args, data_args, training_args = parser.parse_args_into_dataclasses() - - # Detecting last checkpoint. - last_checkpoint = None - if os.path.isdir(training_args.output_dir) and training_args.do_train and not training_args.overwrite_output_dir: - last_checkpoint = get_last_checkpoint(training_args.output_dir) - if last_checkpoint is None and len(os.listdir(training_args.output_dir)) > 0: - raise ValueError( - f"Output directory ({training_args.output_dir}) already exists and is not empty. " - "Use --overwrite_output_dir to overcome." - ) - elif last_checkpoint is not None: - logger.info( - f"Checkpoint detected, resuming training at {last_checkpoint}. To avoid this behavior, change " - "the `--output_dir` or add `--overwrite_output_dir` to train from scratch." - ) - - # Setup logging - logging.basicConfig( - format="%(asctime)s - %(levelname)s - %(name)s - %(message)s", - datefmt="%m/%d/%Y %H:%M:%S", - handlers=[logging.StreamHandler(sys.stdout)], - ) - logger.setLevel(logging.INFO if is_main_process(training_args.local_rank) else logging.WARN) - - # Log on each process the small summary: - logger.warning( - f"Process rank: {training_args.local_rank}, device: {training_args.device}, n_gpu: {training_args.n_gpu}" - f"distributed training: {bool(training_args.local_rank != -1)}, 16-bits training: {training_args.fp16}" - ) - # Set the verbosity to info of the Transformers logger (on main process only): - if is_main_process(training_args.local_rank): - transformers.utils.logging.set_verbosity_info() - logger.info("Training/evaluation parameters %s", training_args) - - # Set seed before initializing model. - set_seed(training_args.seed) - - # 1. First, let's load the dataset - raw_datasets = DatasetDict() - task_name = data_args.task - lang_id = data_args.language - - if task_name is None: - raise ValueError( - "Set --task should be set to '' " "(e.g. 'fleurs-asr', 'mls', 'covost2', 'minds14') " - ) - if lang_id is None: - raise ValueError( - "Set --language should be set to the language id of the sub dataset " - "config to be used (e.g. 'pl', 'en.tr', 'fr-FR') or 'all'" - " for multi-lingual fine-tuning." - ) - if data_args.language_group is not None: - if data_args.task != "fleurs-asr": - raise ValueError("--language_group should only be used with --task=fleurs-asr") - if data_args.language != "all": - raise ValueError("--language_group should only be used with --language=all") - - if data_args.target_column_name is None: - target_column_name = TASK_TO_TARGET_COLUMN_NAME[task_name] - else: - target_column_name = data_args.target_column_name - - # here we differentiate between tasks with text as the target and classification tasks - is_text_target = target_column_name in ("transcription", "translation") - - config_name = ".".join([task_name.split("-")[0], lang_id]) - - if training_args.do_train: - raw_datasets["train"] = load_dataset( - data_args.dataset_name, - config_name, - split=data_args.train_split_name, - use_auth_token=data_args.use_auth_token, - cache_dir=model_args.cache_dir, - ) - - if data_args.audio_column_name not in raw_datasets["train"].column_names: - raise ValueError( - f"--audio_column_name '{data_args.audio_column_name}' not found in dataset '{data_args.dataset_name}'. " - "Make sure to set `--audio_column_name` to the correct audio column - one of " - f"{', '.join(raw_datasets['train'].column_names)}." - ) - - if target_column_name not in raw_datasets["train"].column_names: - raise ValueError( - f"--target_column_name {target_column_name} not found in dataset '{data_args.dataset_name}'. " - "Make sure to set `--target_column_name` to the correct text column - one of " - f"{', '.join(raw_datasets['train'].column_names)}." - ) - - if data_args.max_train_samples is not None: - raw_datasets["train"] = raw_datasets["train"].select(range(data_args.max_train_samples)) - - if training_args.do_eval: - raw_datasets["eval"] = load_dataset( - data_args.dataset_name, - config_name, - split=data_args.eval_split_name, - use_auth_token=data_args.use_auth_token, - cache_dir=model_args.cache_dir, - ) - - if data_args.max_eval_samples is not None: - raw_datasets["eval"] = raw_datasets["eval"].select(range(data_args.max_eval_samples)) - - if training_args.do_predict: - raw_datasets["predict"] = load_dataset( - data_args.dataset_name, - config_name, - split=data_args.predict_split_name, - use_auth_token=data_args.use_auth_token, - cache_dir=model_args.cache_dir, - ) - - if data_args.max_predict_samples is not None: - raw_datasets["predict"] = raw_datasets["predict"].select(range(data_args.max_predict_samples)) - - lang_list = next(iter(raw_datasets.values())).features["lang_id"].names - if not is_text_target: - label_list = next(iter(raw_datasets.values())).features[target_column_name].names - num_labels = len(label_list) - - num_workers = data_args.preprocessing_num_workers - - lang_group = data_args.language_group - if lang_group is not None: - with training_args.main_process_first(desc="language group filter"): - lang_group_id = next(iter(raw_datasets.values())).features["lang_group_id"].str2int(lang_group) - raw_datasets = raw_datasets.filter( - lambda lang_group: lang_group == lang_group_id, - num_proc=num_workers, - input_columns=["lang_group_id"], - ) - - # 2. We remove some special characters from the datasets - # that make training complicated and do not help in transcribing the speech - # E.g. characters, such as `,` and `.` do not really have an acoustic characteristic - # that could be easily picked up by the model - chars_to_ignore_regex = ( - f'[{"".join(data_args.chars_to_ignore)}]' if data_args.chars_to_ignore is not None else None - ) - - def remove_special_characters(batch): - if chars_to_ignore_regex is not None: - batch["target_text"] = re.sub(chars_to_ignore_regex, "", batch[target_column_name]).lower() - else: - batch["target_text"] = batch[target_column_name].lower() - return batch - - if is_text_target: - with training_args.main_process_first(desc="dataset map special characters removal"): - raw_datasets = raw_datasets.map( - remove_special_characters, - remove_columns=[target_column_name], - desc="remove special characters from datasets", - ) - - # save special tokens for tokenizer - word_delimiter_token = data_args.word_delimiter_token - unk_token = data_args.unk_token - pad_token = data_args.pad_token - - - encoder_id = "facebook/wav2vec2-xls-r-300m" - decoder_id = "facebook/bart-large" - - model = SpeechEncoderDecoderModel.from_encoder_decoder_pretrained(encoder_id, decoder_id, encoder_add_adapter=True) - model.config.encoder.feat_proj_dropout = 0.0 - model.config.encoder.final_dropout = 0.0 - model.config.encoder.mask_time_prob = 0.1 - model.config.decoder_start_token_id = model.decoder.config.bos_token_id - model.config.pad_token_id = model.decoder.config.pad_token_id - model.config.eos_token_id = model.decoder.config.eos_token_id - model.config.max_length = 40 - model.config.num_beams = 1 - model.config.encoder.layerdrop = 0.0 - model.config.use_cache = False - model.config.processor_class = "Wav2Vec2Processor" - - model.save_pretrained(model_args.model_name_or_path) - - feature_etxractor = AutoFeatureExtractor.from_pretrained(encoder_id) - feature_etxractor.save_pretrained(model_args.model_name_or_path) - tokenizer = AutoTokenizer.from_pretrained(decoder_id) - tokenizer.save_pretrained(model_args.model_name_or_path) - - # 3. Next, let's load the config as we might need it to create - # the tokenizer - config = AutoConfig.from_pretrained( - model_args.model_name_or_path, cache_dir=model_args.cache_dir, use_auth_token=data_args.use_auth_token - ) - - if is_text_target: - # 4. (Optional, for ASR and translation) If no tokenizer file is defined, - # we create the vocabulary of the model by extracting all unique characters from - # the training and evaluation datasets - # We need to make sure that only first rank saves vocabulary - # make sure all processes wait until vocab is created - tokenizer_name_or_path = model_args.tokenizer_name_or_path - tokenizer_kwargs = {} - if tokenizer_name_or_path is None: - # save vocab in training output dir - tokenizer_name_or_path = training_args.output_dir - - vocab_file = os.path.join(tokenizer_name_or_path, "vocab.json") - - with training_args.main_process_first(): - if training_args.overwrite_output_dir and os.path.isfile(vocab_file): - os.remove(vocab_file) - - with training_args.main_process_first(desc="dataset map vocabulary creation"): - if not os.path.isfile(vocab_file): - os.makedirs(tokenizer_name_or_path, exist_ok=True) - vocab_dict = create_vocabulary_from_data( - raw_datasets, - word_delimiter_token=word_delimiter_token, - unk_token=unk_token, - pad_token=pad_token, - ) - - # save vocab dict to be loaded into tokenizer - with open(vocab_file, "w") as file: - json.dump(vocab_dict, file) - - # if tokenizer has just been created - # it is defined by `tokenizer_class` if present in config else by `model_type` - if not config.is_encoder_decoder: - tokenizer_kwargs = { - "config": config if config.tokenizer_class is not None else None, - "tokenizer_type": config.model_type if config.tokenizer_class is None else None, - "unk_token": unk_token, - "pad_token": pad_token, - "word_delimiter_token": word_delimiter_token, - } - else: - tokenizer_kwargs = {} - - # 5. Now we can instantiate the feature extractor, tokenizer and model - # Note for distributed training, the .from_pretrained methods guarantee that only - # one local process can concurrently download model & vocab. - - # load feature_extractor and tokenizer - if is_text_target: - tokenizer = AutoTokenizer.from_pretrained( - tokenizer_name_or_path, - use_auth_token=data_args.use_auth_token, - **tokenizer_kwargs, - ) - feature_extractor = AutoFeatureExtractor.from_pretrained( - model_args.model_name_or_path, cache_dir=model_args.cache_dir, use_auth_token=data_args.use_auth_token - ) - - # adapt config - # (speech translation requires pre-configured seq2seq models) - if task_name != "covost2": - config.update( - { - "feat_proj_dropout": model_args.feat_proj_dropout, - "attention_dropout": model_args.attention_dropout, - "hidden_dropout": model_args.hidden_dropout, - "final_dropout": model_args.final_dropout, - "mask_time_prob": model_args.mask_time_prob, - "mask_time_length": model_args.mask_time_length, - "mask_feature_prob": model_args.mask_feature_prob, - "mask_feature_length": model_args.mask_feature_length, - "gradient_checkpointing": training_args.gradient_checkpointing, - "layerdrop": model_args.layerdrop, - "ctc_zero_infinity": model_args.ctc_zero_infinity, - "ctc_loss_reduction": model_args.ctc_loss_reduction, - "activation_dropout": model_args.activation_dropout, - } - ) - if training_args.do_train: - if is_text_target: - config.pad_token_id = tokenizer.pad_token_id - config.vocab_size = len(tokenizer) - else: - label_to_id = {v: i for i, v in enumerate(label_list)} - config.label2id = label_to_id - config.id2label = {id: label for label, id in label_to_id.items()} - config.num_labels = num_labels - else: - config.encoder.update({"hidden_dropout": model_args.hidden_dropout}) - - # create model - if target_column_name == "transcription": - model = AutoModelForCTC.from_pretrained( - model_args.model_name_or_path, - cache_dir=model_args.cache_dir, - config=config, - use_auth_token=data_args.use_auth_token, - ) - elif config.is_encoder_decoder: - model = AutoModelForSpeechSeq2Seq.from_pretrained( - model_args.model_name_or_path, - cache_dir=model_args.cache_dir, - config=config, - use_auth_token=data_args.use_auth_token, - ) - if model.config.decoder_start_token_id is None: - raise ValueError("Make sure that `config.decoder_start_token_id` is correctly defined") - else: - model = AutoModelForAudioClassification.from_pretrained( - model_args.model_name_or_path, - cache_dir=model_args.cache_dir, - config=config, - use_auth_token=data_args.use_auth_token, - ) - - # freeze encoder - if model_args.freeze_feature_encoder: - model.freeze_feature_encoder() - - # 6. Now we preprocess the datasets including loading the audio, resampling and normalization - # Thankfully, `datasets` takes care of automatically loading and resampling the audio, - # so that we just need to set the correct target sampling rate and normalize the input - # via the `feature_extractor` - - # make sure that dataset decodes audio with correct sampling rate - dataset_sampling_rate = next(iter(raw_datasets.values())).features[data_args.audio_column_name].sampling_rate - if dataset_sampling_rate != feature_extractor.sampling_rate: - raw_datasets = raw_datasets.cast_column( - data_args.audio_column_name, datasets.features.Audio(sampling_rate=feature_extractor.sampling_rate) - ) - - # derive max & min input length for sample rate & max duration - max_input_length = data_args.max_duration_in_seconds * feature_extractor.sampling_rate - min_input_length = data_args.min_duration_in_seconds * feature_extractor.sampling_rate - audio_column_name = data_args.audio_column_name - - # `phoneme_language` is only relevant if the model is fine-tuned on phoneme classification - phoneme_language = data_args.phoneme_language - - # Preprocessing the datasets. - # We need to read the audio files as arrays and tokenize the targets. - def prepare_dataset(batch): - # load audio - sample = batch[audio_column_name] - - inputs = feature_extractor(sample["array"], sampling_rate=sample["sampling_rate"]) - batch["input_values"] = inputs.input_values[0] - batch["length"] = len(batch["input_values"]) - - # encode targets - additional_kwargs = {} - if phoneme_language is not None: - additional_kwargs["phonemizer_lang"] = phoneme_language - - if is_text_target: - batch["labels"] = tokenizer(batch["target_text"], **additional_kwargs).input_ids - else: - batch["labels"] = batch[target_column_name] - - batch["lang"] = batch["lang_id"] - - return batch - - with training_args.main_process_first(desc="dataset map preprocessing"): - vectorized_datasets = raw_datasets.map( - prepare_dataset, - remove_columns=next(iter(raw_datasets.values())).column_names, - num_proc=num_workers, - desc="preprocess datasets", - ) - - if training_args.do_train: - - def is_audio_in_length_range(length): - return length > min_input_length and length < max_input_length - - # filter data that is shorter than min_input_length - vectorized_datasets["train"] = vectorized_datasets["train"].filter( - is_audio_in_length_range, - num_proc=num_workers, - input_columns=["length"], - ) - - # 7. Next, we can prepare for the training step. - # Let's use the appropriate XTREME-S evaluation metric, - # instantiate a data collator and the trainer - - # Define evaluation metrics during training, *i.e.* word error rate, character error rate - eval_metric = load_metric("xtreme_s", task_name) - - # for large datasets it is advised to run the preprocessing on a - # single machine first with ``args.preprocessing_only`` since there will mostly likely - # be a timeout when running the script in distributed mode. - # In a second step ``args.preprocessing_only`` can then be set to `False` to load the - # cached dataset - if data_args.preprocessing_only: - logger.info(f"Data preprocessing finished. Files cached at {vectorized_datasets.cache_files}") - return - - def asr_logits_argmax(logits, labels): - return logits.argmax(dim=-1) - - def compute_asr_metric(pred): - pred.label_ids[pred.label_ids == -100] = tokenizer.pad_token_id - - pred_str = tokenizer.batch_decode(pred.predictions) - # we do not want to group tokens when computing the metrics - label_str = tokenizer.batch_decode(pred.label_ids, group_tokens=False) - - metric = eval_metric.compute(predictions=pred_str, references=label_str) - return metric - - def compute_classification_metric(pred): - pred_ids = np.argmax(pred.predictions, axis=1) - metric = eval_metric.compute(predictions=pred_ids, references=pred.label_ids) - return metric - - # Now save everything to be able to create a single processor later - if is_main_process(training_args.local_rank): - # save feature extractor, tokenizer and config - feature_extractor.save_pretrained(training_args.output_dir) - if is_text_target: - tokenizer.save_pretrained(training_args.output_dir) - config.save_pretrained(training_args.output_dir) - # wait until configs are saved in the main process before loading the processor - if training_args.local_rank != -1: - torch.distributed.barrier() - - if is_text_target: - processor = AutoProcessor.from_pretrained(training_args.output_dir) - else: - processor = AutoFeatureExtractor.from_pretrained(training_args.output_dir) - - # Instantiate custom data collator - data_collator = SpeechDataCollatorWithPadding(processor=processor, pad_labels=is_text_target) - - # Initialize Trainer - if target_column_name == "translation": - trainer = Seq2SeqTrainer( - model=model, - data_collator=data_collator, - args=training_args, - preprocess_logits_for_metrics=asr_logits_argmax if training_args.predict_with_generate else None, - compute_metrics=compute_asr_metric if training_args.predict_with_generate else None, - train_dataset=vectorized_datasets["train"] if training_args.do_train else None, - eval_dataset=vectorized_datasets["eval"] if training_args.do_eval else None, - tokenizer=feature_extractor, - ) - else: - trainer = Trainer( - model=model, - data_collator=data_collator, - args=training_args, - preprocess_logits_for_metrics=asr_logits_argmax if is_text_target else None, - compute_metrics=compute_asr_metric if is_text_target else compute_classification_metric, - train_dataset=vectorized_datasets["train"] if training_args.do_train else None, - eval_dataset=vectorized_datasets["eval"] if training_args.do_eval else None, - tokenizer=feature_extractor, - ) - - # 8. Finally, we can start training - - # Training - if training_args.do_train: - - # use last checkpoint if exist - if last_checkpoint is not None: - checkpoint = last_checkpoint - elif os.path.isdir(model_args.model_name_or_path): - checkpoint = model_args.model_name_or_path - else: - checkpoint = None - - train_result = trainer.train(resume_from_checkpoint=checkpoint) - trainer.save_model() - - metrics = train_result.metrics - max_train_samples = ( - data_args.max_train_samples - if data_args.max_train_samples is not None - else len(vectorized_datasets["train"]) - ) - metrics["train_samples"] = min(max_train_samples, len(vectorized_datasets["train"])) - - trainer.log_metrics("train", metrics) - trainer.save_metrics("train", metrics) - trainer.save_state() - - # Evaluation on the test set - results = {} - if training_args.do_predict: - logger.info(f"*** Evaluating on the `{data_args.predict_split_name}` set ***") - if data_args.per_lang_metrics: - # separate the `test` dataset into language-specific subsets and compute metrics for each of them - metrics = {} - average_metrics = defaultdict(list) - for lang_id in range(len(lang_list)): - lang_name = lang_list[lang_id] - with training_args.main_process_first(desc="per-language dataset filter"): - lang_dataset = vectorized_datasets["predict"].filter( - lambda lang: lang == lang_id, - num_proc=num_workers, - input_columns=["lang"], - ) - lang_metrics = trainer.evaluate(lang_dataset) - redundant_metrics = ["eval_runtime", "eval_samples_per_second", "eval_steps_per_second", "eval_epoch"] - for metric_name, value in lang_metrics.items(): - average_metrics[metric_name].append(value) - if metric_name not in redundant_metrics: - metrics[f"{metric_name}_{lang_name}"] = value - for metric_name, value in average_metrics.items(): - metrics[metric_name] = np.mean(value) - else: - metrics = trainer.evaluate(vectorized_datasets["predict"]) - max_predict_samples = ( - data_args.max_predict_samples - if data_args.max_predict_samples is not None - else len(vectorized_datasets["predict"]) - ) - metrics["predict_samples"] = min(max_predict_samples, len(vectorized_datasets["predict"])) - - # make sure that the `predict` metrics end up in the log history for the model card - trainer.log(OrderedDict(sorted(metrics.items()))) - - trainer.log_metrics("predict", metrics) - trainer.save_metrics("predict", metrics) - - # Write model card and (optionally) push to hub - kwargs = { - "finetuned_from": model_args.model_name_or_path, - "tasks": task_name, - "tags": [task_name, data_args.dataset_name], - "dataset_args": f"Config: {config_name}, Training split: {data_args.train_split_name}, Eval split: {data_args.eval_split_name}, Predict split: {data_args.predict_split_name}", - "dataset": f"{data_args.dataset_name.upper()} - {config_name.upper()}", - "language": data_args.language, - } - - if training_args.push_to_hub: - trainer.push_to_hub(**kwargs) - else: - trainer.create_model_card(**kwargs) - - return results - - -if __name__ == "__main__": - main() - diff --git a/run_xtreme_s.py b/run_xtreme_s.py new file mode 120000 index 0000000000000000000000000000000000000000..5dd1dff8df0cd819aa47eeac54ff24fbf6cb498b --- /dev/null +++ b/run_xtreme_s.py @@ -0,0 +1 @@ +/home/sanchit_huggingface_co/run_xtreme_s.py \ No newline at end of file diff --git a/runs/May03_17-16-03_sanchit--v100/events.out.tfevents.1651598448.sanchit--v100.42221.0 b/runs/May03_17-16-03_sanchit--v100/events.out.tfevents.1651598448.sanchit--v100.42221.0 index d5f7f665039d561b8f2128cf5e36f78e0289af0e..8f6ad981d3d735a1a41065ee306d601bcd7dee7a 100644 --- a/runs/May03_17-16-03_sanchit--v100/events.out.tfevents.1651598448.sanchit--v100.42221.0 +++ b/runs/May03_17-16-03_sanchit--v100/events.out.tfevents.1651598448.sanchit--v100.42221.0 @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:012338f317ae3f084c2160a5716c65f5bf844c76b8cb43abff36a7c367ee1dfa -size 797661 +oid sha256:41a5bb21fe94ff5b65d5398a079e0c6f5f0208e558c94a1b5518d85c06ee3eed +size 876480 diff --git a/runs/May04_09-50-47_sanchit--v100/1651657899.856504/events.out.tfevents.1651657899.sanchit--v100.49135.1 b/runs/May04_09-50-47_sanchit--v100/1651657899.856504/events.out.tfevents.1651657899.sanchit--v100.49135.1 new file mode 100644 index 0000000000000000000000000000000000000000..f77e901fc06772d4208a013dca1123d4ff320794 --- /dev/null +++ b/runs/May04_09-50-47_sanchit--v100/1651657899.856504/events.out.tfevents.1651657899.sanchit--v100.49135.1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a4f3b61599794bdf22f306f159d9525d6122da5432c3849ecf9145f030cbad78 +size 5184 diff --git a/runs/May04_09-50-47_sanchit--v100/events.out.tfevents.1651657899.sanchit--v100.49135.0 b/runs/May04_09-50-47_sanchit--v100/events.out.tfevents.1651657899.sanchit--v100.49135.0 new file mode 100644 index 0000000000000000000000000000000000000000..a9c961033513c291fb011ea1677147271447c4d4 --- /dev/null +++ b/runs/May04_09-50-47_sanchit--v100/events.out.tfevents.1651657899.sanchit--v100.49135.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a8729bf55db84d47c6384d97886e8c5cb8b099578f3ee1d58539419ba476ff9 +size 36633 diff --git a/runs/May04_13-30-49_sanchit--v100/1651674089.1303275/events.out.tfevents.1651674089.sanchit--v100.50430.1 b/runs/May04_13-30-49_sanchit--v100/1651674089.1303275/events.out.tfevents.1651674089.sanchit--v100.50430.1 new file mode 100644 index 0000000000000000000000000000000000000000..6bb5316c2df653dc95daa88f9c9f7114c98bbe18 --- /dev/null +++ b/runs/May04_13-30-49_sanchit--v100/1651674089.1303275/events.out.tfevents.1651674089.sanchit--v100.50430.1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a097a8d05cba5849f68e86c37e864f3de4696713080887d6d222fb6c05a13197 +size 5184 diff --git a/runs/May04_13-30-49_sanchit--v100/events.out.tfevents.1651674089.sanchit--v100.50430.0 b/runs/May04_13-30-49_sanchit--v100/events.out.tfevents.1651674089.sanchit--v100.50430.0 new file mode 100644 index 0000000000000000000000000000000000000000..bfe79927aa7e8c01c429a67a14d01492b3c69352 --- /dev/null +++ b/runs/May04_13-30-49_sanchit--v100/events.out.tfevents.1651674089.sanchit--v100.50430.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cce28cd4ad04b2a746016eff65528d34dbc7d310b0f44953856160b055fd56a0 +size 88291 diff --git a/training_args.bin b/training_args.bin index 4f0acc794ce998a81ebb1af30bee39f22de11517..501e9efa19e00f5d36dd224fec1c6beac304eb68 100644 --- a/training_args.bin +++ b/training_args.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c92db708d094fc9c984268e3892bb61b788af2f22d46c8a70029d68f2645d771 +oid sha256:9b7921c29322d3f359e19c998450e84ce2ab0e8ef35aa086e1fa26e4a6b15e94 size 3247 diff --git a/wandb/debug-cli.log b/wandb/debug-cli.log new file mode 100644 index 0000000000000000000000000000000000000000..f0c79ad6b81fc9adfe65b587ff5a6ddfa82489d6 --- /dev/null +++ b/wandb/debug-cli.log @@ -0,0 +1,26 @@ +2022-05-04 13:30:45 INFO Running runs: [] +2022-05-04 13:30:45 INFO Agent received command: run +2022-05-04 13:30:45 INFO Agent starting run with config: + eval_split_name: test + eval_steps: 500 + evaluation_strategy: steps + generation_max_length: 40 + generation_num_beams: 1 + gradient_accumulation_steps: 8 + greater_is_better: True + hidden_dropout: 0.17305159310134854 + language: fr.en + learning_rate: 0.00012335092351490598 + logging_steps: 1 + max_duration_in_seconds: 20 + metric_for_best_model: bleu + model_name_or_path: ./ + num_train_epochs: 3 + output_dir: ./ + per_device_eval_batch_size: 8 + per_device_train_batch_size: 8 + save_steps: 500 + task: covost2 + warmup_steps: 500 +2022-05-04 13:30:45 INFO About to run command: python3 run_xtreme_s.py --overwrite_output_dir --freeze_feature_encoder --gradient_checkpointing --predict_with_generate --fp16 --group_by_length --do_train --do_eval --load_best_model_at_end --push_to_hub --use_auth_token --eval_split_name=test --eval_steps=500 --evaluation_strategy=steps --generation_max_length=40 --generation_num_beams=1 --gradient_accumulation_steps=8 --greater_is_better=True --hidden_dropout=0.17305159310134854 --language=fr.en --learning_rate=0.00012335092351490598 --logging_steps=1 --max_duration_in_seconds=20 --metric_for_best_model=bleu --model_name_or_path=./ --num_train_epochs=3 --output_dir=./ --per_device_eval_batch_size=8 --per_device_train_batch_size=8 --save_steps=500 --task=covost2 --warmup_steps=500 +2022-05-04 13:30:50 INFO Running runs: ['w4rlzz90'] diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log index b93e4c402efe9a183d4c60353f86e615dfef7955..61f5cde5e35f8c9937a8eaa6949c7714eb5865ad 120000 --- a/wandb/debug-internal.log +++ b/wandb/debug-internal.log @@ -1 +1 @@ -run-20220503_172048-zotxt8wa/logs/debug-internal.log \ No newline at end of file +run-20220504_142129-w4rlzz90/logs/debug-internal.log \ No newline at end of file diff --git a/wandb/debug.log b/wandb/debug.log index 76361e581554b7d526ba4f00419a2c6f48b1201b..169da42504931e054b69052739e0092cb0db9d86 120000 --- a/wandb/debug.log +++ b/wandb/debug.log @@ -1 +1 @@ -run-20220503_172048-zotxt8wa/logs/debug.log \ No newline at end of file +run-20220504_142129-w4rlzz90/logs/debug.log \ No newline at end of file diff --git a/wandb/latest-run b/wandb/latest-run index ac4ad7f9041ea4869206c55c2db8917c51cd0fdf..30626ac6b38d43c2126fccc3ed15690ca9930e90 120000 --- a/wandb/latest-run +++ b/wandb/latest-run @@ -1 +1 @@ -run-20220503_172048-zotxt8wa \ No newline at end of file +run-20220504_142129-w4rlzz90 \ No newline at end of file diff --git a/wandb/run-20220503_172048-zotxt8wa/files/output.log b/wandb/run-20220503_172048-zotxt8wa/files/output.log index 56506fa818b2ebd7542e89248951e6b6b11a4528..2b0a822b70336e4bf4d4ff55aca8280ae03accab 100644 --- a/wandb/run-20220503_172048-zotxt8wa/files/output.log +++ b/wandb/run-20220503_172048-zotxt8wa/files/output.log @@ -104277,3 +104277,10398 @@ To disable this warning, you can either: huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... To disable this warning, you can either: - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 5001/19440 [15:00:21<4612:05:52, 1149.91s/it] + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 5001/19440 [15:00:21<4612:05:52, 1149.91s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 7.2353, 'learning_rate': 0.00027188682804108984, 'epoch': 0.77} +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 5002/19440 [15:00:26<3233:45:00, 806.31s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.0968, 'learning_rate': 0.00027186800583077204, 'epoch': 0.77} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 5003/19440 [15:00:30<2268:43:48, 565.73s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.9315, 'learning_rate': 0.0002718491836204542, 'epoch': 0.77} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 5004/19440 [15:00:35<1593:08:35, 397.29s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 7.062, 'learning_rate': 0.00027183036141013633, 'epoch': 0.77} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 5005/19440 [15:00:39<1120:08:00, 279.35s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.9241, 'learning_rate': 0.0002718115391998185, 'epoch': 0.77} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 5006/19440 [15:00:43<788:57:41, 196.78s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.9087, 'learning_rate': 0.0002717927169895007, 'epoch': 0.77} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 5007/19440 [15:00:47<557:12:46, 138.98s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.8207, 'learning_rate': 0.0002717738947791828, 'epoch': 0.77} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 5008/19440 [15:00:51<394:53:10, 98.50s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.7627, 'learning_rate': 0.000271755072568865, 'epoch': 0.77} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 5009/19440 [15:00:55<281:07:33, 70.13s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.8625, 'learning_rate': 0.0002717362503585472, 'epoch': 0.77} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 5010/19440 [15:00:59<201:38:37, 50.31s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.5945, 'learning_rate': 0.00027171742814822937, 'epoch': 0.77} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 5011/19440 [15:01:03<145:52:43, 36.40s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.7398, 'learning_rate': 0.0002716986059379115, 'epoch': 0.77} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 5012/19440 [15:01:07<106:38:05, 26.61s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.6961, 'learning_rate': 0.00027167978372759366, 'epoch': 0.77} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 5013/19440 [15:01:11<79:49:59, 19.92s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.6652, 'learning_rate': 0.00027166096151727586, 'epoch': 0.77} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 5014/19440 [15:01:15<60:33:57, 15.11s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.7652, 'learning_rate': 0.000271642139306958, 'epoch': 0.77} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 5015/19440 [15:01:19<46:57:19, 11.72s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.3861, 'learning_rate': 0.0002716233170966402, 'epoch': 0.77} + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 5016/19440 [15:01:23<37:19:53, 9.32s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 5017/19440 [15:01:26<30:51:32, 7.70s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.5883, 'learning_rate': 0.00027160449488632235, 'epoch': 0.77} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 5018/19440 [15:01:30<25:58:05, 6.48s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.6158, 'learning_rate': 0.00027158567267600455, 'epoch': 0.77} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.3952, 'learning_rate': 0.0002715668504656867, 'epoch': 0.77} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 5019/19440 [15:01:34<22:28:07, 5.61s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.3773, 'learning_rate': 0.00027154802825536884, 'epoch': 0.77} +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 5020/19440 [15:01:37<19:56:37, 4.98s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 5021/19440 [15:01:41<18:08:13, 4.53s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.4866, 'learning_rate': 0.00027152920604505104, 'epoch': 0.77} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 5022/19440 [15:01:44<17:00:07, 4.25s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.4064, 'learning_rate': 0.0002715103838347332, 'epoch': 0.77} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.2864, 'learning_rate': 0.0002714915616244154, 'epoch': 0.78} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 5023/19440 [15:01:48<16:10:09, 4.04s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.3696, 'learning_rate': 0.00027147273941409753, 'epoch': 0.78} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 5024/19440 [15:01:51<15:32:52, 3.88s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.1424, 'learning_rate': 0.0002714539172037797, 'epoch': 0.78} +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 5025/19440 [15:01:55<15:35:30, 3.89s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.537, 'learning_rate': 0.0002714350949934618, 'epoch': 0.78} + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 5026/19440 [15:01:59<15:05:49, 3.77s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 5027/19440 [15:02:02<14:36:26, 3.65s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.5337, 'learning_rate': 0.000271416272783144, 'epoch': 0.78} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.2552, 'learning_rate': 0.0002713974505728262, 'epoch': 0.78} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 5028/19440 [15:02:05<14:16:03, 3.56s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 5029/19440 [15:02:09<13:53:29, 3.47s/it] + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 5029/19440 [15:02:09<13:53:29, 3.47s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.2578, 'learning_rate': 0.00027135980615219056, 'epoch': 0.78} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 5030/19440 [15:02:12<13:35:11, 3.39s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.2183, 'learning_rate': 0.0002713409839418727, 'epoch': 0.78} +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 5031/19440 [15:02:15<13:14:22, 3.31s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.026, 'learning_rate': 0.00027132216173155485, 'epoch': 0.78} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 5032/19440 [15:02:18<12:46:16, 3.19s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 5033/19440 [15:02:21<12:28:23, 3.12s/it] + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 5033/19440 [15:02:21<12:28:23, 3.12s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.0845, 'learning_rate': 0.0002712845173109192, 'epoch': 0.78} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 5034/19440 [15:02:24<12:13:46, 3.06s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 5035/19440 [15:02:27<11:58:46, 2.99s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.0071, 'learning_rate': 0.00027126569510060134, 'epoch': 0.78} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.0906, 'learning_rate': 0.00027124687289028354, 'epoch': 0.78} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 5036/19440 [15:02:29<11:48:20, 2.95s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 5037/19440 [15:02:32<11:38:26, 2.91s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 5.9582, 'learning_rate': 0.00027122805067996574, 'epoch': 0.78} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.7155, 'learning_rate': 0.0002712092284696479, 'epoch': 0.78} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 5038/19440 [15:02:36<12:01:05, 3.00s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 5039/19440 [15:02:38<11:45:29, 2.94s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.0514, 'learning_rate': 0.00027119040625933003, 'epoch': 0.78} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 5040/19440 [15:02:41<11:26:40, 2.86s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 5.8325, 'learning_rate': 0.0002711715840490122, 'epoch': 0.78} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 5.4197, 'learning_rate': 0.0002711527618386944, 'epoch': 0.78} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 5041/19440 [15:02:44<11:12:41, 2.80s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 5.8506, 'learning_rate': 0.0002711339396283765, 'epoch': 0.78} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 5042/19440 [15:02:46<11:02:54, 2.76s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 5043/19440 [15:02:49<10:51:30, 2.72s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.5539, 'learning_rate': 0.0002711151174180587, 'epoch': 0.78} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 5.4672, 'learning_rate': 0.0002710962952077409, 'epoch': 0.78} +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 5044/19440 [15:02:52<10:46:07, 2.69s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 5.215, 'learning_rate': 0.00027107747299742307, 'epoch': 0.78} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 5045/19440 [15:02:54<10:31:13, 2.63s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 5046/19440 [15:02:57<10:19:30, 2.58s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 5.0267, 'learning_rate': 0.0002710586507871052, 'epoch': 0.78} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 5047/19440 [15:02:59<10:07:30, 2.53s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 4.9159, 'learning_rate': 0.00027103982857678736, 'epoch': 0.78} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.0468, 'learning_rate': 0.00027102100636646956, 'epoch': 0.78} + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 5048/19440 [15:03:01<9:59:55, 2.50s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.6901, 'learning_rate': 0.0002710021841561517, 'epoch': 0.78} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 5049/19440 [15:03:04<9:50:30, 2.46s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 5050/19440 [15:03:07<10:10:04, 2.54s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.6805, 'learning_rate': 0.0002709833619458339, 'epoch': 0.78} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 7.2864, 'learning_rate': 0.00027096453973551605, 'epoch': 0.78} +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 5051/19440 [15:03:11<12:49:46, 3.21s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 5052/19440 [15:03:16<14:09:45, 3.54s/it] + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 5052/19440 [15:03:16<14:09:45, 3.54s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.9982, 'learning_rate': 0.0002709268953148804, 'epoch': 0.78} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 5053/19440 [15:03:20<14:50:14, 3.71s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 7.098, 'learning_rate': 0.00027090807310456254, 'epoch': 0.78} +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 5054/19440 [15:03:24<15:07:15, 3.78s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.9643, 'learning_rate': 0.00027088925089424474, 'epoch': 0.78} + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 5055/19440 [15:03:27<15:10:35, 3.80s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 5056/19440 [15:03:31<15:11:55, 3.80s/it] + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 5056/19440 [15:03:31<15:11:55, 3.80s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 5057/19440 [15:03:35<15:21:01, 3.84s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.9384, 'learning_rate': 0.0002708516064736091, 'epoch': 0.78} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 5058/19440 [15:03:39<15:14:43, 3.82s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.9776, 'learning_rate': 0.00027083278426329123, 'epoch': 0.78} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 5059/19440 [15:03:43<15:07:40, 3.79s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.9302, 'learning_rate': 0.0002708139620529734, 'epoch': 0.78} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.7927, 'learning_rate': 0.0002707951398426555, 'epoch': 0.78} +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 5060/19440 [15:03:46<14:58:16, 3.75s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 5061/19440 [15:03:50<14:47:52, 3.70s/it] + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 5061/19440 [15:03:50<14:47:52, 3.70s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 5062/19440 [15:03:54<14:40:03, 3.67s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.9061, 'learning_rate': 0.0002707574954220199, 'epoch': 0.78} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 5063/19440 [15:03:58<15:00:07, 3.76s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.782, 'learning_rate': 0.00027073867321170206, 'epoch': 0.78} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 5064/19440 [15:04:01<14:47:02, 3.70s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.6194, 'learning_rate': 0.00027071985100138426, 'epoch': 0.78} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.5265, 'learning_rate': 0.0002707010287910664, 'epoch': 0.78} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 5065/19440 [15:04:05<14:30:18, 3.63s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.6959, 'learning_rate': 0.00027068220658074856, 'epoch': 0.78} + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 5066/19440 [15:04:08<14:13:57, 3.56s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 5067/19440 [15:04:11<13:58:35, 3.50s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.7507, 'learning_rate': 0.0002706633843704307, 'epoch': 0.78} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.546, 'learning_rate': 0.0002706445621601129, 'epoch': 0.78} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 5068/19440 [15:04:15<13:45:36, 3.45s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 5069/19440 [15:04:18<13:31:15, 3.39s/it] + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 5069/19440 [15:04:18<13:31:15, 3.39s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 5070/19440 [15:04:21<13:18:39, 3.33s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.5103, 'learning_rate': 0.00027060691773947724, 'epoch': 0.78} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.5082, 'learning_rate': 0.00027058809552915944, 'epoch': 0.78} +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 5071/19440 [15:04:24<13:08:54, 3.29s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 5072/19440 [15:04:27<12:58:39, 3.25s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.6161, 'learning_rate': 0.0002705692733188416, 'epoch': 0.78} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.5065, 'learning_rate': 0.00027055045110852374, 'epoch': 0.78} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 5073/19440 [15:04:31<12:49:33, 3.21s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 5074/19440 [15:04:34<12:53:39, 3.23s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.2262, 'learning_rate': 0.0002705316288982059, 'epoch': 0.78} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 5075/19440 [15:04:37<13:14:55, 3.32s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.3847, 'learning_rate': 0.0002705128066878881, 'epoch': 0.78} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.232, 'learning_rate': 0.0002704939844775702, 'epoch': 0.78} + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 5076/19440 [15:04:40<13:02:25, 3.27s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 5077/19440 [15:04:43<12:44:18, 3.19s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.238, 'learning_rate': 0.0002704751622672524, 'epoch': 0.78} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.1653, 'learning_rate': 0.00027045634005693457, 'epoch': 0.78} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 5078/19440 [15:04:47<12:31:03, 3.14s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 5079/19440 [15:04:49<12:16:48, 3.08s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.3818, 'learning_rate': 0.0002704375178466167, 'epoch': 0.78} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.2608, 'learning_rate': 0.0002704186956362989, 'epoch': 0.78} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 5080/19440 [15:04:52<12:08:11, 3.04s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 5081/19440 [15:04:55<11:57:25, 3.00s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.2612, 'learning_rate': 0.00027039987342598106, 'epoch': 0.78} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.2312, 'learning_rate': 0.00027038105121566326, 'epoch': 0.78} + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 5082/19440 [15:04:58<11:49:40, 2.97s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.1847, 'learning_rate': 0.0002703622290053454, 'epoch': 0.78} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 5083/19440 [15:05:01<11:44:40, 2.94s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 5084/19440 [15:05:04<11:38:58, 2.92s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 5.9134, 'learning_rate': 0.0002703434067950276, 'epoch': 0.78} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.0898, 'learning_rate': 0.00027032458458470975, 'epoch': 0.78} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 5085/19440 [15:05:07<11:31:42, 2.89s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 5086/19440 [15:05:10<11:24:41, 2.86s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.1434, 'learning_rate': 0.0002703057623743919, 'epoch': 0.78} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.8231, 'learning_rate': 0.0002702869401640741, 'epoch': 0.78} + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 5087/19440 [15:05:12<11:15:29, 2.82s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 5088/19440 [15:05:16<11:38:49, 2.92s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 5.9953, 'learning_rate': 0.00027026811795375624, 'epoch': 0.79} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 5089/19440 [15:05:18<11:27:16, 2.87s/it] + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 5089/19440 [15:05:18<11:27:16, 2.87s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 5.7648, 'learning_rate': 0.0002702304735331206, 'epoch': 0.79} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 5090/19440 [15:05:21<11:13:46, 2.82s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 5091/19440 [15:05:24<11:03:27, 2.77s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 5.4722, 'learning_rate': 0.0002702116513228028, 'epoch': 0.79} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 5092/19440 [15:05:26<10:55:08, 2.74s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 5.7025, 'learning_rate': 0.00027019282911248493, 'epoch': 0.79} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 5.6785, 'learning_rate': 0.0002701740069021671, 'epoch': 0.79} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 5093/19440 [15:05:29<10:45:02, 2.70s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.6593, 'learning_rate': 0.0002701551846918492, 'epoch': 0.79} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 5094/19440 [15:05:31<10:37:09, 2.66s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 5095/19440 [15:05:34<10:28:05, 2.63s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 5.5417, 'learning_rate': 0.0002701363624815314, 'epoch': 0.79} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 5096/19440 [15:05:36<10:20:06, 2.59s/it] + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 5096/19440 [15:05:36<10:20:06, 2.59s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 5.3834, 'learning_rate': 0.00027009871806089577, 'epoch': 0.79} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 5097/19440 [15:05:39<10:12:23, 2.56s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 4.8378, 'learning_rate': 0.00027007989585057797, 'epoch': 0.79} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 5098/19440 [15:05:41<10:01:00, 2.51s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 5099/19440 [15:05:44<9:47:33, 2.46s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.586, 'learning_rate': 0.0002700610736402601, 'epoch': 0.79} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 5100/19440 [15:05:46<10:03:00, 2.52s/it] + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 5100/19440 [15:05:46<10:03:00, 2.52s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.1218, 'learning_rate': 0.0002700234292196244, 'epoch': 0.79} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 5101/19440 [15:05:51<12:35:00, 3.16s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 7.1967, 'learning_rate': 0.0002700046070093066, 'epoch': 0.79} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 5102/19440 [15:05:55<13:48:48, 3.47s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 7.1704, 'learning_rate': 0.00026998578479898875, 'epoch': 0.79} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 5103/19440 [15:05:59<14:33:52, 3.66s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.9559, 'learning_rate': 0.00026996696258867095, 'epoch': 0.79} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 5104/19440 [15:06:03<14:53:11, 3.74s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.075, 'learning_rate': 0.0002699481403783531, 'epoch': 0.79} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 5105/19440 [15:06:07<14:59:57, 3.77s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.9212, 'learning_rate': 0.00026992931816803524, 'epoch': 0.79} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 5106/19440 [15:06:11<15:07:42, 3.80s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 7.056, 'learning_rate': 0.00026991049595771744, 'epoch': 0.79} +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 5107/19440 [15:06:15<15:07:54, 3.80s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 5108/19440 [15:06:18<14:56:30, 3.75s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.7743, 'learning_rate': 0.0002698916737473996, 'epoch': 0.79} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 5109/19440 [15:06:22<14:47:43, 3.72s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.8302, 'learning_rate': 0.0002698728515370818, 'epoch': 0.79} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.8825, 'learning_rate': 0.0002698540293267639, 'epoch': 0.79} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 5110/19440 [15:06:26<14:39:45, 3.68s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.7007, 'learning_rate': 0.0002698352071164461, 'epoch': 0.79} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 5111/19440 [15:06:29<14:28:08, 3.64s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 5112/19440 [15:06:33<14:18:00, 3.59s/it] + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 5112/19440 [15:06:33<14:18:00, 3.59s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 5113/19440 [15:06:37<14:37:56, 3.68s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.7302, 'learning_rate': 0.0002697975626958104, 'epoch': 0.79} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 5114/19440 [15:06:40<14:21:39, 3.61s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.7476, 'learning_rate': 0.0002697787404854926, 'epoch': 0.79} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.6933, 'learning_rate': 0.00026975991827517476, 'epoch': 0.79} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 5115/19440 [15:06:43<14:03:48, 3.53s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 5116/19440 [15:06:47<13:46:48, 3.46s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.8151, 'learning_rate': 0.00026974109606485696, 'epoch': 0.79} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.7924, 'learning_rate': 0.0002697222738545391, 'epoch': 0.79} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 5117/19440 [15:06:50<13:31:27, 3.40s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 5118/19440 [15:06:53<13:16:16, 3.34s/it] + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 5118/19440 [15:06:53<13:16:16, 3.34s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 5119/19440 [15:06:56<13:06:16, 3.29s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.4826, 'learning_rate': 0.00026968462943390345, 'epoch': 0.79} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.6077, 'learning_rate': 0.0002696658072235856, 'epoch': 0.79} +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 5120/19440 [15:06:59<12:57:26, 3.26s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 5121/19440 [15:07:03<12:47:12, 3.21s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.5277, 'learning_rate': 0.00026964698501326774, 'epoch': 0.79} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.5403, 'learning_rate': 0.00026962816280294994, 'epoch': 0.79} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 5122/19440 [15:07:06<12:40:01, 3.18s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 5123/19440 [15:07:09<12:31:34, 3.15s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.5362, 'learning_rate': 0.00026960934059263214, 'epoch': 0.79} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.3673, 'learning_rate': 0.0002695905183823143, 'epoch': 0.79} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 5124/19440 [15:07:12<12:30:08, 3.14s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 5125/19440 [15:07:15<12:54:31, 3.25s/it] + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 5125/19440 [15:07:15<12:54:31, 3.25s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 5126/19440 [15:07:18<12:43:05, 3.20s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.0833, 'learning_rate': 0.00026955287396167863, 'epoch': 0.79} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.5061, 'learning_rate': 0.0002695340517513608, 'epoch': 0.79} + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 5127/19440 [15:07:21<12:30:12, 3.14s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 5128/19440 [15:07:24<12:16:50, 3.09s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.5111, 'learning_rate': 0.0002695152295410429, 'epoch': 0.79} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 5129/19440 [15:07:27<12:08:18, 3.05s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.2214, 'learning_rate': 0.0002694964073307251, 'epoch': 0.79} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.3069, 'learning_rate': 0.0002694775851204073, 'epoch': 0.79} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 5130/19440 [15:07:30<11:57:12, 3.01s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 5131/19440 [15:07:33<11:59:25, 3.02s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.3688, 'learning_rate': 0.00026945876291008947, 'epoch': 0.79} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.1463, 'learning_rate': 0.0002694399406997716, 'epoch': 0.79} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 5132/19440 [15:07:36<11:47:03, 2.97s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 5133/19440 [15:07:39<11:39:12, 2.93s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.0467, 'learning_rate': 0.00026942111848945376, 'epoch': 0.79} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.1139, 'learning_rate': 0.00026940229627913596, 'epoch': 0.79} + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 5134/19440 [15:07:42<11:29:32, 2.89s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 5135/19440 [15:07:45<11:32:22, 2.90s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.0835, 'learning_rate': 0.0002693834740688181, 'epoch': 0.79} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 5136/19440 [15:07:48<11:36:48, 2.92s/it] + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 5136/19440 [15:07:48<11:36:48, 2.92s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 5137/19440 [15:07:51<11:34:50, 2.91s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 5.744, 'learning_rate': 0.00026934582964818245, 'epoch': 0.79} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.7023, 'learning_rate': 0.00026932700743786465, 'epoch': 0.79} + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 5138/19440 [15:07:54<12:04:14, 3.04s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 5139/19440 [15:07:57<11:51:35, 2.99s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.9167, 'learning_rate': 0.0002693081852275468, 'epoch': 0.79} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 5140/19440 [15:08:00<11:36:25, 2.92s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 5.8569, 'learning_rate': 0.00026928936301722894, 'epoch': 0.79} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 5.5421, 'learning_rate': 0.00026927054080691114, 'epoch': 0.79} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 5141/19440 [15:08:02<11:29:55, 2.90s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 5142/19440 [15:08:05<11:13:35, 2.83s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.8725, 'learning_rate': 0.0002692517185965933, 'epoch': 0.79} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 5143/19440 [15:08:08<10:59:19, 2.77s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.4727, 'learning_rate': 0.0002692328963862755, 'epoch': 0.79} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 5.5511, 'learning_rate': 0.00026921407417595763, 'epoch': 0.79} +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 5144/19440 [15:08:10<10:45:49, 2.71s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 5145/19440 [15:08:13<10:31:03, 2.65s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 5.1365, 'learning_rate': 0.0002691952519656398, 'epoch': 0.79} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 5146/19440 [15:08:15<10:13:40, 2.58s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 4.9456, 'learning_rate': 0.00026917642975532197, 'epoch': 0.79} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 5147/19440 [15:08:18<10:03:55, 2.54s/it] + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 5147/19440 [15:08:18<10:03:55, 2.54s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.9672, 'learning_rate': 0.0002691387853346863, 'epoch': 0.79} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 5148/19440 [15:08:20<9:52:02, 2.49s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.7631, 'learning_rate': 0.00026911996312436846, 'epoch': 0.79} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 5149/19440 [15:08:22<9:41:06, 2.44s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 4.4286, 'learning_rate': 0.00026910114091405066, 'epoch': 0.79} +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 5150/19440 [15:08:27<11:54:28, 3.00s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 5151/19440 [15:08:31<13:52:27, 3.50s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.358, 'learning_rate': 0.0002690823187037328, 'epoch': 0.79} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 5152/19440 [15:08:36<14:48:38, 3.73s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 7.3346, 'learning_rate': 0.000269063496493415, 'epoch': 0.8} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 5153/19440 [15:08:40<15:21:35, 3.87s/it] + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 5153/19440 [15:08:40<15:21:35, 3.87s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 5154/19440 [15:08:44<15:30:21, 3.91s/it] + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 5154/19440 [15:08:44<15:30:21, 3.91s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 5155/19440 [15:08:48<15:35:03, 3.93s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 7.1015, 'learning_rate': 0.00026900702986246144, 'epoch': 0.8} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 5156/19440 [15:08:52<15:26:26, 3.89s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.0299, 'learning_rate': 0.00026898820765214364, 'epoch': 0.8} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 5157/19440 [15:08:55<15:30:28, 3.91s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.0661, 'learning_rate': 0.00026896938544182584, 'epoch': 0.8} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 5158/19440 [15:08:59<15:30:02, 3.91s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.7095, 'learning_rate': 0.000268950563231508, 'epoch': 0.8} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.0557, 'learning_rate': 0.0002689317410211902, 'epoch': 0.8} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 5159/19440 [15:09:03<15:18:52, 3.86s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.8326, 'learning_rate': 0.00026891291881087233, 'epoch': 0.8} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 5160/19440 [15:09:07<15:03:48, 3.80s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 5161/19440 [15:09:10<14:49:10, 3.74s/it] + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 5161/19440 [15:09:10<14:49:10, 3.74s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 5162/19440 [15:09:14<14:34:28, 3.67s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.6858, 'learning_rate': 0.0002688752743902366, 'epoch': 0.8} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 5163/19440 [15:09:18<14:51:19, 3.75s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.7326, 'learning_rate': 0.0002688564521799188, 'epoch': 0.8} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.6955, 'learning_rate': 0.00026883762996960097, 'epoch': 0.8} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 5164/19440 [15:09:21<14:39:51, 3.70s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.4379, 'learning_rate': 0.00026881880775928317, 'epoch': 0.8} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 5165/19440 [15:09:25<14:28:56, 3.65s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 5166/19440 [15:09:28<14:14:59, 3.59s/it] + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 5166/19440 [15:09:28<14:14:59, 3.59s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 5167/19440 [15:09:32<14:00:55, 3.54s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.659, 'learning_rate': 0.00026878116333864746, 'epoch': 0.8} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.6712, 'learning_rate': 0.00026876234112832966, 'epoch': 0.8} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 5168/19440 [15:09:35<13:42:04, 3.46s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 5169/19440 [15:09:38<13:28:37, 3.40s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.3781, 'learning_rate': 0.0002687435189180118, 'epoch': 0.8} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 5170/19440 [15:09:42<13:27:41, 3.40s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.453, 'learning_rate': 0.000268724696707694, 'epoch': 0.8} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.3003, 'learning_rate': 0.00026870587449737615, 'epoch': 0.8} +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 5171/19440 [15:09:45<13:16:34, 3.35s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 5172/19440 [15:09:48<13:09:58, 3.32s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.3899, 'learning_rate': 0.00026868705228705835, 'epoch': 0.8} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 5173/19440 [15:09:51<12:58:43, 3.27s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.5276, 'learning_rate': 0.0002686682300767405, 'epoch': 0.8} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 5174/19440 [15:09:55<12:51:29, 3.24s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.4432, 'learning_rate': 0.00026864940786642264, 'epoch': 0.8} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 5175/19440 [15:09:58<13:12:02, 3.33s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.1952, 'learning_rate': 0.00026863058565610484, 'epoch': 0.8} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.3035, 'learning_rate': 0.000268611763445787, 'epoch': 0.8} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 5176/19440 [15:10:01<13:02:02, 3.29s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 5177/19440 [15:10:04<12:43:16, 3.21s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.1305, 'learning_rate': 0.0002685929412354692, 'epoch': 0.8} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.2528, 'learning_rate': 0.00026857411902515133, 'epoch': 0.8} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 5178/19440 [15:10:07<12:27:09, 3.14s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 5179/19440 [15:10:10<12:18:27, 3.11s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.1531, 'learning_rate': 0.00026855529681483353, 'epoch': 0.8} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.236, 'learning_rate': 0.0002685364746045157, 'epoch': 0.8} +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 5180/19440 [15:10:13<12:08:20, 3.06s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 5181/19440 [15:10:16<12:02:05, 3.04s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.1535, 'learning_rate': 0.0002685176523941978, 'epoch': 0.8} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.2812, 'learning_rate': 0.00026849883018388, 'epoch': 0.8} + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 5182/19440 [15:10:19<11:51:40, 2.99s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 5183/19440 [15:10:22<11:42:23, 2.96s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.1967, 'learning_rate': 0.00026848000797356216, 'epoch': 0.8} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 5184/19440 [15:10:25<11:34:53, 2.92s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 5.9552, 'learning_rate': 0.00026846118576324436, 'epoch': 0.8} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 5185/19440 [15:10:28<11:30:15, 2.91s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.9131, 'learning_rate': 0.0002684423635529265, 'epoch': 0.8} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 5186/19440 [15:10:31<11:29:02, 2.90s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 5.8808, 'learning_rate': 0.0002684235413426087, 'epoch': 0.8} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 5187/19440 [15:10:34<11:26:27, 2.89s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.0169, 'learning_rate': 0.00026840471913229085, 'epoch': 0.8} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 5188/19440 [15:10:37<11:48:39, 2.98s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 5.6969, 'learning_rate': 0.000268385896921973, 'epoch': 0.8} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 5189/19440 [15:10:40<11:37:10, 2.94s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 5.8919, 'learning_rate': 0.00026836707471165514, 'epoch': 0.8} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 5190/19440 [15:10:42<11:23:22, 2.88s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 5.552, 'learning_rate': 0.00026834825250133734, 'epoch': 0.8} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 5191/19440 [15:10:45<11:27:11, 2.89s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 5.7015, 'learning_rate': 0.00026832943029101954, 'epoch': 0.8} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 5192/19440 [15:10:48<11:12:22, 2.83s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 5.8862, 'learning_rate': 0.0002683106080807017, 'epoch': 0.8} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 5193/19440 [15:10:51<11:01:07, 2.78s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 5.6516, 'learning_rate': 0.00026829178587038383, 'epoch': 0.8} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 5194/19440 [15:10:53<10:47:22, 2.73s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 5.4186, 'learning_rate': 0.000268272963660066, 'epoch': 0.8} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 5195/19440 [15:10:56<10:33:22, 2.67s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.4383, 'learning_rate': 0.0002682541414497482, 'epoch': 0.8} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 5196/19440 [15:10:58<10:24:07, 2.63s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.4523, 'learning_rate': 0.0002682353192394303, 'epoch': 0.8} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 5197/19440 [15:11:01<10:15:22, 2.59s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 5.1226, 'learning_rate': 0.0002682164970291125, 'epoch': 0.8} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 5198/19440 [15:11:03<10:07:18, 2.56s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.0555, 'learning_rate': 0.00026819767481879467, 'epoch': 0.8} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 4.8679, 'learning_rate': 0.00026817885260847687, 'epoch': 0.8} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 5199/19440 [15:11:06<9:54:41, 2.51s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 5200/19440 [15:11:08<10:10:08, 2.57s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3858, 'learning_rate': 0.000268160030398159, 'epoch': 0.8} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 5201/19440 [15:11:13<12:40:31, 3.20s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.3209, 'learning_rate': 0.00026814120818784116, 'epoch': 0.8} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.2512, 'learning_rate': 0.00026812238597752336, 'epoch': 0.8} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 5202/19440 [15:11:17<13:55:34, 3.52s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 5203/19440 [15:11:21<14:37:54, 3.70s/it] + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 5203/19440 [15:11:21<14:37:54, 3.70s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 5204/19440 [15:11:25<14:56:45, 3.78s/it] + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 5204/19440 [15:11:25<14:56:45, 3.78s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 7.1096, 'learning_rate': 0.00026806591934656985, 'epoch': 0.8} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 5205/19440 [15:11:29<15:02:55, 3.81s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.9579, 'learning_rate': 0.00026804709713625205, 'epoch': 0.8} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 5206/19440 [15:11:33<15:09:07, 3.83s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.9439, 'learning_rate': 0.0002680282749259342, 'epoch': 0.8} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 5207/19440 [15:11:37<15:15:04, 3.86s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.882, 'learning_rate': 0.00026800945271561634, 'epoch': 0.8} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 5208/19440 [15:11:41<15:03:46, 3.81s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.8777, 'learning_rate': 0.00026799063050529854, 'epoch': 0.8} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 5209/19440 [15:11:44<14:55:18, 3.77s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.6009, 'learning_rate': 0.0002679718082949807, 'epoch': 0.8} +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 5210/19440 [15:11:48<14:45:19, 3.73s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.6419, 'learning_rate': 0.0002679529860846629, 'epoch': 0.8} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 5211/19440 [15:11:52<14:35:14, 3.69s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.6841, 'learning_rate': 0.00026793416387434503, 'epoch': 0.8} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 5212/19440 [15:11:55<14:25:03, 3.65s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.7656, 'learning_rate': 0.00026791534166402723, 'epoch': 0.8} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 5213/19440 [15:11:59<14:43:24, 3.73s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.7411, 'learning_rate': 0.0002678965194537094, 'epoch': 0.8} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 5214/19440 [15:12:03<14:29:35, 3.67s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.5296, 'learning_rate': 0.0002678776972433915, 'epoch': 0.8} + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 5215/19440 [15:12:06<14:12:32, 3.60s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.7207, 'learning_rate': 0.00026785887503307366, 'epoch': 0.8} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 5216/19440 [15:12:10<14:02:45, 3.55s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.6075, 'learning_rate': 0.00026784005282275586, 'epoch': 0.81} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 5217/19440 [15:12:13<13:52:57, 3.51s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.483, 'learning_rate': 0.00026782123061243806, 'epoch': 0.81} +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 5218/19440 [15:12:16<13:40:11, 3.46s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.4297, 'learning_rate': 0.0002678024084021202, 'epoch': 0.81} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 5219/19440 [15:12:20<13:31:28, 3.42s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 5220/19440 [15:12:23<13:20:17, 3.38s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.5642, 'learning_rate': 0.00026778358619180235, 'epoch': 0.81} +{'loss': 6.4773, 'learning_rate': 0.0002677647639814845, 'epoch': 0.81} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 5221/19440 [15:12:26<13:08:54, 3.33s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.3278, 'learning_rate': 0.0002677459417711667, 'epoch': 0.81} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 5222/19440 [15:12:29<12:58:59, 3.29s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.5225, 'learning_rate': 0.00026772711956084884, 'epoch': 0.81} + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 5223/19440 [15:12:32<12:47:25, 3.24s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.2409, 'learning_rate': 0.00026770829735053104, 'epoch': 0.81} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 5224/19440 [15:12:36<12:43:15, 3.22s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.3446, 'learning_rate': 0.00026768947514021324, 'epoch': 0.81} +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 5225/19440 [15:12:39<13:04:20, 3.31s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.577, 'learning_rate': 0.0002676706529298954, 'epoch': 0.81} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 5226/19440 [15:12:42<12:52:57, 3.26s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.2596, 'learning_rate': 0.00026765183071957753, 'epoch': 0.81} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 5227/19440 [15:12:45<12:39:39, 3.21s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.1596, 'learning_rate': 0.0002676330085092597, 'epoch': 0.81} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 5228/19440 [15:12:48<12:28:56, 3.16s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.0848, 'learning_rate': 0.0002676141862989419, 'epoch': 0.81} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 5229/19440 [15:12:51<12:19:49, 3.12s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.1654, 'learning_rate': 0.000267595364088624, 'epoch': 0.81} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 5230/19440 [15:12:54<12:09:59, 3.08s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.3121, 'learning_rate': 0.0002675765418783062, 'epoch': 0.81} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 5231/19440 [15:12:57<12:01:58, 3.05s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 5232/19440 [15:13:00<11:54:38, 3.02s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.1355, 'learning_rate': 0.00026755771966798837, 'epoch': 0.81} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.2811, 'learning_rate': 0.00026753889745767057, 'epoch': 0.81} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 5233/19440 [15:13:03<12:01:58, 3.05s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 5234/19440 [15:13:06<11:55:14, 3.02s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.2358, 'learning_rate': 0.0002675200752473527, 'epoch': 0.81} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 5.8805, 'learning_rate': 0.00026750125303703486, 'epoch': 0.81} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 5235/19440 [15:13:09<11:46:07, 2.98s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.0596, 'learning_rate': 0.00026748243082671706, 'epoch': 0.81} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 5236/19440 [15:13:12<11:37:01, 2.94s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 5.7796, 'learning_rate': 0.0002674636086163992, 'epoch': 0.81} +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 5237/19440 [15:13:15<11:31:58, 2.92s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 5.6424, 'learning_rate': 0.0002674447864060814, 'epoch': 0.81} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 5238/19440 [15:13:18<11:52:26, 3.01s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 5.6315, 'learning_rate': 0.00026742596419576355, 'epoch': 0.81} +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 5239/19440 [15:13:21<11:43:45, 2.97s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.8122, 'learning_rate': 0.00026740714198544575, 'epoch': 0.81} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 5240/19440 [15:13:24<11:29:59, 2.92s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 5241/19440 [15:13:27<11:16:54, 2.86s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 5.7493, 'learning_rate': 0.0002673883197751279, 'epoch': 0.81} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.7384, 'learning_rate': 0.00026736949756481004, 'epoch': 0.81} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 5242/19440 [15:13:29<11:04:31, 2.81s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.4551, 'learning_rate': 0.00026735067535449224, 'epoch': 0.81} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 5243/19440 [15:13:32<10:53:45, 2.76s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 5244/19440 [15:13:35<10:45:10, 2.73s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.5409, 'learning_rate': 0.0002673318531441744, 'epoch': 0.81} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 5.3528, 'learning_rate': 0.0002673130309338566, 'epoch': 0.81} +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 5245/19440 [15:13:37<10:34:53, 2.68s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 5.3772, 'learning_rate': 0.00026729420872353873, 'epoch': 0.81} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 5246/19440 [15:13:40<10:36:19, 2.69s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.2534, 'learning_rate': 0.0002672753865132209, 'epoch': 0.81} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 5247/19440 [15:13:42<10:25:54, 2.65s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 5248/19440 [15:13:45<10:13:43, 2.59s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 5.0206, 'learning_rate': 0.000267256564302903, 'epoch': 0.81} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 4.7432, 'learning_rate': 0.0002672377420925852, 'epoch': 0.81} +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 5249/19440 [15:13:47<9:59:14, 2.53s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 4.6664, 'learning_rate': 0.00026721891988226737, 'epoch': 0.81} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 5250/19440 [15:13:50<10:14:39, 2.60s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.1088, 'learning_rate': 0.00026720009767194957, 'epoch': 0.81} + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 5251/19440 [15:13:55<12:47:11, 3.24s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 7.1781, 'learning_rate': 0.00026718127546163176, 'epoch': 0.81} +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 5252/19440 [15:13:59<13:56:15, 3.54s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 7.0285, 'learning_rate': 0.0002671624532513139, 'epoch': 0.81} +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 5253/19440 [15:14:03<14:31:27, 3.69s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 7.1721, 'learning_rate': 0.00026714363104099606, 'epoch': 0.81} +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 5254/19440 [15:14:07<14:52:36, 3.78s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.8896, 'learning_rate': 0.0002671248088306782, 'epoch': 0.81} + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 5255/19440 [15:14:11<15:00:05, 3.81s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 5256/19440 [15:14:15<15:04:02, 3.82s/it] + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 5256/19440 [15:14:15<15:04:02, 3.82s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.8152, 'learning_rate': 0.00026708716441004255, 'epoch': 0.81} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 5257/19440 [15:14:19<15:10:03, 3.85s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.8803, 'learning_rate': 0.00026706834219972475, 'epoch': 0.81} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 5258/19440 [15:14:22<15:03:13, 3.82s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.9613, 'learning_rate': 0.0002670495199894069, 'epoch': 0.81} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 5259/19440 [15:14:26<14:50:52, 3.77s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.8748, 'learning_rate': 0.0002670306977790891, 'epoch': 0.81} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 5260/19440 [15:14:30<14:41:16, 3.73s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.7768, 'learning_rate': 0.00026701187556877124, 'epoch': 0.81} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 5261/19440 [15:14:33<14:28:23, 3.67s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.8197, 'learning_rate': 0.0002669930533584534, 'epoch': 0.81} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 5262/19440 [15:14:37<14:15:00, 3.62s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 5263/19440 [15:14:41<14:33:35, 3.70s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.6697, 'learning_rate': 0.0002669742311481356, 'epoch': 0.81} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.8138, 'learning_rate': 0.0002669554089378177, 'epoch': 0.81} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 5264/19440 [15:14:44<14:18:57, 3.64s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.7171, 'learning_rate': 0.0002669365867274999, 'epoch': 0.81} +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 5265/19440 [15:14:48<14:03:10, 3.57s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.6396, 'learning_rate': 0.00026691776451718207, 'epoch': 0.81} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 5266/19440 [15:14:51<13:47:49, 3.50s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.5342, 'learning_rate': 0.00026689894230686427, 'epoch': 0.81} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 5267/19440 [15:14:54<13:37:50, 3.46s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 5268/19440 [15:14:58<13:23:57, 3.40s/it] + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 5268/19440 [15:14:58<13:23:57, 3.40s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.5004, 'learning_rate': 0.00026686129788622856, 'epoch': 0.81} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 5269/19440 [15:15:01<13:13:40, 3.36s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.5368, 'learning_rate': 0.00026684247567591076, 'epoch': 0.81} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 5270/19440 [15:15:04<13:05:43, 3.33s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.6659, 'learning_rate': 0.0002668236534655929, 'epoch': 0.81} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 5271/19440 [15:15:07<12:54:19, 3.28s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.2801, 'learning_rate': 0.0002668048312552751, 'epoch': 0.81} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 5272/19440 [15:15:10<12:48:31, 3.25s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 5273/19440 [15:15:14<12:37:16, 3.21s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.6029, 'learning_rate': 0.00026678600904495725, 'epoch': 0.81} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.4412, 'learning_rate': 0.0002667671868346394, 'epoch': 0.81} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 5274/19440 [15:15:17<12:28:42, 3.17s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.4423, 'learning_rate': 0.00026674836462432154, 'epoch': 0.81} +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 5275/19440 [15:15:20<12:51:55, 3.27s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.0734, 'learning_rate': 0.00026672954241400374, 'epoch': 0.81} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 5276/19440 [15:15:23<12:40:33, 3.22s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.2705, 'learning_rate': 0.00026671072020368594, 'epoch': 0.81} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 5277/19440 [15:15:26<12:31:42, 3.18s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.3784, 'learning_rate': 0.0002666918979933681, 'epoch': 0.81} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 5278/19440 [15:15:29<12:18:10, 3.13s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.436, 'learning_rate': 0.0002666730757830503, 'epoch': 0.81} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 5279/19440 [15:15:32<12:08:22, 3.09s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.1877, 'learning_rate': 0.00026665425357273243, 'epoch': 0.81} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 5280/19440 [15:15:35<12:03:20, 3.06s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.0595, 'learning_rate': 0.0002666354313624146, 'epoch': 0.81} +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 5281/19440 [15:15:38<11:52:32, 3.02s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.2734, 'learning_rate': 0.0002666166091520967, 'epoch': 0.82} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 5282/19440 [15:15:41<11:43:52, 2.98s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 5283/19440 [15:15:44<11:34:24, 2.94s/it] + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 5283/19440 [15:15:44<11:34:24, 2.94s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.9483, 'learning_rate': 0.00026657896473146107, 'epoch': 0.82} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 5284/19440 [15:15:47<11:30:12, 2.93s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.0144, 'learning_rate': 0.00026656014252114327, 'epoch': 0.82} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 5285/19440 [15:15:50<11:25:25, 2.91s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.111, 'learning_rate': 0.00026654132031082547, 'epoch': 0.82} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 5286/19440 [15:15:53<11:17:44, 2.87s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 5.8899, 'learning_rate': 0.0002665224981005076, 'epoch': 0.82} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 5287/19440 [15:15:55<11:16:01, 2.87s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.9695, 'learning_rate': 0.00026650367589018976, 'epoch': 0.82} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 5288/19440 [15:15:59<11:41:09, 2.97s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 5.9581, 'learning_rate': 0.0002664848536798719, 'epoch': 0.82} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 5289/19440 [15:16:01<11:30:18, 2.93s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 5290/19440 [15:16:04<11:18:10, 2.88s/it] + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 5290/19440 [15:16:04<11:18:10, 2.88s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 5.8847, 'learning_rate': 0.00026644720925923625, 'epoch': 0.82} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 5291/19440 [15:16:07<11:06:25, 2.83s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.6392, 'learning_rate': 0.00026642838704891845, 'epoch': 0.82} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 5292/19440 [15:16:10<10:56:59, 2.79s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 5293/19440 [15:16:12<10:46:01, 2.74s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 5.5232, 'learning_rate': 0.0002664095648386006, 'epoch': 0.82} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 5.5409, 'learning_rate': 0.0002663907426282828, 'epoch': 0.82} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 5294/19440 [15:16:15<10:33:38, 2.69s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.2597, 'learning_rate': 0.00026637192041796494, 'epoch': 0.82} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 5295/19440 [15:16:17<10:27:23, 2.66s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 5.1682, 'learning_rate': 0.0002663530982076471, 'epoch': 0.82} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 5296/19440 [15:16:20<10:17:04, 2.62s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 5297/19440 [15:16:22<10:07:23, 2.58s/it] + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 5297/19440 [15:16:22<10:07:23, 2.58s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.7347, 'learning_rate': 0.0002663154537870114, 'epoch': 0.82} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 5298/19440 [15:16:25<9:57:33, 2.54s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 4.635, 'learning_rate': 0.0002662966315766936, 'epoch': 0.82} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 5299/19440 [15:16:27<9:48:37, 2.50s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 4.5734, 'learning_rate': 0.00026627780936637577, 'epoch': 0.82} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 5300/19440 [15:16:30<10:01:02, 2.55s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 7.3118, 'learning_rate': 0.0002662589871560579, 'epoch': 0.82} +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 5301/19440 [15:16:35<12:50:01, 3.27s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.0103, 'learning_rate': 0.00026624016494574006, 'epoch': 0.82} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 5302/19440 [15:16:39<14:04:11, 3.58s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.995, 'learning_rate': 0.00026622134273542226, 'epoch': 0.82} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 5303/19440 [15:16:43<14:38:02, 3.73s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 7.118, 'learning_rate': 0.00026620252052510446, 'epoch': 0.82} +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 5304/19440 [15:16:47<14:52:51, 3.79s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.1194, 'learning_rate': 0.0002661836983147866, 'epoch': 0.82} + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 5305/19440 [15:16:51<14:56:57, 3.81s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 5306/19440 [15:16:55<14:56:29, 3.81s/it] + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 5306/19440 [15:16:55<14:56:29, 3.81s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 5307/19440 [15:16:59<15:01:10, 3.83s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.7802, 'learning_rate': 0.00026614605389415095, 'epoch': 0.82} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 5308/19440 [15:17:02<14:53:51, 3.80s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.9615, 'learning_rate': 0.0002661272316838331, 'epoch': 0.82} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.8188, 'learning_rate': 0.00026610840947351524, 'epoch': 0.82} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 5309/19440 [15:17:06<14:46:10, 3.76s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.8733, 'learning_rate': 0.00026608958726319744, 'epoch': 0.82} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 5310/19440 [15:17:10<14:38:23, 3.73s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.8261, 'learning_rate': 0.00026607076505287964, 'epoch': 0.82} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 5311/19440 [15:17:13<14:31:40, 3.70s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.7105, 'learning_rate': 0.0002660519428425618, 'epoch': 0.82} +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 5312/19440 [15:17:17<14:23:05, 3.67s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.7052, 'learning_rate': 0.000266033120632244, 'epoch': 0.82} + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 5313/19440 [15:17:21<14:40:50, 3.74s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 5314/19440 [15:17:24<14:25:02, 3.67s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.6806, 'learning_rate': 0.00026601429842192613, 'epoch': 0.82} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.6674, 'learning_rate': 0.0002659954762116083, 'epoch': 0.82} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 5315/19440 [15:17:28<14:05:56, 3.59s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.5874, 'learning_rate': 0.0002659766540012904, 'epoch': 0.82} +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 5316/19440 [15:17:31<13:52:58, 3.54s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 5317/19440 [15:17:35<13:37:06, 3.47s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.69, 'learning_rate': 0.0002659578317909726, 'epoch': 0.82} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.5523, 'learning_rate': 0.00026593900958065477, 'epoch': 0.82} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 5318/19440 [15:17:38<13:24:22, 3.42s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.4107, 'learning_rate': 0.00026592018737033697, 'epoch': 0.82} + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 5319/19440 [15:17:41<13:13:05, 3.37s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.3817, 'learning_rate': 0.00026590136516001917, 'epoch': 0.82} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 5320/19440 [15:17:44<13:05:21, 3.34s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.3907, 'learning_rate': 0.0002658825429497013, 'epoch': 0.82} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 5321/19440 [15:17:48<12:56:26, 3.30s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 5322/19440 [15:17:51<12:47:28, 3.26s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.4387, 'learning_rate': 0.00026586372073938346, 'epoch': 0.82} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.3166, 'learning_rate': 0.0002658448985290656, 'epoch': 0.82} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 5323/19440 [15:17:54<12:44:13, 3.25s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 5324/19440 [15:17:57<12:36:53, 3.22s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.4322, 'learning_rate': 0.0002658260763187478, 'epoch': 0.82} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.2278, 'learning_rate': 0.00026580725410842995, 'epoch': 0.82} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 5325/19440 [15:18:01<13:03:35, 3.33s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.391, 'learning_rate': 0.00026578843189811215, 'epoch': 0.82} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 5326/19440 [15:18:04<12:52:02, 3.28s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 5327/19440 [15:18:07<12:35:36, 3.21s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.3891, 'learning_rate': 0.0002657696096877943, 'epoch': 0.82} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.2199, 'learning_rate': 0.00026575078747747644, 'epoch': 0.82} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 5328/19440 [15:18:10<12:26:24, 3.17s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 5329/19440 [15:18:13<12:17:22, 3.14s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.1468, 'learning_rate': 0.00026573196526715864, 'epoch': 0.82} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.0826, 'learning_rate': 0.0002657131430568408, 'epoch': 0.82} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 5330/19440 [15:18:16<12:10:32, 3.11s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 5331/19440 [15:18:19<11:59:35, 3.06s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.1323, 'learning_rate': 0.000265694320846523, 'epoch': 0.82} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.0305, 'learning_rate': 0.00026567549863620513, 'epoch': 0.82} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 5332/19440 [15:18:22<11:54:05, 3.04s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 5333/19440 [15:18:25<11:42:33, 2.99s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.3133, 'learning_rate': 0.00026565667642588733, 'epoch': 0.82} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.2524, 'learning_rate': 0.00026563785421556947, 'epoch': 0.82} +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 5334/19440 [15:18:28<11:40:58, 2.98s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 5335/19440 [15:18:31<11:46:25, 3.00s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.0932, 'learning_rate': 0.0002656190320052516, 'epoch': 0.82} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.0982, 'learning_rate': 0.00026560020979493376, 'epoch': 0.82} + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 5336/19440 [15:18:34<11:32:37, 2.95s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.0795, 'learning_rate': 0.00026558138758461596, 'epoch': 0.82} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 5337/19440 [15:18:36<11:21:43, 2.90s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.0823, 'learning_rate': 0.00026556256537429816, 'epoch': 0.82} + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 5338/19440 [15:18:40<11:39:41, 2.98s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.832, 'learning_rate': 0.0002655437431639803, 'epoch': 0.82} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 5339/19440 [15:18:42<11:27:50, 2.93s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 5340/19440 [15:18:45<11:16:58, 2.88s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 5.8042, 'learning_rate': 0.0002655249209536625, 'epoch': 0.82} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 5.6653, 'learning_rate': 0.00026550609874334465, 'epoch': 0.82} +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 5341/19440 [15:18:48<11:03:00, 2.82s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 5.6142, 'learning_rate': 0.0002654872765330268, 'epoch': 0.82} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 5342/19440 [15:18:51<10:51:34, 2.77s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 5343/19440 [15:18:53<10:41:38, 2.73s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.5594, 'learning_rate': 0.00026546845432270894, 'epoch': 0.82} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 5344/19440 [15:18:56<10:32:18, 2.69s/it] + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 5344/19440 [15:18:56<10:32:18, 2.69s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 5.3073, 'learning_rate': 0.0002654308099020733, 'epoch': 0.82} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 27%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 5345/19440 [15:18:58<10:23:38, 2.65s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 5346/19440 [15:19:01<10:13:32, 2.61s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 4.9735, 'learning_rate': 0.0002654119876917555, 'epoch': 0.82} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 5347/19440 [15:19:03<10:02:23, 2.56s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.0256, 'learning_rate': 0.0002653931654814377, 'epoch': 0.83} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 5348/19440 [15:19:06<9:53:55, 2.53s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 4.9181, 'learning_rate': 0.00026537434327111983, 'epoch': 0.83} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 4.9471, 'learning_rate': 0.000265355521060802, 'epoch': 0.83} +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 5349/19440 [15:19:08<9:44:31, 2.49s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 5350/19440 [15:19:11<10:00:52, 2.56s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 4.6875, 'learning_rate': 0.0002653366988504841, 'epoch': 0.83} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 5351/19440 [15:19:16<12:30:20, 3.20s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.3718, 'learning_rate': 0.0002653178766401663, 'epoch': 0.83} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.321, 'learning_rate': 0.00026529905442984847, 'epoch': 0.83} + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 5352/19440 [15:19:20<13:45:08, 3.51s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 5353/19440 [15:19:24<14:24:44, 3.68s/it] + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 5353/19440 [15:19:24<14:24:44, 3.68s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 5354/19440 [15:19:28<14:43:20, 3.76s/it] + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 5354/19440 [15:19:28<14:43:20, 3.76s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 5355/19440 [15:19:32<14:48:06, 3.78s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 7.1694, 'learning_rate': 0.00026524258779889496, 'epoch': 0.83} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 5356/19440 [15:19:36<15:01:39, 3.84s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.8816, 'learning_rate': 0.00026522376558857716, 'epoch': 0.83} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 5357/19440 [15:19:40<15:02:48, 3.85s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.9627, 'learning_rate': 0.0002652049433782593, 'epoch': 0.83} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.9357, 'learning_rate': 0.0002651861211679415, 'epoch': 0.83} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 5358/19440 [15:19:43<14:49:20, 3.79s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.9143, 'learning_rate': 0.00026516729895762365, 'epoch': 0.83} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 5359/19440 [15:19:47<14:41:03, 3.75s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.8367, 'learning_rate': 0.00026514847674730585, 'epoch': 0.83} + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 5360/19440 [15:19:50<14:28:42, 3.70s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 5361/19440 [15:19:54<14:20:24, 3.67s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.5368, 'learning_rate': 0.000265129654536988, 'epoch': 0.83} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 5362/19440 [15:19:58<14:10:09, 3.62s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.8248, 'learning_rate': 0.00026511083232667014, 'epoch': 0.83} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.6102, 'learning_rate': 0.00026509201011635234, 'epoch': 0.83} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 5363/19440 [15:20:01<14:27:24, 3.70s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.6859, 'learning_rate': 0.0002650731879060345, 'epoch': 0.83} +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 5364/19440 [15:20:05<14:11:07, 3.63s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 5365/19440 [15:20:08<13:55:16, 3.56s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.5644, 'learning_rate': 0.0002650543656957167, 'epoch': 0.83} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 5366/19440 [15:20:12<13:41:04, 3.50s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.7175, 'learning_rate': 0.00026503554348539883, 'epoch': 0.83} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.6483, 'learning_rate': 0.00026501672127508103, 'epoch': 0.83} + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 5367/19440 [15:20:15<13:26:04, 3.44s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 5368/19440 [15:20:18<13:10:59, 3.37s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.5963, 'learning_rate': 0.0002649978990647632, 'epoch': 0.83} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.5012, 'learning_rate': 0.0002649790768544453, 'epoch': 0.83} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 5369/19440 [15:20:21<12:56:58, 3.31s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 5370/19440 [15:20:25<12:51:24, 3.29s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.3413, 'learning_rate': 0.00026496025464412746, 'epoch': 0.83} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.4024, 'learning_rate': 0.00026494143243380966, 'epoch': 0.83} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 5371/19440 [15:20:28<12:41:51, 3.25s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 5372/19440 [15:20:31<12:36:22, 3.23s/it] + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 5372/19440 [15:20:31<12:36:22, 3.23s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 5373/19440 [15:20:34<12:31:58, 3.21s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.3628, 'learning_rate': 0.000264903788013174, 'epoch': 0.83} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.5333, 'learning_rate': 0.0002648849658028562, 'epoch': 0.83} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 5374/19440 [15:20:37<12:21:51, 3.16s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 5375/19440 [15:20:41<12:43:42, 3.26s/it] + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 5375/19440 [15:20:41<12:43:42, 3.26s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 5376/19440 [15:20:44<12:33:01, 3.21s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.2416, 'learning_rate': 0.0002648473213822205, 'epoch': 0.83} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 5377/19440 [15:20:47<12:17:53, 3.15s/it] + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 5377/19440 [15:20:47<12:17:53, 3.15s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 5378/19440 [15:20:50<12:06:03, 3.10s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.3078, 'learning_rate': 0.00026480967696158484, 'epoch': 0.83} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 5379/19440 [15:20:53<11:55:37, 3.05s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.1589, 'learning_rate': 0.000264790854751267, 'epoch': 0.83} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 5380/19440 [15:20:56<11:51:25, 3.04s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.3578, 'learning_rate': 0.0002647720325409492, 'epoch': 0.83} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 5381/19440 [15:20:59<11:42:26, 3.00s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.1386, 'learning_rate': 0.00026475321033063133, 'epoch': 0.83} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.0363, 'learning_rate': 0.0002647343881203135, 'epoch': 0.83} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 5382/19440 [15:21:01<11:34:57, 2.97s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 5383/19440 [15:21:04<11:28:19, 2.94s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.234, 'learning_rate': 0.0002647155659099957, 'epoch': 0.83} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 5.9729, 'learning_rate': 0.0002646967436996778, 'epoch': 0.83} +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 5384/19440 [15:21:07<11:22:54, 2.92s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 5385/19440 [15:21:10<11:15:20, 2.88s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.1542, 'learning_rate': 0.00026467792148936, 'epoch': 0.83} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 5386/19440 [15:21:13<11:09:15, 2.86s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.0412, 'learning_rate': 0.00026465909927904217, 'epoch': 0.83} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.2245, 'learning_rate': 0.00026464027706872437, 'epoch': 0.83} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 5387/19440 [15:21:16<11:02:23, 2.83s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 5388/19440 [15:21:19<11:26:00, 2.93s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 5.9029, 'learning_rate': 0.0002646214548584065, 'epoch': 0.83} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 5.9255, 'learning_rate': 0.00026460263264808866, 'epoch': 0.83} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 5389/19440 [15:21:21<11:14:15, 2.88s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 5390/19440 [15:21:24<11:00:21, 2.82s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 5.6013, 'learning_rate': 0.00026458381043777086, 'epoch': 0.83} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 5391/19440 [15:21:27<10:48:38, 2.77s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 5.7456, 'learning_rate': 0.000264564988227453, 'epoch': 0.83} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.6665, 'learning_rate': 0.0002645461660171352, 'epoch': 0.83} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 5392/19440 [15:21:29<10:40:00, 2.73s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 5393/19440 [15:21:32<10:31:12, 2.70s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.8873, 'learning_rate': 0.00026452734380681735, 'epoch': 0.83} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 5394/19440 [15:21:35<10:25:49, 2.67s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 5.6572, 'learning_rate': 0.00026450852159649955, 'epoch': 0.83} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.2032, 'learning_rate': 0.0002644896993861817, 'epoch': 0.83} + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 5395/19440 [15:21:37<10:15:58, 2.63s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 5.1896, 'learning_rate': 0.00026447087717586384, 'epoch': 0.83} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 5396/19440 [15:21:40<10:05:10, 2.59s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 5397/19440 [15:21:42<9:55:27, 2.54s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 5.1594, 'learning_rate': 0.000264452054965546, 'epoch': 0.83} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 5398/19440 [15:21:45<9:46:32, 2.51s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.0671, 'learning_rate': 0.0002644332327552282, 'epoch': 0.83} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 5399/19440 [15:21:47<9:39:14, 2.48s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 4.9863, 'learning_rate': 0.0002644144105449104, 'epoch': 0.83} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 4.6278, 'learning_rate': 0.00026439558833459253, 'epoch': 0.83} +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 5400/19440 [15:21:50<9:55:24, 2.54s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 5401/19440 [15:21:54<12:29:50, 3.20s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 7.3243, 'learning_rate': 0.00026437676612427473, 'epoch': 0.83} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 5402/19440 [15:21:59<13:43:27, 3.52s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.3223, 'learning_rate': 0.0002643579439139569, 'epoch': 0.83} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 5403/19440 [15:22:03<14:22:18, 3.69s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.0393, 'learning_rate': 0.000264339121703639, 'epoch': 0.83} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 5404/19440 [15:22:07<14:39:07, 3.76s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 7.0474, 'learning_rate': 0.00026432029949332117, 'epoch': 0.83} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 5405/19440 [15:22:11<14:46:00, 3.79s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 7.0454, 'learning_rate': 0.00026430147728300336, 'epoch': 0.83} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 5406/19440 [15:22:14<14:50:29, 3.81s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.8599, 'learning_rate': 0.00026428265507268556, 'epoch': 0.83} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 5407/19440 [15:22:18<14:52:53, 3.82s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.7652, 'learning_rate': 0.0002642638328623677, 'epoch': 0.83} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.7348, 'learning_rate': 0.00026424501065204985, 'epoch': 0.83} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 5408/19440 [15:22:22<14:42:36, 3.77s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 5409/19440 [15:22:26<14:36:28, 3.75s/it] + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 5409/19440 [15:22:26<14:36:28, 3.75s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 5410/19440 [15:22:29<14:30:10, 3.72s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.9013, 'learning_rate': 0.0002642073662314142, 'epoch': 0.83} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 5411/19440 [15:22:33<14:19:57, 3.68s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.8167, 'learning_rate': 0.00026418854402109635, 'epoch': 0.83} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 5412/19440 [15:22:37<14:23:26, 3.69s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.9145, 'learning_rate': 0.00026416972181077854, 'epoch': 0.84} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 5413/19440 [15:22:40<14:37:59, 3.76s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.8572, 'learning_rate': 0.0002641508996004607, 'epoch': 0.84} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.7758, 'learning_rate': 0.0002641320773901429, 'epoch': 0.84} + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 5414/19440 [15:22:44<14:18:56, 3.67s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 5415/19440 [15:22:47<13:59:53, 3.59s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.5918, 'learning_rate': 0.00026411325517982503, 'epoch': 0.84} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 5416/19440 [15:22:51<13:44:21, 3.53s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.6816, 'learning_rate': 0.0002640944329695072, 'epoch': 0.84} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.5758, 'learning_rate': 0.0002640756107591894, 'epoch': 0.84} + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 5417/19440 [15:22:54<13:31:53, 3.47s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 5418/19440 [15:22:57<13:15:00, 3.40s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.7446, 'learning_rate': 0.0002640567885488715, 'epoch': 0.84} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 5419/19440 [15:23:01<13:04:18, 3.36s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.5982, 'learning_rate': 0.0002640379663385537, 'epoch': 0.84} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 5420/19440 [15:23:04<12:56:37, 3.32s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.596, 'learning_rate': 0.00026401914412823587, 'epoch': 0.84} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 5421/19440 [15:23:07<12:49:44, 3.29s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.5991, 'learning_rate': 0.00026400032191791807, 'epoch': 0.84} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.2699, 'learning_rate': 0.0002639814997076002, 'epoch': 0.84} + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 5422/19440 [15:23:10<12:47:08, 3.28s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 5423/19440 [15:23:13<12:39:16, 3.25s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.29, 'learning_rate': 0.00026396267749728236, 'epoch': 0.84} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.4969, 'learning_rate': 0.00026394385528696456, 'epoch': 0.84} +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 5424/19440 [15:23:17<12:33:08, 3.22s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 5425/19440 [15:23:20<12:54:35, 3.32s/it] + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 5425/19440 [15:23:20<12:54:35, 3.32s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 5426/19440 [15:23:23<12:41:49, 3.26s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.2905, 'learning_rate': 0.0002639062108663289, 'epoch': 0.84} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 5427/19440 [15:23:26<12:28:27, 3.20s/it] + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 5427/19440 [15:23:26<12:28:27, 3.20s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 5428/19440 [15:23:29<12:15:21, 3.15s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.3262, 'learning_rate': 0.00026386856644569325, 'epoch': 0.84} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 5429/19440 [15:23:32<12:06:40, 3.11s/it] + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 5429/19440 [15:23:32<12:06:40, 3.11s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 5430/19440 [15:23:35<11:56:31, 3.07s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.9985, 'learning_rate': 0.00026383092202505754, 'epoch': 0.84} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 5431/19440 [15:23:38<11:48:29, 3.03s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.3218, 'learning_rate': 0.0002638120998147397, 'epoch': 0.84} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 5432/19440 [15:23:42<12:18:21, 3.16s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.1871, 'learning_rate': 0.0002637932776044219, 'epoch': 0.84} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 5433/19440 [15:23:45<12:03:07, 3.10s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.0757, 'learning_rate': 0.0002637744553941041, 'epoch': 0.84} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 5434/19440 [15:23:48<11:48:21, 3.03s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 5.9613, 'learning_rate': 0.00026375563318378623, 'epoch': 0.84} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 5435/19440 [15:23:50<11:35:55, 2.98s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 5.9311, 'learning_rate': 0.0002637368109734684, 'epoch': 0.84} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 5436/19440 [15:23:53<11:28:08, 2.95s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 5.9537, 'learning_rate': 0.0002637179887631505, 'epoch': 0.84} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 5437/19440 [15:23:56<11:19:42, 2.91s/it] + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 5437/19440 [15:23:56<11:19:42, 2.91s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 5438/19440 [15:23:59<11:42:09, 3.01s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 5.8016, 'learning_rate': 0.00026368034434251487, 'epoch': 0.84} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 5439/19440 [15:24:02<11:31:07, 2.96s/it] + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 5439/19440 [15:24:02<11:31:07, 2.96s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 5440/19440 [15:24:05<11:16:26, 2.90s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.8277, 'learning_rate': 0.0002636426999218792, 'epoch': 0.84} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 5441/19440 [15:24:08<11:03:36, 2.84s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.6631, 'learning_rate': 0.0002636238777115614, 'epoch': 0.84} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.7839, 'learning_rate': 0.00026360505550124356, 'epoch': 0.84} + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 5442/19440 [15:24:10<10:55:41, 2.81s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.5369, 'learning_rate': 0.0002635862332909257, 'epoch': 0.84} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 5443/19440 [15:24:13<10:44:55, 2.76s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 5444/19440 [15:24:16<10:37:02, 2.73s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.6975, 'learning_rate': 0.0002635674110806079, 'epoch': 0.84} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 5445/19440 [15:24:18<10:26:04, 2.68s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 5.2611, 'learning_rate': 0.00026354858887029005, 'epoch': 0.84} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 5446/19440 [15:24:21<10:16:05, 2.64s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 5.6138, 'learning_rate': 0.00026352976665997225, 'epoch': 0.84} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 5447/19440 [15:24:23<10:08:14, 2.61s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 5.0781, 'learning_rate': 0.0002635109444496544, 'epoch': 0.84} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 5448/19440 [15:24:26<9:58:28, 2.57s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.8467, 'learning_rate': 0.0002634921222393366, 'epoch': 0.84} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 5449/19440 [15:24:28<9:48:22, 2.52s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 4.9404, 'learning_rate': 0.00026347330002901874, 'epoch': 0.84} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 5450/19440 [15:24:31<10:00:31, 2.58s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 4.4603, 'learning_rate': 0.0002634544778187009, 'epoch': 0.84} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.5232, 'learning_rate': 0.0002634356556083831, 'epoch': 0.84} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 5451/19440 [15:24:36<12:32:10, 3.23s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 7.1705, 'learning_rate': 0.0002634168333980652, 'epoch': 0.84} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 5452/19440 [15:24:40<13:46:32, 3.55s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.1179, 'learning_rate': 0.0002633980111877474, 'epoch': 0.84} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 5453/19440 [15:24:44<14:26:27, 3.72s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 7.0453, 'learning_rate': 0.00026337918897742957, 'epoch': 0.84} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 5454/19440 [15:24:48<14:45:07, 3.80s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 7.055, 'learning_rate': 0.00026336036676711177, 'epoch': 0.84} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 5455/19440 [15:24:52<14:48:27, 3.81s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.9114, 'learning_rate': 0.0002633415445567939, 'epoch': 0.84} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 5456/19440 [15:24:56<14:51:23, 3.82s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.6926, 'learning_rate': 0.00026332272234647606, 'epoch': 0.84} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 5457/19440 [15:25:00<14:55:29, 3.84s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.8101, 'learning_rate': 0.00026330390013615826, 'epoch': 0.84} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 5458/19440 [15:25:03<14:45:33, 3.80s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 5459/19440 [15:25:07<14:36:04, 3.76s/it] + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 5459/19440 [15:25:07<14:36:04, 3.76s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.5921, 'learning_rate': 0.0002632662557155226, 'epoch': 0.84} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 5460/19440 [15:25:11<14:25:51, 3.72s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.8884, 'learning_rate': 0.00026324743350520475, 'epoch': 0.84} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 5461/19440 [15:25:14<14:13:46, 3.66s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.6955, 'learning_rate': 0.0002632286112948869, 'epoch': 0.84} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 5462/19440 [15:25:18<14:05:36, 3.63s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.6144, 'learning_rate': 0.00026320978908456904, 'epoch': 0.84} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 5463/19440 [15:25:22<14:25:15, 3.71s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.5977, 'learning_rate': 0.00026319096687425124, 'epoch': 0.84} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 5464/19440 [15:25:25<14:11:29, 3.66s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.736, 'learning_rate': 0.0002631721446639334, 'epoch': 0.84} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 5465/19440 [15:25:29<13:52:28, 3.57s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.4369, 'learning_rate': 0.0002631533224536156, 'epoch': 0.84} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 5466/19440 [15:25:32<13:47:46, 3.55s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 5467/19440 [15:25:35<13:32:32, 3.49s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.6088, 'learning_rate': 0.0002631345002432978, 'epoch': 0.84} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.2617, 'learning_rate': 0.00026311567803297993, 'epoch': 0.84} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 5468/19440 [15:25:39<13:17:03, 3.42s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.4902, 'learning_rate': 0.0002630968558226621, 'epoch': 0.84} +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 5469/19440 [15:25:42<13:04:29, 3.37s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.4919, 'learning_rate': 0.0002630780336123442, 'epoch': 0.84} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 5470/19440 [15:25:45<12:56:45, 3.34s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.4632, 'learning_rate': 0.0002630592114020264, 'epoch': 0.84} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 5471/19440 [15:25:48<12:46:00, 3.29s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.3952, 'learning_rate': 0.00026304038919170857, 'epoch': 0.84} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 5472/19440 [15:25:52<12:38:45, 3.26s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.3476, 'learning_rate': 0.00026302156698139077, 'epoch': 0.84} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 5473/19440 [15:25:55<12:34:18, 3.24s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 5474/19440 [15:25:58<12:26:17, 3.21s/it] + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 5474/19440 [15:25:58<12:26:17, 3.21s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 5475/19440 [15:26:02<12:50:22, 3.31s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.2908, 'learning_rate': 0.0002629839225607551, 'epoch': 0.84} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.3333, 'learning_rate': 0.00026296510035043726, 'epoch': 0.84} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 5476/19440 [15:26:05<12:40:22, 3.27s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 5477/19440 [15:26:08<12:24:18, 3.20s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.2369, 'learning_rate': 0.0002629462781401194, 'epoch': 0.85} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.0442, 'learning_rate': 0.0002629274559298016, 'epoch': 0.85} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 5478/19440 [15:26:11<12:12:01, 3.15s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 5479/19440 [15:26:14<12:04:32, 3.11s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.407, 'learning_rate': 0.00026290863371948375, 'epoch': 0.85} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.263, 'learning_rate': 0.00026288981150916595, 'epoch': 0.85} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 5480/19440 [15:26:17<11:56:28, 3.08s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 5481/19440 [15:26:20<11:48:51, 3.05s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.0313, 'learning_rate': 0.0002628709892988481, 'epoch': 0.85} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.2339, 'learning_rate': 0.0002628521670885303, 'epoch': 0.85} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 5482/19440 [15:26:23<11:38:46, 3.00s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 5483/19440 [15:26:26<11:32:41, 2.98s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.9356, 'learning_rate': 0.00026283334487821244, 'epoch': 0.85} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 6.0939, 'learning_rate': 0.0002628145226678946, 'epoch': 0.85} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 5484/19440 [15:26:28<11:23:25, 2.94s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 5.9858, 'learning_rate': 0.0002627957004575768, 'epoch': 0.85} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 5485/19440 [15:26:31<11:15:19, 2.90s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.9592, 'learning_rate': 0.0002627768782472589, 'epoch': 0.85} + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 5486/19440 [15:26:34<11:10:38, 2.88s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 5.91, 'learning_rate': 0.0002627580560369411, 'epoch': 0.85} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 5487/19440 [15:26:37<11:03:37, 2.85s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 5488/19440 [15:26:40<11:28:15, 2.96s/it] + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 5488/19440 [15:26:40<11:28:15, 2.96s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 5.529, 'learning_rate': 0.0002627204116163054, 'epoch': 0.85} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 5489/19440 [15:26:43<11:18:01, 2.92s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 6.1441, 'learning_rate': 0.00026270158940598756, 'epoch': 0.85} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 5490/19440 [15:26:46<11:03:07, 2.85s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.7983, 'learning_rate': 0.00026268276719566976, 'epoch': 0.85} + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 5491/19440 [15:26:48<10:49:54, 2.80s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.39, 'learning_rate': 0.0002626639449853519, 'epoch': 0.85} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 5492/19440 [15:26:51<10:38:25, 2.75s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.3332, 'learning_rate': 0.0002626451227750341, 'epoch': 0.85} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 5493/19440 [15:26:53<10:28:44, 2.70s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 5494/19440 [15:26:56<10:19:02, 2.66s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 5.5261, 'learning_rate': 0.0002626263005647163, 'epoch': 0.85} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 5.531, 'learning_rate': 0.00026260747835439845, 'epoch': 0.85} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 5495/19440 [15:26:59<10:09:33, 2.62s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 4.9314, 'learning_rate': 0.0002625886561440806, 'epoch': 0.85} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 5496/19440 [15:27:01<10:01:15, 2.59s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 5.0433, 'learning_rate': 0.00026256983393376274, 'epoch': 0.85} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 5497/19440 [15:27:04<9:55:42, 2.56s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 5498/19440 [15:27:06<9:46:10, 2.52s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +{'loss': 4.6725, 'learning_rate': 0.00026255101172344494, 'epoch': 0.85} +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.859, 'learning_rate': 0.0002625321895131271, 'epoch': 0.85} + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 5499/19440 [15:27:08<9:33:45, 2.47s/it]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed +{'loss': 4.3866, 'learning_rate': 0.0002625133673028093, 'epoch': 0.85} +`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`... +Could not estimate the number of tokens of the input, floating-point operations will not be computed + 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 5500/19440 [15:27:11<9:49:54, 2.54s/it]The following columns in the evaluation set don't have a corresponding argument in `SpeechEncoderDecoderModel.forward` and have been ignored: length, lang. If length, lang are not expected by `SpeechEncoderDecoderModel.forward`, you can safely ignore this message. +***** Running Evaluation ***** + Num examples = 14760 + Batch size = 4 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 3690/3690 [1:01:30<00:00, 1.04it/s] + +Configuration saved in ./checkpoint-5500/config.json +Model weights saved in ./checkpoint-5500/pytorch_model.bin +Feature extractor saved in ./checkpoint-5500/preprocessor_config.json +Traceback (most recent call last): + File "/home/sanchit_huggingface_co/gcp/lib/python3.9/site-packages/torch/serialization.py", line 379, in save + _save(obj, opened_zipfile, pickle_module, pickle_protocol) + File "/home/sanchit_huggingface_co/gcp/lib/python3.9/site-packages/torch/serialization.py", line 499, in _save + zip_file.write_record(name, storage.data_ptr(), num_bytes) +OSError: [Errno 28] No space left on device +During handling of the above exception, another exception occurred: +Traceback (most recent call last): + File "/home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/run_xtreme_s.py", line 947, in + main() + File "/home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/run_xtreme_s.py", line 874, in main + train_result = trainer.train(resume_from_checkpoint=checkpoint) + File "/home/sanchit_huggingface_co/transformers/src/transformers/trainer.py", line 1524, in train + self._maybe_log_save_evaluate(tr_loss, model, trial, epoch, ignore_keys_for_eval) + File "/home/sanchit_huggingface_co/transformers/src/transformers/trainer.py", line 1655, in _maybe_log_save_evaluate + self._save_checkpoint(model, trial, metrics=metrics) + File "/home/sanchit_huggingface_co/transformers/src/transformers/trainer.py", line 1757, in _save_checkpoint + torch.save(self.optimizer.state_dict(), os.path.join(output_dir, OPTIMIZER_NAME)) + File "/home/sanchit_huggingface_co/gcp/lib/python3.9/site-packages/torch/serialization.py", line 380, in save + return + File "/home/sanchit_huggingface_co/gcp/lib/python3.9/site-packages/torch/serialization.py", line 259, in __exit__ + self.file_like.write_end_of_file() +RuntimeError: [enforce fail at inline_container.cc:300] . unexpected pos 2888813184 vs 2888813072 \ No newline at end of file diff --git a/wandb/run-20220503_172048-zotxt8wa/files/wandb-summary.json b/wandb/run-20220503_172048-zotxt8wa/files/wandb-summary.json index 1bd3b70f41a27bf1cbeb368e3d68ac1dfea85054..60390efb1aebb1f4322068973d1c1b4a8d9b7e29 100644 --- a/wandb/run-20220503_172048-zotxt8wa/files/wandb-summary.json +++ b/wandb/run-20220503_172048-zotxt8wa/files/wandb-summary.json @@ -1 +1 @@ -{"train/loss": 4.673, "train/learning_rate": 0.0002719244724617255, "train/epoch": 0.77, "train/global_step": 5000, "_runtime": 53868, "_timestamp": 1651652316, "_step": 5009, "gradients/decoder.model.decoder.layers.11.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1022.0], "bins": [-358.7091979980469, -353.0705261230469, -347.4318542480469, -341.793212890625, -336.154541015625, -330.515869140625, -324.877197265625, -319.238525390625, -313.599853515625, -307.961181640625, -302.322509765625, -296.683837890625, -291.0451965332031, -285.4065246582031, -279.7678527832031, -274.1291809082031, -268.49053955078125, -262.85186767578125, -257.21319580078125, -251.5745391845703, -245.9358673095703, -240.29721069335938, -234.65853881835938, -229.01986694335938, -223.38119506835938, -217.74252319335938, -212.10386657714844, -206.46519470214844, -200.82652282714844, -195.1878662109375, -189.5491943359375, -183.9105224609375, -178.27188110351562, -172.63320922851562, -166.9945526123047, -161.3558807373047, -155.7172088623047, -150.07855224609375, -144.43988037109375, -138.80120849609375, -133.1625518798828, -127.52388763427734, -121.88521575927734, -116.24655151367188, -110.6078872680664, -104.96922302246094, -99.33055114746094, -93.69188690185547, -88.05321502685547, -82.41455078125, -76.77587890625, -71.13721466064453, -65.49855041503906, -59.85988235473633, -54.221214294433594, -48.582550048828125, -42.943878173828125, -37.30521011352539, -31.666545867919922, -26.027877807617188, -20.389211654663086, -14.750545501708984, -9.11187744140625, -3.4732131958007812, 2.165454387664795]}, "gradients/decoder.model.decoder.layers.11.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 1.0, 2.0, 6.0, 0.0, 4.0, 7.0, 7.0, 8.0, 10.0, 19.0, 17.0, 22.0, 17.0, 26.0, 41.0, 52.0, 46.0, 53.0, 61.0, 50.0, 52.0, 52.0, 65.0, 46.0, 49.0, 53.0, 42.0, 43.0, 46.0, 26.0, 25.0, 19.0, 12.0, 9.0, 5.0, 4.0, 7.0, 4.0, 1.0, 3.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-14.737998962402344, -14.262447357177734, -13.786895751953125, -13.311344146728516, -12.835792541503906, -12.360240936279297, -11.884689331054688, -11.409137725830078, -10.933586120605469, -10.45803451538086, -9.98248291015625, -9.50693130493164, -9.031379699707031, -8.555828094482422, -8.080276489257812, -7.604724884033203, -7.129173755645752, -6.653622150421143, -6.178070545196533, -5.702518939971924, -5.2269673347473145, -4.751416206359863, -4.275864601135254, -3.8003127574920654, -3.324761152267456, -2.8492095470428467, -2.3736579418182373, -1.8981064558029175, -1.422554850578308, -0.9470033645629883, -0.4714517593383789, 0.004099845886230469, 0.47965145111083984, 0.9552030563354492, 1.4307546615600586, 1.9063061475753784, 2.3818578720092773, 2.8574092388153076, 3.332960844039917, 3.8085124492645264, 4.284064292907715, 4.759615898132324, 5.235167503356934, 5.710719108581543, 6.186270713806152, 6.661822319030762, 7.137373924255371, 7.6129255294799805, 8.088476181030273, 8.564027786254883, 9.039579391479492, 9.515130996704102, 9.990682601928711, 10.46623420715332, 10.94178581237793, 11.417337417602539, 11.892889022827148, 12.368440628051758, 12.843992233276367, 13.319543838500977, 13.795095443725586, 14.270647048950195, 14.746198654174805, 15.221750259399414, 15.697301864624023]}, "gradients/decoder.model.decoder.layers.11.fc2.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 1.0, 2.0, 4.0, 3.0, 4.0, 8.0, 11.0, 14.0, 28.0, 21.0, 36.0, 56.0, 63.0, 94.0, 116.0, 181.0, 204.0, 301.0, 435.0, 519.0, 824.0, 1148.0, 1894.0, 3690.0, 8863.0, 25768.0, 4107578.0, 24822.0, 8383.0, 3490.0, 1869.0, 1054.0, 733.0, 530.0, 388.0, 306.0, 205.0, 175.0, 106.0, 83.0, 63.0, 54.0, 40.0, 31.0, 28.0, 17.0, 11.0, 12.0, 6.0, 7.0, 6.0, 5.0, 3.0, 3.0, 1.0, 1.0, 0.0, 1.0, 1.0, 2.0], "bins": [-4.8359375, -4.67724609375, -4.5185546875, -4.35986328125, -4.201171875, -4.04248046875, -3.8837890625, -3.72509765625, -3.56640625, -3.40771484375, -3.2490234375, -3.09033203125, -2.931640625, -2.77294921875, -2.6142578125, -2.45556640625, -2.296875, -2.13818359375, -1.9794921875, -1.82080078125, -1.662109375, -1.50341796875, -1.3447265625, -1.18603515625, -1.02734375, -0.86865234375, -0.7099609375, -0.55126953125, -0.392578125, -0.23388671875, -0.0751953125, 0.08349609375, 0.2421875, 0.40087890625, 0.5595703125, 0.71826171875, 0.876953125, 1.03564453125, 1.1943359375, 1.35302734375, 1.51171875, 1.67041015625, 1.8291015625, 1.98779296875, 2.146484375, 2.30517578125, 2.4638671875, 2.62255859375, 2.78125, 2.93994140625, 3.0986328125, 3.25732421875, 3.416015625, 3.57470703125, 3.7333984375, 3.89208984375, 4.05078125, 4.20947265625, 4.3681640625, 4.52685546875, 4.685546875, 4.84423828125, 5.0029296875, 5.16162109375, 5.3203125]}, "gradients/decoder.model.decoder.layers.11.fc2.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 2.0, 2.0, 1.0, 4.0, 1.0, 5.0, 8.0, 8.0, 11.0, 16.0, 15.0, 16.0, 31.0, 38.0, 49.0, 48.0, 56.0, 67.0, 48.0, 60.0, 64.0, 86.0, 57.0, 50.0, 51.0, 50.0, 37.0, 29.0, 25.0, 22.0, 14.0, 13.0, 6.0, 9.0, 6.0, 3.0, 2.0, 1.0, 1.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-9.2890625, -8.9915771484375, -8.694091796875, -8.3966064453125, -8.09912109375, -7.8016357421875, -7.504150390625, -7.2066650390625, -6.9091796875, -6.6116943359375, -6.314208984375, -6.0167236328125, -5.71923828125, -5.4217529296875, -5.124267578125, -4.8267822265625, -4.529296875, -4.2318115234375, -3.934326171875, -3.6368408203125, -3.33935546875, -3.0418701171875, -2.744384765625, -2.4468994140625, -2.1494140625, -1.8519287109375, -1.554443359375, -1.2569580078125, -0.95947265625, -0.6619873046875, -0.364501953125, -0.0670166015625, 0.23046875, 0.5279541015625, 0.825439453125, 1.1229248046875, 1.42041015625, 1.7178955078125, 2.015380859375, 2.3128662109375, 2.6103515625, 2.9078369140625, 3.205322265625, 3.5028076171875, 3.80029296875, 4.0977783203125, 4.395263671875, 4.6927490234375, 4.990234375, 5.2877197265625, 5.585205078125, 5.8826904296875, 6.18017578125, 6.4776611328125, 6.775146484375, 7.0726318359375, 7.3701171875, 7.6676025390625, 7.965087890625, 8.2625732421875, 8.56005859375, 8.8575439453125, 9.155029296875, 9.4525146484375, 9.75]}, "gradients/decoder.model.decoder.layers.11.fc1.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 7.0, 8.0, 13.0, 81.0, 4194068.0, 51.0, 14.0, 6.0, 5.0, 8.0, 7.0, 1.0, 4.0, 2.0, 0.0, 3.0, 3.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-207.875, -200.314453125, -192.75390625, -185.193359375, -177.6328125, -170.072265625, -162.51171875, -154.951171875, -147.390625, -139.830078125, -132.26953125, -124.708984375, -117.1484375, -109.587890625, -102.02734375, -94.466796875, -86.90625, -79.345703125, -71.78515625, -64.224609375, -56.6640625, -49.103515625, -41.54296875, -33.982421875, -26.421875, -18.861328125, -11.30078125, -3.740234375, 3.8203125, 11.380859375, 18.94140625, 26.501953125, 34.0625, 41.623046875, 49.18359375, 56.744140625, 64.3046875, 71.865234375, 79.42578125, 86.986328125, 94.546875, 102.107421875, 109.66796875, 117.228515625, 124.7890625, 132.349609375, 139.91015625, 147.470703125, 155.03125, 162.591796875, 170.15234375, 177.712890625, 185.2734375, 192.833984375, 200.39453125, 207.955078125, 215.515625, 223.076171875, 230.63671875, 238.197265625, 245.7578125, 253.318359375, 260.87890625, 268.439453125, 276.0]}, "gradients/decoder.model.decoder.layers.11.fc1.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 4.0, 7.0, 11.0, 28.0, 3808.0, 161.0, 13.0, 8.0, 4.0, 7.0, 8.0, 4.0, 1.0, 5.0, 0.0, 2.0, 4.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-2.654296875, -2.559326171875, -2.46435546875, -2.369384765625, -2.2744140625, -2.179443359375, -2.08447265625, -1.989501953125, -1.89453125, -1.799560546875, -1.70458984375, -1.609619140625, -1.5146484375, -1.419677734375, -1.32470703125, -1.229736328125, -1.134765625, -1.039794921875, -0.94482421875, -0.849853515625, -0.7548828125, -0.659912109375, -0.56494140625, -0.469970703125, -0.375, -0.280029296875, -0.18505859375, -0.090087890625, 0.0048828125, 0.099853515625, 0.19482421875, 0.289794921875, 0.384765625, 0.479736328125, 0.57470703125, 0.669677734375, 0.7646484375, 0.859619140625, 0.95458984375, 1.049560546875, 1.14453125, 1.239501953125, 1.33447265625, 1.429443359375, 1.5244140625, 1.619384765625, 1.71435546875, 1.809326171875, 1.904296875, 1.999267578125, 2.09423828125, 2.189208984375, 2.2841796875, 2.379150390625, 2.47412109375, 2.569091796875, 2.6640625, 2.759033203125, 2.85400390625, 2.948974609375, 3.0439453125, 3.138916015625, 3.23388671875, 3.328857421875, 3.423828125]}, "gradients/decoder.model.decoder.layers.11.encoder_attn_layer_norm.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 2.0, 7.0, 43.0, 213.0, 652.0, 96.0, 6.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6.891059398651123, -6.073329925537109, -5.2555999755859375, -4.437870025634766, -3.620140552520752, -2.80241060256958, -1.9846811294555664, -1.1669516563415527, -0.34922170639038086, 0.4685080051422119, 1.2862377166748047, 2.1039674282073975, 2.9216971397399902, 3.739427089691162, 4.557156562805176, 5.3748860359191895, 6.192615985870361, 7.010345458984375, 7.828075408935547, 8.645805358886719, 9.46353530883789, 10.281265258789062, 11.098994255065918, 11.916723251342773, 12.734453201293945, 13.552183151245117, 14.369913101196289, 15.187642097473145, 16.00537109375, 16.823101043701172, 17.640830993652344, 18.458560943603516, 19.276290893554688, 20.09402084350586, 20.91175079345703, 21.729480743408203, 22.547210693359375, 23.364940643310547, 24.182668685913086, 25.000398635864258, 25.81812858581543, 26.6358585357666, 27.453588485717773, 28.271318435668945, 29.089046478271484, 29.906776428222656, 30.724506378173828, 31.542236328125, 32.35996627807617, 33.177696228027344, 33.995426177978516, 34.81315612792969, 35.63088607788086, 36.44861602783203, 37.2663459777832, 38.084075927734375, 38.90180206298828, 39.71953201293945, 40.537261962890625, 41.3549919128418, 42.17272186279297, 42.99045181274414, 43.80818176269531, 44.62590789794922, 45.443641662597656]}, "gradients/decoder.model.decoder.layers.11.encoder_attn_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 4.0, 1.0, 4.0, 1.0, 1.0, 7.0, 7.0, 10.0, 6.0, 6.0, 13.0, 19.0, 22.0, 28.0, 22.0, 37.0, 45.0, 42.0, 48.0, 49.0, 61.0, 50.0, 53.0, 47.0, 57.0, 53.0, 52.0, 41.0, 50.0, 27.0, 33.0, 29.0, 18.0, 17.0, 12.0, 14.0, 13.0, 5.0, 2.0, 2.0, 4.0, 1.0, 2.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-8.765387535095215, -8.506000518798828, -8.246613502502441, -7.9872260093688965, -7.727838516235352, -7.468451499938965, -7.209064483642578, -6.949676990509033, -6.690289497375488, -6.430902481079102, -6.171514987945557, -5.91212797164917, -5.652740478515625, -5.393353462219238, -5.133966445922852, -4.874578952789307, -4.61519193649292, -4.355804920196533, -4.096417427062988, -3.8370304107666016, -3.5776429176330566, -3.31825590133667, -3.058868646621704, -2.7994813919067383, -2.5400941371917725, -2.2807068824768066, -2.021319627761841, -1.7619324922561646, -1.5025452375411987, -1.243157982826233, -0.9837708473205566, -0.7243835926055908, -0.464996337890625, -0.20560911297798157, 0.053778111934661865, 0.3131653070449829, 0.5725525617599487, 0.8319398164749146, 1.0913269519805908, 1.3507142066955566, 1.6101014614105225, 1.8694887161254883, 2.128875970840454, 2.38826322555542, 2.6476502418518066, 2.9070377349853516, 3.1664247512817383, 3.425812005996704, 3.68519926071167, 3.9445865154266357, 4.203973770141602, 4.463360786437988, 4.722748279571533, 4.98213529586792, 5.241522789001465, 5.500909805297852, 5.760296821594238, 6.019683837890625, 6.27907133102417, 6.538458347320557, 6.797845840454102, 7.057232856750488, 7.316619873046875, 7.57600736618042, 7.835394859313965]}, "gradients/decoder.model.decoder.layers.11.encoder_attn.out_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 3.0, 2.0, 2.0, 1.0, 7.0, 5.0, 12.0, 4.0, 10.0, 13.0, 17.0, 30.0, 30.0, 59.0, 73.0, 103.0, 142.0, 255.0, 398.0, 609.0, 1102.0, 1918.0, 3680.0, 7986.0, 24569.0, 757908.0, 216116.0, 18928.0, 7063.0, 3308.0, 1605.0, 959.0, 558.0, 362.0, 226.0, 146.0, 105.0, 66.0, 47.0, 39.0, 20.0, 20.0, 20.0, 6.0, 7.0, 9.0, 8.0, 3.0, 4.0, 2.0, 0.0, 0.0, 3.0, 3.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.99560546875, -0.9623184204101562, -0.9290313720703125, -0.8957443237304688, -0.862457275390625, -0.8291702270507812, -0.7958831787109375, -0.7625961303710938, -0.72930908203125, -0.6960220336914062, -0.6627349853515625, -0.6294479370117188, -0.596160888671875, -0.5628738403320312, -0.5295867919921875, -0.49629974365234375, -0.4630126953125, -0.42972564697265625, -0.3964385986328125, -0.36315155029296875, -0.329864501953125, -0.29657745361328125, -0.2632904052734375, -0.23000335693359375, -0.19671630859375, -0.16342926025390625, -0.1301422119140625, -0.09685516357421875, -0.063568115234375, -0.03028106689453125, 0.0030059814453125, 0.03629302978515625, 0.069580078125, 0.10286712646484375, 0.1361541748046875, 0.16944122314453125, 0.202728271484375, 0.23601531982421875, 0.2693023681640625, 0.30258941650390625, 0.33587646484375, 0.36916351318359375, 0.4024505615234375, 0.43573760986328125, 0.469024658203125, 0.5023117065429688, 0.5355987548828125, 0.5688858032226562, 0.6021728515625, 0.6354598999023438, 0.6687469482421875, 0.7020339965820312, 0.735321044921875, 0.7686080932617188, 0.8018951416015625, 0.8351821899414062, 0.86846923828125, 0.9017562866210938, 0.9350433349609375, 0.9683303833007812, 1.001617431640625, 1.0349044799804688, 1.0681915283203125, 1.1014785766601562, 1.134765625]}, "gradients/decoder.model.decoder.layers.11.encoder_attn.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0, 0.0, 2.0, 1.0, 1.0, 2.0, 3.0, 10.0, 6.0, 13.0, 10.0, 13.0, 20.0, 29.0, 32.0, 47.0, 57.0, 49.0, 68.0, 76.0, 65.0, 86.0, 65.0, 71.0, 55.0, 44.0, 54.0, 33.0, 29.0, 20.0, 14.0, 13.0, 5.0, 3.0, 3.0, 6.0, 3.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-4.21484375, -4.090240478515625, -3.96563720703125, -3.841033935546875, -3.7164306640625, -3.591827392578125, -3.46722412109375, -3.342620849609375, -3.218017578125, -3.093414306640625, -2.96881103515625, -2.844207763671875, -2.7196044921875, -2.595001220703125, -2.47039794921875, -2.345794677734375, -2.22119140625, -2.096588134765625, -1.97198486328125, -1.847381591796875, -1.7227783203125, -1.598175048828125, -1.47357177734375, -1.348968505859375, -1.224365234375, -1.099761962890625, -0.97515869140625, -0.850555419921875, -0.7259521484375, -0.601348876953125, -0.47674560546875, -0.352142333984375, -0.2275390625, -0.102935791015625, 0.02166748046875, 0.146270751953125, 0.2708740234375, 0.395477294921875, 0.52008056640625, 0.644683837890625, 0.769287109375, 0.893890380859375, 1.01849365234375, 1.143096923828125, 1.2677001953125, 1.392303466796875, 1.51690673828125, 1.641510009765625, 1.76611328125, 1.890716552734375, 2.01531982421875, 2.139923095703125, 2.2645263671875, 2.389129638671875, 2.51373291015625, 2.638336181640625, 2.762939453125, 2.887542724609375, 3.01214599609375, 3.136749267578125, 3.2613525390625, 3.385955810546875, 3.51055908203125, 3.635162353515625, 3.759765625]}, "gradients/decoder.model.decoder.layers.11.encoder_attn.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 3.0, 0.0, 0.0, 1.0, 2.0, 9.0, 3.0, 14.0, 14.0, 12.0, 11.0, 23.0, 30.0, 39.0, 55.0, 55.0, 70.0, 73.0, 111.0, 154.0, 222.0, 481.0, 1517.0, 5852.0, 38663.0, 798905.0, 181952.0, 15306.0, 2996.0, 867.0, 365.0, 182.0, 137.0, 95.0, 72.0, 71.0, 37.0, 46.0, 23.0, 23.0, 19.0, 14.0, 9.0, 12.0, 7.0, 4.0, 2.0, 1.0, 7.0, 1.0, 3.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.1651611328125, -0.1594219207763672, -0.15368270874023438, -0.14794349670410156, -0.14220428466796875, -0.13646507263183594, -0.13072586059570312, -0.12498664855957031, -0.1192474365234375, -0.11350822448730469, -0.10776901245117188, -0.10202980041503906, -0.09629058837890625, -0.09055137634277344, -0.08481216430664062, -0.07907295227050781, -0.073333740234375, -0.06759452819824219, -0.061855316162109375, -0.05611610412597656, -0.05037689208984375, -0.04463768005371094, -0.038898468017578125, -0.03315925598144531, -0.0274200439453125, -0.021680831909179688, -0.015941619873046875, -0.010202407836914062, -0.00446319580078125, 0.0012760162353515625, 0.007015228271484375, 0.012754440307617188, 0.01849365234375, 0.024232864379882812, 0.029972076416015625, 0.03571128845214844, 0.04145050048828125, 0.04718971252441406, 0.052928924560546875, 0.05866813659667969, 0.0644073486328125, 0.07014656066894531, 0.07588577270507812, 0.08162498474121094, 0.08736419677734375, 0.09310340881347656, 0.09884262084960938, 0.10458183288574219, 0.110321044921875, 0.11606025695800781, 0.12179946899414062, 0.12753868103027344, 0.13327789306640625, 0.13901710510253906, 0.14475631713867188, 0.1504955291748047, 0.1562347412109375, 0.1619739532470703, 0.16771316528320312, 0.17345237731933594, 0.17919158935546875, 0.18493080139160156, 0.19067001342773438, 0.1964092254638672, 0.2021484375]}, "gradients/decoder.model.decoder.layers.11.encoder_attn.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 3.0, 0.0, 0.0, 1.0, 2.0, 7.0, 5.0, 11.0, 10.0, 9.0, 12.0, 10.0, 14.0, 13.0, 23.0, 28.0, 28.0, 25.0, 28.0, 23.0, 45.0, 35.0, 45.0, 33.0, 47.0, 62.0, 51.0, 41.0, 39.0, 44.0, 45.0, 41.0, 30.0, 33.0, 28.0, 30.0, 18.0, 20.0, 9.0, 17.0, 12.0, 7.0, 5.0, 10.0, 5.0, 4.0, 1.0, 1.0, 5.0, 1.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-4.32421875, -4.1739501953125, -4.023681640625, -3.8734130859375, -3.72314453125, -3.5728759765625, -3.422607421875, -3.2723388671875, -3.1220703125, -2.9718017578125, -2.821533203125, -2.6712646484375, -2.52099609375, -2.3707275390625, -2.220458984375, -2.0701904296875, -1.919921875, -1.7696533203125, -1.619384765625, -1.4691162109375, -1.31884765625, -1.1685791015625, -1.018310546875, -0.8680419921875, -0.7177734375, -0.5675048828125, -0.417236328125, -0.2669677734375, -0.11669921875, 0.0335693359375, 0.183837890625, 0.3341064453125, 0.484375, 0.6346435546875, 0.784912109375, 0.9351806640625, 1.08544921875, 1.2357177734375, 1.385986328125, 1.5362548828125, 1.6865234375, 1.8367919921875, 1.987060546875, 2.1373291015625, 2.28759765625, 2.4378662109375, 2.588134765625, 2.7384033203125, 2.888671875, 3.0389404296875, 3.189208984375, 3.3394775390625, 3.48974609375, 3.6400146484375, 3.790283203125, 3.9405517578125, 4.0908203125, 4.2410888671875, 4.391357421875, 4.5416259765625, 4.69189453125, 4.8421630859375, 4.992431640625, 5.1427001953125, 5.29296875]}, "gradients/decoder.model.decoder.layers.11.encoder_attn.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 3.0, 1.0, 5.0, 3.0, 5.0, 2.0, 11.0, 12.0, 15.0, 20.0, 14.0, 20.0, 52.0, 42.0, 69.0, 80.0, 128.0, 233.0, 379.0, 809.0, 1694.0, 4239.0, 11897.0, 43736.0, 495883.0, 427450.0, 42264.0, 11658.0, 4167.0, 1785.0, 795.0, 410.0, 213.0, 140.0, 97.0, 60.0, 33.0, 32.0, 32.0, 20.0, 17.0, 9.0, 11.0, 3.0, 7.0, 7.0, 3.0, 1.0, 2.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.0003025531768798828, -0.000293094664812088, -0.0002836361527442932, -0.0002741776406764984, -0.0002647191286087036, -0.0002552606165409088, -0.000245802104473114, -0.00023634359240531921, -0.00022688508033752441, -0.00021742656826972961, -0.00020796805620193481, -0.00019850954413414001, -0.00018905103206634521, -0.00017959251999855042, -0.00017013400793075562, -0.00016067549586296082, -0.00015121698379516602, -0.00014175847172737122, -0.00013229995965957642, -0.00012284144759178162, -0.00011338293552398682, -0.00010392442345619202, -9.446591138839722e-05, -8.500739932060242e-05, -7.554888725280762e-05, -6.609037518501282e-05, -5.663186311721802e-05, -4.717335104942322e-05, -3.771483898162842e-05, -2.8256326913833618e-05, -1.879781484603882e-05, -9.339302778244019e-06, 1.1920928955078125e-07, 9.577721357345581e-06, 1.903623342514038e-05, 2.849474549293518e-05, 3.795325756072998e-05, 4.741176962852478e-05, 5.687028169631958e-05, 6.632879376411438e-05, 7.578730583190918e-05, 8.524581789970398e-05, 9.470432996749878e-05, 0.00010416284203529358, 0.00011362135410308838, 0.00012307986617088318, 0.00013253837823867798, 0.00014199689030647278, 0.00015145540237426758, 0.00016091391444206238, 0.00017037242650985718, 0.00017983093857765198, 0.00018928945064544678, 0.00019874796271324158, 0.00020820647478103638, 0.00021766498684883118, 0.00022712349891662598, 0.00023658201098442078, 0.0002460405230522156, 0.0002554990351200104, 0.0002649575471878052, 0.0002744160592556, 0.0002838745713233948, 0.0002933330833911896, 0.0003027915954589844]}, "gradients/decoder.model.decoder.layers.11.encoder_attn.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 4.0, 8.0, 7.0, 2.0, 8.0, 11.0, 15.0, 18.0, 20.0, 37.0, 62.0, 103.0, 227.0, 199.0, 115.0, 53.0, 31.0, 17.0, 28.0, 7.0, 9.0, 8.0, 10.0, 5.0, 1.0, 4.0, 3.0, 1.0, 2.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.546476364135742e-05, -3.3686868846416473e-05, -3.1908974051475525e-05, -3.0131079256534576e-05, -2.8353184461593628e-05, -2.657528966665268e-05, -2.479739487171173e-05, -2.3019500076770782e-05, -2.1241605281829834e-05, -1.9463710486888885e-05, -1.7685815691947937e-05, -1.590792089700699e-05, -1.413002610206604e-05, -1.2352131307125092e-05, -1.0574236512184143e-05, -8.796341717243195e-06, -7.018446922302246e-06, -5.240552127361298e-06, -3.462657332420349e-06, -1.6847625374794006e-06, 9.313225746154785e-08, 1.8710270524024963e-06, 3.648921847343445e-06, 5.426816642284393e-06, 7.204711437225342e-06, 8.98260623216629e-06, 1.0760501027107239e-05, 1.2538395822048187e-05, 1.4316290616989136e-05, 1.6094185411930084e-05, 1.7872080206871033e-05, 1.964997500181198e-05, 2.142786979675293e-05, 2.3205764591693878e-05, 2.4983659386634827e-05, 2.6761554181575775e-05, 2.8539448976516724e-05, 3.0317343771457672e-05, 3.209523856639862e-05, 3.387313336133957e-05, 3.565102815628052e-05, 3.7428922951221466e-05, 3.9206817746162415e-05, 4.098471254110336e-05, 4.276260733604431e-05, 4.454050213098526e-05, 4.631839692592621e-05, 4.809629172086716e-05, 4.9874186515808105e-05, 5.1652081310749054e-05, 5.342997610569e-05, 5.520787090063095e-05, 5.69857656955719e-05, 5.876366049051285e-05, 6.0541555285453796e-05, 6.231945008039474e-05, 6.40973448753357e-05, 6.587523967027664e-05, 6.765313446521759e-05, 6.943102926015854e-05, 7.120892405509949e-05, 7.298681885004044e-05, 7.476471364498138e-05, 7.654260843992233e-05, 7.832050323486328e-05]}, "gradients/decoder.model.decoder.layers.11.encoder_attn.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 3.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 5.0, 4.0, 5.0, 13.0, 15.0, 14.0, 21.0, 36.0, 51.0, 183.0, 1045565.0, 2426.0, 84.0, 49.0, 25.0, 19.0, 9.0, 11.0, 5.0, 5.0, 5.0, 2.0, 3.0, 1.0, 1.0, 2.0, 0.0, 2.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00421142578125, -0.004096895456314087, -0.003982365131378174, -0.0038678348064422607, -0.0037533044815063477, -0.0036387741565704346, -0.0035242438316345215, -0.0034097135066986084, -0.0032951831817626953, -0.0031806528568267822, -0.003066122531890869, -0.002951592206954956, -0.002837061882019043, -0.00272253155708313, -0.002608001232147217, -0.0024934709072113037, -0.0023789405822753906, -0.0022644102573394775, -0.0021498799324035645, -0.0020353496074676514, -0.0019208192825317383, -0.0018062889575958252, -0.0016917586326599121, -0.001577228307723999, -0.001462697982788086, -0.0013481676578521729, -0.0012336373329162598, -0.0011191070079803467, -0.0010045766830444336, -0.0008900463581085205, -0.0007755160331726074, -0.0006609857082366943, -0.0005464553833007812, -0.00043192505836486816, -0.0003173947334289551, -0.000202864408493042, -8.83340835571289e-05, 2.619624137878418e-05, 0.00014072656631469727, 0.00025525689125061035, 0.00036978721618652344, 0.0004843175411224365, 0.0005988478660583496, 0.0007133781909942627, 0.0008279085159301758, 0.0009424388408660889, 0.001056969165802002, 0.001171499490737915, 0.0012860298156738281, 0.0014005601406097412, 0.0015150904655456543, 0.0016296207904815674, 0.0017441511154174805, 0.0018586814403533936, 0.0019732117652893066, 0.0020877420902252197, 0.002202272415161133, 0.002316802740097046, 0.002431333065032959, 0.002545863389968872, 0.002660393714904785, 0.0027749240398406982, 0.0028894543647766113, 0.0030039846897125244, 0.0031185150146484375]}, "gradients/decoder.model.decoder.layers.11.encoder_attn.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 2.0, 0.0, 7.0, 11.0, 11.0, 12.0, 22.0, 31.0, 56.0, 137.0, 405.0, 147.0, 61.0, 36.0, 26.0, 14.0, 9.0, 5.0, 6.0, 1.0, 2.0, 1.0, 1.0, 1.0, 0.0, 3.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00014829635620117188, -0.0001442432403564453, -0.00014019012451171875, -0.0001361370086669922, -0.00013208389282226562, -0.00012803077697753906, -0.0001239776611328125, -0.00011992454528808594, -0.00011587142944335938, -0.00011181831359863281, -0.00010776519775390625, -0.00010371208190917969, -9.965896606445312e-05, -9.560585021972656e-05, -9.1552734375e-05, -8.749961853027344e-05, -8.344650268554688e-05, -7.939338684082031e-05, -7.534027099609375e-05, -7.128715515136719e-05, -6.723403930664062e-05, -6.318092346191406e-05, -5.91278076171875e-05, -5.507469177246094e-05, -5.1021575927734375e-05, -4.696846008300781e-05, -4.291534423828125e-05, -3.886222839355469e-05, -3.4809112548828125e-05, -3.075599670410156e-05, -2.6702880859375e-05, -2.2649765014648438e-05, -1.8596649169921875e-05, -1.4543533325195312e-05, -1.049041748046875e-05, -6.4373016357421875e-06, -2.384185791015625e-06, 1.6689300537109375e-06, 5.7220458984375e-06, 9.775161743164062e-06, 1.3828277587890625e-05, 1.7881393432617188e-05, 2.193450927734375e-05, 2.5987625122070312e-05, 3.0040740966796875e-05, 3.409385681152344e-05, 3.814697265625e-05, 4.220008850097656e-05, 4.6253204345703125e-05, 5.030632019042969e-05, 5.435943603515625e-05, 5.841255187988281e-05, 6.246566772460938e-05, 6.651878356933594e-05, 7.05718994140625e-05, 7.462501525878906e-05, 7.867813110351562e-05, 8.273124694824219e-05, 8.678436279296875e-05, 9.083747863769531e-05, 9.489059448242188e-05, 9.894371032714844e-05, 0.000102996826171875, 0.00010704994201660156, 0.00011110305786132812]}, "gradients/decoder.model.decoder.layers.11.self_attn_layer_norm.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 10.0, 42.0, 536.0, 418.0, 11.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.748734951019287, -4.930497169494629, -4.112259387969971, -3.2940213680267334, -2.475783586502075, -1.657545566558838, -0.8393077850341797, -0.021070003509521484, 0.7971677780151367, 1.615405559539795, 2.433643341064453, 3.2518813610076904, 4.0701189041137695, 4.888357162475586, 5.706594944000244, 6.524832725524902, 7.3430705070495605, 8.161308288574219, 8.979546546936035, 9.797783851623535, 10.616022109985352, 11.434259414672852, 12.252497673034668, 13.070735931396484, 13.888973236083984, 14.7072114944458, 15.5254487991333, 16.343687057495117, 17.161924362182617, 17.98016357421875, 18.79840087890625, 19.61663818359375, 20.434877395629883, 21.253114700317383, 22.071353912353516, 22.889591217041016, 23.707828521728516, 24.526065826416016, 25.34430503845215, 26.16254234313965, 26.98077964782715, 27.79901695251465, 28.61725616455078, 29.43549346923828, 30.25373077392578, 31.07196807861328, 31.890207290649414, 32.70844268798828, 33.52668380737305, 34.34492111206055, 35.16315841674805, 35.98139572143555, 36.79963684082031, 37.61787414550781, 38.43611145019531, 39.25434875488281, 40.07258605957031, 40.89082336425781, 41.70906066894531, 42.52729797363281, 43.34553909301758, 44.16377639770508, 44.98201370239258, 45.80025100708008, 46.61848831176758]}, "gradients/decoder.model.decoder.layers.11.self_attn_layer_norm.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 2.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 2.0, 0.0, 2.0, 3.0, 6.0, 10.0, 8.0, 12.0, 13.0, 17.0, 31.0, 41.0, 43.0, 47.0, 61.0, 69.0, 73.0, 64.0, 64.0, 78.0, 54.0, 66.0, 61.0, 41.0, 29.0, 34.0, 20.0, 18.0, 13.0, 10.0, 2.0, 5.0, 4.0, 1.0, 3.0, 2.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0], "bins": [-4.346042156219482, -4.228121280670166, -4.11020040512085, -3.992279529571533, -3.874358654022217, -3.7564377784729004, -3.638517141342163, -3.5205962657928467, -3.4026753902435303, -3.284754514694214, -3.1668336391448975, -3.048912763595581, -2.9309921264648438, -2.8130712509155273, -2.695150375366211, -2.5772294998168945, -2.459308624267578, -2.3413877487182617, -2.2234668731689453, -2.105545997619629, -1.987625241279602, -1.8697043657302856, -1.7517836093902588, -1.6338627338409424, -1.515941858291626, -1.3980209827423096, -1.2801001071929932, -1.1621793508529663, -1.04425847530365, -0.9263375997543335, -0.8084167838096619, -0.6904959678649902, -0.5725748538970947, -0.4546540081501007, -0.3367331624031067, -0.21881231665611267, -0.10089147090911865, 0.017029404640197754, 0.13495022058486938, 0.252871036529541, 0.3707919120788574, 0.48871275782585144, 0.6066336035728455, 0.7245544195175171, 0.8424752950668335, 0.9603961706161499, 1.0783169269561768, 1.1962378025054932, 1.3141586780548096, 1.432079553604126, 1.5500004291534424, 1.6679211854934692, 1.7858420610427856, 1.903762936592102, 2.021683692932129, 2.1396045684814453, 2.2575254440307617, 2.375446319580078, 2.4933671951293945, 2.611288070678711, 2.7292089462280273, 2.8471298217773438, 2.965050458908081, 3.0829713344573975, 3.200892210006714]}, "gradients/decoder.model.decoder.layers.11.self_attn.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 2.0, 4.0, 4.0, 5.0, 2.0, 4.0, 12.0, 16.0, 58.0, 88.0, 186.0, 438.0, 1361.0, 7268.0, 292156.0, 733491.0, 10812.0, 1729.0, 475.0, 212.0, 111.0, 50.0, 22.0, 23.0, 12.0, 7.0, 3.0, 5.0, 2.0, 3.0, 1.0, 5.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-8.9921875, -8.7733154296875, -8.554443359375, -8.3355712890625, -8.11669921875, -7.8978271484375, -7.678955078125, -7.4600830078125, -7.2412109375, -7.0223388671875, -6.803466796875, -6.5845947265625, -6.36572265625, -6.1468505859375, -5.927978515625, -5.7091064453125, -5.490234375, -5.2713623046875, -5.052490234375, -4.8336181640625, -4.61474609375, -4.3958740234375, -4.177001953125, -3.9581298828125, -3.7392578125, -3.5203857421875, -3.301513671875, -3.0826416015625, -2.86376953125, -2.6448974609375, -2.426025390625, -2.2071533203125, -1.98828125, -1.7694091796875, -1.550537109375, -1.3316650390625, -1.11279296875, -0.8939208984375, -0.675048828125, -0.4561767578125, -0.2373046875, -0.0184326171875, 0.200439453125, 0.4193115234375, 0.63818359375, 0.8570556640625, 1.075927734375, 1.2947998046875, 1.513671875, 1.7325439453125, 1.951416015625, 2.1702880859375, 2.38916015625, 2.6080322265625, 2.826904296875, 3.0457763671875, 3.2646484375, 3.4835205078125, 3.702392578125, 3.9212646484375, 4.14013671875, 4.3590087890625, 4.577880859375, 4.7967529296875, 5.015625]}, "gradients/decoder.model.decoder.layers.11.self_attn.out_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 0.0, 1.0, 3.0, 0.0, 4.0, 6.0, 4.0, 20.0, 26.0, 36.0, 57.0, 65.0, 67.0, 92.0, 103.0, 101.0, 99.0, 78.0, 72.0, 53.0, 34.0, 34.0, 29.0, 10.0, 5.0, 7.0, 4.0, 2.0, 1.0, 1.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.408203125, -3.321258544921875, -3.23431396484375, -3.147369384765625, -3.0604248046875, -2.973480224609375, -2.88653564453125, -2.799591064453125, -2.712646484375, -2.625701904296875, -2.53875732421875, -2.451812744140625, -2.3648681640625, -2.277923583984375, -2.19097900390625, -2.104034423828125, -2.01708984375, -1.930145263671875, -1.84320068359375, -1.756256103515625, -1.6693115234375, -1.582366943359375, -1.49542236328125, -1.408477783203125, -1.321533203125, -1.234588623046875, -1.14764404296875, -1.060699462890625, -0.9737548828125, -0.886810302734375, -0.79986572265625, -0.712921142578125, -0.6259765625, -0.539031982421875, -0.45208740234375, -0.365142822265625, -0.2781982421875, -0.191253662109375, -0.10430908203125, -0.017364501953125, 0.069580078125, 0.156524658203125, 0.24346923828125, 0.330413818359375, 0.4173583984375, 0.504302978515625, 0.59124755859375, 0.678192138671875, 0.76513671875, 0.852081298828125, 0.93902587890625, 1.025970458984375, 1.1129150390625, 1.199859619140625, 1.28680419921875, 1.373748779296875, 1.460693359375, 1.547637939453125, 1.63458251953125, 1.721527099609375, 1.8084716796875, 1.895416259765625, 1.98236083984375, 2.069305419921875, 2.15625]}, "gradients/decoder.model.decoder.layers.11.self_attn.v_proj.weight": {"_type": "histogram", "values": [2.0, 1.0, 3.0, 0.0, 0.0, 0.0, 2.0, 0.0, 3.0, 6.0, 4.0, 3.0, 6.0, 3.0, 4.0, 6.0, 9.0, 4.0, 8.0, 12.0, 19.0, 19.0, 25.0, 28.0, 41.0, 63.0, 84.0, 149.0, 487.0, 1044654.0, 2319.0, 197.0, 112.0, 58.0, 53.0, 34.0, 22.0, 28.0, 21.0, 10.0, 11.0, 5.0, 7.0, 9.0, 4.0, 3.0, 7.0, 6.0, 5.0, 3.0, 4.0, 0.0, 2.0, 0.0, 2.0, 2.0, 2.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0, 1.0], "bins": [-30.1875, -29.171875, -28.15625, -27.140625, -26.125, -25.109375, -24.09375, -23.078125, -22.0625, -21.046875, -20.03125, -19.015625, -18.0, -16.984375, -15.96875, -14.953125, -13.9375, -12.921875, -11.90625, -10.890625, -9.875, -8.859375, -7.84375, -6.828125, -5.8125, -4.796875, -3.78125, -2.765625, -1.75, -0.734375, 0.28125, 1.296875, 2.3125, 3.328125, 4.34375, 5.359375, 6.375, 7.390625, 8.40625, 9.421875, 10.4375, 11.453125, 12.46875, 13.484375, 14.5, 15.515625, 16.53125, 17.546875, 18.5625, 19.578125, 20.59375, 21.609375, 22.625, 23.640625, 24.65625, 25.671875, 26.6875, 27.703125, 28.71875, 29.734375, 30.75, 31.765625, 32.78125, 33.796875, 34.8125]}, "gradients/decoder.model.decoder.layers.11.self_attn.v_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 2.0, 2.0, 0.0, 0.0, 2.0, 0.0, 4.0, 4.0, 3.0, 3.0, 5.0, 7.0, 11.0, 4.0, 10.0, 17.0, 17.0, 17.0, 22.0, 28.0, 34.0, 27.0, 37.0, 54.0, 40.0, 53.0, 32.0, 46.0, 56.0, 57.0, 49.0, 41.0, 58.0, 45.0, 39.0, 31.0, 29.0, 14.0, 17.0, 23.0, 20.0, 16.0, 5.0, 2.0, 9.0, 6.0, 1.0, 5.0, 3.0, 4.0, 2.0, 2.0, 0.0, 0.0, 1.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0], "bins": [-2.119140625, -2.048828125, -1.978515625, -1.908203125, -1.837890625, -1.767578125, -1.697265625, -1.626953125, -1.556640625, -1.486328125, -1.416015625, -1.345703125, -1.275390625, -1.205078125, -1.134765625, -1.064453125, -0.994140625, -0.923828125, -0.853515625, -0.783203125, -0.712890625, -0.642578125, -0.572265625, -0.501953125, -0.431640625, -0.361328125, -0.291015625, -0.220703125, -0.150390625, -0.080078125, -0.009765625, 0.060546875, 0.130859375, 0.201171875, 0.271484375, 0.341796875, 0.412109375, 0.482421875, 0.552734375, 0.623046875, 0.693359375, 0.763671875, 0.833984375, 0.904296875, 0.974609375, 1.044921875, 1.115234375, 1.185546875, 1.255859375, 1.326171875, 1.396484375, 1.466796875, 1.537109375, 1.607421875, 1.677734375, 1.748046875, 1.818359375, 1.888671875, 1.958984375, 2.029296875, 2.099609375, 2.169921875, 2.240234375, 2.310546875, 2.380859375]}, "gradients/decoder.model.decoder.layers.11.self_attn.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 2.0, 7.0, 8.0, 8.0, 6.0, 16.0, 27.0, 103.0, 1047670.0, 622.0, 42.0, 18.0, 14.0, 7.0, 6.0, 7.0, 2.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-51.03125, -49.6787109375, -48.326171875, -46.9736328125, -45.62109375, -44.2685546875, -42.916015625, -41.5634765625, -40.2109375, -38.8583984375, -37.505859375, -36.1533203125, -34.80078125, -33.4482421875, -32.095703125, -30.7431640625, -29.390625, -28.0380859375, -26.685546875, -25.3330078125, -23.98046875, -22.6279296875, -21.275390625, -19.9228515625, -18.5703125, -17.2177734375, -15.865234375, -14.5126953125, -13.16015625, -11.8076171875, -10.455078125, -9.1025390625, -7.75, -6.3974609375, -5.044921875, -3.6923828125, -2.33984375, -0.9873046875, 0.365234375, 1.7177734375, 3.0703125, 4.4228515625, 5.775390625, 7.1279296875, 8.48046875, 9.8330078125, 11.185546875, 12.5380859375, 13.890625, 15.2431640625, 16.595703125, 17.9482421875, 19.30078125, 20.6533203125, 22.005859375, 23.3583984375, 24.7109375, 26.0634765625, 27.416015625, 28.7685546875, 30.12109375, 31.4736328125, 32.826171875, 34.1787109375, 35.53125]}, "gradients/decoder.model.decoder.layers.11.self_attn.k_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 3.0, 0.0, 2.0, 2.0, 7.0, 21.0, 32.0, 190.0, 685.0, 39.0, 25.0, 7.0, 4.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00054931640625, -0.0004998445510864258, -0.00045037269592285156, -0.00040090084075927734, -0.0003514289855957031, -0.0003019571304321289, -0.0002524852752685547, -0.00020301342010498047, -0.00015354156494140625, -0.00010406970977783203, -5.459785461425781e-05, -5.125999450683594e-06, 4.4345855712890625e-05, 9.381771087646484e-05, 0.00014328956604003906, 0.00019276142120361328, 0.0002422332763671875, 0.0002917051315307617, 0.00034117698669433594, 0.00039064884185791016, 0.0004401206970214844, 0.0004895925521850586, 0.0005390644073486328, 0.000588536262512207, 0.0006380081176757812, 0.0006874799728393555, 0.0007369518280029297, 0.0007864236831665039, 0.0008358955383300781, 0.0008853673934936523, 0.0009348392486572266, 0.0009843111038208008, 0.001033782958984375, 0.0010832548141479492, 0.0011327266693115234, 0.0011821985244750977, 0.0012316703796386719, 0.001281142234802246, 0.0013306140899658203, 0.0013800859451293945, 0.0014295578002929688, 0.001479029655456543, 0.0015285015106201172, 0.0015779733657836914, 0.0016274452209472656, 0.0016769170761108398, 0.001726388931274414, 0.0017758607864379883, 0.0018253326416015625, 0.0018748044967651367, 0.001924276351928711, 0.001973748207092285, 0.0020232200622558594, 0.0020726919174194336, 0.002122163772583008, 0.002171635627746582, 0.0022211074829101562, 0.0022705793380737305, 0.0023200511932373047, 0.002369523048400879, 0.002418994903564453, 0.0024684667587280273, 0.0025179386138916016, 0.0025674104690551758, 0.00261688232421875]}, "gradients/decoder.model.decoder.layers.11.self_attn.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 3.0, 2.0, 10.0, 18.0, 21.0, 115.0, 1048240.0, 86.0, 19.0, 25.0, 12.0, 6.0, 3.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-70.6875, -68.2880859375, -65.888671875, -63.4892578125, -61.08984375, -58.6904296875, -56.291015625, -53.8916015625, -51.4921875, -49.0927734375, -46.693359375, -44.2939453125, -41.89453125, -39.4951171875, -37.095703125, -34.6962890625, -32.296875, -29.8974609375, -27.498046875, -25.0986328125, -22.69921875, -20.2998046875, -17.900390625, -15.5009765625, -13.1015625, -10.7021484375, -8.302734375, -5.9033203125, -3.50390625, -1.1044921875, 1.294921875, 3.6943359375, 6.09375, 8.4931640625, 10.892578125, 13.2919921875, 15.69140625, 18.0908203125, 20.490234375, 22.8896484375, 25.2890625, 27.6884765625, 30.087890625, 32.4873046875, 34.88671875, 37.2861328125, 39.685546875, 42.0849609375, 44.484375, 46.8837890625, 49.283203125, 51.6826171875, 54.08203125, 56.4814453125, 58.880859375, 61.2802734375, 63.6796875, 66.0791015625, 68.478515625, 70.8779296875, 73.27734375, 75.6767578125, 78.076171875, 80.4755859375, 82.875]}, "gradients/decoder.model.decoder.layers.11.self_attn.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 4.0, 6.0, 13.0, 22.0, 40.0, 714.0, 127.0, 28.0, 19.0, 15.0, 12.0, 4.0, 3.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.03125, -2.929931640625, -2.82861328125, -2.727294921875, -2.6259765625, -2.524658203125, -2.42333984375, -2.322021484375, -2.220703125, -2.119384765625, -2.01806640625, -1.916748046875, -1.8154296875, -1.714111328125, -1.61279296875, -1.511474609375, -1.41015625, -1.308837890625, -1.20751953125, -1.106201171875, -1.0048828125, -0.903564453125, -0.80224609375, -0.700927734375, -0.599609375, -0.498291015625, -0.39697265625, -0.295654296875, -0.1943359375, -0.093017578125, 0.00830078125, 0.109619140625, 0.2109375, 0.312255859375, 0.41357421875, 0.514892578125, 0.6162109375, 0.717529296875, 0.81884765625, 0.920166015625, 1.021484375, 1.122802734375, 1.22412109375, 1.325439453125, 1.4267578125, 1.528076171875, 1.62939453125, 1.730712890625, 1.83203125, 1.933349609375, 2.03466796875, 2.135986328125, 2.2373046875, 2.338623046875, 2.43994140625, 2.541259765625, 2.642578125, 2.743896484375, 2.84521484375, 2.946533203125, 3.0478515625, 3.149169921875, 3.25048828125, 3.351806640625, 3.453125]}, "gradients/decoder.model.decoder.layers.10.final_layer_norm.weight": {"_type": "histogram", "values": [1020.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.5012686252593994, 2.1786506175994873, 6.858570098876953, 11.53848934173584, 16.218408584594727, 20.89832878112793, 25.5782470703125, 30.25816535949707, 34.93808364868164, 39.618003845214844, 44.29792022705078, 48.977840423583984, 53.65776062011719, 58.33768081665039, 63.017601013183594, 67.69751739501953, 72.37744140625, 77.05735778808594, 81.7372817993164, 86.41719818115234, 91.09712219238281, 95.77703857421875, 100.45695495605469, 105.13687133789062, 109.8167953491211, 114.49671173095703, 119.1766357421875, 123.85655212402344, 128.53646850585938, 133.21640014648438, 137.8963165283203, 142.57623291015625, 147.25613403320312, 151.93605041503906, 156.615966796875, 161.2958984375, 165.97581481933594, 170.65573120117188, 175.3356475830078, 180.01556396484375, 184.69549560546875, 189.3754119873047, 194.05532836914062, 198.73526000976562, 203.41517639160156, 208.0950927734375, 212.77500915527344, 217.45492553710938, 222.1348419189453, 226.81475830078125, 231.4946746826172, 236.1746063232422, 240.85452270507812, 245.53443908691406, 250.21435546875, 254.89427185058594, 259.5741882324219, 264.2541198730469, 268.93402099609375, 273.61395263671875, 278.2938537597656, 282.9737854003906, 287.6537170410156, 292.3336181640625, 297.0135498046875]}, "gradients/decoder.model.decoder.layers.10.final_layer_norm.bias": {"_type": "histogram", "values": [2.0, 0.0, 2.0, 2.0, 1.0, 2.0, 4.0, 6.0, 12.0, 24.0, 41.0, 68.0, 82.0, 115.0, 123.0, 119.0, 116.0, 101.0, 70.0, 66.0, 27.0, 22.0, 14.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.2097671031951904, -2.9125418663024902, -2.615316867828369, -2.318091630935669, -2.0208663940429688, -1.7236413955688477, -1.4264161586761475, -1.1291911602020264, -0.8319659233093262, -0.5347408056259155, -0.2375156283378601, 0.05970954895019531, 0.35693466663360596, 0.6541597843170166, 0.9513850212097168, 1.248610019683838, 1.545835256576538, 1.8430603742599487, 2.1402854919433594, 2.4375107288360596, 2.7347359657287598, 3.031960964202881, 3.329186201095581, 3.626411199569702, 3.9236364364624023, 4.220861434936523, 4.518086910247803, 4.815311908721924, 5.112536907196045, 5.409762382507324, 5.706987380981445, 6.004212379455566, 6.301438331604004, 6.598663330078125, 6.895888805389404, 7.193113803863525, 7.4903388023376465, 7.787564277648926, 8.084789276123047, 8.382014274597168, 8.679239273071289, 8.97646427154541, 9.273689270019531, 9.570915222167969, 9.86814022064209, 10.165365219116211, 10.462590217590332, 10.759815216064453, 11.05704116821289, 11.354266166687012, 11.651491165161133, 11.94871711730957, 12.245942115783691, 12.543167114257812, 12.840392112731934, 13.137617111206055, 13.434842109680176, 13.732067108154297, 14.029292106628418, 14.326517105102539, 14.623743057250977, 14.920968055725098, 15.218193054199219, 15.51541805267334, 15.812643051147461]}, "gradients/decoder.model.decoder.layers.10.fc2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0, 2.0, 2.0, 2.0, 3.0, 5.0, 17.0, 17.0, 34.0, 98.0, 199.0, 431.0, 1520.0, 4186231.0, 4532.0, 662.0, 297.0, 131.0, 63.0, 26.0, 15.0, 5.0, 3.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-32.03125, -31.3592529296875, -30.687255859375, -30.0152587890625, -29.34326171875, -28.6712646484375, -27.999267578125, -27.3272705078125, -26.6552734375, -25.9832763671875, -25.311279296875, -24.6392822265625, -23.96728515625, -23.2952880859375, -22.623291015625, -21.9512939453125, -21.279296875, -20.6072998046875, -19.935302734375, -19.2633056640625, -18.59130859375, -17.9193115234375, -17.247314453125, -16.5753173828125, -15.9033203125, -15.2313232421875, -14.559326171875, -13.8873291015625, -13.21533203125, -12.5433349609375, -11.871337890625, -11.1993408203125, -10.52734375, -9.8553466796875, -9.183349609375, -8.5113525390625, -7.83935546875, -7.1673583984375, -6.495361328125, -5.8233642578125, -5.1513671875, -4.4793701171875, -3.807373046875, -3.1353759765625, -2.46337890625, -1.7913818359375, -1.119384765625, -0.4473876953125, 0.224609375, 0.8966064453125, 1.568603515625, 2.2406005859375, 2.91259765625, 3.5845947265625, 4.256591796875, 4.9285888671875, 5.6005859375, 6.2725830078125, 6.944580078125, 7.6165771484375, 8.28857421875, 8.9605712890625, 9.632568359375, 10.3045654296875, 10.9765625]}, "gradients/decoder.model.decoder.layers.10.fc2.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 2.0, 4.0, 8.0, 11.0, 42.0, 88.0, 139.0, 158.0, 187.0, 175.0, 98.0, 59.0, 23.0, 15.0, 4.0, 3.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.826171875, -2.7479248046875, -2.669677734375, -2.5914306640625, -2.51318359375, -2.4349365234375, -2.356689453125, -2.2784423828125, -2.2001953125, -2.1219482421875, -2.043701171875, -1.9654541015625, -1.88720703125, -1.8089599609375, -1.730712890625, -1.6524658203125, -1.57421875, -1.4959716796875, -1.417724609375, -1.3394775390625, -1.26123046875, -1.1829833984375, -1.104736328125, -1.0264892578125, -0.9482421875, -0.8699951171875, -0.791748046875, -0.7135009765625, -0.63525390625, -0.5570068359375, -0.478759765625, -0.4005126953125, -0.322265625, -0.2440185546875, -0.165771484375, -0.0875244140625, -0.00927734375, 0.0689697265625, 0.147216796875, 0.2254638671875, 0.3037109375, 0.3819580078125, 0.460205078125, 0.5384521484375, 0.61669921875, 0.6949462890625, 0.773193359375, 0.8514404296875, 0.9296875, 1.0079345703125, 1.086181640625, 1.1644287109375, 1.24267578125, 1.3209228515625, 1.399169921875, 1.4774169921875, 1.5556640625, 1.6339111328125, 1.712158203125, 1.7904052734375, 1.86865234375, 1.9468994140625, 2.025146484375, 2.1033935546875, 2.181640625]}, "gradients/decoder.model.decoder.layers.10.fc1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 0.0, 0.0, 2.0, 1.0, 3.0, 7.0, 4.0, 6.0, 15.0, 66.0, 4190758.0, 3313.0, 43.0, 12.0, 13.0, 13.0, 7.0, 4.0, 5.0, 1.0, 6.0, 3.0, 2.0, 0.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-69.3125, -67.3251953125, -65.337890625, -63.3505859375, -61.36328125, -59.3759765625, -57.388671875, -55.4013671875, -53.4140625, -51.4267578125, -49.439453125, -47.4521484375, -45.46484375, -43.4775390625, -41.490234375, -39.5029296875, -37.515625, -35.5283203125, -33.541015625, -31.5537109375, -29.56640625, -27.5791015625, -25.591796875, -23.6044921875, -21.6171875, -19.6298828125, -17.642578125, -15.6552734375, -13.66796875, -11.6806640625, -9.693359375, -7.7060546875, -5.71875, -3.7314453125, -1.744140625, 0.2431640625, 2.23046875, 4.2177734375, 6.205078125, 8.1923828125, 10.1796875, 12.1669921875, 14.154296875, 16.1416015625, 18.12890625, 20.1162109375, 22.103515625, 24.0908203125, 26.078125, 28.0654296875, 30.052734375, 32.0400390625, 34.02734375, 36.0146484375, 38.001953125, 39.9892578125, 41.9765625, 43.9638671875, 45.951171875, 47.9384765625, 49.92578125, 51.9130859375, 53.900390625, 55.8876953125, 57.875]}, "gradients/decoder.model.decoder.layers.10.fc1.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 2.0, 1.0, 1.0, 2.0, 0.0, 1.0, 2.0, 4.0, 5.0, 9.0, 314.0, 3673.0, 19.0, 17.0, 12.0, 7.0, 4.0, 4.0, 2.0, 1.0, 0.0, 4.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.5009765625, -1.458221435546875, -1.41546630859375, -1.372711181640625, -1.3299560546875, -1.287200927734375, -1.24444580078125, -1.201690673828125, -1.158935546875, -1.116180419921875, -1.07342529296875, -1.030670166015625, -0.9879150390625, -0.945159912109375, -0.90240478515625, -0.859649658203125, -0.81689453125, -0.774139404296875, -0.73138427734375, -0.688629150390625, -0.6458740234375, -0.603118896484375, -0.56036376953125, -0.517608642578125, -0.474853515625, -0.432098388671875, -0.38934326171875, -0.346588134765625, -0.3038330078125, -0.261077880859375, -0.21832275390625, -0.175567626953125, -0.1328125, -0.090057373046875, -0.04730224609375, -0.004547119140625, 0.0382080078125, 0.080963134765625, 0.12371826171875, 0.166473388671875, 0.209228515625, 0.251983642578125, 0.29473876953125, 0.337493896484375, 0.3802490234375, 0.423004150390625, 0.46575927734375, 0.508514404296875, 0.55126953125, 0.594024658203125, 0.63677978515625, 0.679534912109375, 0.7222900390625, 0.765045166015625, 0.80780029296875, 0.850555419921875, 0.893310546875, 0.936065673828125, 0.97882080078125, 1.021575927734375, 1.0643310546875, 1.107086181640625, 1.14984130859375, 1.192596435546875, 1.2353515625]}, "gradients/decoder.model.decoder.layers.10.encoder_attn_layer_norm.weight": {"_type": "histogram", "values": [2.0, 2.0, 2.0, 2.0, 7.0, 5.0, 6.0, 6.0, 10.0, 15.0, 23.0, 44.0, 66.0, 110.0, 133.0, 189.0, 164.0, 88.0, 69.0, 32.0, 23.0, 5.0, 6.0, 5.0, 1.0, 0.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.1287455558776855, -1.054998755455017, -0.9812518358230591, -0.9075050354003906, -0.8337581157684326, -0.7600113153457642, -0.6862644553184509, -0.6125175952911377, -0.5387707352638245, -0.46502387523651123, -0.391277015209198, -0.31753018498420715, -0.24378332495689392, -0.1700364649295807, -0.09628963470458984, -0.02254277467727661, 0.05120408535003662, 0.12495093792676926, 0.1986977905035019, 0.27244463562965393, 0.34619149565696716, 0.4199383556842804, 0.49368518590927124, 0.5674320459365845, 0.6411789059638977, 0.7149257659912109, 0.7886726260185242, 0.8624194860458374, 0.9361662864685059, 1.0099132061004639, 1.0836600065231323, 1.1574068069458008, 1.2311537265777588, 1.3049005270004272, 1.3786474466323853, 1.4523942470550537, 1.5261411666870117, 1.5998879671096802, 1.6736347675323486, 1.7473816871643066, 1.8211286067962646, 1.894875407218933, 1.9686223268508911, 2.0423691272735596, 2.1161160469055176, 2.1898629665374756, 2.2636096477508545, 2.3373565673828125, 2.4111032485961914, 2.4848501682281494, 2.5585968494415283, 2.6323437690734863, 2.7060906887054443, 2.7798376083374023, 2.8535842895507812, 2.9273312091827393, 3.0010781288146973, 3.0748250484466553, 3.148571729660034, 3.222318649291992, 3.29606556892395, 3.369812488555908, 3.443559169769287, 3.517306089401245, 3.591053009033203]}, "gradients/decoder.model.decoder.layers.10.encoder_attn_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0, 4.0, 7.0, 4.0, 8.0, 15.0, 19.0, 29.0, 34.0, 59.0, 49.0, 64.0, 70.0, 84.0, 75.0, 110.0, 63.0, 69.0, 54.0, 34.0, 49.0, 32.0, 38.0, 15.0, 12.0, 8.0, 4.0, 2.0, 4.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-2.7328779697418213, -2.6718125343322754, -2.6107470989227295, -2.5496816635131836, -2.4886162281036377, -2.427550792694092, -2.366485357284546, -2.305419921875, -2.244354486465454, -2.183289051055908, -2.1222236156463623, -2.0611581802368164, -2.0000927448272705, -1.9390273094177246, -1.8779618740081787, -1.8168964385986328, -1.755831003189087, -1.694765567779541, -1.6337001323699951, -1.5726346969604492, -1.5115692615509033, -1.4505038261413574, -1.3894383907318115, -1.3283729553222656, -1.2673074007034302, -1.2062419652938843, -1.1451765298843384, -1.0841110944747925, -1.0230456590652466, -0.9619802236557007, -0.9009147882461548, -0.8398493528366089, -0.778783917427063, -0.7177184820175171, -0.6566530466079712, -0.5955876111984253, -0.5345221757888794, -0.4734567105770111, -0.4123912751674652, -0.3513258397579193, -0.2902604043483734, -0.22919496893882751, -0.16812953352928162, -0.10706408321857452, -0.045998647809028625, 0.015066802501678467, 0.07613223791122437, 0.13719767332077026, 0.19826310873031616, 0.25932854413986206, 0.32039397954940796, 0.38145941495895386, 0.44252485036849976, 0.5035903453826904, 0.5646557807922363, 0.6257212162017822, 0.6867866516113281, 0.747852087020874, 0.8089175224304199, 0.8699829578399658, 0.9310483932495117, 0.9921138286590576, 1.0531792640686035, 1.1142446994781494, 1.1753101348876953]}, "gradients/decoder.model.decoder.layers.10.encoder_attn.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 3.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 1.0, 2.0, 1.0, 0.0, 2.0, 2.0, 4.0, 6.0, 11.0, 16.0, 22.0, 33.0, 59.0, 239.0, 737.0, 3144.0, 57078.0, 977582.0, 7683.0, 1354.0, 357.0, 106.0, 53.0, 19.0, 13.0, 6.0, 3.0, 5.0, 3.0, 9.0, 3.0, 4.0, 0.0, 0.0, 0.0, 1.0, 3.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.9873046875, -0.9546356201171875, -0.921966552734375, -0.8892974853515625, -0.85662841796875, -0.8239593505859375, -0.791290283203125, -0.7586212158203125, -0.7259521484375, -0.6932830810546875, -0.660614013671875, -0.6279449462890625, -0.59527587890625, -0.5626068115234375, -0.529937744140625, -0.4972686767578125, -0.464599609375, -0.4319305419921875, -0.399261474609375, -0.3665924072265625, -0.33392333984375, -0.3012542724609375, -0.268585205078125, -0.2359161376953125, -0.2032470703125, -0.1705780029296875, -0.137908935546875, -0.1052398681640625, -0.07257080078125, -0.0399017333984375, -0.007232666015625, 0.0254364013671875, 0.05810546875, 0.0907745361328125, 0.123443603515625, 0.1561126708984375, 0.18878173828125, 0.2214508056640625, 0.254119873046875, 0.2867889404296875, 0.3194580078125, 0.3521270751953125, 0.384796142578125, 0.4174652099609375, 0.45013427734375, 0.4828033447265625, 0.515472412109375, 0.5481414794921875, 0.580810546875, 0.6134796142578125, 0.646148681640625, 0.6788177490234375, 0.71148681640625, 0.7441558837890625, 0.776824951171875, 0.8094940185546875, 0.8421630859375, 0.8748321533203125, 0.907501220703125, 0.9401702880859375, 0.97283935546875, 1.0055084228515625, 1.038177490234375, 1.0708465576171875, 1.103515625]}, "gradients/decoder.model.decoder.layers.10.encoder_attn.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 3.0, 1.0, 6.0, 13.0, 25.0, 45.0, 101.0, 142.0, 184.0, 180.0, 138.0, 93.0, 49.0, 24.0, 9.0, 6.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.6826171875, -1.6023712158203125, -1.522125244140625, -1.4418792724609375, -1.36163330078125, -1.2813873291015625, -1.201141357421875, -1.1208953857421875, -1.0406494140625, -0.9604034423828125, -0.880157470703125, -0.7999114990234375, -0.71966552734375, -0.6394195556640625, -0.559173583984375, -0.4789276123046875, -0.398681640625, -0.3184356689453125, -0.238189697265625, -0.1579437255859375, -0.07769775390625, 0.0025482177734375, 0.082794189453125, 0.1630401611328125, 0.2432861328125, 0.3235321044921875, 0.403778076171875, 0.4840240478515625, 0.56427001953125, 0.6445159912109375, 0.724761962890625, 0.8050079345703125, 0.88525390625, 0.9654998779296875, 1.045745849609375, 1.1259918212890625, 1.20623779296875, 1.2864837646484375, 1.366729736328125, 1.4469757080078125, 1.5272216796875, 1.6074676513671875, 1.687713623046875, 1.7679595947265625, 1.84820556640625, 1.9284515380859375, 2.008697509765625, 2.0889434814453125, 2.169189453125, 2.2494354248046875, 2.329681396484375, 2.4099273681640625, 2.49017333984375, 2.5704193115234375, 2.650665283203125, 2.7309112548828125, 2.8111572265625, 2.8914031982421875, 2.971649169921875, 3.0518951416015625, 3.13214111328125, 3.2123870849609375, 3.292633056640625, 3.3728790283203125, 3.453125]}, "gradients/decoder.model.decoder.layers.10.encoder_attn.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 2.0, 1.0, 4.0, 5.0, 3.0, 6.0, 5.0, 13.0, 17.0, 21.0, 30.0, 39.0, 64.0, 64.0, 99.0, 144.0, 188.0, 330.0, 833.0, 3540.0, 27723.0, 825544.0, 176072.0, 10602.0, 1883.0, 505.0, 256.0, 139.0, 120.0, 65.0, 69.0, 58.0, 34.0, 24.0, 18.0, 16.0, 15.0, 4.0, 5.0, 2.0, 2.0, 3.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.085693359375, -0.08315753936767578, -0.08062171936035156, -0.07808589935302734, -0.07555007934570312, -0.0730142593383789, -0.07047843933105469, -0.06794261932373047, -0.06540679931640625, -0.06287097930908203, -0.06033515930175781, -0.057799339294433594, -0.055263519287109375, -0.052727699279785156, -0.05019187927246094, -0.04765605926513672, -0.0451202392578125, -0.04258441925048828, -0.04004859924316406, -0.037512779235839844, -0.034976959228515625, -0.032441139221191406, -0.029905319213867188, -0.02736949920654297, -0.02483367919921875, -0.02229785919189453, -0.019762039184570312, -0.017226219177246094, -0.014690399169921875, -0.012154579162597656, -0.009618759155273438, -0.007082939147949219, -0.004547119140625, -0.0020112991333007812, 0.0005245208740234375, 0.0030603408813476562, 0.005596160888671875, 0.008131980895996094, 0.010667800903320312, 0.013203620910644531, 0.01573944091796875, 0.01827526092529297, 0.020811080932617188, 0.023346900939941406, 0.025882720947265625, 0.028418540954589844, 0.030954360961914062, 0.03349018096923828, 0.0360260009765625, 0.03856182098388672, 0.04109764099121094, 0.043633460998535156, 0.046169281005859375, 0.048705101013183594, 0.05124092102050781, 0.05377674102783203, 0.05631256103515625, 0.05884838104248047, 0.06138420104980469, 0.0639200210571289, 0.06645584106445312, 0.06899166107177734, 0.07152748107910156, 0.07406330108642578, 0.07659912109375]}, "gradients/decoder.model.decoder.layers.10.encoder_attn.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 3.0, 2.0, 3.0, 4.0, 5.0, 12.0, 11.0, 13.0, 17.0, 21.0, 28.0, 31.0, 32.0, 39.0, 44.0, 55.0, 45.0, 46.0, 61.0, 51.0, 50.0, 54.0, 39.0, 69.0, 51.0, 26.0, 44.0, 20.0, 32.0, 21.0, 20.0, 15.0, 11.0, 12.0, 14.0, 3.0, 5.0, 2.0, 2.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.248046875, -2.1815185546875, -2.114990234375, -2.0484619140625, -1.98193359375, -1.9154052734375, -1.848876953125, -1.7823486328125, -1.7158203125, -1.6492919921875, -1.582763671875, -1.5162353515625, -1.44970703125, -1.3831787109375, -1.316650390625, -1.2501220703125, -1.18359375, -1.1170654296875, -1.050537109375, -0.9840087890625, -0.91748046875, -0.8509521484375, -0.784423828125, -0.7178955078125, -0.6513671875, -0.5848388671875, -0.518310546875, -0.4517822265625, -0.38525390625, -0.3187255859375, -0.252197265625, -0.1856689453125, -0.119140625, -0.0526123046875, 0.013916015625, 0.0804443359375, 0.14697265625, 0.2135009765625, 0.280029296875, 0.3465576171875, 0.4130859375, 0.4796142578125, 0.546142578125, 0.6126708984375, 0.67919921875, 0.7457275390625, 0.812255859375, 0.8787841796875, 0.9453125, 1.0118408203125, 1.078369140625, 1.1448974609375, 1.21142578125, 1.2779541015625, 1.344482421875, 1.4110107421875, 1.4775390625, 1.5440673828125, 1.610595703125, 1.6771240234375, 1.74365234375, 1.8101806640625, 1.876708984375, 1.9432373046875, 2.009765625]}, "gradients/decoder.model.decoder.layers.10.encoder_attn.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 3.0, 2.0, 2.0, 13.0, 12.0, 13.0, 9.0, 25.0, 51.0, 61.0, 97.0, 149.0, 206.0, 280.0, 495.0, 692.0, 1115.0, 1911.0, 3203.0, 5604.0, 10396.0, 20983.0, 46439.0, 125360.0, 541671.0, 175863.0, 60594.0, 25087.0, 12222.0, 6430.0, 3669.0, 2127.0, 1333.0, 819.0, 503.0, 349.0, 257.0, 175.0, 100.0, 89.0, 48.0, 38.0, 21.0, 19.0, 8.0, 5.0, 11.0, 7.0, 1.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0002467632293701172, -0.00023870915174484253, -0.00023065507411956787, -0.0002226009964942932, -0.00021454691886901855, -0.0002064928412437439, -0.00019843876361846924, -0.00019038468599319458, -0.00018233060836791992, -0.00017427653074264526, -0.0001662224531173706, -0.00015816837549209595, -0.0001501142978668213, -0.00014206022024154663, -0.00013400614261627197, -0.00012595206499099731, -0.00011789798736572266, -0.000109843909740448, -0.00010178983211517334, -9.373575448989868e-05, -8.568167686462402e-05, -7.762759923934937e-05, -6.957352161407471e-05, -6.151944398880005e-05, -5.346536636352539e-05, -4.541128873825073e-05, -3.7357211112976074e-05, -2.9303133487701416e-05, -2.1249055862426758e-05, -1.31949782371521e-05, -5.140900611877441e-06, 2.913177013397217e-06, 1.0967254638671875e-05, 1.9021332263946533e-05, 2.707540988922119e-05, 3.512948751449585e-05, 4.318356513977051e-05, 5.1237642765045166e-05, 5.9291720390319824e-05, 6.734579801559448e-05, 7.539987564086914e-05, 8.34539532661438e-05, 9.150803089141846e-05, 9.956210851669312e-05, 0.00010761618614196777, 0.00011567026376724243, 0.0001237243413925171, 0.00013177841901779175, 0.0001398324966430664, 0.00014788657426834106, 0.00015594065189361572, 0.00016399472951889038, 0.00017204880714416504, 0.0001801028847694397, 0.00018815696239471436, 0.00019621104001998901, 0.00020426511764526367, 0.00021231919527053833, 0.000220373272895813, 0.00022842735052108765, 0.0002364814281463623, 0.00024453550577163696, 0.0002525895833969116, 0.0002606436610221863, 0.00026869773864746094]}, "gradients/decoder.model.decoder.layers.10.encoder_attn.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 1.0, 1.0, 4.0, 2.0, 2.0, 0.0, 2.0, 1.0, 8.0, 2.0, 5.0, 5.0, 7.0, 12.0, 14.0, 18.0, 28.0, 55.0, 111.0, 215.0, 276.0, 84.0, 56.0, 29.0, 17.0, 22.0, 10.0, 6.0, 7.0, 2.0, 3.0, 3.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.000179290771484375, -0.00017431378364562988, -0.00016933679580688477, -0.00016435980796813965, -0.00015938282012939453, -0.00015440583229064941, -0.0001494288444519043, -0.00014445185661315918, -0.00013947486877441406, -0.00013449788093566895, -0.00012952089309692383, -0.0001245439052581787, -0.0001195669174194336, -0.00011458992958068848, -0.00010961294174194336, -0.00010463595390319824, -9.965896606445312e-05, -9.468197822570801e-05, -8.970499038696289e-05, -8.472800254821777e-05, -7.975101470947266e-05, -7.477402687072754e-05, -6.979703903198242e-05, -6.48200511932373e-05, -5.984306335449219e-05, -5.486607551574707e-05, -4.988908767700195e-05, -4.4912099838256836e-05, -3.993511199951172e-05, -3.49581241607666e-05, -2.9981136322021484e-05, -2.5004148483276367e-05, -2.002716064453125e-05, -1.5050172805786133e-05, -1.0073184967041016e-05, -5.0961971282958984e-06, -1.1920928955078125e-07, 4.857778549194336e-06, 9.834766387939453e-06, 1.481175422668457e-05, 1.9788742065429688e-05, 2.4765729904174805e-05, 2.9742717742919922e-05, 3.471970558166504e-05, 3.9696693420410156e-05, 4.4673681259155273e-05, 4.965066909790039e-05, 5.462765693664551e-05, 5.9604644775390625e-05, 6.458163261413574e-05, 6.955862045288086e-05, 7.453560829162598e-05, 7.95125961303711e-05, 8.448958396911621e-05, 8.946657180786133e-05, 9.444355964660645e-05, 9.942054748535156e-05, 0.00010439753532409668, 0.0001093745231628418, 0.00011435151100158691, 0.00011932849884033203, 0.00012430548667907715, 0.00012928247451782227, 0.00013425946235656738, 0.0001392364501953125]}, "gradients/decoder.model.decoder.layers.10.encoder_attn.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 5.0, 0.0, 3.0, 5.0, 4.0, 3.0, 5.0, 8.0, 10.0, 9.0, 14.0, 28.0, 20.0, 41.0, 56.0, 114.0, 295.0, 748.0, 2562.0, 14401.0, 740926.0, 275262.0, 10654.0, 2154.0, 629.0, 269.0, 112.0, 62.0, 36.0, 31.0, 18.0, 21.0, 10.0, 6.0, 6.0, 9.0, 5.0, 6.0, 2.0, 2.0, 1.0, 1.0, 1.0, 2.0, 3.0, 1.0, 1.0, 0.0, 0.0, 2.0], "bins": [-0.0005135536193847656, -0.0004988498985767365, -0.0004841461777687073, -0.0004694424569606781, -0.0004547387361526489, -0.00044003501534461975, -0.0004253312945365906, -0.0004106275737285614, -0.0003959238529205322, -0.00038122013211250305, -0.0003665164113044739, -0.0003518126904964447, -0.00033710896968841553, -0.00032240524888038635, -0.0003077015280723572, -0.000292997807264328, -0.00027829408645629883, -0.00026359036564826965, -0.0002488866448402405, -0.0002341829240322113, -0.00021947920322418213, -0.00020477548241615295, -0.00019007176160812378, -0.0001753680408000946, -0.00016066431999206543, -0.00014596059918403625, -0.00013125687837600708, -0.0001165531575679779, -0.00010184943675994873, -8.714571595191956e-05, -7.244199514389038e-05, -5.7738274335861206e-05, -4.303455352783203e-05, -2.8330832719802856e-05, -1.3627111911773682e-05, 1.0766088962554932e-06, 1.5780329704284668e-05, 3.0484050512313843e-05, 4.518777132034302e-05, 5.989149212837219e-05, 7.459521293640137e-05, 8.929893374443054e-05, 0.00010400265455245972, 0.00011870637536048889, 0.00013341009616851807, 0.00014811381697654724, 0.00016281753778457642, 0.0001775212585926056, 0.00019222497940063477, 0.00020692870020866394, 0.00022163242101669312, 0.0002363361418247223, 0.00025103986263275146, 0.00026574358344078064, 0.0002804473042488098, 0.000295151025056839, 0.00030985474586486816, 0.00032455846667289734, 0.0003392621874809265, 0.0003539659082889557, 0.00036866962909698486, 0.00038337334990501404, 0.0003980770707130432, 0.0004127807915210724, 0.00042748451232910156]}, "gradients/decoder.model.decoder.layers.10.encoder_attn.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 2.0, 1.0, 3.0, 0.0, 2.0, 0.0, 0.0, 2.0, 3.0, 5.0, 2.0, 6.0, 0.0, 7.0, 8.0, 5.0, 12.0, 10.0, 25.0, 20.0, 19.0, 39.0, 60.0, 64.0, 101.0, 145.0, 114.0, 90.0, 57.0, 46.0, 39.0, 22.0, 19.0, 18.0, 15.0, 10.0, 5.0, 8.0, 4.0, 7.0, 4.0, 7.0, 2.0, 3.0, 2.0, 1.0, 4.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-4.5359134674072266e-05, -4.4022686779499054e-05, -4.268623888492584e-05, -4.134979099035263e-05, -4.001334309577942e-05, -3.867689520120621e-05, -3.7340447306632996e-05, -3.6003999412059784e-05, -3.466755151748657e-05, -3.333110362291336e-05, -3.199465572834015e-05, -3.065820783376694e-05, -2.9321759939193726e-05, -2.7985312044620514e-05, -2.6648864150047302e-05, -2.531241625547409e-05, -2.397596836090088e-05, -2.2639520466327667e-05, -2.1303072571754456e-05, -1.9966624677181244e-05, -1.8630176782608032e-05, -1.729372888803482e-05, -1.595728099346161e-05, -1.4620833098888397e-05, -1.3284385204315186e-05, -1.1947937309741974e-05, -1.0611489415168762e-05, -9.27504152059555e-06, -7.938593626022339e-06, -6.602145731449127e-06, -5.2656978368759155e-06, -3.929249942302704e-06, -2.592802047729492e-06, -1.2563541531562805e-06, 8.009374141693115e-08, 1.4165416359901428e-06, 2.7529895305633545e-06, 4.089437425136566e-06, 5.425885319709778e-06, 6.7623332142829895e-06, 8.098781108856201e-06, 9.435229003429413e-06, 1.0771676898002625e-05, 1.2108124792575836e-05, 1.3444572687149048e-05, 1.478102058172226e-05, 1.611746847629547e-05, 1.7453916370868683e-05, 1.8790364265441895e-05, 2.0126812160015106e-05, 2.1463260054588318e-05, 2.279970794916153e-05, 2.413615584373474e-05, 2.5472603738307953e-05, 2.6809051632881165e-05, 2.8145499527454376e-05, 2.9481947422027588e-05, 3.08183953166008e-05, 3.215484321117401e-05, 3.349129110574722e-05, 3.4827739000320435e-05, 3.6164186894893646e-05, 3.750063478946686e-05, 3.883708268404007e-05, 4.017353057861328e-05]}, "gradients/decoder.model.decoder.layers.10.self_attn_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 2.0, 5.0, 8.0, 8.0, 15.0, 21.0, 34.0, 63.0, 125.0, 241.0, 274.0, 120.0, 60.0, 26.0, 5.0, 4.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.9748425483703613, -0.9103848934173584, -0.8459272384643555, -0.7814695835113525, -0.7170119285583496, -0.6525542736053467, -0.5880966186523438, -0.5236389636993408, -0.4591813087463379, -0.39472365379333496, -0.33026599884033203, -0.2658083438873291, -0.20135068893432617, -0.13689303398132324, -0.07243537902832031, -0.007977724075317383, 0.05647993087768555, 0.12093758583068848, 0.1853952407836914, 0.24985289573669434, 0.31431055068969727, 0.3787682056427002, 0.4432258605957031, 0.507683515548706, 0.572141170501709, 0.6365988254547119, 0.7010564804077148, 0.7655141353607178, 0.8299717903137207, 0.8944294452667236, 0.9588871002197266, 1.0233447551727295, 1.0878026485443115, 1.1522603034973145, 1.2167179584503174, 1.2811756134033203, 1.3456332683563232, 1.4100909233093262, 1.474548578262329, 1.539006233215332, 1.603463888168335, 1.667921543121338, 1.7323791980743408, 1.7968368530273438, 1.8612945079803467, 1.9257521629333496, 1.9902098178863525, 2.0546674728393555, 2.1191251277923584, 2.1835827827453613, 2.2480404376983643, 2.312498092651367, 2.37695574760437, 2.441413402557373, 2.505871057510376, 2.570328712463379, 2.634786367416382, 2.6992440223693848, 2.7637016773223877, 2.8281593322753906, 2.8926169872283936, 2.9570746421813965, 3.0215322971343994, 3.0859899520874023, 3.1504476070404053]}, "gradients/decoder.model.decoder.layers.10.self_attn_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 2.0, 7.0, 5.0, 7.0, 29.0, 32.0, 52.0, 107.0, 94.0, 124.0, 155.0, 119.0, 91.0, 69.0, 54.0, 36.0, 16.0, 8.0, 7.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.9960737228393555, -1.9409973621368408, -1.8859211206436157, -1.8308448791503906, -1.775768518447876, -1.7206921577453613, -1.6656159162521362, -1.6105396747589111, -1.5554633140563965, -1.5003869533538818, -1.4453107118606567, -1.3902344703674316, -1.335158109664917, -1.2800817489624023, -1.2250055074691772, -1.1699292659759521, -1.1148529052734375, -1.0597765445709229, -1.0047003030776978, -0.9496240019798279, -0.894547700881958, -0.8394713997840881, -0.7843950986862183, -0.7293187975883484, -0.6742424964904785, -0.6191661953926086, -0.5640898942947388, -0.5090135931968689, -0.453937292098999, -0.39886099100112915, -0.3437846899032593, -0.2887083888053894, -0.23363196849822998, -0.1785556674003601, -0.12347936630249023, -0.06840306520462036, -0.013326764106750488, 0.041749536991119385, 0.09682583808898926, 0.15190213918685913, 0.206978440284729, 0.2620547413825989, 0.31713104248046875, 0.3722073435783386, 0.4272836446762085, 0.48235994577407837, 0.5374362468719482, 0.5925125479698181, 0.647588849067688, 0.7026651501655579, 0.7577414512634277, 0.8128177523612976, 0.8678940534591675, 0.9229703545570374, 0.9780466556549072, 1.0331230163574219, 1.088199257850647, 1.143275499343872, 1.1983518600463867, 1.2534282207489014, 1.3085044622421265, 1.3635807037353516, 1.4186570644378662, 1.4737334251403809, 1.528809666633606]}, "gradients/decoder.model.decoder.layers.10.self_attn.out_proj.weight": {"_type": "histogram", "values": [2.0, 4.0, 1.0, 0.0, 1.0, 0.0, 1.0, 3.0, 3.0, 2.0, 5.0, 11.0, 7.0, 18.0, 10.0, 21.0, 26.0, 42.0, 61.0, 127.0, 262.0, 644.0, 1768.0, 5312.0, 21424.0, 139679.0, 753658.0, 101232.0, 17103.0, 4542.0, 1482.0, 565.0, 248.0, 128.0, 53.0, 41.0, 20.0, 15.0, 11.0, 6.0, 13.0, 6.0, 4.0, 5.0, 0.0, 2.0, 1.0, 2.0, 0.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.1669921875, -1.122833251953125, -1.07867431640625, -1.034515380859375, -0.9903564453125, -0.946197509765625, -0.90203857421875, -0.857879638671875, -0.813720703125, -0.769561767578125, -0.72540283203125, -0.681243896484375, -0.6370849609375, -0.592926025390625, -0.54876708984375, -0.504608154296875, -0.46044921875, -0.416290283203125, -0.37213134765625, -0.327972412109375, -0.2838134765625, -0.239654541015625, -0.19549560546875, -0.151336669921875, -0.107177734375, -0.063018798828125, -0.01885986328125, 0.025299072265625, 0.0694580078125, 0.113616943359375, 0.15777587890625, 0.201934814453125, 0.24609375, 0.290252685546875, 0.33441162109375, 0.378570556640625, 0.4227294921875, 0.466888427734375, 0.51104736328125, 0.555206298828125, 0.599365234375, 0.643524169921875, 0.68768310546875, 0.731842041015625, 0.7760009765625, 0.820159912109375, 0.86431884765625, 0.908477783203125, 0.95263671875, 0.996795654296875, 1.04095458984375, 1.085113525390625, 1.1292724609375, 1.173431396484375, 1.21759033203125, 1.261749267578125, 1.305908203125, 1.350067138671875, 1.39422607421875, 1.438385009765625, 1.4825439453125, 1.526702880859375, 1.57086181640625, 1.615020751953125, 1.6591796875]}, "gradients/decoder.model.decoder.layers.10.self_attn.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 3.0, 4.0, 7.0, 13.0, 8.0, 10.0, 22.0, 17.0, 25.0, 29.0, 33.0, 40.0, 42.0, 39.0, 40.0, 42.0, 55.0, 54.0, 49.0, 46.0, 46.0, 42.0, 49.0, 47.0, 34.0, 34.0, 34.0, 35.0, 26.0, 11.0, 13.0, 11.0, 12.0, 8.0, 6.0, 15.0, 2.0, 2.0, 3.0, 1.0, 2.0, 0.0, 3.0, 0.0, 0.0, 1.0], "bins": [-0.1995849609375, -0.194122314453125, -0.18865966796875, -0.183197021484375, -0.177734375, -0.172271728515625, -0.16680908203125, -0.161346435546875, -0.1558837890625, -0.150421142578125, -0.14495849609375, -0.139495849609375, -0.134033203125, -0.128570556640625, -0.12310791015625, -0.117645263671875, -0.1121826171875, -0.106719970703125, -0.10125732421875, -0.095794677734375, -0.09033203125, -0.084869384765625, -0.07940673828125, -0.073944091796875, -0.0684814453125, -0.063018798828125, -0.05755615234375, -0.052093505859375, -0.046630859375, -0.041168212890625, -0.03570556640625, -0.030242919921875, -0.0247802734375, -0.019317626953125, -0.01385498046875, -0.008392333984375, -0.0029296875, 0.002532958984375, 0.00799560546875, 0.013458251953125, 0.0189208984375, 0.024383544921875, 0.02984619140625, 0.035308837890625, 0.040771484375, 0.046234130859375, 0.05169677734375, 0.057159423828125, 0.0626220703125, 0.068084716796875, 0.07354736328125, 0.079010009765625, 0.08447265625, 0.089935302734375, 0.09539794921875, 0.100860595703125, 0.1063232421875, 0.111785888671875, 0.11724853515625, 0.122711181640625, 0.128173828125, 0.133636474609375, 0.13909912109375, 0.144561767578125, 0.1500244140625]}, "gradients/decoder.model.decoder.layers.10.self_attn.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 2.0, 1.0, 1.0, 6.0, 5.0, 5.0, 12.0, 7.0, 4.0, 25.0, 21.0, 36.0, 30.0, 56.0, 53.0, 68.0, 98.0, 123.0, 293.0, 2124.0, 166791.0, 873986.0, 3908.0, 356.0, 136.0, 89.0, 77.0, 48.0, 45.0, 45.0, 27.0, 17.0, 18.0, 21.0, 9.0, 4.0, 6.0, 4.0, 2.0, 3.0, 0.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.09765625, -4.93890380859375, -4.7801513671875, -4.62139892578125, -4.462646484375, -4.30389404296875, -4.1451416015625, -3.98638916015625, -3.82763671875, -3.66888427734375, -3.5101318359375, -3.35137939453125, -3.192626953125, -3.03387451171875, -2.8751220703125, -2.71636962890625, -2.5576171875, -2.39886474609375, -2.2401123046875, -2.08135986328125, -1.922607421875, -1.76385498046875, -1.6051025390625, -1.44635009765625, -1.28759765625, -1.12884521484375, -0.9700927734375, -0.81134033203125, -0.652587890625, -0.49383544921875, -0.3350830078125, -0.17633056640625, -0.017578125, 0.14117431640625, 0.2999267578125, 0.45867919921875, 0.617431640625, 0.77618408203125, 0.9349365234375, 1.09368896484375, 1.25244140625, 1.41119384765625, 1.5699462890625, 1.72869873046875, 1.887451171875, 2.04620361328125, 2.2049560546875, 2.36370849609375, 2.5224609375, 2.68121337890625, 2.8399658203125, 2.99871826171875, 3.157470703125, 3.31622314453125, 3.4749755859375, 3.63372802734375, 3.79248046875, 3.95123291015625, 4.1099853515625, 4.26873779296875, 4.427490234375, 4.58624267578125, 4.7449951171875, 4.90374755859375, 5.0625]}, "gradients/decoder.model.decoder.layers.10.self_attn.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 4.0, 1.0, 2.0, 4.0, 5.0, 6.0, 7.0, 0.0, 12.0, 12.0, 14.0, 23.0, 10.0, 17.0, 24.0, 36.0, 28.0, 47.0, 52.0, 66.0, 61.0, 85.0, 68.0, 79.0, 64.0, 52.0, 44.0, 31.0, 29.0, 22.0, 20.0, 19.0, 13.0, 5.0, 14.0, 9.0, 11.0, 7.0, 3.0, 3.0, 0.0, 0.0, 1.0, 2.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.7197265625, -0.6972427368164062, -0.6747589111328125, -0.6522750854492188, -0.629791259765625, -0.6073074340820312, -0.5848236083984375, -0.5623397827148438, -0.53985595703125, -0.5173721313476562, -0.4948883056640625, -0.47240447998046875, -0.449920654296875, -0.42743682861328125, -0.4049530029296875, -0.38246917724609375, -0.3599853515625, -0.33750152587890625, -0.3150177001953125, -0.29253387451171875, -0.270050048828125, -0.24756622314453125, -0.2250823974609375, -0.20259857177734375, -0.18011474609375, -0.15763092041015625, -0.1351470947265625, -0.11266326904296875, -0.090179443359375, -0.06769561767578125, -0.0452117919921875, -0.02272796630859375, -0.000244140625, 0.02223968505859375, 0.0447235107421875, 0.06720733642578125, 0.089691162109375, 0.11217498779296875, 0.1346588134765625, 0.15714263916015625, 0.17962646484375, 0.20211029052734375, 0.2245941162109375, 0.24707794189453125, 0.269561767578125, 0.29204559326171875, 0.3145294189453125, 0.33701324462890625, 0.3594970703125, 0.38198089599609375, 0.4044647216796875, 0.42694854736328125, 0.449432373046875, 0.47191619873046875, 0.4944000244140625, 0.5168838500976562, 0.53936767578125, 0.5618515014648438, 0.5843353271484375, 0.6068191528320312, 0.629302978515625, 0.6517868041992188, 0.6742706298828125, 0.6967544555664062, 0.71923828125]}, "gradients/decoder.model.decoder.layers.10.self_attn.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 2.0, 4.0, 3.0, 4.0, 7.0, 5.0, 4.0, 9.0, 3.0, 4.0, 10.0, 5.0, 9.0, 16.0, 23.0, 32.0, 43.0, 68.0, 86.0, 135.0, 294.0, 519.0, 932.0, 1866.0, 4217.0, 11892.0, 84154.0, 899470.0, 31138.0, 7443.0, 2958.0, 1414.0, 755.0, 386.0, 227.0, 152.0, 79.0, 49.0, 31.0, 22.0, 14.0, 22.0, 13.0, 13.0, 5.0, 1.0, 9.0, 3.0, 5.0, 3.0, 2.0, 1.0, 2.0, 2.0, 1.0, 2.0, 1.0, 0.0, 4.0], "bins": [-0.33984375, -0.32932281494140625, -0.3188018798828125, -0.30828094482421875, -0.297760009765625, -0.28723907470703125, -0.2767181396484375, -0.26619720458984375, -0.25567626953125, -0.24515533447265625, -0.2346343994140625, -0.22411346435546875, -0.213592529296875, -0.20307159423828125, -0.1925506591796875, -0.18202972412109375, -0.1715087890625, -0.16098785400390625, -0.1504669189453125, -0.13994598388671875, -0.129425048828125, -0.11890411376953125, -0.1083831787109375, -0.09786224365234375, -0.08734130859375, -0.07682037353515625, -0.0662994384765625, -0.05577850341796875, -0.045257568359375, -0.03473663330078125, -0.0242156982421875, -0.01369476318359375, -0.003173828125, 0.00734710693359375, 0.0178680419921875, 0.02838897705078125, 0.038909912109375, 0.04943084716796875, 0.0599517822265625, 0.07047271728515625, 0.08099365234375, 0.09151458740234375, 0.1020355224609375, 0.11255645751953125, 0.123077392578125, 0.13359832763671875, 0.1441192626953125, 0.15464019775390625, 0.1651611328125, 0.17568206787109375, 0.1862030029296875, 0.19672393798828125, 0.207244873046875, 0.21776580810546875, 0.2282867431640625, 0.23880767822265625, 0.24932861328125, 0.25984954833984375, 0.2703704833984375, 0.28089141845703125, 0.291412353515625, 0.30193328857421875, 0.3124542236328125, 0.32297515869140625, 0.33349609375]}, "gradients/decoder.model.decoder.layers.10.self_attn.k_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 0.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 8.0, 0.0, 3.0, 0.0, 8.0, 2.0, 11.0, 26.0, 84.0, 729.0, 70.0, 33.0, 14.0, 3.0, 7.0, 0.0, 3.0, 0.0, 5.0, 0.0, 1.0, 0.0, 0.0, 3.0, 0.0, 1.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00018334388732910156, -0.00017666444182395935, -0.00016998499631881714, -0.00016330555081367493, -0.00015662610530853271, -0.0001499466598033905, -0.0001432672142982483, -0.00013658776879310608, -0.00012990832328796387, -0.00012322887778282166, -0.00011654943227767944, -0.00010986998677253723, -0.00010319054126739502, -9.651109576225281e-05, -8.98316502571106e-05, -8.315220475196838e-05, -7.647275924682617e-05, -6.979331374168396e-05, -6.311386823654175e-05, -5.6434422731399536e-05, -4.9754977226257324e-05, -4.307553172111511e-05, -3.63960862159729e-05, -2.971664071083069e-05, -2.3037195205688477e-05, -1.6357749700546265e-05, -9.678304195404053e-06, -2.998858690261841e-06, 3.680586814880371e-06, 1.0360032320022583e-05, 1.7039477825164795e-05, 2.3718923330307007e-05, 3.039836883544922e-05, 3.707781434059143e-05, 4.375725984573364e-05, 5.0436705350875854e-05, 5.7116150856018066e-05, 6.379559636116028e-05, 7.047504186630249e-05, 7.71544873714447e-05, 8.383393287658691e-05, 9.051337838172913e-05, 9.719282388687134e-05, 0.00010387226939201355, 0.00011055171489715576, 0.00011723116040229797, 0.00012391060590744019, 0.0001305900514125824, 0.0001372694969177246, 0.00014394894242286682, 0.00015062838792800903, 0.00015730783343315125, 0.00016398727893829346, 0.00017066672444343567, 0.00017734616994857788, 0.0001840256154537201, 0.0001907050609588623, 0.00019738450646400452, 0.00020406395196914673, 0.00021074339747428894, 0.00021742284297943115, 0.00022410228848457336, 0.00023078173398971558, 0.0002374611794948578, 0.000244140625]}, "gradients/decoder.model.decoder.layers.10.self_attn.q_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 5.0, 0.0, 4.0, 3.0, 4.0, 2.0, 6.0, 5.0, 7.0, 11.0, 4.0, 5.0, 10.0, 28.0, 41.0, 76.0, 205.0, 720.0, 3531.0, 48272.0, 986394.0, 7314.0, 1246.0, 323.0, 141.0, 47.0, 33.0, 21.0, 20.0, 15.0, 15.0, 9.0, 7.0, 7.0, 7.0, 1.0, 3.0, 3.0, 5.0, 3.0, 3.0, 1.0, 2.0, 4.0, 1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.427001953125, -0.4137306213378906, -0.40045928955078125, -0.3871879577636719, -0.3739166259765625, -0.3606452941894531, -0.34737396240234375, -0.3341026306152344, -0.320831298828125, -0.3075599670410156, -0.29428863525390625, -0.2810173034667969, -0.2677459716796875, -0.2544746398925781, -0.24120330810546875, -0.22793197631835938, -0.21466064453125, -0.20138931274414062, -0.18811798095703125, -0.17484664916992188, -0.1615753173828125, -0.14830398559570312, -0.13503265380859375, -0.12176132202148438, -0.108489990234375, -0.09521865844726562, -0.08194732666015625, -0.06867599487304688, -0.0554046630859375, -0.042133331298828125, -0.02886199951171875, -0.015590667724609375, -0.0023193359375, 0.010951995849609375, 0.02422332763671875, 0.037494659423828125, 0.0507659912109375, 0.06403732299804688, 0.07730865478515625, 0.09057998657226562, 0.103851318359375, 0.11712265014648438, 0.13039398193359375, 0.14366531372070312, 0.1569366455078125, 0.17020797729492188, 0.18347930908203125, 0.19675064086914062, 0.21002197265625, 0.22329330444335938, 0.23656463623046875, 0.24983596801757812, 0.2631072998046875, 0.2763786315917969, 0.28964996337890625, 0.3029212951660156, 0.316192626953125, 0.3294639587402344, 0.34273529052734375, 0.3560066223144531, 0.3692779541015625, 0.3825492858886719, 0.39582061767578125, 0.4090919494628906, 0.42236328125]}, "gradients/decoder.model.decoder.layers.10.self_attn.q_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0, 2.0, 2.0, 1.0, 1.0, 2.0, 4.0, 2.0, 5.0, 3.0, 4.0, 1.0, 1.0, 3.0, 3.0, 1.0, 5.0, 15.0, 8.0, 24.0, 22.0, 115.0, 513.0, 118.0, 50.0, 10.0, 13.0, 11.0, 10.0, 9.0, 4.0, 3.0, 5.0, 4.0, 4.0, 6.0, 4.0, 5.0, 4.0, 3.0, 3.0, 0.0, 2.0, 3.0, 3.0, 0.0, 1.0, 0.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.06494140625, -0.0626668930053711, -0.06039237976074219, -0.05811786651611328, -0.055843353271484375, -0.05356884002685547, -0.05129432678222656, -0.049019813537597656, -0.04674530029296875, -0.044470787048339844, -0.04219627380371094, -0.03992176055908203, -0.037647247314453125, -0.03537273406982422, -0.03309822082519531, -0.030823707580566406, -0.0285491943359375, -0.026274681091308594, -0.024000167846679688, -0.02172565460205078, -0.019451141357421875, -0.01717662811279297, -0.014902114868164062, -0.012627601623535156, -0.01035308837890625, -0.008078575134277344, -0.0058040618896484375, -0.0035295486450195312, -0.001255035400390625, 0.0010194778442382812, 0.0032939910888671875, 0.005568504333496094, 0.007843017578125, 0.010117530822753906, 0.012392044067382812, 0.014666557312011719, 0.016941070556640625, 0.01921558380126953, 0.021490097045898438, 0.023764610290527344, 0.02603912353515625, 0.028313636779785156, 0.030588150024414062, 0.03286266326904297, 0.035137176513671875, 0.03741168975830078, 0.03968620300292969, 0.041960716247558594, 0.0442352294921875, 0.046509742736816406, 0.04878425598144531, 0.05105876922607422, 0.053333282470703125, 0.05560779571533203, 0.05788230895996094, 0.060156822204589844, 0.06243133544921875, 0.06470584869384766, 0.06698036193847656, 0.06925487518310547, 0.07152938842773438, 0.07380390167236328, 0.07607841491699219, 0.0783529281616211, 0.08062744140625]}, "gradients/decoder.model.decoder.layers.9.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 3.0, 7.0, 4.0, 9.0, 15.0, 21.0, 51.0, 90.0, 141.0, 191.0, 167.0, 132.0, 70.0, 56.0, 19.0, 17.0, 11.0, 2.0, 4.0, 2.0, 0.0, 3.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.971177577972412, -1.9078834056854248, -1.8445892333984375, -1.7812949419021606, -1.7180007696151733, -1.654706597328186, -1.5914123058319092, -1.5281181335449219, -1.4648239612579346, -1.4015297889709473, -1.33823561668396, -1.274941325187683, -1.2116471529006958, -1.1483529806137085, -1.0850586891174316, -1.0217645168304443, -0.958470344543457, -0.8951761722564697, -0.8318819403648376, -0.7685877084732056, -0.7052935361862183, -0.641999363899231, -0.5787051320075989, -0.5154109001159668, -0.4521167278289795, -0.3888225257396698, -0.3255283236503601, -0.2622341215610504, -0.19893991947174072, -0.13564571738243103, -0.07235151529312134, -0.009057313203811646, 0.05423688888549805, 0.11753109097480774, 0.18082529306411743, 0.24411949515342712, 0.3074136972427368, 0.3707078993320465, 0.4340021014213562, 0.4972963035106659, 0.5605905055999756, 0.6238846778869629, 0.687178909778595, 0.750473141670227, 0.8137673139572144, 0.8770614862442017, 0.9403557181358337, 1.0036499500274658, 1.0669441223144531, 1.1302382946014404, 1.1935324668884277, 1.2568267583847046, 1.320120930671692, 1.3834151029586792, 1.446709394454956, 1.5100035667419434, 1.5732977390289307, 1.636591911315918, 1.6998860836029053, 1.7631803750991821, 1.8264745473861694, 1.8897687196731567, 1.9530630111694336, 2.016357183456421, 2.079651355743408]}, "gradients/decoder.model.decoder.layers.9.final_layer_norm.bias": {"_type": "histogram", "values": [2.0, 0.0, 2.0, 0.0, 1.0, 3.0, 2.0, 1.0, 3.0, 7.0, 5.0, 6.0, 13.0, 12.0, 14.0, 10.0, 24.0, 24.0, 29.0, 37.0, 26.0, 43.0, 28.0, 27.0, 49.0, 41.0, 36.0, 44.0, 47.0, 54.0, 41.0, 40.0, 48.0, 40.0, 36.0, 28.0, 32.0, 22.0, 30.0, 17.0, 22.0, 11.0, 13.0, 14.0, 10.0, 5.0, 6.0, 5.0, 5.0, 1.0, 3.0, 1.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.9781044721603394, -0.9401484727859497, -0.9021924734115601, -0.8642364740371704, -0.8262804746627808, -0.7883244752883911, -0.7503684759140015, -0.7124124765396118, -0.6744564771652222, -0.6365004777908325, -0.5985444784164429, -0.5605884790420532, -0.5226324796676636, -0.4846764802932739, -0.4467204809188843, -0.40876448154449463, -0.370808482170105, -0.33285248279571533, -0.2948964834213257, -0.25694048404693604, -0.2189844846725464, -0.18102848529815674, -0.1430724859237671, -0.10511648654937744, -0.06716048717498779, -0.029204487800598145, 0.008751511573791504, 0.04670751094818115, 0.0846635103225708, 0.12261950969696045, 0.1605755090713501, 0.19853150844573975, 0.23648738861083984, 0.2744433879852295, 0.31239938735961914, 0.3503553867340088, 0.38831138610839844, 0.4262673854827881, 0.46422338485717773, 0.5021793842315674, 0.540135383605957, 0.5780913829803467, 0.6160473823547363, 0.654003381729126, 0.6919593811035156, 0.7299153804779053, 0.7678713798522949, 0.8058273792266846, 0.8437833786010742, 0.8817393779754639, 0.9196953773498535, 0.9576513767242432, 0.9956073760986328, 1.0335633754730225, 1.071519374847412, 1.1094753742218018, 1.1474313735961914, 1.185387372970581, 1.2233433723449707, 1.2612993717193604, 1.29925537109375, 1.3372113704681396, 1.3751673698425293, 1.413123369216919, 1.4510793685913086]}, "gradients/decoder.model.decoder.layers.9.fc2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 2.0, 6.0, 6.0, 8.0, 11.0, 12.0, 15.0, 31.0, 54.0, 82.0, 98.0, 107.0, 179.0, 261.0, 387.0, 544.0, 823.0, 1188.0, 1815.0, 2693.0, 5997.0, 4166315.0, 5372.0, 2660.0, 1768.0, 1169.0, 835.0, 576.0, 392.0, 275.0, 174.0, 136.0, 87.0, 57.0, 35.0, 41.0, 22.0, 16.0, 15.0, 12.0, 12.0, 6.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.884765625, -3.748199462890625, -3.61163330078125, -3.475067138671875, -3.3385009765625, -3.201934814453125, -3.06536865234375, -2.928802490234375, -2.792236328125, -2.655670166015625, -2.51910400390625, -2.382537841796875, -2.2459716796875, -2.109405517578125, -1.97283935546875, -1.836273193359375, -1.69970703125, -1.563140869140625, -1.42657470703125, -1.290008544921875, -1.1534423828125, -1.016876220703125, -0.88031005859375, -0.743743896484375, -0.607177734375, -0.470611572265625, -0.33404541015625, -0.197479248046875, -0.0609130859375, 0.075653076171875, 0.21221923828125, 0.348785400390625, 0.4853515625, 0.621917724609375, 0.75848388671875, 0.895050048828125, 1.0316162109375, 1.168182373046875, 1.30474853515625, 1.441314697265625, 1.577880859375, 1.714447021484375, 1.85101318359375, 1.987579345703125, 2.1241455078125, 2.260711669921875, 2.39727783203125, 2.533843994140625, 2.67041015625, 2.806976318359375, 2.94354248046875, 3.080108642578125, 3.2166748046875, 3.353240966796875, 3.48980712890625, 3.626373291015625, 3.762939453125, 3.899505615234375, 4.03607177734375, 4.172637939453125, 4.3092041015625, 4.445770263671875, 4.58233642578125, 4.718902587890625, 4.85546875]}, "gradients/decoder.model.decoder.layers.9.fc2.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 3.0, 7.0, 7.0, 4.0, 4.0, 9.0, 9.0, 7.0, 8.0, 16.0, 12.0, 22.0, 27.0, 26.0, 31.0, 31.0, 35.0, 23.0, 46.0, 57.0, 46.0, 45.0, 40.0, 41.0, 46.0, 30.0, 44.0, 45.0, 35.0, 39.0, 29.0, 33.0, 24.0, 26.0, 18.0, 15.0, 11.0, 11.0, 11.0, 12.0, 6.0, 4.0, 4.0, 3.0, 2.0, 2.0, 3.0, 4.0, 0.0, 3.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.14892578125, -0.1442394256591797, -0.13955307006835938, -0.13486671447753906, -0.13018035888671875, -0.12549400329589844, -0.12080764770507812, -0.11612129211425781, -0.1114349365234375, -0.10674858093261719, -0.10206222534179688, -0.09737586975097656, -0.09268951416015625, -0.08800315856933594, -0.08331680297851562, -0.07863044738769531, -0.073944091796875, -0.06925773620605469, -0.06457138061523438, -0.05988502502441406, -0.05519866943359375, -0.05051231384277344, -0.045825958251953125, -0.04113960266113281, -0.0364532470703125, -0.03176689147949219, -0.027080535888671875, -0.022394180297851562, -0.01770782470703125, -0.013021469116210938, -0.008335113525390625, -0.0036487579345703125, 0.00103759765625, 0.0057239532470703125, 0.010410308837890625, 0.015096664428710938, 0.01978302001953125, 0.024469375610351562, 0.029155731201171875, 0.03384208679199219, 0.0385284423828125, 0.04321479797363281, 0.047901153564453125, 0.05258750915527344, 0.05727386474609375, 0.06196022033691406, 0.06664657592773438, 0.07133293151855469, 0.076019287109375, 0.08070564270019531, 0.08539199829101562, 0.09007835388183594, 0.09476470947265625, 0.09945106506347656, 0.10413742065429688, 0.10882377624511719, 0.1135101318359375, 0.11819648742675781, 0.12288284301757812, 0.12756919860839844, 0.13225555419921875, 0.13694190979003906, 0.14162826538085938, 0.1463146209716797, 0.1510009765625]}, "gradients/decoder.model.decoder.layers.9.fc1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 1.0, 1.0, 2.0, 0.0, 1.0, 1.0, 0.0, 3.0, 2.0, 5.0, 16.0, 89.0, 1711.0, 4191644.0, 750.0, 42.0, 15.0, 3.0, 2.0, 3.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-29.90625, -28.921875, -27.9375, -26.953125, -25.96875, -24.984375, -24.0, -23.015625, -22.03125, -21.046875, -20.0625, -19.078125, -18.09375, -17.109375, -16.125, -15.140625, -14.15625, -13.171875, -12.1875, -11.203125, -10.21875, -9.234375, -8.25, -7.265625, -6.28125, -5.296875, -4.3125, -3.328125, -2.34375, -1.359375, -0.375, 0.609375, 1.59375, 2.578125, 3.5625, 4.546875, 5.53125, 6.515625, 7.5, 8.484375, 9.46875, 10.453125, 11.4375, 12.421875, 13.40625, 14.390625, 15.375, 16.359375, 17.34375, 18.328125, 19.3125, 20.296875, 21.28125, 22.265625, 23.25, 24.234375, 25.21875, 26.203125, 27.1875, 28.171875, 29.15625, 30.140625, 31.125, 32.109375, 33.09375]}, "gradients/decoder.model.decoder.layers.9.fc1.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 1.0, 1.0, 1.0, 2.0, 0.0, 3.0, 5.0, 11.0, 3995.0, 48.0, 4.0, 1.0, 3.0, 1.0, 3.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.95068359375, -0.9137344360351562, -0.8767852783203125, -0.8398361206054688, -0.802886962890625, -0.7659378051757812, -0.7289886474609375, -0.6920394897460938, -0.65509033203125, -0.6181411743164062, -0.5811920166015625, -0.5442428588867188, -0.507293701171875, -0.47034454345703125, -0.4333953857421875, -0.39644622802734375, -0.3594970703125, -0.32254791259765625, -0.2855987548828125, -0.24864959716796875, -0.211700439453125, -0.17475128173828125, -0.1378021240234375, -0.10085296630859375, -0.06390380859375, -0.02695465087890625, 0.0099945068359375, 0.04694366455078125, 0.083892822265625, 0.12084197998046875, 0.1577911376953125, 0.19474029541015625, 0.231689453125, 0.26863861083984375, 0.3055877685546875, 0.34253692626953125, 0.379486083984375, 0.41643524169921875, 0.4533843994140625, 0.49033355712890625, 0.52728271484375, 0.5642318725585938, 0.6011810302734375, 0.6381301879882812, 0.675079345703125, 0.7120285034179688, 0.7489776611328125, 0.7859268188476562, 0.8228759765625, 0.8598251342773438, 0.8967742919921875, 0.9337234497070312, 0.970672607421875, 1.0076217651367188, 1.0445709228515625, 1.0815200805664062, 1.11846923828125, 1.1554183959960938, 1.1923675537109375, 1.2293167114257812, 1.266265869140625, 1.3032150268554688, 1.3401641845703125, 1.3771133422851562, 1.4140625]}, "gradients/decoder.model.decoder.layers.9.encoder_attn_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 3.0, 0.0, 1.0, 3.0, 3.0, 3.0, 3.0, 3.0, 6.0, 14.0, 12.0, 24.0, 38.0, 57.0, 94.0, 158.0, 242.0, 144.0, 83.0, 41.0, 28.0, 16.0, 6.0, 5.0, 10.0, 5.0, 3.0, 2.0, 1.0, 2.0, 3.0, 2.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-1.4417426586151123, -1.40015709400177, -1.3585715293884277, -1.3169859647750854, -1.2754004001617432, -1.2338148355484009, -1.1922292709350586, -1.1506435871124268, -1.109058141708374, -1.0674725770950317, -1.0258870124816895, -0.9843014478683472, -0.9427158832550049, -0.9011303186416626, -0.8595446944236755, -0.8179591298103333, -0.7763735055923462, -0.7347879409790039, -0.6932023763656616, -0.6516168117523193, -0.610031247138977, -0.5684456825256348, -0.5268600583076477, -0.4852744936943054, -0.44368892908096313, -0.40210336446762085, -0.36051779985427856, -0.3189322054386139, -0.2773466408252716, -0.23576107621192932, -0.19417549669742584, -0.15258991718292236, -0.11100435256958008, -0.0694187805056572, -0.027833208441734314, 0.013752363622188568, 0.05533793568611145, 0.09692350029945374, 0.13850907981395721, 0.1800946593284607, 0.22168022394180298, 0.26326578855514526, 0.30485135316848755, 0.3464369475841522, 0.3880225121974945, 0.4296080768108368, 0.47119367122650146, 0.5127792358398438, 0.554364800453186, 0.5959503650665283, 0.6375359296798706, 0.6791214942932129, 0.7207070589065552, 0.7622926235198975, 0.8038782477378845, 0.8454638123512268, 0.8870493769645691, 0.9286349415779114, 0.9702205061912537, 1.0118061304092407, 1.053391695022583, 1.0949772596359253, 1.1365628242492676, 1.1781483888626099, 1.2197339534759521]}, "gradients/decoder.model.decoder.layers.9.encoder_attn_layer_norm.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 5.0, 1.0, 5.0, 4.0, 6.0, 4.0, 4.0, 4.0, 8.0, 7.0, 9.0, 12.0, 11.0, 26.0, 15.0, 28.0, 25.0, 39.0, 35.0, 24.0, 37.0, 39.0, 32.0, 35.0, 44.0, 53.0, 41.0, 33.0, 41.0, 53.0, 39.0, 34.0, 38.0, 36.0, 27.0, 27.0, 23.0, 16.0, 20.0, 16.0, 7.0, 5.0, 10.0, 8.0, 4.0, 5.0, 2.0, 4.0, 2.0, 4.0, 7.0, 2.0, 1.0, 3.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.39157435297966003, -0.3793319761753082, -0.3670895993709564, -0.3548472225666046, -0.3426048457622528, -0.330362468957901, -0.3181200623512268, -0.305877685546875, -0.2936353087425232, -0.2813929319381714, -0.2691505551338196, -0.2569081783294678, -0.24466580152511597, -0.23242342472076416, -0.22018103301525116, -0.20793865621089935, -0.19569629430770874, -0.18345391750335693, -0.17121154069900513, -0.15896916389465332, -0.1467267870903015, -0.1344844102859497, -0.1222420185804367, -0.1099996417760849, -0.0977572649717331, -0.08551488816738129, -0.07327251136302948, -0.061030127108097076, -0.04878775030374527, -0.03654537349939346, -0.02430298924446106, -0.012060612440109253, 0.00018173456192016602, 0.012424113228917122, 0.024666491895914078, 0.03690887242555618, 0.04915124922990799, 0.061393626034259796, 0.0736360102891922, 0.085878387093544, 0.09812076389789581, 0.11036314070224762, 0.12260551750659943, 0.13484790921211243, 0.14709028601646423, 0.15933266282081604, 0.17157503962516785, 0.18381741642951965, 0.19605979323387146, 0.20830217003822327, 0.22054454684257507, 0.23278692364692688, 0.2450293004512787, 0.2572716772556305, 0.2695140838623047, 0.2817564606666565, 0.2939988374710083, 0.3062412142753601, 0.3184835910797119, 0.3307259678840637, 0.3429683446884155, 0.35521072149276733, 0.36745309829711914, 0.37969547510147095, 0.39193785190582275]}, "gradients/decoder.model.decoder.layers.9.encoder_attn.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 1.0, 2.0, 1.0, 4.0, 0.0, 2.0, 0.0, 0.0, 1.0, 1.0, 3.0, 5.0, 4.0, 13.0, 18.0, 16.0, 52.0, 121.0, 898.0, 92718.0, 953115.0, 1334.0, 147.0, 45.0, 23.0, 10.0, 12.0, 7.0, 4.0, 1.0, 0.0, 0.0, 1.0, 3.0, 1.0, 1.0, 1.0, 0.0, 1.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.9931640625, -0.964019775390625, -0.93487548828125, -0.905731201171875, -0.8765869140625, -0.847442626953125, -0.81829833984375, -0.789154052734375, -0.760009765625, -0.730865478515625, -0.70172119140625, -0.672576904296875, -0.6434326171875, -0.614288330078125, -0.58514404296875, -0.555999755859375, -0.52685546875, -0.497711181640625, -0.46856689453125, -0.439422607421875, -0.4102783203125, -0.381134033203125, -0.35198974609375, -0.322845458984375, -0.293701171875, -0.264556884765625, -0.23541259765625, -0.206268310546875, -0.1771240234375, -0.147979736328125, -0.11883544921875, -0.089691162109375, -0.060546875, -0.031402587890625, -0.00225830078125, 0.026885986328125, 0.0560302734375, 0.085174560546875, 0.11431884765625, 0.143463134765625, 0.172607421875, 0.201751708984375, 0.23089599609375, 0.260040283203125, 0.2891845703125, 0.318328857421875, 0.34747314453125, 0.376617431640625, 0.40576171875, 0.434906005859375, 0.46405029296875, 0.493194580078125, 0.5223388671875, 0.551483154296875, 0.58062744140625, 0.609771728515625, 0.638916015625, 0.668060302734375, 0.69720458984375, 0.726348876953125, 0.7554931640625, 0.784637451171875, 0.81378173828125, 0.842926025390625, 0.8720703125]}, "gradients/decoder.model.decoder.layers.9.encoder_attn.out_proj.bias": {"_type": "histogram", "values": [6.0, 9.0, 19.0, 45.0, 132.0, 226.0, 276.0, 173.0, 90.0, 29.0, 10.0, 6.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.28759765625, -0.24217987060546875, -0.1967620849609375, -0.15134429931640625, -0.105926513671875, -0.06050872802734375, -0.0150909423828125, 0.03032684326171875, 0.07574462890625, 0.12116241455078125, 0.1665802001953125, 0.21199798583984375, 0.257415771484375, 0.30283355712890625, 0.3482513427734375, 0.39366912841796875, 0.4390869140625, 0.48450469970703125, 0.5299224853515625, 0.5753402709960938, 0.620758056640625, 0.6661758422851562, 0.7115936279296875, 0.7570114135742188, 0.80242919921875, 0.8478469848632812, 0.8932647705078125, 0.9386825561523438, 0.984100341796875, 1.0295181274414062, 1.0749359130859375, 1.1203536987304688, 1.165771484375, 1.2111892700195312, 1.2566070556640625, 1.3020248413085938, 1.347442626953125, 1.3928604125976562, 1.4382781982421875, 1.4836959838867188, 1.52911376953125, 1.5745315551757812, 1.6199493408203125, 1.6653671264648438, 1.710784912109375, 1.7562026977539062, 1.8016204833984375, 1.8470382690429688, 1.8924560546875, 1.9378738403320312, 1.9832916259765625, 2.0287094116210938, 2.074127197265625, 2.1195449829101562, 2.1649627685546875, 2.2103805541992188, 2.25579833984375, 2.3012161254882812, 2.3466339111328125, 2.3920516967773438, 2.437469482421875, 2.4828872680664062, 2.5283050537109375, 2.5737228393554688, 2.619140625]}, "gradients/decoder.model.decoder.layers.9.encoder_attn.v_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0, 1.0, 3.0, 4.0, 6.0, 5.0, 5.0, 7.0, 10.0, 15.0, 21.0, 32.0, 32.0, 42.0, 60.0, 62.0, 77.0, 115.0, 129.0, 185.0, 419.0, 1118.0, 4241.0, 25219.0, 700597.0, 294041.0, 17291.0, 2966.0, 854.0, 327.0, 191.0, 132.0, 80.0, 62.0, 58.0, 45.0, 19.0, 18.0, 16.0, 20.0, 15.0, 6.0, 3.0, 5.0, 3.0, 3.0, 2.0, 3.0, 1.0, 1.0, 0.0, 2.0, 0.0, 1.0], "bins": [-0.02581787109375, -0.025078296661376953, -0.024338722229003906, -0.02359914779663086, -0.022859573364257812, -0.022119998931884766, -0.02138042449951172, -0.020640850067138672, -0.019901275634765625, -0.019161701202392578, -0.01842212677001953, -0.017682552337646484, -0.016942977905273438, -0.01620340347290039, -0.015463829040527344, -0.014724254608154297, -0.01398468017578125, -0.013245105743408203, -0.012505531311035156, -0.01176595687866211, -0.011026382446289062, -0.010286808013916016, -0.009547233581542969, -0.008807659149169922, -0.008068084716796875, -0.007328510284423828, -0.006588935852050781, -0.005849361419677734, -0.0051097869873046875, -0.004370212554931641, -0.0036306381225585938, -0.002891063690185547, -0.0021514892578125, -0.0014119148254394531, -0.0006723403930664062, 6.723403930664062e-05, 0.0008068084716796875, 0.0015463829040527344, 0.0022859573364257812, 0.003025531768798828, 0.003765106201171875, 0.004504680633544922, 0.005244255065917969, 0.005983829498291016, 0.0067234039306640625, 0.007462978363037109, 0.008202552795410156, 0.008942127227783203, 0.00968170166015625, 0.010421276092529297, 0.011160850524902344, 0.01190042495727539, 0.012639999389648438, 0.013379573822021484, 0.014119148254394531, 0.014858722686767578, 0.015598297119140625, 0.016337871551513672, 0.01707744598388672, 0.017817020416259766, 0.018556594848632812, 0.01929616928100586, 0.020035743713378906, 0.020775318145751953, 0.021514892578125]}, "gradients/decoder.model.decoder.layers.9.encoder_attn.v_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 3.0, 5.0, 6.0, 4.0, 7.0, 2.0, 13.0, 15.0, 14.0, 18.0, 19.0, 26.0, 27.0, 19.0, 29.0, 26.0, 38.0, 49.0, 54.0, 44.0, 53.0, 49.0, 55.0, 55.0, 44.0, 49.0, 34.0, 41.0, 40.0, 30.0, 24.0, 28.0, 21.0, 13.0, 12.0, 9.0, 11.0, 9.0, 5.0, 1.0, 1.0, 3.0, 3.0, 2.0, 3.0, 1.0, 1.0, 0.0, 2.0, 0.0, 1.0], "bins": [-0.68359375, -0.6640701293945312, -0.6445465087890625, -0.6250228881835938, -0.605499267578125, -0.5859756469726562, -0.5664520263671875, -0.5469284057617188, -0.52740478515625, -0.5078811645507812, -0.4883575439453125, -0.46883392333984375, -0.449310302734375, -0.42978668212890625, -0.4102630615234375, -0.39073944091796875, -0.3712158203125, -0.35169219970703125, -0.3321685791015625, -0.31264495849609375, -0.293121337890625, -0.27359771728515625, -0.2540740966796875, -0.23455047607421875, -0.21502685546875, -0.19550323486328125, -0.1759796142578125, -0.15645599365234375, -0.136932373046875, -0.11740875244140625, -0.0978851318359375, -0.07836151123046875, -0.058837890625, -0.03931427001953125, -0.0197906494140625, -0.00026702880859375, 0.019256591796875, 0.03878021240234375, 0.0583038330078125, 0.07782745361328125, 0.09735107421875, 0.11687469482421875, 0.1363983154296875, 0.15592193603515625, 0.175445556640625, 0.19496917724609375, 0.2144927978515625, 0.23401641845703125, 0.2535400390625, 0.27306365966796875, 0.2925872802734375, 0.31211090087890625, 0.331634521484375, 0.35115814208984375, 0.3706817626953125, 0.39020538330078125, 0.40972900390625, 0.42925262451171875, 0.4487762451171875, 0.46829986572265625, 0.487823486328125, 0.5073471069335938, 0.5268707275390625, 0.5463943481445312, 0.56591796875]}, "gradients/decoder.model.decoder.layers.9.encoder_attn.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 0.0, 2.0, 2.0, 2.0, 2.0, 3.0, 4.0, 12.0, 19.0, 18.0, 31.0, 40.0, 57.0, 84.0, 127.0, 154.0, 230.0, 319.0, 473.0, 678.0, 972.0, 1470.0, 2292.0, 3674.0, 6585.0, 13201.0, 31474.0, 823311.0, 116963.0, 21702.0, 9984.0, 5410.0, 3137.0, 1941.0, 1315.0, 854.0, 602.0, 418.0, 295.0, 235.0, 142.0, 95.0, 65.0, 54.0, 44.0, 23.0, 19.0, 8.0, 6.0, 9.0, 6.0, 3.0, 4.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0009636878967285156, -0.0009324178099632263, -0.000901147723197937, -0.0008698776364326477, -0.0008386075496673584, -0.0008073374629020691, -0.0007760673761367798, -0.0007447972893714905, -0.0007135272026062012, -0.0006822571158409119, -0.0006509870290756226, -0.0006197169423103333, -0.0005884468555450439, -0.0005571767687797546, -0.0005259066820144653, -0.000494636595249176, -0.0004633665084838867, -0.0004320964217185974, -0.0004008263349533081, -0.0003695562481880188, -0.0003382861614227295, -0.0003070160746574402, -0.0002757459878921509, -0.00024447590112686157, -0.00021320581436157227, -0.00018193572759628296, -0.00015066564083099365, -0.00011939555406570435, -8.812546730041504e-05, -5.685538053512573e-05, -2.5585293769836426e-05, 5.684792995452881e-06, 3.695487976074219e-05, 6.82249665260315e-05, 9.94950532913208e-05, 0.0001307651400566101, 0.00016203522682189941, 0.00019330531358718872, 0.00022457540035247803, 0.00025584548711776733, 0.00028711557388305664, 0.00031838566064834595, 0.00034965574741363525, 0.00038092583417892456, 0.00041219592094421387, 0.0004434660077095032, 0.0004747360944747925, 0.0005060061812400818, 0.0005372762680053711, 0.0005685463547706604, 0.0005998164415359497, 0.000631086528301239, 0.0006623566150665283, 0.0006936267018318176, 0.0007248967885971069, 0.0007561668753623962, 0.0007874369621276855, 0.0008187070488929749, 0.0008499771356582642, 0.0008812472224235535, 0.0009125173091888428, 0.0009437873959541321, 0.0009750574827194214, 0.0010063275694847107, 0.00103759765625]}, "gradients/decoder.model.decoder.layers.9.encoder_attn.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 1.0, 0.0, 2.0, 3.0, 7.0, 2.0, 11.0, 9.0, 20.0, 62.0, 433.0, 347.0, 48.0, 17.0, 15.0, 9.0, 7.0, 6.0, 2.0, 3.0, 2.0, 1.0, 1.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.00020694732666015625, -0.0002018958330154419, -0.00019684433937072754, -0.00019179284572601318, -0.00018674135208129883, -0.00018168985843658447, -0.00017663836479187012, -0.00017158687114715576, -0.0001665353775024414, -0.00016148388385772705, -0.0001564323902130127, -0.00015138089656829834, -0.00014632940292358398, -0.00014127790927886963, -0.00013622641563415527, -0.00013117492198944092, -0.00012612342834472656, -0.00012107193470001221, -0.00011602044105529785, -0.0001109689474105835, -0.00010591745376586914, -0.00010086596012115479, -9.581446647644043e-05, -9.076297283172607e-05, -8.571147918701172e-05, -8.065998554229736e-05, -7.560849189758301e-05, -7.055699825286865e-05, -6.55055046081543e-05, -6.045401096343994e-05, -5.5402517318725586e-05, -5.035102367401123e-05, -4.5299530029296875e-05, -4.024803638458252e-05, -3.5196542739868164e-05, -3.014504909515381e-05, -2.5093555450439453e-05, -2.0042061805725098e-05, -1.4990568161010742e-05, -9.939074516296387e-06, -4.887580871582031e-06, 1.6391277313232422e-07, 5.21540641784668e-06, 1.0266900062561035e-05, 1.531839370727539e-05, 2.0369887351989746e-05, 2.54213809967041e-05, 3.0472874641418457e-05, 3.552436828613281e-05, 4.057586193084717e-05, 4.5627355575561523e-05, 5.067884922027588e-05, 5.5730342864990234e-05, 6.078183650970459e-05, 6.583333015441895e-05, 7.08848237991333e-05, 7.593631744384766e-05, 8.098781108856201e-05, 8.603930473327637e-05, 9.109079837799072e-05, 9.614229202270508e-05, 0.00010119378566741943, 0.00010624527931213379, 0.00011129677295684814, 0.0001163482666015625]}, "gradients/decoder.model.decoder.layers.9.encoder_attn.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 5.0, 2.0, 1.0, 2.0, 6.0, 8.0, 16.0, 29.0, 28.0, 45.0, 49.0, 77.0, 125.0, 194.0, 298.0, 428.0, 712.0, 1091.0, 1712.0, 2877.0, 4837.0, 8957.0, 22674.0, 787448.0, 179026.0, 18418.0, 8097.0, 4406.0, 2600.0, 1589.0, 952.0, 610.0, 432.0, 255.0, 165.0, 113.0, 70.0, 68.0, 43.0, 33.0, 22.0, 12.0, 8.0, 7.0, 2.0, 8.0, 4.0, 4.0, 1.0, 3.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.0004401206970214844, -0.000424899160861969, -0.0004096776247024536, -0.00039445608854293823, -0.00037923455238342285, -0.00036401301622390747, -0.0003487914800643921, -0.0003335699439048767, -0.00031834840774536133, -0.00030312687158584595, -0.00028790533542633057, -0.0002726837992668152, -0.0002574622631072998, -0.00024224072694778442, -0.00022701919078826904, -0.00021179765462875366, -0.00019657611846923828, -0.0001813545823097229, -0.00016613304615020752, -0.00015091150999069214, -0.00013568997383117676, -0.00012046843767166138, -0.000105246901512146, -9.002536535263062e-05, -7.480382919311523e-05, -5.9582293033599854e-05, -4.436075687408447e-05, -2.9139220714569092e-05, -1.3917684555053711e-05, 1.30385160446167e-06, 1.652538776397705e-05, 3.174692392349243e-05, 4.696846008300781e-05, 6.21899962425232e-05, 7.741153240203857e-05, 9.263306856155396e-05, 0.00010785460472106934, 0.00012307614088058472, 0.0001382976770401001, 0.00015351921319961548, 0.00016874074935913086, 0.00018396228551864624, 0.00019918382167816162, 0.000214405357837677, 0.00022962689399719238, 0.00024484843015670776, 0.00026006996631622314, 0.0002752915024757385, 0.0002905130386352539, 0.0003057345747947693, 0.00032095611095428467, 0.00033617764711380005, 0.00035139918327331543, 0.0003666207194328308, 0.0003818422555923462, 0.00039706379175186157, 0.00041228532791137695, 0.00042750686407089233, 0.0004427284002304077, 0.0004579499363899231, 0.0004731714725494385, 0.0004883930087089539, 0.0005036145448684692, 0.0005188360810279846, 0.0005340576171875]}, "gradients/decoder.model.decoder.layers.9.encoder_attn.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 1.0, 3.0, 0.0, 0.0, 0.0, 3.0, 0.0, 3.0, 4.0, 3.0, 2.0, 4.0, 5.0, 3.0, 2.0, 5.0, 6.0, 3.0, 10.0, 12.0, 17.0, 33.0, 60.0, 329.0, 309.0, 87.0, 34.0, 10.0, 11.0, 9.0, 1.0, 5.0, 2.0, 5.0, 2.0, 5.0, 5.0, 3.0, 5.0, 1.0, 4.0, 0.0, 2.0, 0.0, 3.0, 0.0, 1.0, 3.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.00010412931442260742, -0.00010040868073701859, -9.668804705142975e-05, -9.296741336584091e-05, -8.924677968025208e-05, -8.552614599466324e-05, -8.18055123090744e-05, -7.808487862348557e-05, -7.436424493789673e-05, -7.064361125230789e-05, -6.692297756671906e-05, -6.320234388113022e-05, -5.948171019554138e-05, -5.5761076509952545e-05, -5.204044282436371e-05, -4.831980913877487e-05, -4.4599175453186035e-05, -4.08785417675972e-05, -3.715790808200836e-05, -3.3437274396419525e-05, -2.971664071083069e-05, -2.5996007025241852e-05, -2.2275373339653015e-05, -1.855473965406418e-05, -1.4834105968475342e-05, -1.1113472282886505e-05, -7.3928385972976685e-06, -3.6722049117088318e-06, 4.842877388000488e-08, 3.7690624594688416e-06, 7.489696145057678e-06, 1.1210329830646515e-05, 1.4930963516235352e-05, 1.8651597201824188e-05, 2.2372230887413025e-05, 2.609286457300186e-05, 2.9813498258590698e-05, 3.3534131944179535e-05, 3.725476562976837e-05, 4.097539931535721e-05, 4.4696033000946045e-05, 4.841666668653488e-05, 5.213730037212372e-05, 5.5857934057712555e-05, 5.957856774330139e-05, 6.329920142889023e-05, 6.701983511447906e-05, 7.07404688000679e-05, 7.446110248565674e-05, 7.818173617124557e-05, 8.190236985683441e-05, 8.562300354242325e-05, 8.934363722801208e-05, 9.306427091360092e-05, 9.678490459918976e-05, 0.0001005055382847786, 0.00010422617197036743, 0.00010794680565595627, 0.0001116674393415451, 0.00011538807302713394, 0.00011910870671272278, 0.00012282934039831161, 0.00012654997408390045, 0.0001302706077694893, 0.00013399124145507812]}, "gradients/decoder.model.decoder.layers.9.self_attn_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 3.0, 13.0, 36.0, 483.0, 430.0, 38.0, 11.0, 4.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.9797057509422302, -0.8714054226875305, -0.7631050944328308, -0.6548048257827759, -0.5465044975280762, -0.43820416927337646, -0.32990384101867676, -0.22160351276397705, -0.11330318450927734, -0.005002863705158234, 0.10329745709896088, 0.2115977704524994, 0.3198980987071991, 0.4281983971595764, 0.5364987254142761, 0.6447990536689758, 0.7530993819236755, 0.8613997101783752, 0.969700038433075, 1.0780003070831299, 1.1863006353378296, 1.2946009635925293, 1.402901291847229, 1.5112016201019287, 1.6195019483566284, 1.7278022766113281, 1.8361026048660278, 1.9444029331207275, 2.0527031421661377, 2.161003589630127, 2.269303798675537, 2.3776042461395264, 2.4859044551849365, 2.5942046642303467, 2.702505111694336, 2.810805320739746, 2.9191057682037354, 3.0274059772491455, 3.1357064247131348, 3.244006633758545, 3.352307081222534, 3.4606072902679443, 3.5689077377319336, 3.6772079467773438, 3.785508394241333, 3.893808603286743, 4.002109050750732, 4.110409259796143, 4.218709468841553, 4.327009677886963, 4.435309886932373, 4.543610572814941, 4.651910781860352, 4.760210990905762, 4.868511199951172, 4.97681188583374, 5.08511209487915, 5.1934123039245605, 5.301712512969971, 5.410013198852539, 5.518313407897949, 5.626613616943359, 5.7349138259887695, 5.843214511871338, 5.951514720916748]}, "gradients/decoder.model.decoder.layers.9.self_attn_layer_norm.bias": {"_type": "histogram", "values": [2.0, 12.0, 30.0, 114.0, 243.0, 323.0, 196.0, 75.0, 20.0, 6.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.23198917508125305, -0.1888667792081833, -0.14574438333511353, -0.10262197256088257, -0.059499576687812805, -0.016377180814743042, 0.026745229959487915, 0.06986761093139648, 0.11299002170562744, 0.1561124175786972, 0.19923481345176697, 0.24235722422599792, 0.2854796051979065, 0.32860201597213745, 0.3717244267463684, 0.414846807718277, 0.45796921849250793, 0.5010915994644165, 0.5442140102386475, 0.5873364210128784, 0.6304588317871094, 0.6735812425613403, 0.7167036533355713, 0.7598260045051575, 0.8029484152793884, 0.8460708260536194, 0.8891932368278503, 0.9323155879974365, 0.9754379987716675, 1.0185604095458984, 1.0616828203201294, 1.1048052310943604, 1.1479276418685913, 1.1910500526428223, 1.2341724634170532, 1.2772948741912842, 1.3204172849655151, 1.363539695739746, 1.4066619873046875, 1.4497843980789185, 1.4929068088531494, 1.5360292196273804, 1.5791516304016113, 1.6222740411758423, 1.6653964519500732, 1.7085187435150146, 1.7516412734985352, 1.7947635650634766, 1.837886095046997, 1.881008505821228, 1.924130916595459, 1.96725332736969, 2.010375738143921, 2.0534980297088623, 2.096620559692383, 2.139742851257324, 2.1828651428222656, 2.225987434387207, 2.2691099643707275, 2.312232255935669, 2.3553547859191895, 2.398477077484131, 2.4415996074676514, 2.4847218990325928, 2.5278444290161133]}, "gradients/decoder.model.decoder.layers.9.self_attn.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 2.0, 4.0, 3.0, 2.0, 2.0, 2.0, 9.0, 2.0, 7.0, 5.0, 7.0, 9.0, 11.0, 14.0, 9.0, 8.0, 8.0, 12.0, 10.0, 16.0, 10.0, 11.0, 18.0, 40.0, 68.0, 208.0, 2317.0, 30306.0, 959793.0, 51242.0, 3794.0, 339.0, 68.0, 34.0, 23.0, 16.0, 15.0, 7.0, 11.0, 6.0, 7.0, 7.0, 7.0, 14.0, 8.0, 12.0, 7.0, 7.0, 2.0, 5.0, 5.0, 6.0, 2.0, 6.0, 3.0, 3.0, 1.0, 1.0, 2.0], "bins": [-1.2451171875, -1.2080535888671875, -1.170989990234375, -1.1339263916015625, -1.09686279296875, -1.0597991943359375, -1.022735595703125, -0.9856719970703125, -0.9486083984375, -0.9115447998046875, -0.874481201171875, -0.8374176025390625, -0.80035400390625, -0.7632904052734375, -0.726226806640625, -0.6891632080078125, -0.652099609375, -0.6150360107421875, -0.577972412109375, -0.5409088134765625, -0.50384521484375, -0.4667816162109375, -0.429718017578125, -0.3926544189453125, -0.3555908203125, -0.3185272216796875, -0.281463623046875, -0.2444000244140625, -0.20733642578125, -0.1702728271484375, -0.133209228515625, -0.0961456298828125, -0.05908203125, -0.0220184326171875, 0.015045166015625, 0.0521087646484375, 0.08917236328125, 0.1262359619140625, 0.163299560546875, 0.2003631591796875, 0.2374267578125, 0.2744903564453125, 0.311553955078125, 0.3486175537109375, 0.38568115234375, 0.4227447509765625, 0.459808349609375, 0.4968719482421875, 0.533935546875, 0.5709991455078125, 0.608062744140625, 0.6451263427734375, 0.68218994140625, 0.7192535400390625, 0.756317138671875, 0.7933807373046875, 0.8304443359375, 0.8675079345703125, 0.904571533203125, 0.9416351318359375, 0.97869873046875, 1.0157623291015625, 1.052825927734375, 1.0898895263671875, 1.126953125]}, "gradients/decoder.model.decoder.layers.9.self_attn.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 13.0, 26.0, 80.0, 205.0, 308.0, 239.0, 100.0, 35.0, 13.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0259857177734375, -0.02297234535217285, -0.019958972930908203, -0.016945600509643555, -0.013932228088378906, -0.010918855667114258, -0.00790548324584961, -0.004892110824584961, -0.0018787384033203125, 0.001134634017944336, 0.004148006439208984, 0.007161378860473633, 0.010174751281738281, 0.01318812370300293, 0.016201496124267578, 0.019214868545532227, 0.022228240966796875, 0.025241613388061523, 0.028254985809326172, 0.03126835823059082, 0.03428173065185547, 0.03729510307312012, 0.040308475494384766, 0.043321847915649414, 0.04633522033691406, 0.04934859275817871, 0.05236196517944336, 0.05537533760070801, 0.058388710021972656, 0.061402082443237305, 0.06441545486450195, 0.0674288272857666, 0.07044219970703125, 0.0734555721282959, 0.07646894454956055, 0.0794823169708252, 0.08249568939208984, 0.08550906181335449, 0.08852243423461914, 0.09153580665588379, 0.09454917907714844, 0.09756255149841309, 0.10057592391967773, 0.10358929634094238, 0.10660266876220703, 0.10961604118347168, 0.11262941360473633, 0.11564278602600098, 0.11865615844726562, 0.12166953086853027, 0.12468290328979492, 0.12769627571105957, 0.13070964813232422, 0.13372302055358887, 0.13673639297485352, 0.13974976539611816, 0.1427631378173828, 0.14577651023864746, 0.1487898826599121, 0.15180325508117676, 0.1548166275024414, 0.15782999992370605, 0.1608433723449707, 0.16385674476623535, 0.1668701171875]}, "gradients/decoder.model.decoder.layers.9.self_attn.v_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 6.0, 1.0, 2.0, 3.0, 6.0, 15.0, 18.0, 20.0, 34.0, 39.0, 52.0, 71.0, 126.0, 182.0, 220.0, 341.0, 602.0, 793.0, 1338.0, 2226.0, 3771.0, 6947.0, 13071.0, 28188.0, 77277.0, 465530.0, 328847.0, 65741.0, 25432.0, 12024.0, 6452.0, 3441.0, 2115.0, 1297.0, 750.0, 496.0, 325.0, 234.0, 163.0, 104.0, 71.0, 58.0, 38.0, 26.0, 26.0, 16.0, 9.0, 9.0, 6.0, 2.0, 4.0, 4.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0], "bins": [-0.1197509765625, -0.11561203002929688, -0.11147308349609375, -0.10733413696289062, -0.1031951904296875, -0.09905624389648438, -0.09491729736328125, -0.09077835083007812, -0.086639404296875, -0.08250045776367188, -0.07836151123046875, -0.07422256469726562, -0.0700836181640625, -0.06594467163085938, -0.06180572509765625, -0.057666778564453125, -0.05352783203125, -0.049388885498046875, -0.04524993896484375, -0.041110992431640625, -0.0369720458984375, -0.032833099365234375, -0.02869415283203125, -0.024555206298828125, -0.020416259765625, -0.016277313232421875, -0.01213836669921875, -0.007999420166015625, -0.0038604736328125, 0.000278472900390625, 0.00441741943359375, 0.008556365966796875, 0.0126953125, 0.016834259033203125, 0.02097320556640625, 0.025112152099609375, 0.0292510986328125, 0.033390045166015625, 0.03752899169921875, 0.041667938232421875, 0.045806884765625, 0.049945831298828125, 0.05408477783203125, 0.058223724365234375, 0.0623626708984375, 0.06650161743164062, 0.07064056396484375, 0.07477951049804688, 0.07891845703125, 0.08305740356445312, 0.08719635009765625, 0.09133529663085938, 0.0954742431640625, 0.09961318969726562, 0.10375213623046875, 0.10789108276367188, 0.112030029296875, 0.11616897583007812, 0.12030792236328125, 0.12444686889648438, 0.1285858154296875, 0.13272476196289062, 0.13686370849609375, 0.14100265502929688, 0.1451416015625]}, "gradients/decoder.model.decoder.layers.9.self_attn.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 3.0, 4.0, 2.0, 4.0, 9.0, 7.0, 6.0, 4.0, 9.0, 8.0, 12.0, 12.0, 17.0, 16.0, 19.0, 20.0, 22.0, 27.0, 32.0, 47.0, 60.0, 58.0, 61.0, 69.0, 78.0, 65.0, 54.0, 57.0, 35.0, 31.0, 22.0, 13.0, 14.0, 21.0, 14.0, 11.0, 8.0, 6.0, 12.0, 8.0, 7.0, 9.0, 8.0, 0.0, 4.0, 0.0, 4.0, 1.0, 2.0, 1.0, 0.0, 1.0, 1.0, 3.0, 0.0, 1.0], "bins": [-0.047821044921875, -0.04630231857299805, -0.044783592224121094, -0.04326486587524414, -0.04174613952636719, -0.040227413177490234, -0.03870868682861328, -0.03718996047973633, -0.035671234130859375, -0.03415250778198242, -0.03263378143310547, -0.031115055084228516, -0.029596328735351562, -0.02807760238647461, -0.026558876037597656, -0.025040149688720703, -0.02352142333984375, -0.022002696990966797, -0.020483970642089844, -0.01896524429321289, -0.017446517944335938, -0.015927791595458984, -0.014409065246582031, -0.012890338897705078, -0.011371612548828125, -0.009852886199951172, -0.008334159851074219, -0.006815433502197266, -0.0052967071533203125, -0.0037779808044433594, -0.0022592544555664062, -0.0007405281066894531, 0.0007781982421875, 0.002296924591064453, 0.0038156509399414062, 0.005334377288818359, 0.0068531036376953125, 0.008371829986572266, 0.009890556335449219, 0.011409282684326172, 0.012928009033203125, 0.014446735382080078, 0.01596546173095703, 0.017484188079833984, 0.019002914428710938, 0.02052164077758789, 0.022040367126464844, 0.023559093475341797, 0.02507781982421875, 0.026596546173095703, 0.028115272521972656, 0.02963399887084961, 0.031152725219726562, 0.032671451568603516, 0.03419017791748047, 0.03570890426635742, 0.037227630615234375, 0.03874635696411133, 0.04026508331298828, 0.041783809661865234, 0.04330253601074219, 0.04482126235961914, 0.046339988708496094, 0.04785871505737305, 0.04937744140625]}, "gradients/decoder.model.decoder.layers.9.self_attn.k_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 0.0, 0.0, 2.0, 3.0, 4.0, 2.0, 4.0, 5.0, 3.0, 8.0, 10.0, 11.0, 13.0, 24.0, 40.0, 54.0, 95.0, 146.0, 219.0, 386.0, 638.0, 1135.0, 2445.0, 8824.0, 990912.0, 35087.0, 4642.0, 1706.0, 818.0, 473.0, 284.0, 191.0, 119.0, 84.0, 54.0, 43.0, 31.0, 10.0, 15.0, 11.0, 8.0, 1.0, 2.0, 4.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.48828125, -4.32672119140625, -4.1651611328125, -4.00360107421875, -3.842041015625, -3.68048095703125, -3.5189208984375, -3.35736083984375, -3.19580078125, -3.03424072265625, -2.8726806640625, -2.71112060546875, -2.549560546875, -2.38800048828125, -2.2264404296875, -2.06488037109375, -1.9033203125, -1.74176025390625, -1.5802001953125, -1.41864013671875, -1.257080078125, -1.09552001953125, -0.9339599609375, -0.77239990234375, -0.61083984375, -0.44927978515625, -0.2877197265625, -0.12615966796875, 0.035400390625, 0.19696044921875, 0.3585205078125, 0.52008056640625, 0.681640625, 0.84320068359375, 1.0047607421875, 1.16632080078125, 1.327880859375, 1.48944091796875, 1.6510009765625, 1.81256103515625, 1.97412109375, 2.13568115234375, 2.2972412109375, 2.45880126953125, 2.620361328125, 2.78192138671875, 2.9434814453125, 3.10504150390625, 3.2666015625, 3.42816162109375, 3.5897216796875, 3.75128173828125, 3.912841796875, 4.07440185546875, 4.2359619140625, 4.39752197265625, 4.55908203125, 4.72064208984375, 4.8822021484375, 5.04376220703125, 5.205322265625, 5.36688232421875, 5.5284423828125, 5.69000244140625, 5.8515625]}, "gradients/decoder.model.decoder.layers.9.self_attn.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 3.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 3.0, 3.0, 4.0, 7.0, 5.0, 19.0, 663.0, 253.0, 21.0, 8.0, 4.0, 0.0, 3.0, 1.0, 2.0, 0.0, 0.0, 3.0, 0.0, 2.0, 0.0, 3.0, 1.0, 2.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.001811981201171875, -0.001753494143486023, -0.001695007085800171, -0.0016365200281143188, -0.0015780329704284668, -0.0015195459127426147, -0.0014610588550567627, -0.0014025717973709106, -0.0013440847396850586, -0.0012855976819992065, -0.0012271106243133545, -0.0011686235666275024, -0.0011101365089416504, -0.0010516494512557983, -0.0009931623935699463, -0.0009346753358840942, -0.0008761882781982422, -0.0008177012205123901, -0.0007592141628265381, -0.000700727105140686, -0.000642240047454834, -0.0005837529897689819, -0.0005252659320831299, -0.00046677887439727783, -0.0004082918167114258, -0.00034980475902557373, -0.0002913177013397217, -0.00023283064365386963, -0.00017434358596801758, -0.00011585652828216553, -5.7369470596313477e-05, 1.1175870895385742e-06, 5.9604644775390625e-05, 0.00011809170246124268, 0.00017657876014709473, 0.00023506581783294678, 0.00029355287551879883, 0.0003520399332046509, 0.00041052699089050293, 0.000469014048576355, 0.000527501106262207, 0.0005859881639480591, 0.0006444752216339111, 0.0007029622793197632, 0.0007614493370056152, 0.0008199363946914673, 0.0008784234523773193, 0.0009369105100631714, 0.0009953975677490234, 0.0010538846254348755, 0.0011123716831207275, 0.0011708587408065796, 0.0012293457984924316, 0.0012878328561782837, 0.0013463199138641357, 0.0014048069715499878, 0.0014632940292358398, 0.001521781086921692, 0.001580268144607544, 0.001638755202293396, 0.001697242259979248, 0.0017557293176651, 0.0018142163753509521, 0.0018727034330368042, 0.0019311904907226562]}, "gradients/decoder.model.decoder.layers.9.self_attn.q_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 2.0, 1.0, 0.0, 0.0, 3.0, 2.0, 2.0, 5.0, 3.0, 6.0, 15.0, 12.0, 24.0, 34.0, 27.0, 42.0, 63.0, 66.0, 106.0, 140.0, 192.0, 271.0, 405.0, 563.0, 863.0, 1501.0, 2665.0, 5683.0, 15651.0, 779582.0, 213525.0, 14828.0, 5590.0, 2567.0, 1378.0, 838.0, 568.0, 387.0, 264.0, 198.0, 122.0, 88.0, 75.0, 51.0, 41.0, 31.0, 29.0, 18.0, 13.0, 11.0, 4.0, 6.0, 6.0, 1.0, 1.0, 1.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0], "bins": [-0.78369140625, -0.759185791015625, -0.73468017578125, -0.710174560546875, -0.6856689453125, -0.661163330078125, -0.63665771484375, -0.612152099609375, -0.587646484375, -0.563140869140625, -0.53863525390625, -0.514129638671875, -0.4896240234375, -0.465118408203125, -0.44061279296875, -0.416107177734375, -0.3916015625, -0.367095947265625, -0.34259033203125, -0.318084716796875, -0.2935791015625, -0.269073486328125, -0.24456787109375, -0.220062255859375, -0.195556640625, -0.171051025390625, -0.14654541015625, -0.122039794921875, -0.0975341796875, -0.073028564453125, -0.04852294921875, -0.024017333984375, 0.00048828125, 0.024993896484375, 0.04949951171875, 0.074005126953125, 0.0985107421875, 0.123016357421875, 0.14752197265625, 0.172027587890625, 0.196533203125, 0.221038818359375, 0.24554443359375, 0.270050048828125, 0.2945556640625, 0.319061279296875, 0.34356689453125, 0.368072509765625, 0.392578125, 0.417083740234375, 0.44158935546875, 0.466094970703125, 0.4906005859375, 0.515106201171875, 0.53961181640625, 0.564117431640625, 0.588623046875, 0.613128662109375, 0.63763427734375, 0.662139892578125, 0.6866455078125, 0.711151123046875, 0.73565673828125, 0.760162353515625, 0.78466796875]}, "gradients/decoder.model.decoder.layers.9.self_attn.q_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 1.0, 0.0, 2.0, 3.0, 1.0, 3.0, 1.0, 3.0, 2.0, 3.0, 6.0, 8.0, 11.0, 14.0, 13.0, 33.0, 397.0, 416.0, 22.0, 21.0, 6.0, 10.0, 13.0, 9.0, 7.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.417236328125, -0.40419769287109375, -0.3911590576171875, -0.37812042236328125, -0.365081787109375, -0.35204315185546875, -0.3390045166015625, -0.32596588134765625, -0.31292724609375, -0.29988861083984375, -0.2868499755859375, -0.27381134033203125, -0.260772705078125, -0.24773406982421875, -0.2346954345703125, -0.22165679931640625, -0.2086181640625, -0.19557952880859375, -0.1825408935546875, -0.16950225830078125, -0.156463623046875, -0.14342498779296875, -0.1303863525390625, -0.11734771728515625, -0.10430908203125, -0.09127044677734375, -0.0782318115234375, -0.06519317626953125, -0.052154541015625, -0.03911590576171875, -0.0260772705078125, -0.01303863525390625, 0.0, 0.01303863525390625, 0.0260772705078125, 0.03911590576171875, 0.052154541015625, 0.06519317626953125, 0.0782318115234375, 0.09127044677734375, 0.10430908203125, 0.11734771728515625, 0.1303863525390625, 0.14342498779296875, 0.156463623046875, 0.16950225830078125, 0.1825408935546875, 0.19557952880859375, 0.2086181640625, 0.22165679931640625, 0.2346954345703125, 0.24773406982421875, 0.260772705078125, 0.27381134033203125, 0.2868499755859375, 0.29988861083984375, 0.31292724609375, 0.32596588134765625, 0.3390045166015625, 0.35204315185546875, 0.365081787109375, 0.37812042236328125, 0.3911590576171875, 0.40419769287109375, 0.417236328125]}, "gradients/decoder.model.decoder.layers.8.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 2.0, 2.0, 3.0, 0.0, 3.0, 4.0, 7.0, 14.0, 21.0, 36.0, 102.0, 437.0, 252.0, 60.0, 30.0, 16.0, 6.0, 5.0, 2.0, 4.0, 2.0, 2.0, 1.0, 3.0, 1.0, 0.0, 2.0, 0.0, 0.0, 2.0], "bins": [-18.341251373291016, -17.941007614135742, -17.540761947631836, -17.140518188476562, -16.740272521972656, -16.340028762817383, -15.939783096313477, -15.539538383483887, -15.139293670654297, -14.739048957824707, -14.338804244995117, -13.938559532165527, -13.538314819335938, -13.138070106506348, -12.737825393676758, -12.337580680847168, -11.937335968017578, -11.537091255187988, -11.136846542358398, -10.736601829528809, -10.336357116699219, -9.936112403869629, -9.535867691040039, -9.13562297821045, -8.735379219055176, -8.335134506225586, -7.934889793395996, -7.534645080566406, -7.134400367736816, -6.734155654907227, -6.333910942077637, -5.933666229248047, -5.533421516418457, -5.133176803588867, -4.732932090759277, -4.3326873779296875, -3.9324426651000977, -3.532198190689087, -3.131953477859497, -2.7317087650299072, -2.3314640522003174, -1.9312193393707275, -1.5309746265411377, -1.1307300329208374, -0.7304853200912476, -0.33024072647094727, 0.07000398635864258, 0.4702486991882324, 0.8704934120178223, 1.270738124847412, 1.670982837677002, 2.071227550506592, 2.4714722633361816, 2.8717167377471924, 3.2719614505767822, 3.672206163406372, 4.072450637817383, 4.472695350646973, 4.8729400634765625, 5.273184776306152, 5.673429489135742, 6.073674201965332, 6.473918914794922, 6.874163627624512, 7.274408340454102]}, "gradients/decoder.model.decoder.layers.8.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 2.0, 0.0, 2.0, 3.0, 7.0, 6.0, 9.0, 9.0, 5.0, 7.0, 11.0, 15.0, 12.0, 21.0, 21.0, 21.0, 21.0, 28.0, 36.0, 34.0, 38.0, 40.0, 47.0, 36.0, 24.0, 47.0, 47.0, 45.0, 40.0, 36.0, 34.0, 36.0, 37.0, 24.0, 39.0, 20.0, 33.0, 14.0, 25.0, 17.0, 13.0, 11.0, 12.0, 4.0, 7.0, 6.0, 1.0, 6.0, 3.0, 3.0, 1.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.3505553603172302, -0.33817169070243835, -0.32578805088996887, -0.313404381275177, -0.3010207414627075, -0.28863707184791565, -0.2762534022331238, -0.2638697624206543, -0.2514861226081848, -0.23910246789455414, -0.22671881318092346, -0.2143351435661316, -0.2019515037536621, -0.18956783413887024, -0.17718417942523956, -0.1648005247116089, -0.15241685509681702, -0.14003320038318634, -0.12764954566955566, -0.11526588350534439, -0.10288222879171371, -0.09049857407808304, -0.07811491191387177, -0.06573125720024109, -0.05334760248661041, -0.040963947772979736, -0.02858028933405876, -0.016196630895137787, -0.0038129761815071106, 0.008570678532123566, 0.02095434069633484, 0.033337995409965515, 0.04572165012359619, 0.05810530483722687, 0.07048895955085754, 0.08287262171506882, 0.0952562764286995, 0.10763993114233017, 0.12002359330654144, 0.13240724802017212, 0.1447909027338028, 0.15717455744743347, 0.16955821216106415, 0.18194186687469482, 0.1943255364894867, 0.20670917630195618, 0.21909284591674805, 0.23147650063037872, 0.2438601553440094, 0.25624382495880127, 0.26862746477127075, 0.2810111343860626, 0.2933947741985321, 0.305778443813324, 0.31816208362579346, 0.3305457532405853, 0.3429294228553772, 0.35531309247016907, 0.36769673228263855, 0.3800804018974304, 0.3924640417098999, 0.4048477113246918, 0.41723138093948364, 0.4296150207519531, 0.4419986605644226]}, "gradients/decoder.model.decoder.layers.8.fc2.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 2.0, 1.0, 1.0, 2.0, 1.0, 5.0, 4.0, 7.0, 11.0, 8.0, 15.0, 24.0, 24.0, 18.0, 45.0, 78.0, 98.0, 174.0, 250.0, 419.0, 724.0, 1283.0, 2635.0, 5334.0, 12789.0, 36660.0, 4010987.0, 88688.0, 19793.0, 7372.0, 3248.0, 1552.0, 817.0, 453.0, 308.0, 156.0, 114.0, 64.0, 49.0, 24.0, 18.0, 16.0, 8.0, 5.0, 4.0, 2.0, 2.0, 2.0, 2.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.12109375, -3.9827880859375, -3.844482421875, -3.7061767578125, -3.56787109375, -3.4295654296875, -3.291259765625, -3.1529541015625, -3.0146484375, -2.8763427734375, -2.738037109375, -2.5997314453125, -2.46142578125, -2.3231201171875, -2.184814453125, -2.0465087890625, -1.908203125, -1.7698974609375, -1.631591796875, -1.4932861328125, -1.35498046875, -1.2166748046875, -1.078369140625, -0.9400634765625, -0.8017578125, -0.6634521484375, -0.525146484375, -0.3868408203125, -0.24853515625, -0.1102294921875, 0.028076171875, 0.1663818359375, 0.3046875, 0.4429931640625, 0.581298828125, 0.7196044921875, 0.85791015625, 0.9962158203125, 1.134521484375, 1.2728271484375, 1.4111328125, 1.5494384765625, 1.687744140625, 1.8260498046875, 1.96435546875, 2.1026611328125, 2.240966796875, 2.3792724609375, 2.517578125, 2.6558837890625, 2.794189453125, 2.9324951171875, 3.07080078125, 3.2091064453125, 3.347412109375, 3.4857177734375, 3.6240234375, 3.7623291015625, 3.900634765625, 4.0389404296875, 4.17724609375, 4.3155517578125, 4.453857421875, 4.5921630859375, 4.73046875]}, "gradients/decoder.model.decoder.layers.8.fc2.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 2.0, 0.0, 2.0, 2.0, 2.0, 5.0, 6.0, 11.0, 10.0, 15.0, 15.0, 27.0, 25.0, 27.0, 40.0, 74.0, 125.0, 232.0, 177.0, 62.0, 34.0, 30.0, 32.0, 17.0, 10.0, 9.0, 5.0, 2.0, 3.0, 1.0, 4.0, 2.0, 3.0, 1.0, 1.0, 2.0, 0.0, 2.0, 2.0], "bins": [-0.214111328125, -0.20908164978027344, -0.20405197143554688, -0.1990222930908203, -0.19399261474609375, -0.1889629364013672, -0.18393325805664062, -0.17890357971191406, -0.1738739013671875, -0.16884422302246094, -0.16381454467773438, -0.1587848663330078, -0.15375518798828125, -0.1487255096435547, -0.14369583129882812, -0.13866615295410156, -0.133636474609375, -0.12860679626464844, -0.12357711791992188, -0.11854743957519531, -0.11351776123046875, -0.10848808288574219, -0.10345840454101562, -0.09842872619628906, -0.0933990478515625, -0.08836936950683594, -0.08333969116210938, -0.07831001281738281, -0.07328033447265625, -0.06825065612792969, -0.06322097778320312, -0.05819129943847656, -0.05316162109375, -0.04813194274902344, -0.043102264404296875, -0.03807258605957031, -0.03304290771484375, -0.028013229370117188, -0.022983551025390625, -0.017953872680664062, -0.0129241943359375, -0.007894515991210938, -0.002864837646484375, 0.0021648406982421875, 0.00719451904296875, 0.012224197387695312, 0.017253875732421875, 0.022283554077148438, 0.027313232421875, 0.03234291076660156, 0.037372589111328125, 0.04240226745605469, 0.04743194580078125, 0.05246162414550781, 0.057491302490234375, 0.06252098083496094, 0.0675506591796875, 0.07258033752441406, 0.07761001586914062, 0.08263969421386719, 0.08766937255859375, 0.09269905090332031, 0.09772872924804688, 0.10275840759277344, 0.1077880859375]}, "gradients/decoder.model.decoder.layers.8.fc1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 1.0, 2.0, 5.0, 4.0, 7.0, 10.0, 11.0, 14.0, 27.0, 38.0, 44.0, 76.0, 125.0, 230.0, 379.0, 711.0, 1366.0, 2873.0, 6802.0, 17946.0, 64879.0, 3980195.0, 83479.0, 20928.0, 7563.0, 3277.0, 1507.0, 752.0, 419.0, 241.0, 150.0, 91.0, 42.0, 37.0, 19.0, 15.0, 6.0, 4.0, 6.0, 6.0, 2.0, 3.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-1.779296875, -1.7191162109375, -1.658935546875, -1.5987548828125, -1.53857421875, -1.4783935546875, -1.418212890625, -1.3580322265625, -1.2978515625, -1.2376708984375, -1.177490234375, -1.1173095703125, -1.05712890625, -0.9969482421875, -0.936767578125, -0.8765869140625, -0.81640625, -0.7562255859375, -0.696044921875, -0.6358642578125, -0.57568359375, -0.5155029296875, -0.455322265625, -0.3951416015625, -0.3349609375, -0.2747802734375, -0.214599609375, -0.1544189453125, -0.09423828125, -0.0340576171875, 0.026123046875, 0.0863037109375, 0.146484375, 0.2066650390625, 0.266845703125, 0.3270263671875, 0.38720703125, 0.4473876953125, 0.507568359375, 0.5677490234375, 0.6279296875, 0.6881103515625, 0.748291015625, 0.8084716796875, 0.86865234375, 0.9288330078125, 0.989013671875, 1.0491943359375, 1.109375, 1.1695556640625, 1.229736328125, 1.2899169921875, 1.35009765625, 1.4102783203125, 1.470458984375, 1.5306396484375, 1.5908203125, 1.6510009765625, 1.711181640625, 1.7713623046875, 1.83154296875, 1.8917236328125, 1.951904296875, 2.0120849609375, 2.072265625]}, "gradients/decoder.model.decoder.layers.8.fc1.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 2.0, 2.0, 2.0, 4.0, 4.0, 3.0, 4.0, 4.0, 12.0, 10.0, 6.0, 16.0, 12.0, 29.0, 46.0, 79.0, 2671.0, 993.0, 64.0, 25.0, 24.0, 11.0, 10.0, 8.0, 9.0, 4.0, 6.0, 4.0, 3.0, 5.0, 1.0, 2.0, 1.0, 1.0, 1.0, 4.0, 0.0, 0.0, 2.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.4501953125, -0.43350982666015625, -0.4168243408203125, -0.40013885498046875, -0.383453369140625, -0.36676788330078125, -0.3500823974609375, -0.33339691162109375, -0.31671142578125, -0.30002593994140625, -0.2833404541015625, -0.26665496826171875, -0.249969482421875, -0.23328399658203125, -0.2165985107421875, -0.19991302490234375, -0.1832275390625, -0.16654205322265625, -0.1498565673828125, -0.13317108154296875, -0.116485595703125, -0.09980010986328125, -0.0831146240234375, -0.06642913818359375, -0.04974365234375, -0.03305816650390625, -0.0163726806640625, 0.00031280517578125, 0.016998291015625, 0.03368377685546875, 0.0503692626953125, 0.06705474853515625, 0.083740234375, 0.10042572021484375, 0.1171112060546875, 0.13379669189453125, 0.150482177734375, 0.16716766357421875, 0.1838531494140625, 0.20053863525390625, 0.21722412109375, 0.23390960693359375, 0.2505950927734375, 0.26728057861328125, 0.283966064453125, 0.30065155029296875, 0.3173370361328125, 0.33402252197265625, 0.3507080078125, 0.36739349365234375, 0.3840789794921875, 0.40076446533203125, 0.417449951171875, 0.43413543701171875, 0.4508209228515625, 0.46750640869140625, 0.48419189453125, 0.5008773803710938, 0.5175628662109375, 0.5342483520507812, 0.550933837890625, 0.5676193237304688, 0.5843048095703125, 0.6009902954101562, 0.61767578125]}, "gradients/decoder.model.decoder.layers.8.encoder_attn_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 2.0, 3.0, 3.0, 3.0, 7.0, 9.0, 20.0, 18.0, 22.0, 48.0, 80.0, 163.0, 231.0, 165.0, 94.0, 59.0, 23.0, 18.0, 21.0, 7.0, 2.0, 2.0, 3.0, 6.0, 3.0, 3.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-2.2225303649902344, -2.1544642448425293, -2.086397886276245, -2.01833176612854, -1.950265645980835, -1.8821994066238403, -1.8141331672668457, -1.7460670471191406, -1.678000807762146, -1.6099345684051514, -1.5418684482574463, -1.4738022089004517, -1.405735969543457, -1.337669849395752, -1.2696036100387573, -1.2015373706817627, -1.1334712505340576, -1.065405011177063, -0.9973388910293579, -0.9292726516723633, -0.8612064719200134, -0.7931402921676636, -0.725074052810669, -0.6570078730583191, -0.5889416933059692, -0.5208755135536194, -0.45280930399894714, -0.3847430944442749, -0.31667691469192505, -0.2486107349395752, -0.18054452538490295, -0.11247831583023071, -0.04441237449645996, 0.023653820157051086, 0.09172001481056213, 0.15978620946407318, 0.22785240411758423, 0.2959185838699341, 0.3639847934246063, 0.43205100297927856, 0.5001171827316284, 0.5681833624839783, 0.6362495422363281, 0.7043157815933228, 0.7723819613456726, 0.8404481410980225, 0.9085143804550171, 0.9765805602073669, 1.0446467399597168, 1.1127129793167114, 1.1807790994644165, 1.2488453388214111, 1.3169114589691162, 1.3849776983261108, 1.4530439376831055, 1.5211100578308105, 1.5891762971878052, 1.6572425365447998, 1.7253086566925049, 1.7933748960494995, 1.8614411354064941, 1.9295072555541992, 1.9975734949111938, 2.0656397342681885, 2.1337058544158936]}, "gradients/decoder.model.decoder.layers.8.encoder_attn_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 5.0, 7.0, 4.0, 11.0, 2.0, 9.0, 9.0, 11.0, 15.0, 19.0, 11.0, 25.0, 30.0, 27.0, 27.0, 46.0, 29.0, 46.0, 38.0, 45.0, 50.0, 47.0, 56.0, 42.0, 50.0, 43.0, 46.0, 34.0, 27.0, 33.0, 33.0, 25.0, 17.0, 21.0, 20.0, 9.0, 13.0, 9.0, 5.0, 6.0, 6.0, 2.0, 1.0, 4.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0], "bins": [-0.4896220266819, -0.4749629497528076, -0.4603038430213928, -0.4456447660923004, -0.4309856593608856, -0.4163265824317932, -0.4016674757003784, -0.387008398771286, -0.3723493218421936, -0.3576902449131012, -0.3430311381816864, -0.328372061252594, -0.3137129545211792, -0.2990538775920868, -0.2843948006629944, -0.2697356939315796, -0.2550765872001648, -0.2404174953699112, -0.2257584035396576, -0.21109932661056519, -0.1964402198791504, -0.18178114295005798, -0.16712205111980438, -0.15246295928955078, -0.13780386745929718, -0.12314477562904358, -0.10848568379878998, -0.09382659941911697, -0.07916750758886337, -0.06450841575860977, -0.04984933137893677, -0.035190239548683167, -0.020531147718429565, -0.005872057750821114, 0.008787032216787338, 0.02344612032175064, 0.03810521215200424, 0.05276430398225784, 0.06742338836193085, 0.08208248019218445, 0.09674157202243805, 0.11140066385269165, 0.12605975568294525, 0.14071884751319885, 0.15537792444229126, 0.17003703117370605, 0.18469610810279846, 0.19935519993305206, 0.21401429176330566, 0.22867338359355927, 0.24333247542381287, 0.2579915523529053, 0.27265065908432007, 0.2873097360134125, 0.3019688129425049, 0.3166279196739197, 0.3312870264053345, 0.3459461033344269, 0.3606052100658417, 0.3752642869949341, 0.3899233937263489, 0.4045824706554413, 0.4192415475845337, 0.4339006543159485, 0.4485597312450409]}, "gradients/decoder.model.decoder.layers.8.encoder_attn.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 1.0, 5.0, 1.0, 2.0, 5.0, 7.0, 2.0, 5.0, 10.0, 9.0, 27.0, 27.0, 42.0, 49.0, 103.0, 173.0, 308.0, 576.0, 1164.0, 3289.0, 11950.0, 75419.0, 816417.0, 116187.0, 15775.0, 4093.0, 1427.0, 633.0, 343.0, 201.0, 114.0, 76.0, 41.0, 23.0, 12.0, 15.0, 9.0, 9.0, 6.0, 7.0, 4.0, 1.0, 1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.2447509765625, -0.23767471313476562, -0.23059844970703125, -0.22352218627929688, -0.2164459228515625, -0.20936965942382812, -0.20229339599609375, -0.19521713256835938, -0.188140869140625, -0.18106460571289062, -0.17398834228515625, -0.16691207885742188, -0.1598358154296875, -0.15275955200195312, -0.14568328857421875, -0.13860702514648438, -0.13153076171875, -0.12445449829101562, -0.11737823486328125, -0.11030197143554688, -0.1032257080078125, -0.09614944458007812, -0.08907318115234375, -0.08199691772460938, -0.074920654296875, -0.06784439086914062, -0.06076812744140625, -0.053691864013671875, -0.0466156005859375, -0.039539337158203125, -0.03246307373046875, -0.025386810302734375, -0.018310546875, -0.011234283447265625, -0.00415802001953125, 0.002918243408203125, 0.0099945068359375, 0.017070770263671875, 0.02414703369140625, 0.031223297119140625, 0.038299560546875, 0.045375823974609375, 0.05245208740234375, 0.059528350830078125, 0.0666046142578125, 0.07368087768554688, 0.08075714111328125, 0.08783340454101562, 0.09490966796875, 0.10198593139648438, 0.10906219482421875, 0.11613845825195312, 0.1232147216796875, 0.13029098510742188, 0.13736724853515625, 0.14444351196289062, 0.151519775390625, 0.15859603881835938, 0.16567230224609375, 0.17274856567382812, 0.1798248291015625, 0.18690109252929688, 0.19397735595703125, 0.20105361938476562, 0.2081298828125]}, "gradients/decoder.model.decoder.layers.8.encoder_attn.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 4.0, 2.0, 2.0, 0.0, 6.0, 8.0, 7.0, 10.0, 11.0, 15.0, 25.0, 32.0, 40.0, 81.0, 66.0, 82.0, 112.0, 102.0, 95.0, 69.0, 64.0, 46.0, 47.0, 28.0, 15.0, 11.0, 14.0, 6.0, 5.0, 7.0, 1.0, 2.0, 2.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.67919921875, -0.6597213745117188, -0.6402435302734375, -0.6207656860351562, -0.601287841796875, -0.5818099975585938, -0.5623321533203125, -0.5428543090820312, -0.52337646484375, -0.5038986206054688, -0.4844207763671875, -0.46494293212890625, -0.445465087890625, -0.42598724365234375, -0.4065093994140625, -0.38703155517578125, -0.3675537109375, -0.34807586669921875, -0.3285980224609375, -0.30912017822265625, -0.289642333984375, -0.27016448974609375, -0.2506866455078125, -0.23120880126953125, -0.21173095703125, -0.19225311279296875, -0.1727752685546875, -0.15329742431640625, -0.133819580078125, -0.11434173583984375, -0.0948638916015625, -0.07538604736328125, -0.055908203125, -0.03643035888671875, -0.0169525146484375, 0.00252532958984375, 0.022003173828125, 0.04148101806640625, 0.0609588623046875, 0.08043670654296875, 0.09991455078125, 0.11939239501953125, 0.1388702392578125, 0.15834808349609375, 0.177825927734375, 0.19730377197265625, 0.2167816162109375, 0.23625946044921875, 0.2557373046875, 0.27521514892578125, 0.2946929931640625, 0.31417083740234375, 0.333648681640625, 0.35312652587890625, 0.3726043701171875, 0.39208221435546875, 0.41156005859375, 0.43103790283203125, 0.4505157470703125, 0.46999359130859375, 0.489471435546875, 0.5089492797851562, 0.5284271240234375, 0.5479049682617188, 0.5673828125]}, "gradients/decoder.model.decoder.layers.8.encoder_attn.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 3.0, 3.0, 3.0, 4.0, 6.0, 11.0, 10.0, 15.0, 16.0, 22.0, 34.0, 36.0, 47.0, 52.0, 71.0, 85.0, 100.0, 140.0, 209.0, 442.0, 1292.0, 4995.0, 30104.0, 655255.0, 327229.0, 21928.0, 4117.0, 1083.0, 391.0, 197.0, 135.0, 116.0, 73.0, 75.0, 62.0, 51.0, 38.0, 21.0, 35.0, 16.0, 10.0, 9.0, 7.0, 6.0, 8.0, 2.0, 4.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.039031982421875, -0.03788137435913086, -0.03673076629638672, -0.03558015823364258, -0.03442955017089844, -0.0332789421081543, -0.032128334045410156, -0.030977725982666016, -0.029827117919921875, -0.028676509857177734, -0.027525901794433594, -0.026375293731689453, -0.025224685668945312, -0.024074077606201172, -0.02292346954345703, -0.02177286148071289, -0.02062225341796875, -0.01947164535522461, -0.01832103729248047, -0.017170429229736328, -0.016019821166992188, -0.014869213104248047, -0.013718605041503906, -0.012567996978759766, -0.011417388916015625, -0.010266780853271484, -0.009116172790527344, -0.007965564727783203, -0.0068149566650390625, -0.005664348602294922, -0.004513740539550781, -0.0033631324768066406, -0.0022125244140625, -0.0010619163513183594, 8.869171142578125e-05, 0.0012392997741699219, 0.0023899078369140625, 0.003540515899658203, 0.004691123962402344, 0.005841732025146484, 0.006992340087890625, 0.008142948150634766, 0.009293556213378906, 0.010444164276123047, 0.011594772338867188, 0.012745380401611328, 0.013895988464355469, 0.01504659652709961, 0.01619720458984375, 0.01734781265258789, 0.01849842071533203, 0.019649028778076172, 0.020799636840820312, 0.021950244903564453, 0.023100852966308594, 0.024251461029052734, 0.025402069091796875, 0.026552677154541016, 0.027703285217285156, 0.028853893280029297, 0.030004501342773438, 0.031155109405517578, 0.03230571746826172, 0.03345632553100586, 0.03460693359375]}, "gradients/decoder.model.decoder.layers.8.encoder_attn.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 3.0, 2.0, 3.0, 4.0, 4.0, 10.0, 7.0, 9.0, 10.0, 13.0, 20.0, 15.0, 21.0, 21.0, 23.0, 25.0, 30.0, 32.0, 34.0, 40.0, 41.0, 44.0, 46.0, 45.0, 47.0, 38.0, 39.0, 41.0, 35.0, 36.0, 35.0, 36.0, 26.0, 26.0, 26.0, 25.0, 16.0, 11.0, 21.0, 12.0, 12.0, 7.0, 7.0, 6.0, 5.0, 3.0, 5.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.0263671875, -0.9962081909179688, -0.9660491943359375, -0.9358901977539062, -0.905731201171875, -0.8755722045898438, -0.8454132080078125, -0.8152542114257812, -0.78509521484375, -0.7549362182617188, -0.7247772216796875, -0.6946182250976562, -0.664459228515625, -0.6343002319335938, -0.6041412353515625, -0.5739822387695312, -0.5438232421875, -0.5136642456054688, -0.4835052490234375, -0.45334625244140625, -0.423187255859375, -0.39302825927734375, -0.3628692626953125, -0.33271026611328125, -0.30255126953125, -0.27239227294921875, -0.2422332763671875, -0.21207427978515625, -0.181915283203125, -0.15175628662109375, -0.1215972900390625, -0.09143829345703125, -0.061279296875, -0.03112030029296875, -0.0009613037109375, 0.02919769287109375, 0.059356689453125, 0.08951568603515625, 0.1196746826171875, 0.14983367919921875, 0.17999267578125, 0.21015167236328125, 0.2403106689453125, 0.27046966552734375, 0.300628662109375, 0.33078765869140625, 0.3609466552734375, 0.39110565185546875, 0.4212646484375, 0.45142364501953125, 0.4815826416015625, 0.5117416381835938, 0.541900634765625, 0.5720596313476562, 0.6022186279296875, 0.6323776245117188, 0.66253662109375, 0.6926956176757812, 0.7228546142578125, 0.7530136108398438, 0.783172607421875, 0.8133316040039062, 0.8434906005859375, 0.8736495971679688, 0.90380859375]}, "gradients/decoder.model.decoder.layers.8.encoder_attn.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 3.0, 6.0, 6.0, 3.0, 4.0, 18.0, 19.0, 28.0, 40.0, 29.0, 76.0, 111.0, 164.0, 254.0, 391.0, 582.0, 985.0, 1525.0, 2574.0, 4457.0, 8591.0, 18664.0, 50872.0, 261823.0, 582027.0, 68497.0, 23596.0, 10189.0, 5383.0, 2884.0, 1760.0, 1068.0, 670.0, 443.0, 283.0, 164.0, 117.0, 88.0, 51.0, 45.0, 20.0, 13.0, 17.0, 10.0, 9.0, 4.0, 0.0, 3.0, 1.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.00031685829162597656, -0.0003069937229156494, -0.00029712915420532227, -0.0002872645854949951, -0.00027740001678466797, -0.0002675354480743408, -0.00025767087936401367, -0.0002478063106536865, -0.00023794174194335938, -0.00022807717323303223, -0.00021821260452270508, -0.00020834803581237793, -0.00019848346710205078, -0.00018861889839172363, -0.00017875432968139648, -0.00016888976097106934, -0.0001590251922607422, -0.00014916062355041504, -0.0001392960548400879, -0.00012943148612976074, -0.0001195669174194336, -0.00010970234870910645, -9.98377799987793e-05, -8.997321128845215e-05, -8.0108642578125e-05, -7.024407386779785e-05, -6.03795051574707e-05, -5.0514936447143555e-05, -4.0650367736816406e-05, -3.078579902648926e-05, -2.092123031616211e-05, -1.1056661605834961e-05, -1.1920928955078125e-06, 8.672475814819336e-06, 1.8537044525146484e-05, 2.8401613235473633e-05, 3.826618194580078e-05, 4.813075065612793e-05, 5.799531936645508e-05, 6.785988807678223e-05, 7.772445678710938e-05, 8.758902549743652e-05, 9.745359420776367e-05, 0.00010731816291809082, 0.00011718273162841797, 0.00012704730033874512, 0.00013691186904907227, 0.00014677643775939941, 0.00015664100646972656, 0.0001665055751800537, 0.00017637014389038086, 0.000186234712600708, 0.00019609928131103516, 0.0002059638500213623, 0.00021582841873168945, 0.0002256929874420166, 0.00023555755615234375, 0.0002454221248626709, 0.00025528669357299805, 0.0002651512622833252, 0.00027501583099365234, 0.0002848803997039795, 0.00029474496841430664, 0.0003046095371246338, 0.00031447410583496094]}, "gradients/decoder.model.decoder.layers.8.encoder_attn.k_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 4.0, 3.0, 2.0, 2.0, 0.0, 3.0, 4.0, 2.0, 3.0, 2.0, 4.0, 6.0, 4.0, 9.0, 14.0, 15.0, 17.0, 31.0, 37.0, 44.0, 69.0, 134.0, 232.0, 126.0, 55.0, 48.0, 35.0, 23.0, 19.0, 17.0, 8.0, 11.0, 9.0, 7.0, 5.0, 3.0, 3.0, 3.0, 1.0, 1.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-7.343292236328125e-05, -7.093697786331177e-05, -6.844103336334229e-05, -6.59450888633728e-05, -6.344914436340332e-05, -6.095319986343384e-05, -5.8457255363464355e-05, -5.596131086349487e-05, -5.346536636352539e-05, -5.096942186355591e-05, -4.8473477363586426e-05, -4.597753286361694e-05, -4.348158836364746e-05, -4.098564386367798e-05, -3.8489699363708496e-05, -3.5993754863739014e-05, -3.349781036376953e-05, -3.100186586380005e-05, -2.8505921363830566e-05, -2.6009976863861084e-05, -2.35140323638916e-05, -2.101808786392212e-05, -1.8522143363952637e-05, -1.6026198863983154e-05, -1.3530254364013672e-05, -1.103430986404419e-05, -8.538365364074707e-06, -6.042420864105225e-06, -3.546476364135742e-06, -1.0505318641662598e-06, 1.4454126358032227e-06, 3.941357135772705e-06, 6.4373016357421875e-06, 8.93324613571167e-06, 1.1429190635681152e-05, 1.3925135135650635e-05, 1.6421079635620117e-05, 1.89170241355896e-05, 2.1412968635559082e-05, 2.3908913135528564e-05, 2.6404857635498047e-05, 2.890080213546753e-05, 3.139674663543701e-05, 3.3892691135406494e-05, 3.6388635635375977e-05, 3.888458013534546e-05, 4.138052463531494e-05, 4.3876469135284424e-05, 4.6372413635253906e-05, 4.886835813522339e-05, 5.136430263519287e-05, 5.3860247135162354e-05, 5.6356191635131836e-05, 5.885213613510132e-05, 6.13480806350708e-05, 6.384402513504028e-05, 6.633996963500977e-05, 6.883591413497925e-05, 7.133185863494873e-05, 7.382780313491821e-05, 7.63237476348877e-05, 7.881969213485718e-05, 8.131563663482666e-05, 8.381158113479614e-05, 8.630752563476562e-05]}, "gradients/decoder.model.decoder.layers.8.encoder_attn.q_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 5.0, 5.0, 5.0, 5.0, 8.0, 13.0, 19.0, 15.0, 29.0, 23.0, 34.0, 53.0, 86.0, 111.0, 152.0, 200.0, 279.0, 385.0, 548.0, 884.0, 1305.0, 2157.0, 3641.0, 6765.0, 13920.0, 34915.0, 154434.0, 710744.0, 71514.0, 22668.0, 10106.0, 5164.0, 3009.0, 1804.0, 1063.0, 733.0, 540.0, 342.0, 256.0, 160.0, 110.0, 104.0, 64.0, 48.0, 44.0, 30.0, 19.0, 14.0, 12.0, 9.0, 4.0, 5.0, 3.0, 4.0, 1.0, 0.0, 2.0, 0.0, 2.0], "bins": [-0.00022304058074951172, -0.0002161320298910141, -0.00020922347903251648, -0.00020231492817401886, -0.00019540637731552124, -0.00018849782645702362, -0.000181589275598526, -0.00017468072474002838, -0.00016777217388153076, -0.00016086362302303314, -0.00015395507216453552, -0.0001470465213060379, -0.00014013797044754028, -0.00013322941958904266, -0.00012632086873054504, -0.00011941231787204742, -0.0001125037670135498, -0.00010559521615505219, -9.868666529655457e-05, -9.177811443805695e-05, -8.486956357955933e-05, -7.79610127210617e-05, -7.105246186256409e-05, -6.414391100406647e-05, -5.723536014556885e-05, -5.032680928707123e-05, -4.341825842857361e-05, -3.650970757007599e-05, -2.960115671157837e-05, -2.269260585308075e-05, -1.578405499458313e-05, -8.87550413608551e-06, -1.9669532775878906e-06, 4.941597580909729e-06, 1.1850148439407349e-05, 1.8758699297904968e-05, 2.5667250156402588e-05, 3.257580101490021e-05, 3.948435187339783e-05, 4.639290273189545e-05, 5.3301453590393066e-05, 6.0210004448890686e-05, 6.71185553073883e-05, 7.402710616588593e-05, 8.093565702438354e-05, 8.784420788288116e-05, 9.475275874137878e-05, 0.0001016613095998764, 0.00010856986045837402, 0.00011547841131687164, 0.00012238696217536926, 0.00012929551303386688, 0.0001362040638923645, 0.00014311261475086212, 0.00015002116560935974, 0.00015692971646785736, 0.00016383826732635498, 0.0001707468181848526, 0.00017765536904335022, 0.00018456391990184784, 0.00019147247076034546, 0.00019838102161884308, 0.0002052895724773407, 0.00021219812333583832, 0.00021910667419433594]}, "gradients/decoder.model.decoder.layers.8.encoder_attn.q_proj.bias": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 2.0, 3.0, 2.0, 5.0, 7.0, 7.0, 16.0, 11.0, 18.0, 39.0, 47.0, 102.0, 187.0, 215.0, 113.0, 66.0, 48.0, 20.0, 18.0, 15.0, 13.0, 11.0, 11.0, 5.0, 6.0, 3.0, 3.0, 3.0, 1.0, 2.0, 1.0, 2.0, 3.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6.204843521118164e-05, -5.9767626225948334e-05, -5.748681724071503e-05, -5.520600825548172e-05, -5.292519927024841e-05, -5.0644390285015106e-05, -4.83635812997818e-05, -4.608277231454849e-05, -4.3801963329315186e-05, -4.152115434408188e-05, -3.924034535884857e-05, -3.6959536373615265e-05, -3.467872738838196e-05, -3.239791840314865e-05, -3.0117109417915344e-05, -2.7836300432682037e-05, -2.555549144744873e-05, -2.3274682462215424e-05, -2.0993873476982117e-05, -1.871306449174881e-05, -1.6432255506515503e-05, -1.4151446521282196e-05, -1.187063753604889e-05, -9.589828550815582e-06, -7.309019565582275e-06, -5.0282105803489685e-06, -2.7474015951156616e-06, -4.6659260988235474e-07, 1.8142163753509521e-06, 4.095025360584259e-06, 6.375834345817566e-06, 8.656643331050873e-06, 1.093745231628418e-05, 1.3218261301517487e-05, 1.5499070286750793e-05, 1.77798792719841e-05, 2.0060688257217407e-05, 2.2341497242450714e-05, 2.462230622768402e-05, 2.6903115212917328e-05, 2.9183924198150635e-05, 3.146473318338394e-05, 3.374554216861725e-05, 3.6026351153850555e-05, 3.830716013908386e-05, 4.058796912431717e-05, 4.2868778109550476e-05, 4.514958709478378e-05, 4.743039608001709e-05, 4.97112050652504e-05, 5.1992014050483704e-05, 5.427282303571701e-05, 5.655363202095032e-05, 5.8834441006183624e-05, 6.111524999141693e-05, 6.339605897665024e-05, 6.567686796188354e-05, 6.795767694711685e-05, 7.023848593235016e-05, 7.251929491758347e-05, 7.480010390281677e-05, 7.708091288805008e-05, 7.936172187328339e-05, 8.164253085851669e-05, 8.392333984375e-05]}, "gradients/decoder.model.decoder.layers.8.self_attn_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 1.0, 3.0, 3.0, 7.0, 9.0, 13.0, 12.0, 27.0, 37.0, 112.0, 187.0, 319.0, 142.0, 61.0, 32.0, 13.0, 13.0, 7.0, 3.0, 3.0, 2.0, 4.0, 1.0, 3.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-1.3092880249023438, -1.2689340114593506, -1.2285799980163574, -1.1882259845733643, -1.147871971130371, -1.1075178384780884, -1.0671638250350952, -1.026809811592102, -0.9864557981491089, -0.9461017847061157, -0.9057477712631226, -0.8653936982154846, -0.8250396847724915, -0.7846856713294983, -0.7443315982818604, -0.7039775848388672, -0.663623571395874, -0.6232695579528809, -0.5829155445098877, -0.5425614714622498, -0.5022074580192566, -0.4618534445762634, -0.4214994013309479, -0.3811453580856323, -0.34079134464263916, -0.300437331199646, -0.26008328795433044, -0.2197292596101761, -0.17937523126602173, -0.13902120292186737, -0.09866717457771301, -0.05831313133239746, -0.017958998680114746, 0.022395029664039612, 0.06274905800819397, 0.10310308635234833, 0.14345711469650269, 0.18381114304065704, 0.2241651713848114, 0.26451921463012695, 0.3048732280731201, 0.3452272415161133, 0.38558128476142883, 0.4259353280067444, 0.46628934144973755, 0.5066433548927307, 0.5469974279403687, 0.5873514413833618, 0.627705454826355, 0.6680594682693481, 0.7084134817123413, 0.7487675547599792, 0.7891215682029724, 0.8294755816459656, 0.8698296546936035, 0.9101836681365967, 0.9505376815795898, 0.990891695022583, 1.0312457084655762, 1.0715997219085693, 1.1119537353515625, 1.1523078680038452, 1.1926618814468384, 1.2330158948898315, 1.2733699083328247]}, "gradients/decoder.model.decoder.layers.8.self_attn_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 4.0, 5.0, 8.0, 6.0, 14.0, 14.0, 17.0, 22.0, 21.0, 29.0, 42.0, 41.0, 45.0, 59.0, 43.0, 58.0, 68.0, 59.0, 68.0, 52.0, 62.0, 45.0, 50.0, 34.0, 29.0, 25.0, 22.0, 22.0, 9.0, 15.0, 10.0, 3.0, 5.0, 2.0, 2.0, 3.0, 2.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.18351320922374725, -0.17560678720474243, -0.1677003800868988, -0.15979395806789398, -0.15188753604888916, -0.14398112893104553, -0.1360747069120407, -0.1281682848930359, -0.12026187032461166, -0.11235545575618744, -0.10444903373718262, -0.09654261916875839, -0.08863620460033417, -0.08072978258132935, -0.07282336801290512, -0.0649169534444809, -0.057010531425476074, -0.04910411313176155, -0.04119769483804703, -0.0332912802696228, -0.02538486197590828, -0.017478443682193756, -0.009572029113769531, -0.001665610820055008, 0.006240807473659515, 0.014147224836051464, 0.022053642198443413, 0.029960058629512787, 0.03786647692322731, 0.045772895216941833, 0.05367930978536606, 0.06158572807908058, 0.0694921612739563, 0.07739857584238052, 0.08530499786138535, 0.09321141242980957, 0.10111783444881439, 0.10902424901723862, 0.11693066358566284, 0.12483708560466766, 0.13274350762367249, 0.1406499296426773, 0.14855633676052094, 0.15646275877952576, 0.16436918079853058, 0.1722756028175354, 0.18018200993537903, 0.18808843195438385, 0.19599483907222748, 0.2039012610912323, 0.21180766820907593, 0.21971409022808075, 0.22762051224708557, 0.2355269193649292, 0.24343334138393402, 0.25133976340293884, 0.25924617052078247, 0.2671525776386261, 0.2750590145587921, 0.28296542167663574, 0.29087182879447937, 0.2987782657146454, 0.306684672832489, 0.31459107995033264, 0.32249751687049866]}, "gradients/decoder.model.decoder.layers.8.self_attn.out_proj.weight": {"_type": "histogram", "values": [1.0, 2.0, 1.0, 2.0, 3.0, 2.0, 7.0, 8.0, 8.0, 15.0, 24.0, 33.0, 27.0, 57.0, 70.0, 99.0, 151.0, 195.0, 290.0, 391.0, 517.0, 737.0, 1127.0, 1698.0, 2573.0, 4020.0, 6323.0, 10543.0, 18153.0, 33346.0, 69105.0, 190380.0, 462406.0, 126500.0, 53098.0, 26938.0, 15238.0, 8699.0, 5424.0, 3366.0, 2131.0, 1410.0, 1018.0, 726.0, 453.0, 329.0, 256.0, 167.0, 148.0, 92.0, 60.0, 50.0, 38.0, 23.0, 30.0, 17.0, 15.0, 13.0, 7.0, 6.0, 4.0, 3.0, 1.0, 2.0], "bins": [-0.2266845703125, -0.21967315673828125, -0.2126617431640625, -0.20565032958984375, -0.198638916015625, -0.19162750244140625, -0.1846160888671875, -0.17760467529296875, -0.17059326171875, -0.16358184814453125, -0.1565704345703125, -0.14955902099609375, -0.142547607421875, -0.13553619384765625, -0.1285247802734375, -0.12151336669921875, -0.114501953125, -0.10749053955078125, -0.1004791259765625, -0.09346771240234375, -0.086456298828125, -0.07944488525390625, -0.0724334716796875, -0.06542205810546875, -0.05841064453125, -0.05139923095703125, -0.0443878173828125, -0.03737640380859375, -0.030364990234375, -0.02335357666015625, -0.0163421630859375, -0.00933074951171875, -0.0023193359375, 0.00469207763671875, 0.0117034912109375, 0.01871490478515625, 0.025726318359375, 0.03273773193359375, 0.0397491455078125, 0.04676055908203125, 0.05377197265625, 0.06078338623046875, 0.0677947998046875, 0.07480621337890625, 0.081817626953125, 0.08882904052734375, 0.0958404541015625, 0.10285186767578125, 0.10986328125, 0.11687469482421875, 0.1238861083984375, 0.13089752197265625, 0.137908935546875, 0.14492034912109375, 0.1519317626953125, 0.15894317626953125, 0.16595458984375, 0.17296600341796875, 0.1799774169921875, 0.18698883056640625, 0.194000244140625, 0.20101165771484375, 0.2080230712890625, 0.21503448486328125, 0.2220458984375]}, "gradients/decoder.model.decoder.layers.8.self_attn.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 3.0, 1.0, 1.0, 2.0, 3.0, 6.0, 3.0, 7.0, 9.0, 7.0, 24.0, 18.0, 38.0, 46.0, 60.0, 80.0, 99.0, 88.0, 105.0, 82.0, 74.0, 65.0, 48.0, 29.0, 28.0, 21.0, 18.0, 14.0, 6.0, 7.0, 4.0, 5.0, 5.0, 2.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.0341796875, -0.03323698043823242, -0.032294273376464844, -0.031351566314697266, -0.030408859252929688, -0.02946615219116211, -0.02852344512939453, -0.027580738067626953, -0.026638031005859375, -0.025695323944091797, -0.02475261688232422, -0.02380990982055664, -0.022867202758789062, -0.021924495697021484, -0.020981788635253906, -0.020039081573486328, -0.01909637451171875, -0.018153667449951172, -0.017210960388183594, -0.016268253326416016, -0.015325546264648438, -0.01438283920288086, -0.013440132141113281, -0.012497425079345703, -0.011554718017578125, -0.010612010955810547, -0.009669303894042969, -0.00872659683227539, -0.0077838897705078125, -0.006841182708740234, -0.005898475646972656, -0.004955768585205078, -0.0040130615234375, -0.003070354461669922, -0.0021276473999023438, -0.0011849403381347656, -0.0002422332763671875, 0.0007004737854003906, 0.0016431808471679688, 0.002585887908935547, 0.003528594970703125, 0.004471302032470703, 0.005414009094238281, 0.006356716156005859, 0.0072994232177734375, 0.008242130279541016, 0.009184837341308594, 0.010127544403076172, 0.01107025146484375, 0.012012958526611328, 0.012955665588378906, 0.013898372650146484, 0.014841079711914062, 0.01578378677368164, 0.01672649383544922, 0.017669200897216797, 0.018611907958984375, 0.019554615020751953, 0.02049732208251953, 0.02144002914428711, 0.022382736206054688, 0.023325443267822266, 0.024268150329589844, 0.025210857391357422, 0.026153564453125]}, "gradients/decoder.model.decoder.layers.8.self_attn.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 1.0, 3.0, 5.0, 9.0, 4.0, 10.0, 18.0, 21.0, 27.0, 26.0, 46.0, 78.0, 111.0, 121.0, 188.0, 257.0, 318.0, 540.0, 763.0, 1055.0, 1567.0, 2416.0, 3985.0, 6180.0, 10544.0, 18775.0, 36927.0, 83566.0, 276900.0, 395443.0, 109208.0, 44898.0, 22377.0, 12217.0, 7170.0, 4313.0, 2694.0, 1827.0, 1218.0, 823.0, 565.0, 406.0, 264.0, 196.0, 138.0, 99.0, 74.0, 46.0, 43.0, 28.0, 20.0, 16.0, 5.0, 6.0, 5.0, 6.0, 3.0, 2.0, 0.0, 1.0, 1.0], "bins": [-0.096435546875, -0.09343433380126953, -0.09043312072753906, -0.0874319076538086, -0.08443069458007812, -0.08142948150634766, -0.07842826843261719, -0.07542705535888672, -0.07242584228515625, -0.06942462921142578, -0.06642341613769531, -0.06342220306396484, -0.060420989990234375, -0.057419776916503906, -0.05441856384277344, -0.05141735076904297, -0.0484161376953125, -0.04541492462158203, -0.04241371154785156, -0.039412498474121094, -0.036411285400390625, -0.033410072326660156, -0.030408859252929688, -0.02740764617919922, -0.02440643310546875, -0.02140522003173828, -0.018404006958007812, -0.015402793884277344, -0.012401580810546875, -0.009400367736816406, -0.0063991546630859375, -0.0033979415893554688, -0.000396728515625, 0.0026044845581054688, 0.0056056976318359375, 0.008606910705566406, 0.011608123779296875, 0.014609336853027344, 0.017610549926757812, 0.02061176300048828, 0.02361297607421875, 0.02661418914794922, 0.029615402221679688, 0.032616615295410156, 0.035617828369140625, 0.038619041442871094, 0.04162025451660156, 0.04462146759033203, 0.0476226806640625, 0.05062389373779297, 0.05362510681152344, 0.056626319885253906, 0.059627532958984375, 0.06262874603271484, 0.06562995910644531, 0.06863117218017578, 0.07163238525390625, 0.07463359832763672, 0.07763481140136719, 0.08063602447509766, 0.08363723754882812, 0.0866384506225586, 0.08963966369628906, 0.09264087677001953, 0.09564208984375]}, "gradients/decoder.model.decoder.layers.8.self_attn.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 3.0, 4.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 5.0, 3.0, 6.0, 7.0, 6.0, 11.0, 14.0, 13.0, 15.0, 6.0, 17.0, 19.0, 21.0, 29.0, 32.0, 36.0, 48.0, 55.0, 63.0, 58.0, 54.0, 54.0, 64.0, 54.0, 52.0, 50.0, 36.0, 32.0, 20.0, 22.0, 20.0, 13.0, 12.0, 9.0, 7.0, 8.0, 7.0, 5.0, 6.0, 4.0, 5.0, 2.0, 3.0, 1.0, 3.0, 1.0, 0.0, 2.0, 0.0, 1.0], "bins": [-0.056365966796875, -0.05472755432128906, -0.053089141845703125, -0.05145072937011719, -0.04981231689453125, -0.04817390441894531, -0.046535491943359375, -0.04489707946777344, -0.0432586669921875, -0.04162025451660156, -0.039981842041015625, -0.03834342956542969, -0.03670501708984375, -0.03506660461425781, -0.033428192138671875, -0.03178977966308594, -0.0301513671875, -0.028512954711914062, -0.026874542236328125, -0.025236129760742188, -0.02359771728515625, -0.021959304809570312, -0.020320892333984375, -0.018682479858398438, -0.0170440673828125, -0.015405654907226562, -0.013767242431640625, -0.012128829956054688, -0.01049041748046875, -0.008852005004882812, -0.007213592529296875, -0.0055751800537109375, -0.003936767578125, -0.0022983551025390625, -0.000659942626953125, 0.0009784698486328125, 0.00261688232421875, 0.0042552947998046875, 0.005893707275390625, 0.0075321197509765625, 0.0091705322265625, 0.010808944702148438, 0.012447357177734375, 0.014085769653320312, 0.01572418212890625, 0.017362594604492188, 0.019001007080078125, 0.020639419555664062, 0.02227783203125, 0.023916244506835938, 0.025554656982421875, 0.027193069458007812, 0.02883148193359375, 0.030469894409179688, 0.032108306884765625, 0.03374671936035156, 0.0353851318359375, 0.03702354431152344, 0.038661956787109375, 0.04030036926269531, 0.04193878173828125, 0.04357719421386719, 0.045215606689453125, 0.04685401916503906, 0.048492431640625]}, "gradients/decoder.model.decoder.layers.8.self_attn.k_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 3.0, 0.0, 1.0, 1.0, 5.0, 5.0, 2.0, 5.0, 11.0, 6.0, 10.0, 16.0, 19.0, 26.0, 46.0, 57.0, 77.0, 111.0, 210.0, 309.0, 573.0, 1093.0, 2460.0, 9591.0, 1012422.0, 15466.0, 3136.0, 1174.0, 669.0, 398.0, 212.0, 132.0, 91.0, 53.0, 48.0, 34.0, 34.0, 12.0, 15.0, 9.0, 7.0, 3.0, 7.0, 3.0, 0.0, 1.0, 2.0, 2.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.90625, -4.7459716796875, -4.585693359375, -4.4254150390625, -4.26513671875, -4.1048583984375, -3.944580078125, -3.7843017578125, -3.6240234375, -3.4637451171875, -3.303466796875, -3.1431884765625, -2.98291015625, -2.8226318359375, -2.662353515625, -2.5020751953125, -2.341796875, -2.1815185546875, -2.021240234375, -1.8609619140625, -1.70068359375, -1.5404052734375, -1.380126953125, -1.2198486328125, -1.0595703125, -0.8992919921875, -0.739013671875, -0.5787353515625, -0.41845703125, -0.2581787109375, -0.097900390625, 0.0623779296875, 0.22265625, 0.3829345703125, 0.543212890625, 0.7034912109375, 0.86376953125, 1.0240478515625, 1.184326171875, 1.3446044921875, 1.5048828125, 1.6651611328125, 1.825439453125, 1.9857177734375, 2.14599609375, 2.3062744140625, 2.466552734375, 2.6268310546875, 2.787109375, 2.9473876953125, 3.107666015625, 3.2679443359375, 3.42822265625, 3.5885009765625, 3.748779296875, 3.9090576171875, 4.0693359375, 4.2296142578125, 4.389892578125, 4.5501708984375, 4.71044921875, 4.8707275390625, 5.031005859375, 5.1912841796875, 5.3515625]}, "gradients/decoder.model.decoder.layers.8.self_attn.k_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0, 2.0, 2.0, 1.0, 2.0, 3.0, 1.0, 5.0, 3.0, 8.0, 13.0, 56.0, 834.0, 39.0, 13.0, 5.0, 6.0, 3.0, 0.0, 4.0, 3.0, 1.0, 0.0, 1.0, 0.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.0013418197631835938, -0.0013050884008407593, -0.0012683570384979248, -0.0012316256761550903, -0.0011948943138122559, -0.0011581629514694214, -0.001121431589126587, -0.0010847002267837524, -0.001047968864440918, -0.0010112375020980835, -0.000974506139755249, -0.0009377747774124146, -0.0009010434150695801, -0.0008643120527267456, -0.0008275806903839111, -0.0007908493280410767, -0.0007541179656982422, -0.0007173866033554077, -0.0006806552410125732, -0.0006439238786697388, -0.0006071925163269043, -0.0005704611539840698, -0.0005337297916412354, -0.0004969984292984009, -0.0004602670669555664, -0.00042353570461273193, -0.00038680434226989746, -0.000350072979927063, -0.0003133416175842285, -0.00027661025524139404, -0.00023987889289855957, -0.0002031475305557251, -0.00016641616821289062, -0.00012968480587005615, -9.295344352722168e-05, -5.622208118438721e-05, -1.9490718841552734e-05, 1.7240643501281738e-05, 5.397200584411621e-05, 9.070336818695068e-05, 0.00012743473052978516, 0.00016416609287261963, 0.0002008974552154541, 0.00023762881755828857, 0.00027436017990112305, 0.0003110915422439575, 0.000347822904586792, 0.00038455426692962646, 0.00042128562927246094, 0.0004580169916152954, 0.0004947483539581299, 0.0005314797163009644, 0.0005682110786437988, 0.0006049424409866333, 0.0006416738033294678, 0.0006784051656723022, 0.0007151365280151367, 0.0007518678903579712, 0.0007885992527008057, 0.0008253306150436401, 0.0008620619773864746, 0.0008987933397293091, 0.0009355247020721436, 0.000972256064414978, 0.0010089874267578125]}, "gradients/decoder.model.decoder.layers.8.self_attn.q_proj.weight": {"_type": "histogram", "values": [1.0, 2.0, 1.0, 4.0, 1.0, 3.0, 3.0, 10.0, 7.0, 10.0, 18.0, 22.0, 24.0, 35.0, 46.0, 65.0, 77.0, 91.0, 135.0, 162.0, 256.0, 323.0, 481.0, 729.0, 987.0, 1534.0, 2250.0, 3592.0, 5991.0, 13290.0, 588805.0, 399843.0, 12844.0, 6129.0, 3735.0, 2172.0, 1449.0, 951.0, 659.0, 495.0, 393.0, 270.0, 176.0, 141.0, 100.0, 87.0, 43.0, 35.0, 28.0, 18.0, 9.0, 12.0, 9.0, 10.0, 3.0, 1.0, 2.0, 2.0, 1.0, 2.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.59423828125, -0.5750656127929688, -0.5558929443359375, -0.5367202758789062, -0.517547607421875, -0.49837493896484375, -0.4792022705078125, -0.46002960205078125, -0.44085693359375, -0.42168426513671875, -0.4025115966796875, -0.38333892822265625, -0.364166259765625, -0.34499359130859375, -0.3258209228515625, -0.30664825439453125, -0.2874755859375, -0.26830291748046875, -0.2491302490234375, -0.22995758056640625, -0.210784912109375, -0.19161224365234375, -0.1724395751953125, -0.15326690673828125, -0.13409423828125, -0.11492156982421875, -0.0957489013671875, -0.07657623291015625, -0.057403564453125, -0.03823089599609375, -0.0190582275390625, 0.00011444091796875, 0.019287109375, 0.03845977783203125, 0.0576324462890625, 0.07680511474609375, 0.095977783203125, 0.11515045166015625, 0.1343231201171875, 0.15349578857421875, 0.17266845703125, 0.19184112548828125, 0.2110137939453125, 0.23018646240234375, 0.249359130859375, 0.26853179931640625, 0.2877044677734375, 0.30687713623046875, 0.3260498046875, 0.34522247314453125, 0.3643951416015625, 0.38356781005859375, 0.402740478515625, 0.42191314697265625, 0.4410858154296875, 0.46025848388671875, 0.47943115234375, 0.49860382080078125, 0.5177764892578125, 0.5369491577148438, 0.556121826171875, 0.5752944946289062, 0.5944671630859375, 0.6136398315429688, 0.6328125]}, "gradients/decoder.model.decoder.layers.8.self_attn.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 2.0, 1.0, 0.0, 1.0, 1.0, 3.0, 1.0, 1.0, 3.0, 1.0, 3.0, 4.0, 3.0, 5.0, 4.0, 7.0, 11.0, 47.0, 433.0, 382.0, 40.0, 12.0, 7.0, 4.0, 2.0, 4.0, 5.0, 2.0, 6.0, 0.0, 1.0, 2.0, 1.0, 3.0, 2.0, 5.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.385986328125, -0.3746299743652344, -0.36327362060546875, -0.3519172668457031, -0.3405609130859375, -0.3292045593261719, -0.31784820556640625, -0.3064918518066406, -0.295135498046875, -0.2837791442871094, -0.27242279052734375, -0.2610664367675781, -0.2497100830078125, -0.23835372924804688, -0.22699737548828125, -0.21564102172851562, -0.20428466796875, -0.19292831420898438, -0.18157196044921875, -0.17021560668945312, -0.1588592529296875, -0.14750289916992188, -0.13614654541015625, -0.12479019165039062, -0.113433837890625, -0.10207748413085938, -0.09072113037109375, -0.07936477661132812, -0.0680084228515625, -0.056652069091796875, -0.04529571533203125, -0.033939361572265625, -0.0225830078125, -0.011226654052734375, 0.00012969970703125, 0.011486053466796875, 0.0228424072265625, 0.034198760986328125, 0.04555511474609375, 0.056911468505859375, 0.068267822265625, 0.07962417602539062, 0.09098052978515625, 0.10233688354492188, 0.1136932373046875, 0.12504959106445312, 0.13640594482421875, 0.14776229858398438, 0.15911865234375, 0.17047500610351562, 0.18183135986328125, 0.19318771362304688, 0.2045440673828125, 0.21590042114257812, 0.22725677490234375, 0.23861312866210938, 0.249969482421875, 0.2613258361816406, 0.27268218994140625, 0.2840385437011719, 0.2953948974609375, 0.3067512512207031, 0.31810760498046875, 0.3294639587402344, 0.3408203125]}, "gradients/decoder.model.decoder.layers.7.final_layer_norm.weight": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 1.0, 2.0, 1.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0, 3.0, 0.0, 4.0, 5.0, 8.0, 15.0, 13.0, 38.0, 65.0, 267.0, 395.0, 96.0, 38.0, 22.0, 14.0, 8.0, 9.0, 1.0, 2.0, 1.0, 1.0, 0.0, 2.0, 3.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6.361732482910156, -6.07496976852417, -5.788207530975342, -5.5014448165893555, -5.214682579040527, -4.927919864654541, -4.641157150268555, -4.354394912719727, -4.06763219833374, -3.780869722366333, -3.494107246398926, -3.2073445320129395, -2.9205820560455322, -2.633819580078125, -2.3470568656921387, -2.0602943897247314, -1.7735319137573242, -1.486769437789917, -1.2000068426132202, -0.9132443070411682, -0.6264817714691162, -0.339719295501709, -0.05295670032501221, 0.23380589485168457, 0.5205683708190918, 0.8073309063911438, 1.0940934419631958, 1.3808560371398926, 1.6676185131072998, 1.954380989074707, 2.2411437034606934, 2.5279061794281006, 2.8146677017211914, 3.1014301776885986, 3.388192653656006, 3.674955368041992, 3.9617178440093994, 4.248480319976807, 4.535243034362793, 4.822005271911621, 5.108767986297607, 5.395530700683594, 5.682292938232422, 5.969055652618408, 6.2558183670043945, 6.542580604553223, 6.829343318939209, 7.116106033325195, 7.402868270874023, 7.68963098526001, 7.976393222808838, 8.263155937194824, 8.549918174743652, 8.836681365966797, 9.123443603515625, 9.410205841064453, 9.696968078613281, 9.98373031616211, 10.270493507385254, 10.557255744934082, 10.84401798248291, 11.130781173706055, 11.417543411254883, 11.704305648803711, 11.991068840026855]}, "gradients/decoder.model.decoder.layers.7.final_layer_norm.bias": {"_type": "histogram", "values": [3.0, 0.0, 1.0, 0.0, 1.0, 2.0, 6.0, 9.0, 5.0, 8.0, 8.0, 17.0, 8.0, 10.0, 20.0, 16.0, 19.0, 19.0, 28.0, 21.0, 28.0, 27.0, 26.0, 39.0, 33.0, 39.0, 39.0, 43.0, 44.0, 30.0, 44.0, 29.0, 37.0, 47.0, 27.0, 31.0, 39.0, 37.0, 23.0, 20.0, 15.0, 20.0, 22.0, 8.0, 14.0, 9.0, 11.0, 9.0, 4.0, 6.0, 8.0, 3.0, 3.0, 4.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.3854639530181885, -0.37170493602752686, -0.35794588923454285, -0.3441868722438812, -0.3304278254508972, -0.3166688084602356, -0.302909791469574, -0.28915074467658997, -0.27539169788360596, -0.26163268089294434, -0.24787363409996033, -0.2341146171092987, -0.2203555703163147, -0.20659655332565308, -0.19283752143383026, -0.17907848954200745, -0.16531947255134583, -0.151560440659523, -0.1378014087677002, -0.12404238432645798, -0.11028335243463516, -0.09652432054281235, -0.08276529610157013, -0.06900626420974731, -0.0552472323179245, -0.041488200426101685, -0.027729172259569168, -0.013970144093036652, -0.00021111220121383667, 0.013547919690608978, 0.027306944131851196, 0.04106597602367401, 0.05482497811317444, 0.06858401000499725, 0.08234304189682007, 0.09610206633806229, 0.1098610982298851, 0.12362013012170792, 0.13737915456295013, 0.15113818645477295, 0.16489721834659576, 0.17865625023841858, 0.1924152821302414, 0.2061743140220642, 0.21993333101272583, 0.23369237780570984, 0.24745139479637146, 0.26121044158935547, 0.2749694585800171, 0.2887284755706787, 0.3024875223636627, 0.31624653935432434, 0.33000558614730835, 0.34376460313796997, 0.3575236201286316, 0.3712826669216156, 0.3850417137145996, 0.39880073070526123, 0.41255977749824524, 0.42631879448890686, 0.44007784128189087, 0.4538368582725525, 0.4675958752632141, 0.4813549220561981, 0.49511393904685974]}, "gradients/decoder.model.decoder.layers.7.fc2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 1.0, 0.0, 5.0, 7.0, 10.0, 17.0, 22.0, 24.0, 43.0, 46.0, 88.0, 101.0, 165.0, 248.0, 378.0, 549.0, 839.0, 1382.0, 2407.0, 4693.0, 10575.0, 35798.0, 4094889.0, 24603.0, 8323.0, 3811.0, 1989.0, 1154.0, 749.0, 447.0, 289.0, 202.0, 143.0, 88.0, 68.0, 41.0, 34.0, 24.0, 15.0, 16.0, 2.0, 5.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.958984375, -3.824249267578125, -3.68951416015625, -3.554779052734375, -3.4200439453125, -3.285308837890625, -3.15057373046875, -3.015838623046875, -2.881103515625, -2.746368408203125, -2.61163330078125, -2.476898193359375, -2.3421630859375, -2.207427978515625, -2.07269287109375, -1.937957763671875, -1.80322265625, -1.668487548828125, -1.53375244140625, -1.399017333984375, -1.2642822265625, -1.129547119140625, -0.99481201171875, -0.860076904296875, -0.725341796875, -0.590606689453125, -0.45587158203125, -0.321136474609375, -0.1864013671875, -0.051666259765625, 0.08306884765625, 0.217803955078125, 0.3525390625, 0.487274169921875, 0.62200927734375, 0.756744384765625, 0.8914794921875, 1.026214599609375, 1.16094970703125, 1.295684814453125, 1.430419921875, 1.565155029296875, 1.69989013671875, 1.834625244140625, 1.9693603515625, 2.104095458984375, 2.23883056640625, 2.373565673828125, 2.50830078125, 2.643035888671875, 2.77777099609375, 2.912506103515625, 3.0472412109375, 3.181976318359375, 3.31671142578125, 3.451446533203125, 3.586181640625, 3.720916748046875, 3.85565185546875, 3.990386962890625, 4.1251220703125, 4.259857177734375, 4.39459228515625, 4.529327392578125, 4.6640625]}, "gradients/decoder.model.decoder.layers.7.fc2.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 1.0, 3.0, 5.0, 3.0, 7.0, 11.0, 13.0, 14.0, 24.0, 27.0, 24.0, 46.0, 98.0, 134.0, 196.0, 134.0, 85.0, 53.0, 30.0, 15.0, 27.0, 11.0, 5.0, 3.0, 9.0, 4.0, 1.0, 5.0, 5.0, 2.0, 0.0, 3.0, 0.0, 0.0, 2.0, 0.0, 1.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.1685791015625, -0.16304588317871094, -0.15751266479492188, -0.1519794464111328, -0.14644622802734375, -0.1409130096435547, -0.13537979125976562, -0.12984657287597656, -0.1243133544921875, -0.11878013610839844, -0.11324691772460938, -0.10771369934082031, -0.10218048095703125, -0.09664726257324219, -0.09111404418945312, -0.08558082580566406, -0.080047607421875, -0.07451438903808594, -0.06898117065429688, -0.06344795227050781, -0.05791473388671875, -0.05238151550292969, -0.046848297119140625, -0.04131507873535156, -0.0357818603515625, -0.030248641967773438, -0.024715423583984375, -0.019182205200195312, -0.01364898681640625, -0.008115768432617188, -0.002582550048828125, 0.0029506683349609375, 0.00848388671875, 0.014017105102539062, 0.019550323486328125, 0.025083541870117188, 0.03061676025390625, 0.03614997863769531, 0.041683197021484375, 0.04721641540527344, 0.0527496337890625, 0.05828285217285156, 0.06381607055664062, 0.06934928894042969, 0.07488250732421875, 0.08041572570800781, 0.08594894409179688, 0.09148216247558594, 0.097015380859375, 0.10254859924316406, 0.10808181762695312, 0.11361503601074219, 0.11914825439453125, 0.12468147277832031, 0.13021469116210938, 0.13574790954589844, 0.1412811279296875, 0.14681434631347656, 0.15234756469726562, 0.1578807830810547, 0.16341400146484375, 0.1689472198486328, 0.17448043823242188, 0.18001365661621094, 0.185546875]}, "gradients/decoder.model.decoder.layers.7.fc1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 3.0, 1.0, 0.0, 3.0, 4.0, 2.0, 3.0, 5.0, 6.0, 15.0, 69.0, 406.0, 5185.0, 4175492.0, 12334.0, 618.0, 98.0, 20.0, 12.0, 7.0, 5.0, 1.0, 1.0, 3.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-7.87890625, -7.60357666015625, -7.3282470703125, -7.05291748046875, -6.777587890625, -6.50225830078125, -6.2269287109375, -5.95159912109375, -5.67626953125, -5.40093994140625, -5.1256103515625, -4.85028076171875, -4.574951171875, -4.29962158203125, -4.0242919921875, -3.74896240234375, -3.4736328125, -3.19830322265625, -2.9229736328125, -2.64764404296875, -2.372314453125, -2.09698486328125, -1.8216552734375, -1.54632568359375, -1.27099609375, -0.99566650390625, -0.7203369140625, -0.44500732421875, -0.169677734375, 0.10565185546875, 0.3809814453125, 0.65631103515625, 0.931640625, 1.20697021484375, 1.4822998046875, 1.75762939453125, 2.032958984375, 2.30828857421875, 2.5836181640625, 2.85894775390625, 3.13427734375, 3.40960693359375, 3.6849365234375, 3.96026611328125, 4.235595703125, 4.51092529296875, 4.7862548828125, 5.06158447265625, 5.3369140625, 5.61224365234375, 5.8875732421875, 6.16290283203125, 6.438232421875, 6.71356201171875, 6.9888916015625, 7.26422119140625, 7.53955078125, 7.81488037109375, 8.0902099609375, 8.36553955078125, 8.640869140625, 8.91619873046875, 9.1915283203125, 9.46685791015625, 9.7421875]}, "gradients/decoder.model.decoder.layers.7.fc1.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 3.0, 3.0, 4.0, 4.0, 6.0, 7.0, 10.0, 30.0, 104.0, 3747.0, 86.0, 33.0, 12.0, 10.0, 9.0, 6.0, 1.0, 3.0, 1.0, 2.0, 1.0, 1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.58251953125, -0.5620651245117188, -0.5416107177734375, -0.5211563110351562, -0.500701904296875, -0.48024749755859375, -0.4597930908203125, -0.43933868408203125, -0.41888427734375, -0.39842987060546875, -0.3779754638671875, -0.35752105712890625, -0.337066650390625, -0.31661224365234375, -0.2961578369140625, -0.27570343017578125, -0.2552490234375, -0.23479461669921875, -0.2143402099609375, -0.19388580322265625, -0.173431396484375, -0.15297698974609375, -0.1325225830078125, -0.11206817626953125, -0.09161376953125, -0.07115936279296875, -0.0507049560546875, -0.03025054931640625, -0.009796142578125, 0.01065826416015625, 0.0311126708984375, 0.05156707763671875, 0.072021484375, 0.09247589111328125, 0.1129302978515625, 0.13338470458984375, 0.153839111328125, 0.17429351806640625, 0.1947479248046875, 0.21520233154296875, 0.23565673828125, 0.25611114501953125, 0.2765655517578125, 0.29701995849609375, 0.317474365234375, 0.33792877197265625, 0.3583831787109375, 0.37883758544921875, 0.3992919921875, 0.41974639892578125, 0.4402008056640625, 0.46065521240234375, 0.481109619140625, 0.5015640258789062, 0.5220184326171875, 0.5424728393554688, 0.56292724609375, 0.5833816528320312, 0.6038360595703125, 0.6242904663085938, 0.644744873046875, 0.6651992797851562, 0.6856536865234375, 0.7061080932617188, 0.7265625]}, "gradients/decoder.model.decoder.layers.7.encoder_attn_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 2.0, 1.0, 1.0, 2.0, 1.0, 3.0, 3.0, 9.0, 7.0, 13.0, 14.0, 20.0, 22.0, 43.0, 60.0, 75.0, 123.0, 174.0, 127.0, 86.0, 58.0, 50.0, 31.0, 24.0, 17.0, 12.0, 6.0, 11.0, 2.0, 4.0, 6.0, 0.0, 5.0, 1.0, 1.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-1.0336763858795166, -1.0061780214309692, -0.9786795973777771, -0.951181173324585, -0.9236828088760376, -0.8961843848228455, -0.8686859607696533, -0.841187596321106, -0.8136892318725586, -0.7861908078193665, -0.7586924433708191, -0.731194019317627, -0.7036956548690796, -0.6761972308158875, -0.6486988067626953, -0.621200442314148, -0.5937020182609558, -0.5662035942077637, -0.5387052297592163, -0.5112068057060242, -0.4837084412574768, -0.45621001720428467, -0.4287116229534149, -0.40121322870254517, -0.3737148344516754, -0.34621644020080566, -0.3187180459499359, -0.29121965169906616, -0.263721227645874, -0.23622284829616547, -0.20872443914413452, -0.18122604489326477, -0.1537277102470398, -0.12622931599617004, -0.0987309142947197, -0.07123251259326935, -0.0437341183423996, -0.016235724091529846, 0.011262685060501099, 0.03876107931137085, 0.0662594735622406, 0.09375786781311035, 0.1212562695145607, 0.14875467121601105, 0.1762530654668808, 0.20375145971775055, 0.2312498688697815, 0.25874826312065125, 0.286246657371521, 0.31374505162239075, 0.3412434458732605, 0.36874186992645264, 0.396240234375, 0.42373865842819214, 0.4512370526790619, 0.47873544692993164, 0.506233811378479, 0.5337322354316711, 0.5612305998802185, 0.5887290239334106, 0.616227388381958, 0.6437258124351501, 0.6712242364883423, 0.6987226009368896, 0.7262210249900818]}, "gradients/decoder.model.decoder.layers.7.encoder_attn_layer_norm.bias": {"_type": "histogram", "values": [1.0, 2.0, 2.0, 2.0, 3.0, 0.0, 2.0, 2.0, 5.0, 4.0, 3.0, 8.0, 7.0, 5.0, 9.0, 12.0, 9.0, 17.0, 17.0, 26.0, 16.0, 26.0, 31.0, 23.0, 30.0, 33.0, 31.0, 33.0, 38.0, 29.0, 42.0, 32.0, 34.0, 43.0, 27.0, 33.0, 35.0, 27.0, 23.0, 35.0, 31.0, 20.0, 22.0, 22.0, 24.0, 20.0, 14.0, 24.0, 14.0, 11.0, 11.0, 13.0, 9.0, 3.0, 8.0, 3.0, 4.0, 2.0, 3.0, 4.0, 1.0, 0.0, 2.0, 2.0], "bins": [-0.24702009558677673, -0.2396390736103058, -0.23225806653499603, -0.22487705945968628, -0.21749603748321533, -0.21011501550674438, -0.20273400843143463, -0.19535300135612488, -0.18797197937965393, -0.18059095740318298, -0.17320995032787323, -0.16582894325256348, -0.15844792127609253, -0.15106689929962158, -0.14368589222431183, -0.13630488514900208, -0.12892386317253113, -0.12154284864664078, -0.11416183412075043, -0.10678081959486008, -0.09939980506896973, -0.09201879054307938, -0.08463777601718903, -0.07725676149129868, -0.06987574696540833, -0.062494732439517975, -0.055113717913627625, -0.047732703387737274, -0.040351688861846924, -0.032970674335956573, -0.025589659810066223, -0.018208645284175873, -0.010827615857124329, -0.0034466013312339783, 0.003934413194656372, 0.011315427720546722, 0.018696442246437073, 0.026077456772327423, 0.03345847129821777, 0.040839485824108124, 0.048220500349998474, 0.055601514875888824, 0.06298252940177917, 0.07036354392766953, 0.07774455845355988, 0.08512557297945023, 0.09250658750534058, 0.09988760203123093, 0.10726861655712128, 0.11464963108301163, 0.12203064560890198, 0.12941166758537292, 0.13679267466068268, 0.14417368173599243, 0.15155470371246338, 0.15893572568893433, 0.16631673276424408, 0.17369773983955383, 0.18107876181602478, 0.18845978379249573, 0.19584079086780548, 0.20322179794311523, 0.21060281991958618, 0.21798384189605713, 0.22536484897136688]}, "gradients/decoder.model.decoder.layers.7.encoder_attn.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 3.0, 3.0, 5.0, 4.0, 8.0, 4.0, 16.0, 18.0, 35.0, 56.0, 128.0, 347.0, 1049.0, 6230.0, 238307.0, 790125.0, 10133.0, 1385.0, 371.0, 143.0, 71.0, 40.0, 24.0, 15.0, 13.0, 10.0, 6.0, 3.0, 5.0, 2.0, 0.0, 1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.18896484375, -0.18247032165527344, -0.17597579956054688, -0.1694812774658203, -0.16298675537109375, -0.1564922332763672, -0.14999771118164062, -0.14350318908691406, -0.1370086669921875, -0.13051414489746094, -0.12401962280273438, -0.11752510070800781, -0.11103057861328125, -0.10453605651855469, -0.09804153442382812, -0.09154701232910156, -0.085052490234375, -0.07855796813964844, -0.07206344604492188, -0.06556892395019531, -0.05907440185546875, -0.05257987976074219, -0.046085357666015625, -0.03959083557128906, -0.0330963134765625, -0.026601791381835938, -0.020107269287109375, -0.013612747192382812, -0.00711822509765625, -0.0006237030029296875, 0.005870819091796875, 0.012365341186523438, 0.01885986328125, 0.025354385375976562, 0.031848907470703125, 0.03834342956542969, 0.04483795166015625, 0.05133247375488281, 0.057826995849609375, 0.06432151794433594, 0.0708160400390625, 0.07731056213378906, 0.08380508422851562, 0.09029960632324219, 0.09679412841796875, 0.10328865051269531, 0.10978317260742188, 0.11627769470214844, 0.122772216796875, 0.12926673889160156, 0.13576126098632812, 0.1422557830810547, 0.14875030517578125, 0.1552448272705078, 0.16173934936523438, 0.16823387145996094, 0.1747283935546875, 0.18122291564941406, 0.18771743774414062, 0.1942119598388672, 0.20070648193359375, 0.2072010040283203, 0.21369552612304688, 0.22019004821777344, 0.2266845703125]}, "gradients/decoder.model.decoder.layers.7.encoder_attn.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 3.0, 1.0, 1.0, 5.0, 13.0, 14.0, 17.0, 24.0, 51.0, 87.0, 131.0, 146.0, 150.0, 112.0, 101.0, 69.0, 46.0, 19.0, 12.0, 5.0, 4.0, 1.0, 2.0, 0.0, 4.0, 0.0, 1.0, 2.0], "bins": [-1.0380859375, -1.0165138244628906, -0.9949417114257812, -0.9733695983886719, -0.9517974853515625, -0.9302253723144531, -0.9086532592773438, -0.8870811462402344, -0.865509033203125, -0.8439369201660156, -0.8223648071289062, -0.8007926940917969, -0.7792205810546875, -0.7576484680175781, -0.7360763549804688, -0.7145042419433594, -0.69293212890625, -0.6713600158691406, -0.6497879028320312, -0.6282157897949219, -0.6066436767578125, -0.5850715637207031, -0.5634994506835938, -0.5419273376464844, -0.520355224609375, -0.4987831115722656, -0.47721099853515625, -0.4556388854980469, -0.4340667724609375, -0.4124946594238281, -0.39092254638671875, -0.3693504333496094, -0.3477783203125, -0.3262062072753906, -0.30463409423828125, -0.2830619812011719, -0.2614898681640625, -0.23991775512695312, -0.21834564208984375, -0.19677352905273438, -0.175201416015625, -0.15362930297851562, -0.13205718994140625, -0.11048507690429688, -0.0889129638671875, -0.06734085083007812, -0.04576873779296875, -0.024196624755859375, -0.00262451171875, 0.018947601318359375, 0.04051971435546875, 0.062091827392578125, 0.0836639404296875, 0.10523605346679688, 0.12680816650390625, 0.14838027954101562, 0.169952392578125, 0.19152450561523438, 0.21309661865234375, 0.23466873168945312, 0.2562408447265625, 0.2778129577636719, 0.29938507080078125, 0.3209571838378906, 0.342529296875]}, "gradients/decoder.model.decoder.layers.7.encoder_attn.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 10.0, 15.0, 45.0, 97.0, 199.0, 388.0, 1220.0, 57650.0, 983203.0, 4693.0, 554.0, 241.0, 123.0, 69.0, 30.0, 17.0, 6.0, 5.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.05426025390625, -0.05246591567993164, -0.05067157745361328, -0.04887723922729492, -0.04708290100097656, -0.0452885627746582, -0.043494224548339844, -0.041699886322021484, -0.039905548095703125, -0.038111209869384766, -0.036316871643066406, -0.03452253341674805, -0.03272819519042969, -0.030933856964111328, -0.02913951873779297, -0.02734518051147461, -0.02555084228515625, -0.02375650405883789, -0.02196216583251953, -0.020167827606201172, -0.018373489379882812, -0.016579151153564453, -0.014784812927246094, -0.012990474700927734, -0.011196136474609375, -0.009401798248291016, -0.007607460021972656, -0.005813121795654297, -0.0040187835693359375, -0.002224445343017578, -0.00043010711669921875, 0.0013642311096191406, 0.0031585693359375, 0.004952907562255859, 0.006747245788574219, 0.008541584014892578, 0.010335922241210938, 0.012130260467529297, 0.013924598693847656, 0.015718936920166016, 0.017513275146484375, 0.019307613372802734, 0.021101951599121094, 0.022896289825439453, 0.024690628051757812, 0.026484966278076172, 0.02827930450439453, 0.03007364273071289, 0.03186798095703125, 0.03366231918334961, 0.03545665740966797, 0.03725099563598633, 0.03904533386230469, 0.04083967208862305, 0.042634010314941406, 0.044428348541259766, 0.046222686767578125, 0.048017024993896484, 0.049811363220214844, 0.0516057014465332, 0.05340003967285156, 0.05519437789916992, 0.05698871612548828, 0.05878305435180664, 0.060577392578125]}, "gradients/decoder.model.decoder.layers.7.encoder_attn.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 7.0, 9.0, 25.0, 50.0, 60.0, 100.0, 111.0, 119.0, 154.0, 113.0, 92.0, 64.0, 55.0, 28.0, 12.0, 12.0, 4.0, 5.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.44921875, -1.4012908935546875, -1.353363037109375, -1.3054351806640625, -1.25750732421875, -1.2095794677734375, -1.161651611328125, -1.1137237548828125, -1.0657958984375, -1.0178680419921875, -0.969940185546875, -0.9220123291015625, -0.87408447265625, -0.8261566162109375, -0.778228759765625, -0.7303009033203125, -0.682373046875, -0.6344451904296875, -0.586517333984375, -0.5385894775390625, -0.49066162109375, -0.4427337646484375, -0.394805908203125, -0.3468780517578125, -0.2989501953125, -0.2510223388671875, -0.203094482421875, -0.1551666259765625, -0.10723876953125, -0.0593109130859375, -0.011383056640625, 0.0365447998046875, 0.08447265625, 0.1324005126953125, 0.180328369140625, 0.2282562255859375, 0.27618408203125, 0.3241119384765625, 0.372039794921875, 0.4199676513671875, 0.4678955078125, 0.5158233642578125, 0.563751220703125, 0.6116790771484375, 0.65960693359375, 0.7075347900390625, 0.755462646484375, 0.8033905029296875, 0.851318359375, 0.8992462158203125, 0.947174072265625, 0.9951019287109375, 1.04302978515625, 1.0909576416015625, 1.138885498046875, 1.1868133544921875, 1.2347412109375, 1.2826690673828125, 1.330596923828125, 1.3785247802734375, 1.42645263671875, 1.4743804931640625, 1.522308349609375, 1.5702362060546875, 1.6181640625]}, "gradients/decoder.model.decoder.layers.7.encoder_attn.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 3.0, 5.0, 7.0, 5.0, 6.0, 7.0, 14.0, 14.0, 19.0, 32.0, 44.0, 56.0, 98.0, 146.0, 211.0, 393.0, 646.0, 1231.0, 2587.0, 6252.0, 20637.0, 934960.0, 63583.0, 10011.0, 3753.0, 1658.0, 862.0, 478.0, 277.0, 181.0, 113.0, 76.0, 60.0, 38.0, 21.0, 17.0, 10.0, 15.0, 12.0, 8.0, 9.0, 3.0, 1.0, 5.0, 2.0, 3.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0005269050598144531, -0.0005097538232803345, -0.0004926025867462158, -0.00047545135021209717, -0.0004583001136779785, -0.00044114887714385986, -0.0004239976406097412, -0.00040684640407562256, -0.0003896951675415039, -0.00037254393100738525, -0.0003553926944732666, -0.00033824145793914795, -0.0003210902214050293, -0.00030393898487091064, -0.000286787748336792, -0.00026963651180267334, -0.0002524852752685547, -0.00023533403873443604, -0.00021818280220031738, -0.00020103156566619873, -0.00018388032913208008, -0.00016672909259796143, -0.00014957785606384277, -0.00013242661952972412, -0.00011527538299560547, -9.812414646148682e-05, -8.097290992736816e-05, -6.382167339324951e-05, -4.667043685913086e-05, -2.9519200325012207e-05, -1.2367963790893555e-05, 4.783272743225098e-06, 2.193450927734375e-05, 3.90857458114624e-05, 5.6236982345581055e-05, 7.338821887969971e-05, 9.053945541381836e-05, 0.00010769069194793701, 0.00012484192848205566, 0.00014199316501617432, 0.00015914440155029297, 0.00017629563808441162, 0.00019344687461853027, 0.00021059811115264893, 0.00022774934768676758, 0.00024490058422088623, 0.0002620518207550049, 0.00027920305728912354, 0.0002963542938232422, 0.00031350553035736084, 0.0003306567668914795, 0.00034780800342559814, 0.0003649592399597168, 0.00038211047649383545, 0.0003992617130279541, 0.00041641294956207275, 0.0004335641860961914, 0.00045071542263031006, 0.0004678666591644287, 0.00048501789569854736, 0.000502169132232666, 0.0005193203687667847, 0.0005364716053009033, 0.000553622841835022, 0.0005707740783691406]}, "gradients/decoder.model.decoder.layers.7.encoder_attn.k_proj.bias": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 5.0, 1.0, 3.0, 3.0, 6.0, 3.0, 5.0, 8.0, 14.0, 29.0, 46.0, 95.0, 272.0, 297.0, 97.0, 43.0, 27.0, 9.0, 8.0, 12.0, 4.0, 6.0, 2.0, 1.0, 2.0, 3.0, 3.0, 1.0, 2.0, 1.0, 0.0, 2.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.319978713989258e-05, -3.2017938792705536e-05, -3.0836090445518494e-05, -2.965424209833145e-05, -2.847239375114441e-05, -2.7290545403957367e-05, -2.6108697056770325e-05, -2.4926848709583282e-05, -2.374500036239624e-05, -2.2563152015209198e-05, -2.1381303668022156e-05, -2.0199455320835114e-05, -1.901760697364807e-05, -1.783575862646103e-05, -1.6653910279273987e-05, -1.5472061932086945e-05, -1.4290213584899902e-05, -1.310836523771286e-05, -1.1926516890525818e-05, -1.0744668543338776e-05, -9.562820196151733e-06, -8.380971848964691e-06, -7.199123501777649e-06, -6.017275154590607e-06, -4.8354268074035645e-06, -3.6535784602165222e-06, -2.47173011302948e-06, -1.2898817658424377e-06, -1.0803341865539551e-07, 1.0738149285316467e-06, 2.255663275718689e-06, 3.437511622905731e-06, 4.6193599700927734e-06, 5.801208317279816e-06, 6.983056664466858e-06, 8.1649050116539e-06, 9.346753358840942e-06, 1.0528601706027985e-05, 1.1710450053215027e-05, 1.2892298400402069e-05, 1.4074146747589111e-05, 1.5255995094776154e-05, 1.6437843441963196e-05, 1.7619691789150238e-05, 1.880154013633728e-05, 1.9983388483524323e-05, 2.1165236830711365e-05, 2.2347085177898407e-05, 2.352893352508545e-05, 2.471078187227249e-05, 2.5892630219459534e-05, 2.7074478566646576e-05, 2.8256326913833618e-05, 2.943817526102066e-05, 3.06200236082077e-05, 3.1801871955394745e-05, 3.298372030258179e-05, 3.416556864976883e-05, 3.534741699695587e-05, 3.6529265344142914e-05, 3.7711113691329956e-05, 3.8892962038517e-05, 4.007481038570404e-05, 4.125665873289108e-05, 4.2438507080078125e-05]}, "gradients/decoder.model.decoder.layers.7.encoder_attn.q_proj.weight": {"_type": "histogram", "values": [2.0, 1.0, 2.0, 3.0, 4.0, 3.0, 5.0, 6.0, 11.0, 10.0, 15.0, 25.0, 33.0, 38.0, 56.0, 79.0, 83.0, 139.0, 187.0, 240.0, 337.0, 481.0, 664.0, 930.0, 1310.0, 1793.0, 2514.0, 3980.0, 8193.0, 31135.0, 723670.0, 232105.0, 22155.0, 6624.0, 3493.0, 2327.0, 1601.0, 1189.0, 839.0, 632.0, 428.0, 334.0, 227.0, 165.0, 135.0, 97.0, 67.0, 66.0, 41.0, 24.0, 15.0, 18.0, 10.0, 11.0, 6.0, 1.0, 5.0, 5.0, 0.0, 2.0, 3.0, 1.0, 0.0, 1.0], "bins": [-0.000164031982421875, -0.00015872158110141754, -0.00015341117978096008, -0.00014810077846050262, -0.00014279037714004517, -0.0001374799758195877, -0.00013216957449913025, -0.0001268591731786728, -0.00012154877185821533, -0.00011623837053775787, -0.00011092796921730042, -0.00010561756789684296, -0.0001003071665763855, -9.499676525592804e-05, -8.968636393547058e-05, -8.437596261501312e-05, -7.906556129455566e-05, -7.37551599740982e-05, -6.844475865364075e-05, -6.313435733318329e-05, -5.782395601272583e-05, -5.251355469226837e-05, -4.720315337181091e-05, -4.1892752051353455e-05, -3.6582350730895996e-05, -3.127194941043854e-05, -2.596154808998108e-05, -2.065114676952362e-05, -1.5340745449066162e-05, -1.0030344128608704e-05, -4.719942808151245e-06, 5.904585123062134e-07, 5.900859832763672e-06, 1.121126115322113e-05, 1.652166247367859e-05, 2.1832063794136047e-05, 2.7142465114593506e-05, 3.2452866435050964e-05, 3.776326775550842e-05, 4.307366907596588e-05, 4.838407039642334e-05, 5.36944717168808e-05, 5.900487303733826e-05, 6.431527435779572e-05, 6.962567567825317e-05, 7.493607699871063e-05, 8.024647831916809e-05, 8.555687963962555e-05, 9.086728096008301e-05, 9.617768228054047e-05, 0.00010148808360099792, 0.00010679848492145538, 0.00011210888624191284, 0.0001174192875623703, 0.00012272968888282776, 0.00012804009020328522, 0.00013335049152374268, 0.00013866089284420013, 0.0001439712941646576, 0.00014928169548511505, 0.0001545920968055725, 0.00015990249812602997, 0.00016521289944648743, 0.00017052330076694489, 0.00017583370208740234]}, "gradients/decoder.model.decoder.layers.7.encoder_attn.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 1.0, 0.0, 3.0, 2.0, 3.0, 2.0, 3.0, 3.0, 0.0, 3.0, 0.0, 4.0, 6.0, 5.0, 13.0, 34.0, 67.0, 137.0, 412.0, 176.0, 62.0, 28.0, 20.0, 6.0, 3.0, 1.0, 1.0, 3.0, 4.0, 2.0, 0.0, 1.0, 2.0, 2.0, 0.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-5.882978439331055e-05, -5.690380930900574e-05, -5.497783422470093e-05, -5.305185914039612e-05, -5.112588405609131e-05, -4.91999089717865e-05, -4.727393388748169e-05, -4.534795880317688e-05, -4.342198371887207e-05, -4.149600863456726e-05, -3.957003355026245e-05, -3.764405846595764e-05, -3.571808338165283e-05, -3.379210829734802e-05, -3.186613321304321e-05, -2.9940158128738403e-05, -2.8014183044433594e-05, -2.6088207960128784e-05, -2.4162232875823975e-05, -2.2236257791519165e-05, -2.0310282707214355e-05, -1.8384307622909546e-05, -1.6458332538604736e-05, -1.4532357454299927e-05, -1.2606382369995117e-05, -1.0680407285690308e-05, -8.754432201385498e-06, -6.8284571170806885e-06, -4.902482032775879e-06, -2.9765069484710693e-06, -1.0505318641662598e-06, 8.754432201385498e-07, 2.8014183044433594e-06, 4.727393388748169e-06, 6.6533684730529785e-06, 8.579343557357788e-06, 1.0505318641662598e-05, 1.2431293725967407e-05, 1.4357268810272217e-05, 1.6283243894577026e-05, 1.8209218978881836e-05, 2.0135194063186646e-05, 2.2061169147491455e-05, 2.3987144231796265e-05, 2.5913119316101074e-05, 2.7839094400405884e-05, 2.9765069484710693e-05, 3.16910445690155e-05, 3.361701965332031e-05, 3.554299473762512e-05, 3.746896982192993e-05, 3.939494490623474e-05, 4.132091999053955e-05, 4.324689507484436e-05, 4.517287015914917e-05, 4.709884524345398e-05, 4.902482032775879e-05, 5.09507954120636e-05, 5.287677049636841e-05, 5.480274558067322e-05, 5.672872066497803e-05, 5.865469574928284e-05, 6.0580670833587646e-05, 6.250664591789246e-05, 6.443262100219727e-05]}, "gradients/decoder.model.decoder.layers.7.self_attn_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0, 1.0, 1.0, 0.0, 2.0, 3.0, 7.0, 6.0, 7.0, 12.0, 17.0, 24.0, 26.0, 38.0, 70.0, 89.0, 151.0, 183.0, 104.0, 87.0, 56.0, 34.0, 31.0, 18.0, 8.0, 3.0, 9.0, 7.0, 6.0, 2.0, 1.0, 4.0, 3.0, 2.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.6227833032608032, -0.6060669422149658, -0.5893505811691284, -0.5726341605186462, -0.5559177994728088, -0.5392014384269714, -0.5224850177764893, -0.5057686567306519, -0.48905229568481445, -0.47233593463897705, -0.45561954379081726, -0.43890315294265747, -0.42218679189682007, -0.40547043085098267, -0.3887540400028229, -0.3720376491546631, -0.3553212881088257, -0.3386049270629883, -0.3218885362148285, -0.3051721453666687, -0.2884557843208313, -0.2717394232749939, -0.2550230324268341, -0.2383066564798355, -0.22159028053283691, -0.20487390458583832, -0.18815752863883972, -0.17144115269184113, -0.15472477674484253, -0.13800840079784393, -0.12129202485084534, -0.10457564890384674, -0.08785927295684814, -0.07114289700984955, -0.05442652106285095, -0.037710145115852356, -0.02099376916885376, -0.004277393221855164, 0.012438982725143433, 0.02915535867214203, 0.045871734619140625, 0.06258811056613922, 0.07930448651313782, 0.09602086246013641, 0.11273723840713501, 0.1294536143541336, 0.1461699903011322, 0.1628863662481308, 0.1796027421951294, 0.196319118142128, 0.2130354940891266, 0.22975187003612518, 0.24646824598312378, 0.2631846070289612, 0.27990099787712097, 0.29661738872528076, 0.31333374977111816, 0.33005011081695557, 0.34676650166511536, 0.36348289251327515, 0.38019925355911255, 0.39691561460494995, 0.41363200545310974, 0.43034839630126953, 0.44706475734710693]}, "gradients/decoder.model.decoder.layers.7.self_attn_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 3.0, 3.0, 0.0, 3.0, 1.0, 3.0, 2.0, 3.0, 9.0, 8.0, 7.0, 7.0, 11.0, 13.0, 16.0, 14.0, 21.0, 19.0, 27.0, 35.0, 26.0, 30.0, 26.0, 35.0, 43.0, 43.0, 34.0, 39.0, 39.0, 32.0, 42.0, 33.0, 39.0, 34.0, 24.0, 29.0, 36.0, 24.0, 28.0, 20.0, 20.0, 25.0, 19.0, 13.0, 16.0, 11.0, 14.0, 9.0, 5.0, 8.0, 4.0, 4.0, 2.0, 1.0, 4.0, 3.0, 0.0, 1.0, 1.0], "bins": [-0.14444056153297424, -0.14018818736076355, -0.13593582808971405, -0.13168345391750336, -0.12743109464645386, -0.12317872047424316, -0.11892635375261307, -0.11467398703098297, -0.11042161285877228, -0.10616924613714218, -0.10191687941551208, -0.09766450524330139, -0.0934121385216713, -0.0891597718000412, -0.0849074050784111, -0.080655038356781, -0.07640267163515091, -0.07215030491352081, -0.06789793819189072, -0.06364557147026062, -0.05939319729804993, -0.05514083057641983, -0.050888463854789734, -0.04663609713315964, -0.04238372668623924, -0.038131359964609146, -0.03387898951768875, -0.029626622796058655, -0.02537425421178341, -0.021121885627508163, -0.016869518905878067, -0.012617150321602821, -0.008364781737327576, -0.004112413618713617, 0.00013995449990034103, 0.004392322152853012, 0.008644690737128258, 0.012897059321403503, 0.0171494260430336, 0.021401794627308846, 0.02565416321158409, 0.029906531795859337, 0.03415890038013458, 0.03841126710176468, 0.042663633823394775, 0.04691600427031517, 0.05116837099194527, 0.05542074143886566, 0.05967310816049576, 0.06392547488212585, 0.06817784160375595, 0.07243020832538605, 0.07668258249759674, 0.08093494921922684, 0.08518731594085693, 0.08943968266248703, 0.09369204938411713, 0.09794441610574722, 0.10219678282737732, 0.10644915699958801, 0.11070152372121811, 0.1149538904428482, 0.1192062571644783, 0.1234586238861084, 0.1277109980583191]}, "gradients/decoder.model.decoder.layers.7.self_attn.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 3.0, 1.0, 1.0, 1.0, 6.0, 1.0, 2.0, 4.0, 0.0, 3.0, 2.0, 7.0, 8.0, 3.0, 5.0, 10.0, 11.0, 10.0, 25.0, 39.0, 47.0, 74.0, 992.0, 970161.0, 76642.0, 269.0, 75.0, 43.0, 36.0, 23.0, 9.0, 3.0, 8.0, 3.0, 6.0, 7.0, 6.0, 1.0, 5.0, 1.0, 0.0, 1.0, 3.0, 1.0, 2.0, 2.0, 2.0, 1.0, 2.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-3.193359375, -3.099029541015625, -3.00469970703125, -2.910369873046875, -2.8160400390625, -2.721710205078125, -2.62738037109375, -2.533050537109375, -2.438720703125, -2.344390869140625, -2.25006103515625, -2.155731201171875, -2.0614013671875, -1.967071533203125, -1.87274169921875, -1.778411865234375, -1.68408203125, -1.589752197265625, -1.49542236328125, -1.401092529296875, -1.3067626953125, -1.212432861328125, -1.11810302734375, -1.023773193359375, -0.929443359375, -0.835113525390625, -0.74078369140625, -0.646453857421875, -0.5521240234375, -0.457794189453125, -0.36346435546875, -0.269134521484375, -0.1748046875, -0.080474853515625, 0.01385498046875, 0.108184814453125, 0.2025146484375, 0.296844482421875, 0.39117431640625, 0.485504150390625, 0.579833984375, 0.674163818359375, 0.76849365234375, 0.862823486328125, 0.9571533203125, 1.051483154296875, 1.14581298828125, 1.240142822265625, 1.33447265625, 1.428802490234375, 1.52313232421875, 1.617462158203125, 1.7117919921875, 1.806121826171875, 1.90045166015625, 1.994781494140625, 2.089111328125, 2.183441162109375, 2.27777099609375, 2.372100830078125, 2.4664306640625, 2.560760498046875, 2.65509033203125, 2.749420166015625, 2.84375]}, "gradients/decoder.model.decoder.layers.7.self_attn.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 3.0, 7.0, 10.0, 13.0, 35.0, 68.0, 139.0, 167.0, 182.0, 159.0, 108.0, 68.0, 30.0, 13.0, 11.0, 7.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.099853515625, -0.09791088104248047, -0.09596824645996094, -0.0940256118774414, -0.09208297729492188, -0.09014034271240234, -0.08819770812988281, -0.08625507354736328, -0.08431243896484375, -0.08236980438232422, -0.08042716979980469, -0.07848453521728516, -0.07654190063476562, -0.0745992660522461, -0.07265663146972656, -0.07071399688720703, -0.0687713623046875, -0.06682872772216797, -0.06488609313964844, -0.0629434585571289, -0.061000823974609375, -0.059058189392089844, -0.05711555480957031, -0.05517292022705078, -0.05323028564453125, -0.05128765106201172, -0.04934501647949219, -0.047402381896972656, -0.045459747314453125, -0.043517112731933594, -0.04157447814941406, -0.03963184356689453, -0.037689208984375, -0.03574657440185547, -0.03380393981933594, -0.031861305236816406, -0.029918670654296875, -0.027976036071777344, -0.026033401489257812, -0.02409076690673828, -0.02214813232421875, -0.02020549774169922, -0.018262863159179688, -0.016320228576660156, -0.014377593994140625, -0.012434959411621094, -0.010492324829101562, -0.008549690246582031, -0.0066070556640625, -0.004664421081542969, -0.0027217864990234375, -0.0007791519165039062, 0.001163482666015625, 0.0031061172485351562, 0.0050487518310546875, 0.006991386413574219, 0.00893402099609375, 0.010876655578613281, 0.012819290161132812, 0.014761924743652344, 0.016704559326171875, 0.018647193908691406, 0.020589828491210938, 0.02253246307373047, 0.02447509765625]}, "gradients/decoder.model.decoder.layers.7.self_attn.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 3.0, 2.0, 2.0, 1.0, 2.0, 3.0, 1.0, 4.0, 6.0, 7.0, 5.0, 11.0, 11.0, 17.0, 23.0, 27.0, 59.0, 92.0, 157.0, 259.0, 400.0, 638.0, 1074.0, 1739.0, 3207.0, 6668.0, 15175.0, 42481.0, 172278.0, 630926.0, 116240.0, 32434.0, 12212.0, 5521.0, 2850.0, 1582.0, 911.0, 577.0, 348.0, 206.0, 135.0, 80.0, 62.0, 29.0, 27.0, 18.0, 16.0, 10.0, 8.0, 7.0, 8.0, 2.0, 2.0, 4.0, 2.0, 1.0, 2.0, 2.0, 0.0, 0.0, 1.0], "bins": [-0.11981201171875, -0.11611175537109375, -0.1124114990234375, -0.10871124267578125, -0.105010986328125, -0.10131072998046875, -0.0976104736328125, -0.09391021728515625, -0.0902099609375, -0.08650970458984375, -0.0828094482421875, -0.07910919189453125, -0.075408935546875, -0.07170867919921875, -0.0680084228515625, -0.06430816650390625, -0.06060791015625, -0.05690765380859375, -0.0532073974609375, -0.04950714111328125, -0.045806884765625, -0.04210662841796875, -0.0384063720703125, -0.03470611572265625, -0.031005859375, -0.02730560302734375, -0.0236053466796875, -0.01990509033203125, -0.016204833984375, -0.01250457763671875, -0.0088043212890625, -0.00510406494140625, -0.00140380859375, 0.00229644775390625, 0.0059967041015625, 0.00969696044921875, 0.013397216796875, 0.01709747314453125, 0.0207977294921875, 0.02449798583984375, 0.0281982421875, 0.03189849853515625, 0.0355987548828125, 0.03929901123046875, 0.042999267578125, 0.04669952392578125, 0.0503997802734375, 0.05410003662109375, 0.05780029296875, 0.06150054931640625, 0.0652008056640625, 0.06890106201171875, 0.072601318359375, 0.07630157470703125, 0.0800018310546875, 0.08370208740234375, 0.08740234375, 0.09110260009765625, 0.0948028564453125, 0.09850311279296875, 0.102203369140625, 0.10590362548828125, 0.1096038818359375, 0.11330413818359375, 0.11700439453125]}, "gradients/decoder.model.decoder.layers.7.self_attn.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 3.0, 1.0, 1.0, 0.0, 1.0, 2.0, 1.0, 1.0, 2.0, 2.0, 1.0, 1.0, 2.0, 2.0, 3.0, 2.0, 3.0, 9.0, 12.0, 17.0, 21.0, 25.0, 36.0, 40.0, 52.0, 64.0, 59.0, 60.0, 75.0, 82.0, 63.0, 63.0, 52.0, 49.0, 50.0, 30.0, 27.0, 26.0, 16.0, 10.0, 5.0, 8.0, 8.0, 3.0, 7.0, 2.0, 2.0, 3.0, 1.0, 3.0, 3.0, 3.0, 1.0, 2.0, 1.0, 1.0, 1.0, 2.0, 0.0, 0.0, 1.0], "bins": [-0.048858642578125, -0.0473475456237793, -0.045836448669433594, -0.04432535171508789, -0.04281425476074219, -0.041303157806396484, -0.03979206085205078, -0.03828096389770508, -0.036769866943359375, -0.03525876998901367, -0.03374767303466797, -0.032236576080322266, -0.030725479125976562, -0.02921438217163086, -0.027703285217285156, -0.026192188262939453, -0.02468109130859375, -0.023169994354248047, -0.021658897399902344, -0.02014780044555664, -0.018636703491210938, -0.017125606536865234, -0.015614509582519531, -0.014103412628173828, -0.012592315673828125, -0.011081218719482422, -0.009570121765136719, -0.008059024810791016, -0.0065479278564453125, -0.005036830902099609, -0.0035257339477539062, -0.002014636993408203, -0.0005035400390625, 0.0010075569152832031, 0.0025186538696289062, 0.004029750823974609, 0.0055408477783203125, 0.007051944732666016, 0.008563041687011719, 0.010074138641357422, 0.011585235595703125, 0.013096332550048828, 0.014607429504394531, 0.016118526458740234, 0.017629623413085938, 0.01914072036743164, 0.020651817321777344, 0.022162914276123047, 0.02367401123046875, 0.025185108184814453, 0.026696205139160156, 0.02820730209350586, 0.029718399047851562, 0.031229496002197266, 0.03274059295654297, 0.03425168991088867, 0.035762786865234375, 0.03727388381958008, 0.03878498077392578, 0.040296077728271484, 0.04180717468261719, 0.04331827163696289, 0.044829368591308594, 0.0463404655456543, 0.0478515625]}, "gradients/decoder.model.decoder.layers.7.self_attn.k_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 3.0, 3.0, 1.0, 1.0, 4.0, 6.0, 5.0, 7.0, 8.0, 13.0, 11.0, 12.0, 18.0, 32.0, 34.0, 52.0, 79.0, 85.0, 156.0, 193.0, 321.0, 519.0, 1013.0, 2363.0, 7430.0, 908463.0, 117191.0, 6157.0, 2027.0, 885.0, 482.0, 310.0, 200.0, 133.0, 92.0, 59.0, 56.0, 27.0, 26.0, 18.0, 16.0, 10.0, 14.0, 8.0, 7.0, 3.0, 2.0, 7.0, 2.0, 2.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0], "bins": [-0.376708984375, -0.3652687072753906, -0.35382843017578125, -0.3423881530761719, -0.3309478759765625, -0.3195075988769531, -0.30806732177734375, -0.2966270446777344, -0.285186767578125, -0.2737464904785156, -0.26230621337890625, -0.2508659362792969, -0.2394256591796875, -0.22798538208007812, -0.21654510498046875, -0.20510482788085938, -0.19366455078125, -0.18222427368164062, -0.17078399658203125, -0.15934371948242188, -0.1479034423828125, -0.13646316528320312, -0.12502288818359375, -0.11358261108398438, -0.102142333984375, -0.09070205688476562, -0.07926177978515625, -0.06782150268554688, -0.0563812255859375, -0.044940948486328125, -0.03350067138671875, -0.022060394287109375, -0.0106201171875, 0.000820159912109375, 0.01226043701171875, 0.023700714111328125, 0.0351409912109375, 0.046581268310546875, 0.05802154541015625, 0.06946182250976562, 0.080902099609375, 0.09234237670898438, 0.10378265380859375, 0.11522293090820312, 0.1266632080078125, 0.13810348510742188, 0.14954376220703125, 0.16098403930664062, 0.17242431640625, 0.18386459350585938, 0.19530487060546875, 0.20674514770507812, 0.2181854248046875, 0.22962570190429688, 0.24106597900390625, 0.2525062561035156, 0.263946533203125, 0.2753868103027344, 0.28682708740234375, 0.2982673645019531, 0.3097076416015625, 0.3211479187011719, 0.33258819580078125, 0.3440284729003906, 0.35546875]}, "gradients/decoder.model.decoder.layers.7.self_attn.k_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 5.0, 3.0, 3.0, 4.0, 5.0, 7.0, 8.0, 16.0, 51.0, 378.0, 412.0, 44.0, 25.0, 16.0, 16.0, 10.0, 3.0, 0.0, 3.0, 1.0, 2.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00010573863983154297, -0.00010093115270137787, -9.612366557121277e-05, -9.131617844104767e-05, -8.650869131088257e-05, -8.170120418071747e-05, -7.689371705055237e-05, -7.208622992038727e-05, -6.727874279022217e-05, -6.247125566005707e-05, -5.766376852989197e-05, -5.285628139972687e-05, -4.804879426956177e-05, -4.324130713939667e-05, -3.843382000923157e-05, -3.362633287906647e-05, -2.8818845748901367e-05, -2.4011358618736267e-05, -1.9203871488571167e-05, -1.4396384358406067e-05, -9.588897228240967e-06, -4.781410098075867e-06, 2.60770320892334e-08, 4.8335641622543335e-06, 9.641051292419434e-06, 1.4448538422584534e-05, 1.9256025552749634e-05, 2.4063512682914734e-05, 2.8870999813079834e-05, 3.3678486943244934e-05, 3.8485974073410034e-05, 4.3293461203575134e-05, 4.8100948333740234e-05, 5.2908435463905334e-05, 5.7715922594070435e-05, 6.252340972423553e-05, 6.733089685440063e-05, 7.213838398456573e-05, 7.694587111473083e-05, 8.175335824489594e-05, 8.656084537506104e-05, 9.136833250522614e-05, 9.617581963539124e-05, 0.00010098330676555634, 0.00010579079389572144, 0.00011059828102588654, 0.00011540576815605164, 0.00012021325528621674, 0.00012502074241638184, 0.00012982822954654694, 0.00013463571667671204, 0.00013944320380687714, 0.00014425069093704224, 0.00014905817806720734, 0.00015386566519737244, 0.00015867315232753754, 0.00016348063945770264, 0.00016828812658786774, 0.00017309561371803284, 0.00017790310084819794, 0.00018271058797836304, 0.00018751807510852814, 0.00019232556223869324, 0.00019713304936885834, 0.00020194053649902344]}, "gradients/decoder.model.decoder.layers.7.self_attn.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 1.0, 2.0, 1.0, 0.0, 4.0, 5.0, 6.0, 12.0, 14.0, 16.0, 27.0, 43.0, 54.0, 94.0, 139.0, 222.0, 344.0, 598.0, 1033.0, 1912.0, 4147.0, 12651.0, 205747.0, 797063.0, 14947.0, 4627.0, 2090.0, 1126.0, 638.0, 369.0, 221.0, 130.0, 101.0, 59.0, 40.0, 28.0, 18.0, 13.0, 10.0, 5.0, 4.0, 4.0, 1.0, 1.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.163330078125, -0.1588001251220703, -0.15427017211914062, -0.14974021911621094, -0.14521026611328125, -0.14068031311035156, -0.13615036010742188, -0.1316204071044922, -0.1270904541015625, -0.12256050109863281, -0.11803054809570312, -0.11350059509277344, -0.10897064208984375, -0.10444068908691406, -0.09991073608398438, -0.09538078308105469, -0.090850830078125, -0.08632087707519531, -0.08179092407226562, -0.07726097106933594, -0.07273101806640625, -0.06820106506347656, -0.06367111206054688, -0.05914115905761719, -0.0546112060546875, -0.05008125305175781, -0.045551300048828125, -0.04102134704589844, -0.03649139404296875, -0.03196144104003906, -0.027431488037109375, -0.022901535034179688, -0.01837158203125, -0.013841629028320312, -0.009311676025390625, -0.0047817230224609375, -0.00025177001953125, 0.0042781829833984375, 0.008808135986328125, 0.013338088989257812, 0.0178680419921875, 0.022397994995117188, 0.026927947998046875, 0.03145790100097656, 0.03598785400390625, 0.04051780700683594, 0.045047760009765625, 0.04957771301269531, 0.054107666015625, 0.05863761901855469, 0.06316757202148438, 0.06769752502441406, 0.07222747802734375, 0.07675743103027344, 0.08128738403320312, 0.08581733703613281, 0.0903472900390625, 0.09487724304199219, 0.09940719604492188, 0.10393714904785156, 0.10846710205078125, 0.11299705505371094, 0.11752700805664062, 0.12205696105957031, 0.1265869140625]}, "gradients/decoder.model.decoder.layers.7.self_attn.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 1.0, 2.0, 0.0, 2.0, 2.0, 3.0, 6.0, 12.0, 19.0, 29.0, 71.0, 291.0, 410.0, 64.0, 32.0, 21.0, 10.0, 8.0, 5.0, 3.0, 3.0, 1.0, 4.0, 1.0, 1.0, 2.0, 4.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0703125, -0.06836271286010742, -0.06641292572021484, -0.06446313858032227, -0.06251335144042969, -0.06056356430053711, -0.05861377716064453, -0.05666399002075195, -0.054714202880859375, -0.0527644157409668, -0.05081462860107422, -0.04886484146118164, -0.04691505432128906, -0.044965267181396484, -0.043015480041503906, -0.04106569290161133, -0.03911590576171875, -0.03716611862182617, -0.035216331481933594, -0.033266544342041016, -0.03131675720214844, -0.02936697006225586, -0.02741718292236328, -0.025467395782470703, -0.023517608642578125, -0.021567821502685547, -0.01961803436279297, -0.01766824722290039, -0.015718460083007812, -0.013768672943115234, -0.011818885803222656, -0.009869098663330078, -0.0079193115234375, -0.005969524383544922, -0.004019737243652344, -0.0020699501037597656, -0.0001201629638671875, 0.0018296241760253906, 0.0037794113159179688, 0.005729198455810547, 0.007678985595703125, 0.009628772735595703, 0.011578559875488281, 0.01352834701538086, 0.015478134155273438, 0.017427921295166016, 0.019377708435058594, 0.021327495574951172, 0.02327728271484375, 0.025227069854736328, 0.027176856994628906, 0.029126644134521484, 0.031076431274414062, 0.03302621841430664, 0.03497600555419922, 0.0369257926940918, 0.038875579833984375, 0.04082536697387695, 0.04277515411376953, 0.04472494125366211, 0.04667472839355469, 0.048624515533447266, 0.050574302673339844, 0.05252408981323242, 0.054473876953125]}, "gradients/decoder.model.decoder.layers.6.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 4.0, 1.0, 2.0, 3.0, 3.0, 7.0, 10.0, 18.0, 16.0, 28.0, 48.0, 105.0, 225.0, 254.0, 130.0, 78.0, 31.0, 13.0, 7.0, 9.0, 4.0, 2.0, 2.0, 5.0, 0.0, 1.0, 1.0, 3.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.5683838129043579, -0.5517370700836182, -0.5350902676582336, -0.5184435248374939, -0.5017967224121094, -0.48514997959136963, -0.4685032367706299, -0.45185646414756775, -0.4352096915245056, -0.4185629189014435, -0.40191614627838135, -0.3852694034576416, -0.36862263083457947, -0.35197585821151733, -0.3353291153907776, -0.31868234276771545, -0.3020355701446533, -0.2853887975215912, -0.26874202489852905, -0.2520952820777893, -0.23544850945472717, -0.21880173683166504, -0.2021549791097641, -0.18550822138786316, -0.16886144876480103, -0.1522146761417389, -0.13556791841983795, -0.11892115324735641, -0.10227438807487488, -0.08562762290239334, -0.0689808577299118, -0.05233409255743027, -0.03568735718727112, -0.01904059201478958, -0.0023938268423080444, 0.014252938330173492, 0.03089970350265503, 0.047546468675136566, 0.0641932338476181, 0.08083999902009964, 0.09748676419258118, 0.11413352936506271, 0.13078029453754425, 0.1474270522594452, 0.16407382488250732, 0.18072059750556946, 0.1973673552274704, 0.21401411294937134, 0.23066088557243347, 0.2473076581954956, 0.26395440101623535, 0.2806011736392975, 0.2972479462623596, 0.31389471888542175, 0.3305414915084839, 0.34718823432922363, 0.36383500695228577, 0.3804817795753479, 0.39712852239608765, 0.4137752950191498, 0.4304220676422119, 0.44706884026527405, 0.4637156128883362, 0.4803623557090759, 0.49700912833213806]}, "gradients/decoder.model.decoder.layers.6.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 2.0, 1.0, 5.0, 0.0, 3.0, 5.0, 6.0, 5.0, 10.0, 10.0, 9.0, 17.0, 18.0, 19.0, 28.0, 23.0, 34.0, 23.0, 37.0, 35.0, 39.0, 43.0, 44.0, 49.0, 56.0, 35.0, 49.0, 32.0, 36.0, 31.0, 25.0, 42.0, 28.0, 35.0, 28.0, 21.0, 18.0, 24.0, 16.0, 16.0, 9.0, 12.0, 8.0, 9.0, 2.0, 3.0, 3.0, 4.0, 0.0, 1.0, 3.0, 0.0, 2.0, 1.0, 1.0, 3.0], "bins": [-0.09166291356086731, -0.08886117488145828, -0.08605942875146866, -0.08325769007205963, -0.08045594394207001, -0.07765420526266098, -0.07485246658325195, -0.07205072045326233, -0.0692489743232727, -0.06644723564386368, -0.06364548951387405, -0.06084375083446503, -0.0580420047044754, -0.055240266025066376, -0.05243852362036705, -0.049636781215667725, -0.0468350425362587, -0.04403330013155937, -0.041231557726860046, -0.03842981904745102, -0.035628072917461395, -0.03282633423805237, -0.030024591833353043, -0.027222849428653717, -0.02442110702395439, -0.021619364619255066, -0.01881762221455574, -0.016015881672501564, -0.013214139267802238, -0.010412396863102913, -0.007610656321048737, -0.004808913916349411, -0.0020071640610694885, 0.0007945778779685497, 0.003596319817006588, 0.006398061290383339, 0.009199803695082664, 0.01200154609978199, 0.014803286641836166, 0.017605029046535492, 0.020406771451234818, 0.023208513855934143, 0.02601025626063347, 0.028811996802687645, 0.03161373734474182, 0.034415483474731445, 0.03721722215414047, 0.0400189645588398, 0.042820706963539124, 0.04562244936823845, 0.048424191772937775, 0.0512259304523468, 0.054027676582336426, 0.05682941526174545, 0.05963115766644478, 0.062432900071144104, 0.06523464620113373, 0.06803638488054276, 0.07083813101053238, 0.0736398696899414, 0.07644161581993103, 0.07924335449934006, 0.08204509317874908, 0.08484683930873871, 0.08764857798814774]}, "gradients/decoder.model.decoder.layers.6.fc2.weight": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 1.0, 1.0, 0.0, 2.0, 2.0, 3.0, 4.0, 9.0, 6.0, 9.0, 16.0, 11.0, 17.0, 14.0, 32.0, 45.0, 58.0, 82.0, 111.0, 175.0, 220.0, 338.0, 423.0, 613.0, 955.0, 1351.0, 1970.0, 3126.0, 5004.0, 8105.0, 14516.0, 28272.0, 68377.0, 3930556.0, 68706.0, 27561.0, 13627.0, 7464.0, 4379.0, 2684.0, 1752.0, 1119.0, 811.0, 524.0, 318.0, 257.0, 208.0, 118.0, 101.0, 60.0, 50.0, 44.0, 33.0, 13.0, 18.0, 12.0, 4.0, 7.0, 3.0, 0.0, 3.0], "bins": [-0.1553955078125, -0.15114116668701172, -0.14688682556152344, -0.14263248443603516, -0.13837814331054688, -0.1341238021850586, -0.1298694610595703, -0.12561511993408203, -0.12136077880859375, -0.11710643768310547, -0.11285209655761719, -0.1085977554321289, -0.10434341430664062, -0.10008907318115234, -0.09583473205566406, -0.09158039093017578, -0.0873260498046875, -0.08307170867919922, -0.07881736755371094, -0.07456302642822266, -0.07030868530273438, -0.0660543441772461, -0.06180000305175781, -0.05754566192626953, -0.05329132080078125, -0.04903697967529297, -0.04478263854980469, -0.040528297424316406, -0.036273956298828125, -0.032019615173339844, -0.027765274047851562, -0.02351093292236328, -0.019256591796875, -0.015002250671386719, -0.010747909545898438, -0.006493568420410156, -0.002239227294921875, 0.0020151138305664062, 0.0062694549560546875, 0.010523796081542969, 0.01477813720703125, 0.01903247833251953, 0.023286819458007812, 0.027541160583496094, 0.031795501708984375, 0.036049842834472656, 0.04030418395996094, 0.04455852508544922, 0.0488128662109375, 0.05306720733642578, 0.05732154846191406, 0.061575889587402344, 0.06583023071289062, 0.0700845718383789, 0.07433891296386719, 0.07859325408935547, 0.08284759521484375, 0.08710193634033203, 0.09135627746582031, 0.0956106185913086, 0.09986495971679688, 0.10411930084228516, 0.10837364196777344, 0.11262798309326172, 0.11688232421875]}, "gradients/decoder.model.decoder.layers.6.fc2.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 3.0, 0.0, 3.0, 3.0, 4.0, 3.0, 7.0, 6.0, 8.0, 9.0, 7.0, 13.0, 16.0, 25.0, 25.0, 41.0, 32.0, 54.0, 67.0, 80.0, 91.0, 106.0, 67.0, 58.0, 65.0, 49.0, 36.0, 29.0, 26.0, 20.0, 13.0, 13.0, 8.0, 8.0, 6.0, 0.0, 4.0, 3.0, 3.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0], "bins": [-0.007656097412109375, -0.00743943452835083, -0.007222771644592285, -0.00700610876083374, -0.006789445877075195, -0.00657278299331665, -0.0063561201095581055, -0.0061394572257995605, -0.005922794342041016, -0.005706131458282471, -0.005489468574523926, -0.005272805690765381, -0.005056142807006836, -0.004839479923248291, -0.004622817039489746, -0.004406154155731201, -0.004189491271972656, -0.003972828388214111, -0.0037561655044555664, -0.0035395026206970215, -0.0033228397369384766, -0.0031061768531799316, -0.0028895139694213867, -0.002672851085662842, -0.002456188201904297, -0.002239525318145752, -0.002022862434387207, -0.0018061995506286621, -0.0015895366668701172, -0.0013728737831115723, -0.0011562108993530273, -0.0009395480155944824, -0.0007228851318359375, -0.0005062222480773926, -0.00028955936431884766, -7.289648056030273e-05, 0.0001437664031982422, 0.0003604292869567871, 0.000577092170715332, 0.000793755054473877, 0.0010104179382324219, 0.0012270808219909668, 0.0014437437057495117, 0.0016604065895080566, 0.0018770694732666016, 0.0020937323570251465, 0.0023103952407836914, 0.0025270581245422363, 0.0027437210083007812, 0.002960383892059326, 0.003177046775817871, 0.003393709659576416, 0.003610372543334961, 0.003827035427093506, 0.004043698310852051, 0.004260361194610596, 0.004477024078369141, 0.0046936869621276855, 0.0049103498458862305, 0.005127012729644775, 0.00534367561340332, 0.005560338497161865, 0.00577700138092041, 0.005993664264678955, 0.0062103271484375]}, "gradients/decoder.model.decoder.layers.6.fc1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 0.0, 0.0, 1.0, 2.0, 6.0, 3.0, 2.0, 3.0, 5.0, 10.0, 7.0, 13.0, 7.0, 18.0, 31.0, 41.0, 204.0, 2112.0, 198414.0, 3990244.0, 2780.0, 238.0, 48.0, 25.0, 17.0, 18.0, 12.0, 12.0, 6.0, 2.0, 6.0, 2.0, 2.0, 1.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.31201171875, -0.3025703430175781, -0.29312896728515625, -0.2836875915527344, -0.2742462158203125, -0.2648048400878906, -0.25536346435546875, -0.24592208862304688, -0.236480712890625, -0.22703933715820312, -0.21759796142578125, -0.20815658569335938, -0.1987152099609375, -0.18927383422851562, -0.17983245849609375, -0.17039108276367188, -0.16094970703125, -0.15150833129882812, -0.14206695556640625, -0.13262557983398438, -0.1231842041015625, -0.11374282836914062, -0.10430145263671875, -0.09486007690429688, -0.085418701171875, -0.07597732543945312, -0.06653594970703125, -0.057094573974609375, -0.0476531982421875, -0.038211822509765625, -0.02877044677734375, -0.019329071044921875, -0.0098876953125, -0.000446319580078125, 0.00899505615234375, 0.018436431884765625, 0.0278778076171875, 0.037319183349609375, 0.04676055908203125, 0.056201934814453125, 0.065643310546875, 0.07508468627929688, 0.08452606201171875, 0.09396743774414062, 0.1034088134765625, 0.11285018920898438, 0.12229156494140625, 0.13173294067382812, 0.14117431640625, 0.15061569213867188, 0.16005706787109375, 0.16949844360351562, 0.1789398193359375, 0.18838119506835938, 0.19782257080078125, 0.20726394653320312, 0.216705322265625, 0.22614669799804688, 0.23558807373046875, 0.24502944946289062, 0.2544708251953125, 0.2639122009277344, 0.27335357666015625, 0.2827949523925781, 0.292236328125]}, "gradients/decoder.model.decoder.layers.6.fc1.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 1.0, 1.0, 5.0, 4.0, 5.0, 6.0, 6.0, 7.0, 7.0, 13.0, 19.0, 24.0, 35.0, 47.0, 93.0, 997.0, 2550.0, 98.0, 39.0, 21.0, 21.0, 18.0, 19.0, 10.0, 5.0, 7.0, 7.0, 4.0, 2.0, 4.0, 3.0, 2.0, 3.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.01788330078125, -0.017307758331298828, -0.016732215881347656, -0.016156673431396484, -0.015581130981445312, -0.01500558853149414, -0.014430046081542969, -0.013854503631591797, -0.013278961181640625, -0.012703418731689453, -0.012127876281738281, -0.01155233383178711, -0.010976791381835938, -0.010401248931884766, -0.009825706481933594, -0.009250164031982422, -0.00867462158203125, -0.008099079132080078, -0.007523536682128906, -0.006947994232177734, -0.0063724517822265625, -0.005796909332275391, -0.005221366882324219, -0.004645824432373047, -0.004070281982421875, -0.003494739532470703, -0.0029191970825195312, -0.0023436546325683594, -0.0017681121826171875, -0.0011925697326660156, -0.0006170272827148438, -4.1484832763671875e-05, 0.0005340576171875, 0.0011096000671386719, 0.0016851425170898438, 0.0022606849670410156, 0.0028362274169921875, 0.0034117698669433594, 0.003987312316894531, 0.004562854766845703, 0.005138397216796875, 0.005713939666748047, 0.006289482116699219, 0.006865024566650391, 0.0074405670166015625, 0.008016109466552734, 0.008591651916503906, 0.009167194366455078, 0.00974273681640625, 0.010318279266357422, 0.010893821716308594, 0.011469364166259766, 0.012044906616210938, 0.01262044906616211, 0.013195991516113281, 0.013771533966064453, 0.014347076416015625, 0.014922618865966797, 0.015498161315917969, 0.01607370376586914, 0.016649246215820312, 0.017224788665771484, 0.017800331115722656, 0.018375873565673828, 0.018951416015625]}, "gradients/decoder.model.decoder.layers.6.encoder_attn_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 2.0, 0.0, 1.0, 0.0, 2.0, 4.0, 6.0, 4.0, 9.0, 19.0, 20.0, 45.0, 74.0, 147.0, 286.0, 191.0, 90.0, 38.0, 21.0, 17.0, 18.0, 1.0, 2.0, 5.0, 6.0, 2.0, 3.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.07760714739561081, -0.07515301555395126, -0.07269889116287231, -0.07024475932121277, -0.06779063493013382, -0.06533650308847427, -0.06288237869739532, -0.06042824685573578, -0.05797411873936653, -0.055519990622997284, -0.053065862506628036, -0.05061173439025879, -0.04815760254859924, -0.045703478157520294, -0.04324934631586075, -0.0407952181994915, -0.03834109008312225, -0.035886961966753006, -0.03343283385038376, -0.030978703871369362, -0.028524575755000114, -0.026070447638630867, -0.02361631765961647, -0.021162189543247223, -0.018708061426877975, -0.016253933310508728, -0.013799804262816906, -0.011345675215125084, -0.008891547098755836, -0.006437418982386589, -0.003983289934694767, -0.001529160887002945, 0.0009249597787857056, 0.0033790883608162403, 0.005833216942846775, 0.008287345990538597, 0.010741474106907845, 0.013195602223277092, 0.01564973220229149, 0.018103860318660736, 0.020557988435029984, 0.02301211655139923, 0.02546624466776848, 0.027920374646782875, 0.030374502763152122, 0.03282862901687622, 0.03528276085853577, 0.037736888974905014, 0.04019101709127426, 0.04264514520764351, 0.045099273324012756, 0.047553401440382004, 0.05000752955675125, 0.0524616613984108, 0.054915789514780045, 0.05736991763114929, 0.05982404574751854, 0.06227817386388779, 0.06473230570554733, 0.06718643009662628, 0.06964056193828583, 0.07209468632936478, 0.07454881817102432, 0.07700294256210327, 0.07945707440376282]}, "gradients/decoder.model.decoder.layers.6.encoder_attn_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 1.0, 2.0, 5.0, 1.0, 4.0, 5.0, 6.0, 8.0, 5.0, 10.0, 10.0, 23.0, 18.0, 17.0, 27.0, 28.0, 32.0, 32.0, 37.0, 29.0, 43.0, 53.0, 55.0, 53.0, 38.0, 41.0, 40.0, 31.0, 44.0, 42.0, 45.0, 26.0, 25.0, 30.0, 28.0, 21.0, 19.0, 17.0, 5.0, 8.0, 7.0, 12.0, 2.0, 6.0, 11.0, 5.0, 0.0, 6.0, 1.0, 3.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.016146419569849968, -0.01561732031404972, -0.015088221058249474, -0.014559122733771801, -0.014030023477971554, -0.013500924222171307, -0.012971825897693634, -0.012442726641893387, -0.01191362738609314, -0.011384528130292892, -0.010855428874492645, -0.010326330550014973, -0.009797231294214725, -0.009268132038414478, -0.008739033713936806, -0.008209934458136559, -0.007680835202336311, -0.007151735946536064, -0.006622637156397104, -0.006093538366258144, -0.005564439110457897, -0.00503533985465765, -0.00450624106451869, -0.00397714227437973, -0.003448043018579483, -0.0029189439956098795, -0.002389844972640276, -0.0018607459496706724, -0.0013316469267010689, -0.0008025479037314653, -0.0002734488807618618, 0.00025565014220774174, 0.000784747302532196, 0.0013138463255017996, 0.0018429453484714031, 0.0023720443714410067, 0.00290114339441061, 0.0034302424173802137, 0.003959341440349817, 0.004488440230488777, 0.005017539486289024, 0.0055466387420892715, 0.006075737532228231, 0.006604836322367191, 0.0071339355781674385, 0.007663034833967686, 0.008192133158445358, 0.008721232414245605, 0.009250331670045853, 0.0097794309258461, 0.010308530181646347, 0.01083762850612402, 0.011366727761924267, 0.011895827017724514, 0.012424925342202187, 0.012954024598002434, 0.013483123853802681, 0.014012223109602928, 0.014541322365403175, 0.015070420689880848, 0.015599519945681095, 0.016128618270158768, 0.016657717525959015, 0.017186816781759262, 0.01771591603755951]}, "gradients/decoder.model.decoder.layers.6.encoder_attn.out_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 3.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 2.0, 1.0, 4.0, 2.0, 1.0, 4.0, 3.0, 7.0, 6.0, 4.0, 9.0, 7.0, 16.0, 17.0, 27.0, 32.0, 32.0, 65.0, 102.0, 321.0, 3393.0, 791600.0, 249871.0, 2394.0, 266.0, 105.0, 59.0, 33.0, 39.0, 30.0, 25.0, 21.0, 12.0, 4.0, 17.0, 10.0, 6.0, 4.0, 6.0, 0.0, 3.0, 2.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0], "bins": [-0.0240325927734375, -0.02339911460876465, -0.022765636444091797, -0.022132158279418945, -0.021498680114746094, -0.020865201950073242, -0.02023172378540039, -0.01959824562072754, -0.018964767456054688, -0.018331289291381836, -0.017697811126708984, -0.017064332962036133, -0.01643085479736328, -0.01579737663269043, -0.015163898468017578, -0.014530420303344727, -0.013896942138671875, -0.013263463973999023, -0.012629985809326172, -0.01199650764465332, -0.011363029479980469, -0.010729551315307617, -0.010096073150634766, -0.009462594985961914, -0.008829116821289062, -0.008195638656616211, -0.007562160491943359, -0.006928682327270508, -0.006295204162597656, -0.005661725997924805, -0.005028247833251953, -0.0043947696685791016, -0.00376129150390625, -0.0031278133392333984, -0.002494335174560547, -0.0018608570098876953, -0.0012273788452148438, -0.0005939006805419922, 3.9577484130859375e-05, 0.0006730556488037109, 0.0013065338134765625, 0.001940011978149414, 0.0025734901428222656, 0.003206968307495117, 0.0038404464721679688, 0.00447392463684082, 0.005107402801513672, 0.0057408809661865234, 0.006374359130859375, 0.0070078372955322266, 0.007641315460205078, 0.00827479362487793, 0.008908271789550781, 0.009541749954223633, 0.010175228118896484, 0.010808706283569336, 0.011442184448242188, 0.012075662612915039, 0.01270914077758789, 0.013342618942260742, 0.013976097106933594, 0.014609575271606445, 0.015243053436279297, 0.01587653160095215, 0.016510009765625]}, "gradients/decoder.model.decoder.layers.6.encoder_attn.out_proj.bias": {"_type": "histogram", "values": [3.0, 0.0, 2.0, 4.0, 6.0, 32.0, 91.0, 211.0, 295.0, 231.0, 99.0, 33.0, 4.0, 9.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0237579345703125, -0.020979642868041992, -0.018201351165771484, -0.015423059463500977, -0.012644767761230469, -0.009866476058959961, -0.007088184356689453, -0.004309892654418945, -0.0015316009521484375, 0.0012466907501220703, 0.004024982452392578, 0.006803274154663086, 0.009581565856933594, 0.012359857559204102, 0.01513814926147461, 0.017916440963745117, 0.020694732666015625, 0.023473024368286133, 0.02625131607055664, 0.02902960777282715, 0.031807899475097656, 0.034586191177368164, 0.03736448287963867, 0.04014277458190918, 0.04292106628417969, 0.045699357986450195, 0.0484776496887207, 0.05125594139099121, 0.05403423309326172, 0.05681252479553223, 0.059590816497802734, 0.06236910820007324, 0.06514739990234375, 0.06792569160461426, 0.07070398330688477, 0.07348227500915527, 0.07626056671142578, 0.07903885841369629, 0.0818171501159668, 0.0845954418182373, 0.08737373352050781, 0.09015202522277832, 0.09293031692504883, 0.09570860862731934, 0.09848690032958984, 0.10126519203186035, 0.10404348373413086, 0.10682177543640137, 0.10960006713867188, 0.11237835884094238, 0.11515665054321289, 0.1179349422454834, 0.1207132339477539, 0.12349152565002441, 0.12626981735229492, 0.12904810905456543, 0.13182640075683594, 0.13460469245910645, 0.13738298416137695, 0.14016127586364746, 0.14293956756591797, 0.14571785926818848, 0.14849615097045898, 0.1512744426727295, 0.154052734375]}, "gradients/decoder.model.decoder.layers.6.encoder_attn.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 2.0, 3.0, 4.0, 2.0, 9.0, 25.0, 36.0, 86.0, 180.0, 336.0, 1122.0, 24491.0, 1012165.0, 8721.0, 789.0, 308.0, 142.0, 78.0, 32.0, 19.0, 9.0, 4.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.004001617431640625, -0.0038654208183288574, -0.00372922420501709, -0.0035930275917053223, -0.0034568309783935547, -0.003320634365081787, -0.0031844377517700195, -0.003048241138458252, -0.0029120445251464844, -0.002775847911834717, -0.0026396512985229492, -0.0025034546852111816, -0.002367258071899414, -0.0022310614585876465, -0.002094864845275879, -0.0019586682319641113, -0.0018224716186523438, -0.0016862750053405762, -0.0015500783920288086, -0.001413881778717041, -0.0012776851654052734, -0.0011414885520935059, -0.0010052919387817383, -0.0008690953254699707, -0.0007328987121582031, -0.0005967020988464355, -0.00046050548553466797, -0.0003243088722229004, -0.0001881122589111328, -5.1915645599365234e-05, 8.428096771240234e-05, 0.00022047758102416992, 0.0003566741943359375, 0.0004928708076477051, 0.0006290674209594727, 0.0007652640342712402, 0.0009014606475830078, 0.0010376572608947754, 0.001173853874206543, 0.0013100504875183105, 0.0014462471008300781, 0.0015824437141418457, 0.0017186403274536133, 0.0018548369407653809, 0.0019910335540771484, 0.002127230167388916, 0.0022634267807006836, 0.002399623394012451, 0.0025358200073242188, 0.0026720166206359863, 0.002808213233947754, 0.0029444098472595215, 0.003080606460571289, 0.0032168030738830566, 0.0033529996871948242, 0.003489196300506592, 0.0036253929138183594, 0.003761589527130127, 0.0038977861404418945, 0.004033982753753662, 0.00417017936706543, 0.004306375980377197, 0.004442572593688965, 0.004578769207000732, 0.0047149658203125]}, "gradients/decoder.model.decoder.layers.6.encoder_attn.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 1.0, 5.0, 15.0, 17.0, 36.0, 54.0, 65.0, 113.0, 136.0, 107.0, 149.0, 109.0, 79.0, 51.0, 34.0, 21.0, 11.0, 6.0, 3.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.10443115234375, -0.10087966918945312, -0.09732818603515625, -0.09377670288085938, -0.0902252197265625, -0.08667373657226562, -0.08312225341796875, -0.07957077026367188, -0.076019287109375, -0.07246780395507812, -0.06891632080078125, -0.06536483764648438, -0.0618133544921875, -0.058261871337890625, -0.05471038818359375, -0.051158905029296875, -0.047607421875, -0.044055938720703125, -0.04050445556640625, -0.036952972412109375, -0.0334014892578125, -0.029850006103515625, -0.02629852294921875, -0.022747039794921875, -0.019195556640625, -0.015644073486328125, -0.01209259033203125, -0.008541107177734375, -0.0049896240234375, -0.001438140869140625, 0.00211334228515625, 0.005664825439453125, 0.00921630859375, 0.012767791748046875, 0.01631927490234375, 0.019870758056640625, 0.0234222412109375, 0.026973724365234375, 0.03052520751953125, 0.034076690673828125, 0.037628173828125, 0.041179656982421875, 0.04473114013671875, 0.048282623291015625, 0.0518341064453125, 0.055385589599609375, 0.05893707275390625, 0.062488555908203125, 0.0660400390625, 0.06959152221679688, 0.07314300537109375, 0.07669448852539062, 0.0802459716796875, 0.08379745483398438, 0.08734893798828125, 0.09090042114257812, 0.094451904296875, 0.09800338745117188, 0.10155487060546875, 0.10510635375976562, 0.1086578369140625, 0.11220932006835938, 0.11576080322265625, 0.11931228637695312, 0.12286376953125]}, "gradients/decoder.model.decoder.layers.6.encoder_attn.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0, 3.0, 6.0, 5.0, 8.0, 13.0, 17.0, 19.0, 30.0, 53.0, 72.0, 92.0, 154.0, 258.0, 357.0, 538.0, 886.0, 1372.0, 2615.0, 3900.0, 6728.0, 12177.0, 23293.0, 51100.0, 187261.0, 611433.0, 77560.0, 31797.0, 15780.0, 8580.0, 4745.0, 3145.0, 1678.0, 1027.0, 640.0, 405.0, 270.0, 180.0, 133.0, 79.0, 50.0, 33.0, 27.0, 15.0, 7.0, 10.0, 7.0, 4.0, 1.0, 3.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.3589859008789062e-05, -1.316796988248825e-05, -1.2746080756187439e-05, -1.2324191629886627e-05, -1.1902302503585815e-05, -1.1480413377285004e-05, -1.1058524250984192e-05, -1.063663512468338e-05, -1.0214745998382568e-05, -9.792856872081757e-06, -9.370967745780945e-06, -8.949078619480133e-06, -8.527189493179321e-06, -8.10530036687851e-06, -7.683411240577698e-06, -7.261522114276886e-06, -6.839632987976074e-06, -6.4177438616752625e-06, -5.995854735374451e-06, -5.573965609073639e-06, -5.152076482772827e-06, -4.730187356472015e-06, -4.308298230171204e-06, -3.886409103870392e-06, -3.46451997756958e-06, -3.0426308512687683e-06, -2.6207417249679565e-06, -2.1988525986671448e-06, -1.776963472366333e-06, -1.3550743460655212e-06, -9.331852197647095e-07, -5.112960934638977e-07, -8.940696716308594e-08, 3.3248215913772583e-07, 7.543712854385376e-07, 1.1762604117393494e-06, 1.5981495380401611e-06, 2.020038664340973e-06, 2.4419277906417847e-06, 2.8638169169425964e-06, 3.285706043243408e-06, 3.70759516954422e-06, 4.129484295845032e-06, 4.5513734221458435e-06, 4.973262548446655e-06, 5.395151674747467e-06, 5.817040801048279e-06, 6.2389299273490906e-06, 6.660819053649902e-06, 7.082708179950714e-06, 7.504597306251526e-06, 7.926486432552338e-06, 8.34837555885315e-06, 8.770264685153961e-06, 9.192153811454773e-06, 9.614042937755585e-06, 1.0035932064056396e-05, 1.0457821190357208e-05, 1.087971031665802e-05, 1.1301599442958832e-05, 1.1723488569259644e-05, 1.2145377695560455e-05, 1.2567266821861267e-05, 1.2989155948162079e-05, 1.341104507446289e-05]}, "gradients/decoder.model.decoder.layers.6.encoder_attn.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 2.0, 5.0, 5.0, 3.0, 5.0, 2.0, 2.0, 7.0, 12.0, 9.0, 13.0, 17.0, 20.0, 25.0, 30.0, 50.0, 67.0, 98.0, 96.0, 103.0, 111.0, 82.0, 59.0, 40.0, 26.0, 22.0, 24.0, 17.0, 9.0, 11.0, 5.0, 11.0, 3.0, 5.0, 1.0, 4.0, 3.0, 3.0, 2.0, 2.0, 1.0, 2.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-1.9669532775878906e-06, -1.9073486328125e-06, -1.8477439880371094e-06, -1.7881393432617188e-06, -1.7285346984863281e-06, -1.6689300537109375e-06, -1.6093254089355469e-06, -1.5497207641601562e-06, -1.4901161193847656e-06, -1.430511474609375e-06, -1.3709068298339844e-06, -1.3113021850585938e-06, -1.2516975402832031e-06, -1.1920928955078125e-06, -1.1324882507324219e-06, -1.0728836059570312e-06, -1.0132789611816406e-06, -9.5367431640625e-07, -8.940696716308594e-07, -8.344650268554688e-07, -7.748603820800781e-07, -7.152557373046875e-07, -6.556510925292969e-07, -5.960464477539062e-07, -5.364418029785156e-07, -4.76837158203125e-07, -4.172325134277344e-07, -3.5762786865234375e-07, -2.980232238769531e-07, -2.384185791015625e-07, -1.7881393432617188e-07, -1.1920928955078125e-07, -5.960464477539063e-08, 0.0, 5.960464477539063e-08, 1.1920928955078125e-07, 1.7881393432617188e-07, 2.384185791015625e-07, 2.980232238769531e-07, 3.5762786865234375e-07, 4.172325134277344e-07, 4.76837158203125e-07, 5.364418029785156e-07, 5.960464477539062e-07, 6.556510925292969e-07, 7.152557373046875e-07, 7.748603820800781e-07, 8.344650268554688e-07, 8.940696716308594e-07, 9.5367431640625e-07, 1.0132789611816406e-06, 1.0728836059570312e-06, 1.1324882507324219e-06, 1.1920928955078125e-06, 1.2516975402832031e-06, 1.3113021850585938e-06, 1.3709068298339844e-06, 1.430511474609375e-06, 1.4901161193847656e-06, 1.5497207641601562e-06, 1.6093254089355469e-06, 1.6689300537109375e-06, 1.7285346984863281e-06, 1.7881393432617188e-06, 1.8477439880371094e-06]}, "gradients/decoder.model.decoder.layers.6.encoder_attn.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 4.0, 1.0, 4.0, 4.0, 6.0, 8.0, 5.0, 15.0, 25.0, 36.0, 71.0, 76.0, 149.0, 265.0, 403.0, 785.0, 1643.0, 3052.0, 7671.0, 21364.0, 73482.0, 810380.0, 93173.0, 20371.0, 8475.0, 3548.0, 1508.0, 906.0, 498.0, 255.0, 130.0, 99.0, 46.0, 39.0, 17.0, 13.0, 17.0, 10.0, 3.0, 4.0, 4.0, 1.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.990795135498047e-05, -1.9331462681293488e-05, -1.8754974007606506e-05, -1.8178485333919525e-05, -1.7601996660232544e-05, -1.7025507986545563e-05, -1.644901931285858e-05, -1.58725306391716e-05, -1.529604196548462e-05, -1.4719553291797638e-05, -1.4143064618110657e-05, -1.3566575944423676e-05, -1.2990087270736694e-05, -1.2413598597049713e-05, -1.1837109923362732e-05, -1.126062124967575e-05, -1.068413257598877e-05, -1.0107643902301788e-05, -9.531155228614807e-06, -8.954666554927826e-06, -8.378177881240845e-06, -7.801689207553864e-06, -7.225200533866882e-06, -6.648711860179901e-06, -6.07222318649292e-06, -5.495734512805939e-06, -4.9192458391189575e-06, -4.342757165431976e-06, -3.766268491744995e-06, -3.189779818058014e-06, -2.6132911443710327e-06, -2.0368024706840515e-06, -1.4603137969970703e-06, -8.838251233100891e-07, -3.073364496231079e-07, 2.691522240638733e-07, 8.456408977508545e-07, 1.4221295714378357e-06, 1.998618245124817e-06, 2.575106918811798e-06, 3.1515955924987793e-06, 3.7280842661857605e-06, 4.304572939872742e-06, 4.881061613559723e-06, 5.457550287246704e-06, 6.034038960933685e-06, 6.6105276346206665e-06, 7.187016308307648e-06, 7.763504981994629e-06, 8.33999365568161e-06, 8.916482329368591e-06, 9.492971003055573e-06, 1.0069459676742554e-05, 1.0645948350429535e-05, 1.1222437024116516e-05, 1.1798925697803497e-05, 1.2375414371490479e-05, 1.295190304517746e-05, 1.3528391718864441e-05, 1.4104880392551422e-05, 1.4681369066238403e-05, 1.5257857739925385e-05, 1.5834346413612366e-05, 1.6410835087299347e-05, 1.6987323760986328e-05]}, "gradients/decoder.model.decoder.layers.6.encoder_attn.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 3.0, 2.0, 2.0, 4.0, 7.0, 1.0, 8.0, 7.0, 9.0, 7.0, 18.0, 16.0, 26.0, 25.0, 34.0, 56.0, 75.0, 141.0, 232.0, 76.0, 33.0, 64.0, 31.0, 31.0, 15.0, 23.0, 8.0, 9.0, 11.0, 12.0, 7.0, 5.0, 2.0, 3.0, 4.0, 2.0, 1.0, 0.0, 1.0, 4.0, 0.0, 1.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.4570693969726562e-06, -3.339722752571106e-06, -3.2223761081695557e-06, -3.1050294637680054e-06, -2.987682819366455e-06, -2.8703361749649048e-06, -2.7529895305633545e-06, -2.635642886161804e-06, -2.518296241760254e-06, -2.4009495973587036e-06, -2.2836029529571533e-06, -2.166256308555603e-06, -2.0489096641540527e-06, -1.9315630197525024e-06, -1.8142163753509521e-06, -1.6968697309494019e-06, -1.5795230865478516e-06, -1.4621764421463013e-06, -1.344829797744751e-06, -1.2274831533432007e-06, -1.1101365089416504e-06, -9.927898645401e-07, -8.754432201385498e-07, -7.580965757369995e-07, -6.407499313354492e-07, -5.234032869338989e-07, -4.0605664253234863e-07, -2.8870999813079834e-07, -1.7136335372924805e-07, -5.4016709327697754e-08, 6.332993507385254e-08, 1.8067657947540283e-07, 2.980232238769531e-07, 4.153698682785034e-07, 5.327165126800537e-07, 6.50063157081604e-07, 7.674098014831543e-07, 8.847564458847046e-07, 1.0021030902862549e-06, 1.1194497346878052e-06, 1.2367963790893555e-06, 1.3541430234909058e-06, 1.471489667892456e-06, 1.5888363122940063e-06, 1.7061829566955566e-06, 1.823529601097107e-06, 1.9408762454986572e-06, 2.0582228899002075e-06, 2.175569534301758e-06, 2.292916178703308e-06, 2.4102628231048584e-06, 2.5276094675064087e-06, 2.644956111907959e-06, 2.7623027563095093e-06, 2.8796494007110596e-06, 2.99699604511261e-06, 3.11434268951416e-06, 3.2316893339157104e-06, 3.3490359783172607e-06, 3.466382622718811e-06, 3.5837292671203613e-06, 3.7010759115219116e-06, 3.818422555923462e-06, 3.935769200325012e-06, 4.0531158447265625e-06]}, "gradients/decoder.model.decoder.layers.6.self_attn_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 2.0, 2.0, 2.0, 3.0, 5.0, 21.0, 34.0, 132.0, 511.0, 199.0, 50.0, 25.0, 8.0, 12.0, 6.0, 3.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.11132108420133591, -0.10858423262834549, -0.10584738105535507, -0.10311053693294525, -0.10037368535995483, -0.09763683378696442, -0.094899982213974, -0.09216313064098358, -0.08942627906799316, -0.08668942749500275, -0.08395257592201233, -0.08121572434902191, -0.07847888022661209, -0.07574202865362167, -0.07300517708063126, -0.07026832550764084, -0.06753148138523102, -0.0647946298122406, -0.06205778196454048, -0.059320930391550064, -0.056584082543849945, -0.05384723097085953, -0.05111037939786911, -0.04837352782487869, -0.045636679977178574, -0.042899828404188156, -0.04016298055648804, -0.03742612898349762, -0.0346892774105072, -0.03195242956280708, -0.029215577989816666, -0.026478728279471397, -0.023741871118545532, -0.021005021408200264, -0.018268171697854996, -0.015531320124864578, -0.01279447041451931, -0.010057620704174042, -0.007320769131183624, -0.004583919420838356, -0.0018470697104930878, 0.0008897804655134678, 0.0036266306415200233, 0.006363481283187866, 0.009100330993533134, 0.011837180703878403, 0.01457403227686882, 0.01731088198721409, 0.020047731697559357, 0.022784581407904625, 0.025521431118249893, 0.02825828269124031, 0.03099513240158558, 0.03373198211193085, 0.036468833684921265, 0.03920568525791168, 0.0419425331056118, 0.04467938467860222, 0.04741623252630234, 0.050153084099292755, 0.05288993567228317, 0.05562678351998329, 0.05836363509297371, 0.06110048294067383, 0.06383733451366425]}, "gradients/decoder.model.decoder.layers.6.self_attn_layer_norm.bias": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 11.0, 21.0, 18.0, 43.0, 79.0, 102.0, 118.0, 155.0, 111.0, 125.0, 83.0, 59.0, 39.0, 23.0, 17.0, 7.0, 4.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.009400900453329086, -0.008539849892258644, -0.007678799331188202, -0.00681774877011776, -0.0059566982090473175, -0.005095647182315588, -0.004234596621245146, -0.0033735460601747036, -0.0025124954991042614, -0.0016514449380338192, -0.0007903942605480552, 7.065641693770885e-05, 0.0009317069780081511, 0.001792757771909237, 0.002653808332979679, 0.0035148588940501213, 0.0043759094551205635, 0.005236960016191006, 0.006098010577261448, 0.00695906113833189, 0.007820111699402332, 0.008681163191795349, 0.009542213752865791, 0.010403264313936234, 0.011264314875006676, 0.012125365436077118, 0.01298641599714756, 0.013847466558218002, 0.014708517119288445, 0.015569567680358887, 0.01643061824142933, 0.01729166880249977, 0.018152719363570213, 0.019013769924640656, 0.019874820485711098, 0.02073587104678154, 0.021596921607851982, 0.022457972168922424, 0.023319022729992867, 0.02418007329106331, 0.02504112385213375, 0.025902174413204193, 0.026763224974274635, 0.027624275535345078, 0.02848532609641552, 0.029346376657485962, 0.030207427218556404, 0.031068477779626846, 0.03192953020334244, 0.03279058262705803, 0.03365163132548332, 0.034512683749198914, 0.03537373244762421, 0.0362347848713398, 0.03709583356976509, 0.03795688599348068, 0.038817934691905975, 0.03967898711562157, 0.04054003581404686, 0.04140108823776245, 0.042262136936187744, 0.043123189359903336, 0.04398423805832863, 0.04484529048204422, 0.04570633918046951]}, "gradients/decoder.model.decoder.layers.6.self_attn.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 4.0, 3.0, 1.0, 2.0, 5.0, 4.0, 6.0, 9.0, 15.0, 13.0, 13.0, 23.0, 18.0, 19.0, 24.0, 32.0, 35.0, 38.0, 52.0, 137.0, 15497.0, 1027444.0, 4759.0, 88.0, 38.0, 48.0, 46.0, 33.0, 39.0, 20.0, 20.0, 18.0, 13.0, 14.0, 5.0, 9.0, 2.0, 8.0, 3.0, 6.0, 2.0, 0.0, 0.0, 0.0, 2.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.120849609375, -0.11673927307128906, -0.11262893676757812, -0.10851860046386719, -0.10440826416015625, -0.10029792785644531, -0.09618759155273438, -0.09207725524902344, -0.0879669189453125, -0.08385658264160156, -0.07974624633789062, -0.07563591003417969, -0.07152557373046875, -0.06741523742675781, -0.06330490112304688, -0.05919456481933594, -0.055084228515625, -0.05097389221191406, -0.046863555908203125, -0.04275321960449219, -0.03864288330078125, -0.03453254699707031, -0.030422210693359375, -0.026311874389648438, -0.0222015380859375, -0.018091201782226562, -0.013980865478515625, -0.009870529174804688, -0.00576019287109375, -0.0016498565673828125, 0.002460479736328125, 0.0065708160400390625, 0.01068115234375, 0.014791488647460938, 0.018901824951171875, 0.023012161254882812, 0.02712249755859375, 0.031232833862304688, 0.035343170166015625, 0.03945350646972656, 0.0435638427734375, 0.04767417907714844, 0.051784515380859375, 0.05589485168457031, 0.06000518798828125, 0.06411552429199219, 0.06822586059570312, 0.07233619689941406, 0.076446533203125, 0.08055686950683594, 0.08466720581054688, 0.08877754211425781, 0.09288787841796875, 0.09699821472167969, 0.10110855102539062, 0.10521888732910156, 0.1093292236328125, 0.11343955993652344, 0.11754989624023438, 0.12166023254394531, 0.12577056884765625, 0.1298809051513672, 0.13399124145507812, 0.13810157775878906, 0.1422119140625]}, "gradients/decoder.model.decoder.layers.6.self_attn.out_proj.bias": {"_type": "histogram", "values": [6.0, 26.0, 132.0, 359.0, 344.0, 121.0, 28.0, 6.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0010957717895507812, -0.0008264034986495972, -0.0005570352077484131, -0.000287666916847229, -1.8298625946044922e-05, 0.00025106966495513916, 0.0005204379558563232, 0.0007898062467575073, 0.0010591745376586914, 0.0013285428285598755, 0.0015979111194610596, 0.0018672794103622437, 0.0021366477012634277, 0.002406015992164612, 0.002675384283065796, 0.00294475257396698, 0.003214120864868164, 0.003483489155769348, 0.0037528574466705322, 0.004022225737571716, 0.0042915940284729, 0.0045609623193740845, 0.0048303306102752686, 0.005099698901176453, 0.005369067192077637, 0.005638435482978821, 0.005907803773880005, 0.006177172064781189, 0.006446540355682373, 0.006715908646583557, 0.006985276937484741, 0.007254645228385925, 0.007524013519287109, 0.0077933818101882935, 0.008062750101089478, 0.008332118391990662, 0.008601486682891846, 0.00887085497379303, 0.009140223264694214, 0.009409591555595398, 0.009678959846496582, 0.009948328137397766, 0.01021769642829895, 0.010487064719200134, 0.010756433010101318, 0.011025801301002502, 0.011295169591903687, 0.01156453788280487, 0.011833906173706055, 0.012103274464607239, 0.012372642755508423, 0.012642011046409607, 0.012911379337310791, 0.013180747628211975, 0.01345011591911316, 0.013719484210014343, 0.013988852500915527, 0.014258220791816711, 0.014527589082717896, 0.01479695737361908, 0.015066325664520264, 0.015335693955421448, 0.015605062246322632, 0.015874430537223816, 0.016143798828125]}, "gradients/decoder.model.decoder.layers.6.self_attn.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 6.0, 0.0, 3.0, 6.0, 7.0, 10.0, 10.0, 13.0, 32.0, 30.0, 44.0, 50.0, 84.0, 131.0, 178.0, 251.0, 352.0, 541.0, 870.0, 1347.0, 2005.0, 3322.0, 5529.0, 9540.0, 17114.0, 32223.0, 66600.0, 170290.0, 455965.0, 151236.0, 61709.0, 29706.0, 16088.0, 8888.0, 5333.0, 3182.0, 2037.0, 1307.0, 793.0, 557.0, 391.0, 241.0, 181.0, 109.0, 73.0, 57.0, 40.0, 25.0, 14.0, 14.0, 17.0, 6.0, 3.0, 6.0, 1.0, 2.0, 4.0, 0.0, 0.0, 2.0], "bins": [-0.0052490234375, -0.00508725643157959, -0.00492548942565918, -0.0047637224197387695, -0.004601955413818359, -0.004440188407897949, -0.004278421401977539, -0.004116654396057129, -0.003954887390136719, -0.0037931203842163086, -0.0036313533782958984, -0.0034695863723754883, -0.003307819366455078, -0.003146052360534668, -0.002984285354614258, -0.0028225183486938477, -0.0026607513427734375, -0.0024989843368530273, -0.002337217330932617, -0.002175450325012207, -0.002013683319091797, -0.0018519163131713867, -0.0016901493072509766, -0.0015283823013305664, -0.0013666152954101562, -0.001204848289489746, -0.001043081283569336, -0.0008813142776489258, -0.0007195472717285156, -0.0005577802658081055, -0.0003960132598876953, -0.00023424625396728516, -7.2479248046875e-05, 8.928775787353516e-05, 0.0002510547637939453, 0.00041282176971435547, 0.0005745887756347656, 0.0007363557815551758, 0.0008981227874755859, 0.001059889793395996, 0.0012216567993164062, 0.0013834238052368164, 0.0015451908111572266, 0.0017069578170776367, 0.0018687248229980469, 0.002030491828918457, 0.002192258834838867, 0.0023540258407592773, 0.0025157928466796875, 0.0026775598526000977, 0.002839326858520508, 0.003001093864440918, 0.003162860870361328, 0.0033246278762817383, 0.0034863948822021484, 0.0036481618881225586, 0.0038099288940429688, 0.003971695899963379, 0.004133462905883789, 0.004295229911804199, 0.004456996917724609, 0.0046187639236450195, 0.00478053092956543, 0.00494229793548584, 0.00510406494140625]}, "gradients/decoder.model.decoder.layers.6.self_attn.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 2.0, 1.0, 7.0, 2.0, 3.0, 5.0, 1.0, 11.0, 6.0, 19.0, 15.0, 27.0, 25.0, 31.0, 32.0, 40.0, 45.0, 63.0, 65.0, 76.0, 75.0, 69.0, 56.0, 54.0, 36.0, 40.0, 39.0, 35.0, 34.0, 13.0, 24.0, 15.0, 13.0, 5.0, 4.0, 8.0, 8.0, 5.0, 3.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0036468505859375, -0.003534287214279175, -0.0034217238426208496, -0.0033091604709625244, -0.0031965970993041992, -0.003084033727645874, -0.002971470355987549, -0.0028589069843292236, -0.0027463436126708984, -0.0026337802410125732, -0.002521216869354248, -0.002408653497695923, -0.0022960901260375977, -0.0021835267543792725, -0.0020709633827209473, -0.001958400011062622, -0.0018458366394042969, -0.0017332732677459717, -0.0016207098960876465, -0.0015081465244293213, -0.001395583152770996, -0.001283019781112671, -0.0011704564094543457, -0.0010578930377960205, -0.0009453296661376953, -0.0008327662944793701, -0.0007202029228210449, -0.0006076395511627197, -0.0004950761795043945, -0.00038251280784606934, -0.00026994943618774414, -0.00015738606452941895, -4.482269287109375e-05, 6.774067878723145e-05, 0.00018030405044555664, 0.00029286742210388184, 0.00040543079376220703, 0.0005179941654205322, 0.0006305575370788574, 0.0007431209087371826, 0.0008556842803955078, 0.000968247652053833, 0.0010808110237121582, 0.0011933743953704834, 0.0013059377670288086, 0.0014185011386871338, 0.001531064510345459, 0.0016436278820037842, 0.0017561912536621094, 0.0018687546253204346, 0.0019813179969787598, 0.002093881368637085, 0.00220644474029541, 0.0023190081119537354, 0.0024315714836120605, 0.0025441348552703857, 0.002656698226928711, 0.002769261598587036, 0.0028818249702453613, 0.0029943883419036865, 0.0031069517135620117, 0.003219515085220337, 0.003332078456878662, 0.0034446418285369873, 0.0035572052001953125]}, "gradients/decoder.model.decoder.layers.6.self_attn.k_proj.weight": {"_type": "histogram", "values": [4.0, 0.0, 1.0, 2.0, 3.0, 1.0, 2.0, 2.0, 3.0, 4.0, 7.0, 8.0, 18.0, 11.0, 23.0, 26.0, 28.0, 51.0, 50.0, 64.0, 85.0, 125.0, 153.0, 229.0, 314.0, 453.0, 643.0, 1125.0, 2356.0, 6917.0, 45218.0, 964621.0, 17149.0, 4269.0, 1723.0, 919.0, 552.0, 370.0, 247.0, 205.0, 128.0, 121.0, 72.0, 54.0, 47.0, 40.0, 30.0, 29.0, 16.0, 14.0, 13.0, 8.0, 3.0, 3.0, 4.0, 3.0, 2.0, 0.0, 0.0, 0.0, 2.0, 2.0, 1.0, 3.0], "bins": [-0.038116455078125, -0.03689861297607422, -0.03568077087402344, -0.034462928771972656, -0.033245086669921875, -0.032027244567871094, -0.030809402465820312, -0.02959156036376953, -0.02837371826171875, -0.02715587615966797, -0.025938034057617188, -0.024720191955566406, -0.023502349853515625, -0.022284507751464844, -0.021066665649414062, -0.01984882354736328, -0.0186309814453125, -0.01741313934326172, -0.016195297241210938, -0.014977455139160156, -0.013759613037109375, -0.012541770935058594, -0.011323928833007812, -0.010106086730957031, -0.00888824462890625, -0.007670402526855469, -0.0064525604248046875, -0.005234718322753906, -0.004016876220703125, -0.0027990341186523438, -0.0015811920166015625, -0.00036334991455078125, 0.0008544921875, 0.0020723342895507812, 0.0032901763916015625, 0.004508018493652344, 0.005725860595703125, 0.006943702697753906, 0.008161544799804688, 0.009379386901855469, 0.01059722900390625, 0.011815071105957031, 0.013032913208007812, 0.014250755310058594, 0.015468597412109375, 0.016686439514160156, 0.017904281616210938, 0.01912212371826172, 0.0203399658203125, 0.02155780792236328, 0.022775650024414062, 0.023993492126464844, 0.025211334228515625, 0.026429176330566406, 0.027647018432617188, 0.02886486053466797, 0.03008270263671875, 0.03130054473876953, 0.03251838684082031, 0.033736228942871094, 0.034954071044921875, 0.036171913146972656, 0.03738975524902344, 0.03860759735107422, 0.039825439453125]}, "gradients/decoder.model.decoder.layers.6.self_attn.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 1.0, 0.0, 2.0, 3.0, 5.0, 9.0, 4.0, 8.0, 16.0, 31.0, 81.0, 620.0, 149.0, 34.0, 15.0, 7.0, 5.0, 10.0, 4.0, 5.0, 1.0, 3.0, 1.0, 3.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-1.233816146850586e-05, -1.1934898793697357e-05, -1.1531636118888855e-05, -1.1128373444080353e-05, -1.072511076927185e-05, -1.0321848094463348e-05, -9.918585419654846e-06, -9.515322744846344e-06, -9.112060070037842e-06, -8.70879739522934e-06, -8.305534720420837e-06, -7.902272045612335e-06, -7.499009370803833e-06, -7.095746695995331e-06, -6.692484021186829e-06, -6.289221346378326e-06, -5.885958671569824e-06, -5.482695996761322e-06, -5.07943332195282e-06, -4.676170647144318e-06, -4.2729079723358154e-06, -3.869645297527313e-06, -3.466382622718811e-06, -3.063119947910309e-06, -2.6598572731018066e-06, -2.2565945982933044e-06, -1.8533319234848022e-06, -1.4500692486763e-06, -1.0468065738677979e-06, -6.435438990592957e-07, -2.4028122425079346e-07, 1.6298145055770874e-07, 5.662441253662109e-07, 9.695068001747131e-07, 1.3727694749832153e-06, 1.7760321497917175e-06, 2.1792948246002197e-06, 2.582557499408722e-06, 2.985820174217224e-06, 3.3890828490257263e-06, 3.7923455238342285e-06, 4.195608198642731e-06, 4.598870873451233e-06, 5.002133548259735e-06, 5.405396223068237e-06, 5.8086588978767395e-06, 6.211921572685242e-06, 6.615184247493744e-06, 7.018446922302246e-06, 7.421709597110748e-06, 7.82497227191925e-06, 8.228234946727753e-06, 8.631497621536255e-06, 9.034760296344757e-06, 9.43802297115326e-06, 9.841285645961761e-06, 1.0244548320770264e-05, 1.0647810995578766e-05, 1.1051073670387268e-05, 1.145433634519577e-05, 1.1857599020004272e-05, 1.2260861694812775e-05, 1.2664124369621277e-05, 1.3067387044429779e-05, 1.3470649719238281e-05]}, "gradients/decoder.model.decoder.layers.6.self_attn.q_proj.weight": {"_type": "histogram", "values": [2.0, 2.0, 1.0, 5.0, 5.0, 3.0, 6.0, 5.0, 10.0, 18.0, 26.0, 36.0, 35.0, 41.0, 58.0, 78.0, 114.0, 161.0, 173.0, 278.0, 379.0, 515.0, 726.0, 994.0, 1570.0, 2535.0, 4636.0, 11104.0, 45467.0, 910771.0, 45187.0, 11350.0, 4644.0, 2526.0, 1501.0, 971.0, 730.0, 461.0, 368.0, 265.0, 212.0, 125.0, 110.0, 90.0, 62.0, 56.0, 43.0, 22.0, 16.0, 21.0, 14.0, 16.0, 10.0, 6.0, 6.0, 2.0, 1.0, 2.0, 1.0, 1.0, 2.0, 0.0, 0.0, 1.0], "bins": [-0.009674072265625, -0.0093461275100708, -0.009018182754516602, -0.008690237998962402, -0.008362293243408203, -0.008034348487854004, -0.007706403732299805, -0.0073784589767456055, -0.007050514221191406, -0.006722569465637207, -0.006394624710083008, -0.006066679954528809, -0.005738735198974609, -0.00541079044342041, -0.005082845687866211, -0.004754900932312012, -0.0044269561767578125, -0.004099011421203613, -0.003771066665649414, -0.003443121910095215, -0.0031151771545410156, -0.0027872323989868164, -0.002459287643432617, -0.002131342887878418, -0.0018033981323242188, -0.0014754533767700195, -0.0011475086212158203, -0.0008195638656616211, -0.0004916191101074219, -0.00016367435455322266, 0.00016427040100097656, 0.0004922151565551758, 0.000820159912109375, 0.0011481046676635742, 0.0014760494232177734, 0.0018039941787719727, 0.002131938934326172, 0.002459883689880371, 0.0027878284454345703, 0.0031157732009887695, 0.0034437179565429688, 0.003771662712097168, 0.004099607467651367, 0.004427552223205566, 0.004755496978759766, 0.005083441734313965, 0.005411386489868164, 0.005739331245422363, 0.0060672760009765625, 0.006395220756530762, 0.006723165512084961, 0.00705111026763916, 0.007379055023193359, 0.007706999778747559, 0.008034944534301758, 0.008362889289855957, 0.008690834045410156, 0.009018778800964355, 0.009346723556518555, 0.009674668312072754, 0.010002613067626953, 0.010330557823181152, 0.010658502578735352, 0.01098644733428955, 0.01131439208984375]}, "gradients/decoder.model.decoder.layers.6.self_attn.q_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 4.0, 5.0, 1.0, 4.0, 5.0, 4.0, 11.0, 8.0, 19.0, 25.0, 42.0, 253.0, 459.0, 68.0, 41.0, 23.0, 8.0, 9.0, 4.0, 1.0, 1.0, 5.0, 2.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.0060272216796875, -0.005813121795654297, -0.005599021911621094, -0.005384922027587891, -0.0051708221435546875, -0.004956722259521484, -0.004742622375488281, -0.004528522491455078, -0.004314422607421875, -0.004100322723388672, -0.0038862228393554688, -0.0036721229553222656, -0.0034580230712890625, -0.0032439231872558594, -0.0030298233032226562, -0.002815723419189453, -0.00260162353515625, -0.002387523651123047, -0.0021734237670898438, -0.0019593238830566406, -0.0017452239990234375, -0.0015311241149902344, -0.0013170242309570312, -0.0011029243469238281, -0.000888824462890625, -0.0006747245788574219, -0.00046062469482421875, -0.0002465248107910156, -3.24249267578125e-05, 0.00018167495727539062, 0.00039577484130859375, 0.0006098747253417969, 0.000823974609375, 0.0010380744934082031, 0.0012521743774414062, 0.0014662742614746094, 0.0016803741455078125, 0.0018944740295410156, 0.0021085739135742188, 0.002322673797607422, 0.002536773681640625, 0.002750873565673828, 0.0029649734497070312, 0.0031790733337402344, 0.0033931732177734375, 0.0036072731018066406, 0.0038213729858398438, 0.004035472869873047, 0.00424957275390625, 0.004463672637939453, 0.004677772521972656, 0.004891872406005859, 0.0051059722900390625, 0.005320072174072266, 0.005534172058105469, 0.005748271942138672, 0.005962371826171875, 0.006176471710205078, 0.006390571594238281, 0.006604671478271484, 0.0068187713623046875, 0.007032871246337891, 0.007246971130371094, 0.007461071014404297, 0.0076751708984375]}, "gradients/decoder.model.decoder.layers.5.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 2.0, 3.0, 3.0, 2.0, 0.0, 1.0, 9.0, 10.0, 6.0, 14.0, 29.0, 35.0, 40.0, 100.0, 178.0, 225.0, 155.0, 70.0, 49.0, 19.0, 16.0, 13.0, 9.0, 7.0, 7.0, 2.0, 3.0, 0.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.06038687378168106, -0.05856429785490036, -0.05674172192811966, -0.05491914600133896, -0.053096573799848557, -0.051273997873067856, -0.049451421946287155, -0.047628846019506454, -0.04580627381801605, -0.04398369789123535, -0.04216112196445465, -0.04033854603767395, -0.03851597383618355, -0.03669339790940285, -0.03487082198262215, -0.033048246055841446, -0.031225670129060745, -0.029403094202280045, -0.027580520138144493, -0.025757944211363792, -0.02393537014722824, -0.02211279422044754, -0.02029021829366684, -0.01846764236688614, -0.016645068302750587, -0.014822493307292461, -0.012999918311834335, -0.011177342385053635, -0.009354767389595509, -0.0075321923941373825, -0.005709616467356682, -0.0038870414718985558, -0.0020644664764404297, -0.00024189124815165997, 0.0015806839801371098, 0.003403259441256523, 0.005225834436714649, 0.007048409432172775, 0.008870985358953476, 0.010693560354411602, 0.012516135349869728, 0.014338710345327854, 0.01616128534078598, 0.01798386126756668, 0.01980643719434738, 0.021629011258482933, 0.023451587185263634, 0.025274161249399185, 0.027096737176179886, 0.028919313102960587, 0.030741887167096138, 0.03256446123123169, 0.03438703715801239, 0.03620961308479309, 0.03803218901157379, 0.03985476493835449, 0.04167734086513519, 0.043499916791915894, 0.045322492718696594, 0.047145068645477295, 0.0489676408469677, 0.0507902167737484, 0.0526127927005291, 0.0544353686273098, 0.0562579408288002]}, "gradients/decoder.model.decoder.layers.5.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 2.0, 4.0, 2.0, 3.0, 10.0, 2.0, 6.0, 7.0, 12.0, 11.0, 13.0, 15.0, 16.0, 21.0, 25.0, 34.0, 28.0, 38.0, 47.0, 42.0, 42.0, 45.0, 46.0, 48.0, 39.0, 42.0, 38.0, 43.0, 33.0, 34.0, 39.0, 23.0, 35.0, 26.0, 25.0, 16.0, 16.0, 20.0, 13.0, 10.0, 7.0, 8.0, 4.0, 5.0, 5.0, 5.0, 4.0, 6.0, 3.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.007344689220190048, -0.007059444207698107, -0.006774199195206165, -0.0064889537170529366, -0.006203708704560995, -0.005918463692069054, -0.005633218213915825, -0.0053479732014238834, -0.005062728188931942, -0.0047774831764400005, -0.004492238163948059, -0.00420699268579483, -0.003921747673302889, -0.0036365026608109474, -0.0033512574154883623, -0.003066012170165777, -0.0027807671576738358, -0.0024955221451818943, -0.002210276899859309, -0.001925031770952046, -0.0016397866420447826, -0.0013545415131375194, -0.001069296384230256, -0.000784051138907671, -0.0004988061264157295, -0.00021356099750846624, 7.168413139879704e-05, 0.0003569292603060603, 0.0006421743892133236, 0.0009274195181205869, 0.0012126646470278502, 0.0014979098923504353, 0.0017831549048423767, 0.002068399917334318, 0.0023536451626569033, 0.0026388904079794884, 0.00292413542047143, 0.0032093804329633713, 0.0034946256782859564, 0.0037798709236085415, 0.004065115936100483, 0.004350360948592424, 0.004635605961084366, 0.004920851439237595, 0.005206096451729536, 0.0054913414642214775, 0.005776586942374706, 0.006061831954866648, 0.006347076967358589, 0.006632321979850531, 0.006917566992342472, 0.007202812470495701, 0.007488057482987642, 0.007773302495479584, 0.008058547973632812, 0.008343793451786041, 0.008629037998616695, 0.008914283476769924, 0.009199528023600578, 0.009484773501753807, 0.009770018979907036, 0.01005526352673769, 0.010340509004890919, 0.010625753551721573, 0.010910999029874802]}, "gradients/decoder.model.decoder.layers.5.fc2.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 3.0, 2.0, 4.0, 5.0, 10.0, 14.0, 15.0, 24.0, 36.0, 35.0, 45.0, 86.0, 124.0, 177.0, 266.0, 377.0, 627.0, 1017.0, 1817.0, 3684.0, 8444.0, 26860.0, 4086077.0, 42114.0, 11829.0, 4832.0, 2406.0, 1236.0, 740.0, 459.0, 281.0, 199.0, 146.0, 97.0, 59.0, 45.0, 29.0, 21.0, 12.0, 16.0, 5.0, 9.0, 6.0, 3.0, 4.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0498046875, -0.0483245849609375, -0.046844482421875, -0.0453643798828125, -0.04388427734375, -0.0424041748046875, -0.040924072265625, -0.0394439697265625, -0.0379638671875, -0.0364837646484375, -0.035003662109375, -0.0335235595703125, -0.03204345703125, -0.0305633544921875, -0.029083251953125, -0.0276031494140625, -0.026123046875, -0.0246429443359375, -0.023162841796875, -0.0216827392578125, -0.02020263671875, -0.0187225341796875, -0.017242431640625, -0.0157623291015625, -0.0142822265625, -0.0128021240234375, -0.011322021484375, -0.0098419189453125, -0.00836181640625, -0.0068817138671875, -0.005401611328125, -0.0039215087890625, -0.00244140625, -0.0009613037109375, 0.000518798828125, 0.0019989013671875, 0.00347900390625, 0.0049591064453125, 0.006439208984375, 0.0079193115234375, 0.0093994140625, 0.0108795166015625, 0.012359619140625, 0.0138397216796875, 0.01531982421875, 0.0167999267578125, 0.018280029296875, 0.0197601318359375, 0.021240234375, 0.0227203369140625, 0.024200439453125, 0.0256805419921875, 0.02716064453125, 0.0286407470703125, 0.030120849609375, 0.0316009521484375, 0.0330810546875, 0.0345611572265625, 0.036041259765625, 0.0375213623046875, 0.03900146484375, 0.0404815673828125, 0.041961669921875, 0.0434417724609375, 0.044921875]}, "gradients/decoder.model.decoder.layers.5.fc2.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 2.0, 1.0, 0.0, 1.0, 2.0, 3.0, 2.0, 2.0, 7.0, 2.0, 9.0, 8.0, 7.0, 9.0, 14.0, 18.0, 23.0, 42.0, 62.0, 93.0, 122.0, 139.0, 117.0, 86.0, 57.0, 39.0, 36.0, 23.0, 21.0, 14.0, 10.0, 10.0, 4.0, 7.0, 4.0, 1.0, 4.0, 2.0, 5.0, 2.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 2.0, 2.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.002803802490234375, -0.002717941999435425, -0.0026320815086364746, -0.0025462210178375244, -0.0024603605270385742, -0.002374500036239624, -0.002288639545440674, -0.0022027790546417236, -0.0021169185638427734, -0.0020310580730438232, -0.001945197582244873, -0.0018593370914459229, -0.0017734766006469727, -0.0016876161098480225, -0.0016017556190490723, -0.001515895128250122, -0.0014300346374511719, -0.0013441741466522217, -0.0012583136558532715, -0.0011724531650543213, -0.001086592674255371, -0.001000732183456421, -0.0009148716926574707, -0.0008290112018585205, -0.0007431507110595703, -0.0006572902202606201, -0.0005714297294616699, -0.0004855692386627197, -0.00039970874786376953, -0.00031384825706481934, -0.00022798776626586914, -0.00014212727546691895, -5.626678466796875e-05, 2.9593706130981445e-05, 0.00011545419692993164, 0.00020131468772888184, 0.00028717517852783203, 0.0003730356693267822, 0.0004588961601257324, 0.0005447566509246826, 0.0006306171417236328, 0.000716477632522583, 0.0008023381233215332, 0.0008881986141204834, 0.0009740591049194336, 0.0010599195957183838, 0.001145780086517334, 0.0012316405773162842, 0.0013175010681152344, 0.0014033615589141846, 0.0014892220497131348, 0.001575082540512085, 0.0016609430313110352, 0.0017468035221099854, 0.0018326640129089355, 0.0019185245037078857, 0.002004384994506836, 0.002090245485305786, 0.0021761059761047363, 0.0022619664669036865, 0.0023478269577026367, 0.002433687448501587, 0.002519547939300537, 0.0026054084300994873, 0.0026912689208984375]}, "gradients/decoder.model.decoder.layers.5.fc1.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 2.0, 1.0, 3.0, 0.0, 5.0, 4.0, 6.0, 7.0, 3.0, 22.0, 29.0, 48.0, 72.0, 123.0, 213.0, 324.0, 650.0, 1291.0, 2890.0, 8589.0, 39111.0, 4088905.0, 38056.0, 8338.0, 2893.0, 1227.0, 684.0, 329.0, 199.0, 101.0, 58.0, 45.0, 21.0, 11.0, 5.0, 13.0, 8.0, 3.0, 0.0, 4.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.0183868408203125, -0.01763606071472168, -0.01688528060913086, -0.01613450050354004, -0.015383720397949219, -0.014632940292358398, -0.013882160186767578, -0.013131380081176758, -0.012380599975585938, -0.011629819869995117, -0.010879039764404297, -0.010128259658813477, -0.009377479553222656, -0.008626699447631836, -0.007875919342041016, -0.007125139236450195, -0.006374359130859375, -0.005623579025268555, -0.004872798919677734, -0.004122018814086914, -0.0033712387084960938, -0.0026204586029052734, -0.0018696784973144531, -0.0011188983917236328, -0.0003681182861328125, 0.0003826618194580078, 0.0011334419250488281, 0.0018842220306396484, 0.0026350021362304688, 0.003385782241821289, 0.004136562347412109, 0.00488734245300293, 0.00563812255859375, 0.00638890266418457, 0.007139682769775391, 0.007890462875366211, 0.008641242980957031, 0.009392023086547852, 0.010142803192138672, 0.010893583297729492, 0.011644363403320312, 0.012395143508911133, 0.013145923614501953, 0.013896703720092773, 0.014647483825683594, 0.015398263931274414, 0.016149044036865234, 0.016899824142456055, 0.017650604248046875, 0.018401384353637695, 0.019152164459228516, 0.019902944564819336, 0.020653724670410156, 0.021404504776000977, 0.022155284881591797, 0.022906064987182617, 0.023656845092773438, 0.024407625198364258, 0.025158405303955078, 0.0259091854095459, 0.02665996551513672, 0.02741074562072754, 0.02816152572631836, 0.02891230583190918, 0.0296630859375]}, "gradients/decoder.model.decoder.layers.5.fc1.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 1.0, 0.0, 2.0, 0.0, 2.0, 3.0, 3.0, 5.0, 4.0, 7.0, 9.0, 14.0, 20.0, 31.0, 105.0, 2110.0, 1611.0, 78.0, 23.0, 16.0, 10.0, 11.0, 3.0, 5.0, 3.0, 3.0, 1.0, 0.0, 2.0, 0.0, 0.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.00450897216796875, -0.004335522651672363, -0.0041620731353759766, -0.00398862361907959, -0.003815174102783203, -0.0036417245864868164, -0.0034682750701904297, -0.003294825553894043, -0.0031213760375976562, -0.0029479265213012695, -0.002774477005004883, -0.002601027488708496, -0.0024275779724121094, -0.0022541284561157227, -0.002080678939819336, -0.0019072294235229492, -0.0017337799072265625, -0.0015603303909301758, -0.001386880874633789, -0.0012134313583374023, -0.0010399818420410156, -0.0008665323257446289, -0.0006930828094482422, -0.0005196332931518555, -0.00034618377685546875, -0.00017273426055908203, 7.152557373046875e-07, 0.0001741647720336914, 0.0003476142883300781, 0.0005210638046264648, 0.0006945133209228516, 0.0008679628372192383, 0.001041412353515625, 0.0012148618698120117, 0.0013883113861083984, 0.0015617609024047852, 0.0017352104187011719, 0.0019086599349975586, 0.0020821094512939453, 0.002255558967590332, 0.0024290084838867188, 0.0026024580001831055, 0.002775907516479492, 0.002949357032775879, 0.0031228065490722656, 0.0032962560653686523, 0.003469705581665039, 0.0036431550979614258, 0.0038166046142578125, 0.003990054130554199, 0.004163503646850586, 0.004336953163146973, 0.004510402679443359, 0.004683852195739746, 0.004857301712036133, 0.0050307512283325195, 0.005204200744628906, 0.005377650260925293, 0.00555109977722168, 0.005724549293518066, 0.005897998809814453, 0.00607144832611084, 0.0062448978424072266, 0.006418347358703613, 0.006591796875]}, "gradients/decoder.model.decoder.layers.5.encoder_attn_layer_norm.weight": {"_type": "histogram", "values": [1.0, 4.0, 5.0, 6.0, 19.0, 10.0, 27.0, 62.0, 124.0, 261.0, 254.0, 120.0, 54.0, 34.0, 11.0, 10.0, 7.0, 7.0, 0.0, 2.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.006153257098048925, -0.0055395811796188354, -0.004925905726850033, -0.004312229808419943, -0.003698553889989853, -0.003084877971559763, -0.0024712022859603167, -0.0018575266003608704, -0.0012438506819307804, -0.0006301748799160123, -1.6499077901244164e-05, 0.000597176724113524, 0.001210852526128292, 0.001824528444558382, 0.0024382041301578283, 0.0030518798157572746, 0.0036655557341873646, 0.0042792316526174545, 0.004892907105386257, 0.005506583023816347, 0.006120258942246437, 0.006733934860676527, 0.007347610779106617, 0.00796128623187542, 0.008574962615966797, 0.0091886380687356, 0.009802314452826977, 0.01041598990559578, 0.011029666289687157, 0.01164334174245596, 0.012257017195224762, 0.01287069357931614, 0.013484369963407516, 0.014098045416176319, 0.014711721800267696, 0.015325397253036499, 0.015939073637127876, 0.016552750021219254, 0.01716642454266548, 0.01778010092675686, 0.018393777310848236, 0.019007453694939613, 0.01962112821638584, 0.02023480460047722, 0.020848480984568596, 0.021462157368659973, 0.0220758318901062, 0.02268950827419758, 0.023303182795643806, 0.023916859179735184, 0.024530533701181412, 0.02514421008527279, 0.025757886469364166, 0.026371560990810394, 0.02698523737490177, 0.02759891375899315, 0.028212588280439377, 0.028826264664530754, 0.029439939185976982, 0.03005361557006836, 0.030667291954159737, 0.031280968338251114, 0.03189464285969734, 0.03250831738114357, 0.033121995627880096]}, "gradients/decoder.model.decoder.layers.5.encoder_attn_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 3.0, 1.0, 4.0, 4.0, 3.0, 6.0, 5.0, 8.0, 8.0, 8.0, 9.0, 4.0, 14.0, 10.0, 19.0, 21.0, 25.0, 33.0, 30.0, 35.0, 33.0, 36.0, 33.0, 45.0, 35.0, 44.0, 39.0, 46.0, 39.0, 37.0, 37.0, 33.0, 43.0, 27.0, 29.0, 28.0, 28.0, 13.0, 22.0, 16.0, 23.0, 14.0, 12.0, 10.0, 7.0, 12.0, 8.0, 6.0, 4.0, 4.0, 1.0, 1.0, 3.0, 0.0, 0.0, 1.0], "bins": [-0.00344871892593801, -0.0033506317995488644, -0.0032525446731597185, -0.0031544575467705727, -0.0030563706532120705, -0.002958283293992281, -0.0028601964004337788, -0.002762109274044633, -0.002664022147655487, -0.002565935021266341, -0.0024678478948771954, -0.0023697607684880495, -0.0022716736420989037, -0.0021735867485404015, -0.0020754996221512556, -0.0019774124957621098, -0.001879325369372964, -0.001781238242983818, -0.0016831511165946722, -0.0015850641066208482, -0.0014869769802317023, -0.0013888898538425565, -0.0012908028438687325, -0.0011927157174795866, -0.0010946285910904408, -0.000996541464701295, -0.00089845439651981, -0.000800367328338325, -0.0007022802019491792, -0.0006041930755600333, -0.0005061060073785484, -0.00040801893919706345, -0.00030993157997727394, -0.00021184448269195855, -0.00011375738540664315, -1.5670288121327758e-05, 8.241680916398764e-05, 0.0001805039355531335, 0.0002785910037346184, 0.00037667807191610336, 0.0004747651983052492, 0.0005728523246943951, 0.00067093939287588, 0.0007690264610573649, 0.0008671135874465108, 0.0009652007138356566, 0.0010632877238094807, 0.0011613748501986265, 0.0012594619765877724, 0.0013575491029769182, 0.001455636229366064, 0.001553723239339888, 0.001651810365729034, 0.0017498974921181798, 0.0018479845020920038, 0.0019460716284811497, 0.0020441587548702955, 0.0021422458812594414, 0.0022403330076485872, 0.002338420134037733, 0.0024365070275962353, 0.0025345943868160248, 0.002632681280374527, 0.002730768406763673, 0.0028288555331528187]}, "gradients/decoder.model.decoder.layers.5.encoder_attn.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 2.0, 2.0, 0.0, 0.0, 3.0, 4.0, 6.0, 7.0, 19.0, 21.0, 29.0, 38.0, 73.0, 86.0, 166.0, 383.0, 857.0, 2229.0, 8163.0, 51563.0, 829153.0, 135188.0, 14964.0, 3328.0, 1216.0, 503.0, 216.0, 135.0, 72.0, 45.0, 40.0, 19.0, 9.0, 11.0, 2.0, 2.0, 3.0, 2.0, 2.0, 1.0, 2.0, 3.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0], "bins": [-0.0011129379272460938, -0.0010817497968673706, -0.0010505616664886475, -0.0010193735361099243, -0.0009881854057312012, -0.000956997275352478, -0.0009258091449737549, -0.0008946210145950317, -0.0008634328842163086, -0.0008322447538375854, -0.0008010566234588623, -0.0007698684930801392, -0.000738680362701416, -0.0007074922323226929, -0.0006763041019439697, -0.0006451159715652466, -0.0006139278411865234, -0.0005827397108078003, -0.0005515515804290771, -0.000520363450050354, -0.0004891753196716309, -0.0004579871892929077, -0.00042679905891418457, -0.0003956109285354614, -0.0003644227981567383, -0.00033323466777801514, -0.000302046537399292, -0.00027085840702056885, -0.0002396702766418457, -0.00020848214626312256, -0.00017729401588439941, -0.00014610588550567627, -0.00011491775512695312, -8.372962474822998e-05, -5.2541494369506836e-05, -2.135336399078369e-05, 9.834766387939453e-06, 4.10228967666626e-05, 7.221102714538574e-05, 0.00010339915752410889, 0.00013458728790283203, 0.00016577541828155518, 0.00019696354866027832, 0.00022815167903900146, 0.0002593398094177246, 0.00029052793979644775, 0.0003217160701751709, 0.00035290420055389404, 0.0003840923309326172, 0.00041528046131134033, 0.0004464685916900635, 0.0004776567220687866, 0.0005088448524475098, 0.0005400329828262329, 0.0005712211132049561, 0.0006024092435836792, 0.0006335973739624023, 0.0006647855043411255, 0.0006959736347198486, 0.0007271617650985718, 0.0007583498954772949, 0.0007895380258560181, 0.0008207261562347412, 0.0008519142866134644, 0.0008831024169921875]}, "gradients/decoder.model.decoder.layers.5.encoder_attn.out_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 1.0, 4.0, 0.0, 1.0, 1.0, 4.0, 4.0, 2.0, 5.0, 11.0, 12.0, 20.0, 22.0, 21.0, 31.0, 45.0, 61.0, 82.0, 63.0, 79.0, 88.0, 70.0, 80.0, 61.0, 47.0, 31.0, 38.0, 32.0, 17.0, 18.0, 13.0, 9.0, 10.0, 12.0, 4.0, 2.0, 1.0, 3.0, 2.0, 1.0, 2.0, 1.0, 1.0, 0.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.004360198974609375, -0.004212379455566406, -0.0040645599365234375, -0.003916740417480469, -0.0037689208984375, -0.0036211013793945312, -0.0034732818603515625, -0.0033254623413085938, -0.003177642822265625, -0.0030298233032226562, -0.0028820037841796875, -0.0027341842651367188, -0.00258636474609375, -0.0024385452270507812, -0.0022907257080078125, -0.0021429061889648438, -0.001995086669921875, -0.0018472671508789062, -0.0016994476318359375, -0.0015516281127929688, -0.00140380859375, -0.0012559890747070312, -0.0011081695556640625, -0.0009603500366210938, -0.000812530517578125, -0.0006647109985351562, -0.0005168914794921875, -0.00036907196044921875, -0.00022125244140625, -7.343292236328125e-05, 7.43865966796875e-05, 0.00022220611572265625, 0.000370025634765625, 0.0005178451538085938, 0.0006656646728515625, 0.0008134841918945312, 0.0009613037109375, 0.0011091232299804688, 0.0012569427490234375, 0.0014047622680664062, 0.001552581787109375, 0.0017004013061523438, 0.0018482208251953125, 0.0019960403442382812, 0.00214385986328125, 0.0022916793823242188, 0.0024394989013671875, 0.0025873184204101562, 0.002735137939453125, 0.0028829574584960938, 0.0030307769775390625, 0.0031785964965820312, 0.003326416015625, 0.0034742355346679688, 0.0036220550537109375, 0.0037698745727539062, 0.003917694091796875, 0.004065513610839844, 0.0042133331298828125, 0.004361152648925781, 0.00450897216796875, 0.004656791687011719, 0.0048046112060546875, 0.004952430725097656, 0.005100250244140625]}, "gradients/decoder.model.decoder.layers.5.encoder_attn.v_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 1.0, 2.0, 1.0, 1.0, 2.0, 2.0, 4.0, 2.0, 6.0, 7.0, 8.0, 15.0, 10.0, 14.0, 22.0, 29.0, 39.0, 45.0, 52.0, 65.0, 103.0, 105.0, 143.0, 265.0, 458.0, 1300.0, 4797.0, 25813.0, 503044.0, 479525.0, 25423.0, 4559.0, 1322.0, 495.0, 243.0, 142.0, 99.0, 87.0, 61.0, 46.0, 40.0, 34.0, 38.0, 20.0, 14.0, 17.0, 13.0, 10.0, 5.0, 9.0, 3.0, 1.0, 4.0, 3.0, 1.0, 1.0, 2.0, 0.0, 0.0, 1.0], "bins": [-0.0002551078796386719, -0.00024737603962421417, -0.00023964419960975647, -0.00023191235959529877, -0.00022418051958084106, -0.00021644867956638336, -0.00020871683955192566, -0.00020098499953746796, -0.00019325315952301025, -0.00018552131950855255, -0.00017778947949409485, -0.00017005763947963715, -0.00016232579946517944, -0.00015459395945072174, -0.00014686211943626404, -0.00013913027942180634, -0.00013139843940734863, -0.00012366659939289093, -0.00011593475937843323, -0.00010820291936397552, -0.00010047107934951782, -9.273923933506012e-05, -8.500739932060242e-05, -7.727555930614471e-05, -6.954371929168701e-05, -6.181187927722931e-05, -5.4080039262771606e-05, -4.6348199248313904e-05, -3.86163592338562e-05, -3.08845192193985e-05, -2.3152679204940796e-05, -1.5420839190483093e-05, -7.68899917602539e-06, 4.284083843231201e-08, 7.774680852890015e-06, 1.5506520867347717e-05, 2.323836088180542e-05, 3.097020089626312e-05, 3.8702040910720825e-05, 4.643388092517853e-05, 5.416572093963623e-05, 6.189756095409393e-05, 6.962940096855164e-05, 7.736124098300934e-05, 8.509308099746704e-05, 9.282492101192474e-05, 0.00010055676102638245, 0.00010828860104084015, 0.00011602044105529785, 0.00012375228106975555, 0.00013148412108421326, 0.00013921596109867096, 0.00014694780111312866, 0.00015467964112758636, 0.00016241148114204407, 0.00017014332115650177, 0.00017787516117095947, 0.00018560700118541718, 0.00019333884119987488, 0.00020107068121433258, 0.00020880252122879028, 0.00021653436124324799, 0.0002242662012577057, 0.0002319980412721634, 0.0002397298812866211]}, "gradients/decoder.model.decoder.layers.5.encoder_attn.v_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 1.0, 2.0, 2.0, 0.0, 2.0, 2.0, 2.0, 0.0, 6.0, 3.0, 5.0, 10.0, 5.0, 11.0, 13.0, 20.0, 21.0, 23.0, 22.0, 19.0, 32.0, 27.0, 31.0, 50.0, 39.0, 43.0, 38.0, 35.0, 45.0, 43.0, 48.0, 43.0, 42.0, 29.0, 42.0, 40.0, 34.0, 26.0, 22.0, 23.0, 15.0, 16.0, 20.0, 11.0, 6.0, 6.0, 12.0, 4.0, 7.0, 8.0, 2.0, 1.0, 4.0, 3.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0], "bins": [-0.006664276123046875, -0.006462395191192627, -0.006260514259338379, -0.006058633327484131, -0.005856752395629883, -0.005654871463775635, -0.005452990531921387, -0.005251109600067139, -0.005049228668212891, -0.004847347736358643, -0.0046454668045043945, -0.0044435858726501465, -0.0042417049407958984, -0.00403982400894165, -0.0038379430770874023, -0.0036360621452331543, -0.0034341812133789062, -0.003232300281524658, -0.00303041934967041, -0.002828538417816162, -0.002626657485961914, -0.002424776554107666, -0.002222895622253418, -0.00202101469039917, -0.0018191337585449219, -0.0016172528266906738, -0.0014153718948364258, -0.0012134909629821777, -0.0010116100311279297, -0.0008097290992736816, -0.0006078481674194336, -0.00040596723556518555, -0.0002040863037109375, -2.205371856689453e-06, 0.0001996755599975586, 0.00040155649185180664, 0.0006034374237060547, 0.0008053183555603027, 0.0010071992874145508, 0.0012090802192687988, 0.0014109611511230469, 0.001612842082977295, 0.001814723014831543, 0.002016603946685791, 0.002218484878540039, 0.002420365810394287, 0.002622246742248535, 0.002824127674102783, 0.0030260086059570312, 0.0032278895378112793, 0.0034297704696655273, 0.0036316514015197754, 0.0038335323333740234, 0.0040354132652282715, 0.0042372941970825195, 0.004439175128936768, 0.004641056060791016, 0.004842936992645264, 0.005044817924499512, 0.00524669885635376, 0.005448579788208008, 0.005650460720062256, 0.005852341651916504, 0.006054222583770752, 0.006256103515625]}, "gradients/decoder.model.decoder.layers.5.encoder_attn.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 4.0, 0.0, 0.0, 0.0, 30.0, 0.0, 0.0, 0.0, 98.0, 0.0, 0.0, 388.0, 0.0, 0.0, 0.0, 1481.0, 0.0, 0.0, 0.0, 5910.0, 0.0, 0.0, 0.0, 33008.0, 0.0, 0.0, 967297.0, 0.0, 0.0, 0.0, 32936.0, 0.0, 0.0, 0.0, 5567.0, 0.0, 0.0, 0.0, 1370.0, 0.0, 0.0, 355.0, 0.0, 0.0, 0.0, 98.0, 0.0, 0.0, 0.0, 23.0, 0.0, 0.0, 0.0, 5.0, 0.0, 0.0, 3.0], "bins": [-5.364418029785156e-07, -5.206093192100525e-07, -5.047768354415894e-07, -4.889443516731262e-07, -4.731118679046631e-07, -4.5727938413619995e-07, -4.414469003677368e-07, -4.256144165992737e-07, -4.0978193283081055e-07, -3.939494490623474e-07, -3.781169652938843e-07, -3.6228448152542114e-07, -3.46451997756958e-07, -3.3061951398849487e-07, -3.1478703022003174e-07, -2.989545464515686e-07, -2.8312206268310547e-07, -2.6728957891464233e-07, -2.514570951461792e-07, -2.3562461137771606e-07, -2.1979212760925293e-07, -2.039596438407898e-07, -1.8812716007232666e-07, -1.7229467630386353e-07, -1.564621925354004e-07, -1.4062970876693726e-07, -1.2479722499847412e-07, -1.0896474123001099e-07, -9.313225746154785e-08, -7.729977369308472e-08, -6.146728992462158e-08, -4.563480615615845e-08, -2.9802322387695312e-08, -1.3969838619232178e-08, 1.862645149230957e-09, 1.7695128917694092e-08, 3.3527612686157227e-08, 4.936009645462036e-08, 6.51925802230835e-08, 8.102506399154663e-08, 9.685754776000977e-08, 1.126900315284729e-07, 1.2852251529693604e-07, 1.4435499906539917e-07, 1.601874828338623e-07, 1.7601996660232544e-07, 1.9185245037078857e-07, 2.076849341392517e-07, 2.2351741790771484e-07, 2.39349901676178e-07, 2.551823854446411e-07, 2.7101486921310425e-07, 2.868473529815674e-07, 3.026798367500305e-07, 3.1851232051849365e-07, 3.343448042869568e-07, 3.501772880554199e-07, 3.6600977182388306e-07, 3.818422555923462e-07, 3.976747393608093e-07, 4.1350722312927246e-07, 4.293397068977356e-07, 4.4517219066619873e-07, 4.6100467443466187e-07, 4.76837158203125e-07]}, "gradients/decoder.model.decoder.layers.5.encoder_attn.k_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 2.0, 0.0, 0.0, 4.0, 0.0, 0.0, 3.0, 0.0, 0.0, 5.0, 0.0, 10.0, 0.0, 0.0, 19.0, 0.0, 0.0, 36.0, 0.0, 0.0, 61.0, 0.0, 0.0, 79.0, 0.0, 107.0, 0.0, 0.0, 104.0, 0.0, 0.0, 127.0, 0.0, 0.0, 134.0, 0.0, 114.0, 0.0, 0.0, 85.0, 0.0, 0.0, 53.0, 0.0, 0.0, 28.0, 0.0, 0.0, 19.0, 0.0, 17.0, 0.0, 0.0, 3.0, 0.0, 0.0, 6.0, 0.0, 0.0, 2.0, 0.0, 4.0], "bins": [-7.152557373046875e-07, -6.938353180885315e-07, -6.724148988723755e-07, -6.509944796562195e-07, -6.295740604400635e-07, -6.081536412239075e-07, -5.867332220077515e-07, -5.653128027915955e-07, -5.438923835754395e-07, -5.224719643592834e-07, -5.010515451431274e-07, -4.796311259269714e-07, -4.5821070671081543e-07, -4.367902874946594e-07, -4.153698682785034e-07, -3.939494490623474e-07, -3.725290298461914e-07, -3.511086106300354e-07, -3.296881914138794e-07, -3.082677721977234e-07, -2.868473529815674e-07, -2.654269337654114e-07, -2.4400651454925537e-07, -2.2258609533309937e-07, -2.0116567611694336e-07, -1.7974525690078735e-07, -1.5832483768463135e-07, -1.3690441846847534e-07, -1.1548399925231934e-07, -9.406358003616333e-08, -7.264316082000732e-08, -5.122274160385132e-08, -2.9802322387695312e-08, -8.381903171539307e-09, 1.30385160446167e-08, 3.4458935260772705e-08, 5.587935447692871e-08, 7.729977369308472e-08, 9.872019290924072e-08, 1.2014061212539673e-07, 1.4156103134155273e-07, 1.6298145055770874e-07, 1.8440186977386475e-07, 2.0582228899002075e-07, 2.2724270820617676e-07, 2.4866312742233276e-07, 2.7008354663848877e-07, 2.915039658546448e-07, 3.129243850708008e-07, 3.343448042869568e-07, 3.557652235031128e-07, 3.771856427192688e-07, 3.986060619354248e-07, 4.200264811515808e-07, 4.414469003677368e-07, 4.628673195838928e-07, 4.842877388000488e-07, 5.057081580162048e-07, 5.271285772323608e-07, 5.485489964485168e-07, 5.699694156646729e-07, 5.913898348808289e-07, 6.128102540969849e-07, 6.342306733131409e-07, 6.556510925292969e-07]}, "gradients/decoder.model.decoder.layers.5.encoder_attn.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 4.0, 0.0, 0.0, 0.0, 9.0, 0.0, 0.0, 25.0, 0.0, 40.0, 0.0, 122.0, 0.0, 0.0, 325.0, 0.0, 1238.0, 0.0, 0.0, 5998.0, 0.0, 32215.0, 0.0, 968513.0, 0.0, 0.0, 32142.0, 0.0, 6244.0, 0.0, 1201.0, 0.0, 0.0, 317.0, 0.0, 111.0, 0.0, 0.0, 37.0, 0.0, 22.0, 0.0, 5.0, 0.0, 0.0, 2.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-7.748603820800781e-07, -7.497146725654602e-07, -7.245689630508423e-07, -6.994232535362244e-07, -6.742775440216064e-07, -6.491318345069885e-07, -6.239861249923706e-07, -5.988404154777527e-07, -5.736947059631348e-07, -5.485489964485168e-07, -5.234032869338989e-07, -4.98257577419281e-07, -4.731118679046631e-07, -4.4796615839004517e-07, -4.2282044887542725e-07, -3.976747393608093e-07, -3.725290298461914e-07, -3.473833203315735e-07, -3.2223761081695557e-07, -2.9709190130233765e-07, -2.7194619178771973e-07, -2.468004822731018e-07, -2.2165477275848389e-07, -1.9650906324386597e-07, -1.7136335372924805e-07, -1.4621764421463013e-07, -1.210719347000122e-07, -9.592622518539429e-08, -7.078051567077637e-08, -4.563480615615845e-08, -2.0489096641540527e-08, 4.6566128730773926e-09, 2.9802322387695312e-08, 5.494803190231323e-08, 8.009374141693115e-08, 1.0523945093154907e-07, 1.30385160446167e-07, 1.555308699607849e-07, 1.8067657947540283e-07, 2.0582228899002075e-07, 2.3096799850463867e-07, 2.561137080192566e-07, 2.812594175338745e-07, 3.0640512704849243e-07, 3.3155083656311035e-07, 3.5669654607772827e-07, 3.818422555923462e-07, 4.069879651069641e-07, 4.3213367462158203e-07, 4.5727938413619995e-07, 4.824250936508179e-07, 5.075708031654358e-07, 5.327165126800537e-07, 5.578622221946716e-07, 5.830079317092896e-07, 6.081536412239075e-07, 6.332993507385254e-07, 6.584450602531433e-07, 6.835907697677612e-07, 7.087364792823792e-07, 7.338821887969971e-07, 7.59027898311615e-07, 7.841736078262329e-07, 8.093193173408508e-07, 8.344650268554688e-07]}, "gradients/decoder.model.decoder.layers.5.encoder_attn.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 4.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 68.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 878.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 62.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 10.0], "bins": [-2.384185791015625e-07, -2.3283064365386963e-07, -2.2724270820617676e-07, -2.2165477275848389e-07, -2.1606683731079102e-07, -2.1047890186309814e-07, -2.0489096641540527e-07, -1.993030309677124e-07, -1.9371509552001953e-07, -1.8812716007232666e-07, -1.825392246246338e-07, -1.7695128917694092e-07, -1.7136335372924805e-07, -1.6577541828155518e-07, -1.601874828338623e-07, -1.5459954738616943e-07, -1.4901161193847656e-07, -1.434236764907837e-07, -1.3783574104309082e-07, -1.3224780559539795e-07, -1.2665987014770508e-07, -1.210719347000122e-07, -1.1548399925231934e-07, -1.0989606380462646e-07, -1.043081283569336e-07, -9.872019290924072e-08, -9.313225746154785e-08, -8.754432201385498e-08, -8.195638656616211e-08, -7.636845111846924e-08, -7.078051567077637e-08, -6.51925802230835e-08, -5.960464477539063e-08, -5.4016709327697754e-08, -4.842877388000488e-08, -4.284083843231201e-08, -3.725290298461914e-08, -3.166496753692627e-08, -2.60770320892334e-08, -2.0489096641540527e-08, -1.4901161193847656e-08, -9.313225746154785e-09, -3.725290298461914e-09, 1.862645149230957e-09, 7.450580596923828e-09, 1.30385160446167e-08, 1.862645149230957e-08, 2.421438694000244e-08, 2.9802322387695312e-08, 3.5390257835388184e-08, 4.0978193283081055e-08, 4.6566128730773926e-08, 5.21540641784668e-08, 5.774199962615967e-08, 6.332993507385254e-08, 6.891787052154541e-08, 7.450580596923828e-08, 8.009374141693115e-08, 8.568167686462402e-08, 9.12696123123169e-08, 9.685754776000977e-08, 1.0244548320770264e-07, 1.0803341865539551e-07, 1.1362135410308838e-07, 1.1920928955078125e-07]}, "gradients/decoder.model.decoder.layers.5.self_attn_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 3.0, 6.0, 10.0, 10.0, 17.0, 31.0, 82.0, 151.0, 319.0, 207.0, 94.0, 36.0, 26.0, 6.0, 8.0, 8.0, 2.0, 0.0, 0.0, 2.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.004593722987920046, -0.004201252944767475, -0.0038087833672761917, -0.003416313324123621, -0.003023843513801694, -0.002631373703479767, -0.002238903660327196, -0.001846433850005269, -0.001453964039683342, -0.001061494229361415, -0.000669024302624166, -0.0002765543758869171, 0.00011591543443500996, 0.000508385244756937, 0.0009008552879095078, 0.0012933250982314348, 0.001685794908553362, 0.002078264718875289, 0.002470734529197216, 0.0028632045723497868, 0.003255674382671714, 0.003648144192993641, 0.004040614236146212, 0.004433084279298782, 0.004825553856790066, 0.0052180238999426365, 0.00561049347743392, 0.006002963520586491, 0.006395433098077774, 0.006787903141230345, 0.0071803731843829155, 0.007572842761874199, 0.007965313270688057, 0.00835778284817934, 0.008750253356993198, 0.009142722934484482, 0.009535192511975765, 0.009927662089467049, 0.010320132598280907, 0.01071260217577219, 0.011105071753263474, 0.011497541330754757, 0.011890011839568615, 0.012282481417059898, 0.012674950994551182, 0.013067420572042465, 0.013459891080856323, 0.013852360658347607, 0.014244831167161465, 0.014637300744652748, 0.015029771253466606, 0.01542224083095789, 0.015814710408449173, 0.01620718091726303, 0.01659965142607689, 0.016992120072245598, 0.017384590581059456, 0.017777061089873314, 0.018169529736042023, 0.01856200024485588, 0.01895447075366974, 0.019346939399838448, 0.019739409908652306, 0.020131880417466164, 0.020524349063634872]}, "gradients/decoder.model.decoder.layers.5.self_attn_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 3.0, 3.0, 1.0, 6.0, 8.0, 5.0, 10.0, 4.0, 6.0, 10.0, 13.0, 18.0, 22.0, 24.0, 32.0, 38.0, 43.0, 40.0, 36.0, 45.0, 53.0, 40.0, 52.0, 49.0, 52.0, 40.0, 32.0, 56.0, 38.0, 27.0, 38.0, 28.0, 18.0, 25.0, 17.0, 19.0, 8.0, 17.0, 11.0, 10.0, 5.0, 8.0, 2.0, 3.0, 3.0, 1.0], "bins": [-0.002756757428869605, -0.002689383225515485, -0.0026220090221613646, -0.0025546348188072443, -0.002487260615453124, -0.002419886412099004, -0.0023525122087448835, -0.0022851380053907633, -0.002217763802036643, -0.0021503895986825228, -0.0020830153953284025, -0.0020156411919742823, -0.001948266988620162, -0.0018808927852660418, -0.0018135185819119215, -0.0017461443785578012, -0.0016787700587883592, -0.001611395855434239, -0.0015440216520801187, -0.0014766474487259984, -0.0014092732453718781, -0.001341899042017758, -0.0012745247222483158, -0.0012071505188941956, -0.0011397763155400753, -0.001072402112185955, -0.0010050279088318348, -0.0009376537054777145, -0.0008702795021235943, -0.000802905298769474, -0.0007355310372076929, -0.0006681568338535726, -0.0006007828051224351, -0.0005334086017683148, -0.0004660343984141946, -0.00039866016595624387, -0.0003312859626021236, -0.00026391175924800336, -0.00019653752679005265, -0.0001291633234359324, -6.178912008181214e-05, 5.585090548265725e-06, 7.29593011783436e-05, 0.00014033351908437908, 0.00020770772243849933, 0.0002750819257926196, 0.0003424561582505703, 0.00040983036160469055, 0.0004772045649588108, 0.0005445787683129311, 0.0006119529716670513, 0.0006793271750211716, 0.0007467013783752918, 0.0008140755817294121, 0.0008814498432911932, 0.0009488240466453135, 0.0010161981917917728, 0.001083572395145893, 0.0011509465985000134, 0.0012183208018541336, 0.0012856950052082539, 0.0013530692085623741, 0.0014204434119164944, 0.0014878176152706146, 0.0015551919350400567]}, "gradients/decoder.model.decoder.layers.5.self_attn.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 2.0, 1.0, 3.0, 5.0, 7.0, 2.0, 10.0, 18.0, 19.0, 36.0, 45.0, 70.0, 104.0, 199.0, 298.0, 537.0, 950.0, 1676.0, 3190.0, 6449.0, 14196.0, 36541.0, 125991.0, 601926.0, 177504.0, 45587.0, 17214.0, 7645.0, 3741.0, 1990.0, 1070.0, 579.0, 382.0, 220.0, 114.0, 98.0, 54.0, 33.0, 15.0, 15.0, 9.0, 7.0, 9.0, 3.0, 1.0, 0.0, 1.0, 2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0055084228515625, -0.005334138870239258, -0.005159854888916016, -0.0049855709075927734, -0.004811286926269531, -0.004637002944946289, -0.004462718963623047, -0.004288434982299805, -0.0041141510009765625, -0.00393986701965332, -0.003765583038330078, -0.003591299057006836, -0.0034170150756835938, -0.0032427310943603516, -0.0030684471130371094, -0.002894163131713867, -0.002719879150390625, -0.002545595169067383, -0.0023713111877441406, -0.0021970272064208984, -0.0020227432250976562, -0.001848459243774414, -0.0016741752624511719, -0.0014998912811279297, -0.0013256072998046875, -0.0011513233184814453, -0.0009770393371582031, -0.0008027553558349609, -0.0006284713745117188, -0.00045418739318847656, -0.0002799034118652344, -0.00010561943054199219, 6.866455078125e-05, 0.0002429485321044922, 0.0004172325134277344, 0.0005915164947509766, 0.0007658004760742188, 0.0009400844573974609, 0.0011143684387207031, 0.0012886524200439453, 0.0014629364013671875, 0.0016372203826904297, 0.0018115043640136719, 0.001985788345336914, 0.0021600723266601562, 0.0023343563079833984, 0.0025086402893066406, 0.002682924270629883, 0.002857208251953125, 0.003031492233276367, 0.0032057762145996094, 0.0033800601959228516, 0.0035543441772460938, 0.003728628158569336, 0.003902912139892578, 0.00407719612121582, 0.0042514801025390625, 0.004425764083862305, 0.004600048065185547, 0.004774332046508789, 0.004948616027832031, 0.0051229000091552734, 0.005297183990478516, 0.005471467971801758, 0.005645751953125]}, "gradients/decoder.model.decoder.layers.5.self_attn.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 3.0, 2.0, 2.0, 7.0, 6.0, 9.0, 16.0, 20.0, 32.0, 38.0, 62.0, 64.0, 94.0, 101.0, 89.0, 103.0, 92.0, 64.0, 60.0, 48.0, 25.0, 22.0, 23.0, 13.0, 3.0, 6.0, 3.0, 1.0, 1.0, 2.0, 0.0, 2.0, 2.0, 1.0, 0.0, 1.0, 1.0, 1.0], "bins": [-0.0008788108825683594, -0.0008571147918701172, -0.000835418701171875, -0.0008137226104736328, -0.0007920265197753906, -0.0007703304290771484, -0.0007486343383789062, -0.0007269382476806641, -0.0007052421569824219, -0.0006835460662841797, -0.0006618499755859375, -0.0006401538848876953, -0.0006184577941894531, -0.0005967617034912109, -0.0005750656127929688, -0.0005533695220947266, -0.0005316734313964844, -0.0005099773406982422, -0.00048828125, -0.0004665851593017578, -0.0004448890686035156, -0.00042319297790527344, -0.00040149688720703125, -0.00037980079650878906, -0.0003581047058105469, -0.0003364086151123047, -0.0003147125244140625, -0.0002930164337158203, -0.0002713203430175781, -0.00024962425231933594, -0.00022792816162109375, -0.00020623207092285156, -0.00018453598022460938, -0.0001628398895263672, -0.000141143798828125, -0.00011944770812988281, -9.775161743164062e-05, -7.605552673339844e-05, -5.435943603515625e-05, -3.266334533691406e-05, -1.0967254638671875e-05, 1.0728836059570312e-05, 3.24249267578125e-05, 5.412101745605469e-05, 7.581710815429688e-05, 9.751319885253906e-05, 0.00011920928955078125, 0.00014090538024902344, 0.00016260147094726562, 0.0001842975616455078, 0.00020599365234375, 0.0002276897430419922, 0.0002493858337402344, 0.00027108192443847656, 0.00029277801513671875, 0.00031447410583496094, 0.0003361701965332031, 0.0003578662872314453, 0.0003795623779296875, 0.0004012584686279297, 0.0004229545593261719, 0.00044465065002441406, 0.00046634674072265625, 0.00048804283142089844, 0.0005097389221191406]}, "gradients/decoder.model.decoder.layers.5.self_attn.v_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 7.0, 2.0, 3.0, 6.0, 7.0, 13.0, 19.0, 32.0, 30.0, 48.0, 83.0, 115.0, 166.0, 218.0, 305.0, 434.0, 629.0, 900.0, 1449.0, 2202.0, 3646.0, 5753.0, 9809.0, 17367.0, 33135.0, 69133.0, 176710.0, 418208.0, 167304.0, 66444.0, 32396.0, 16877.0, 9585.0, 5751.0, 3415.0, 2042.0, 1371.0, 947.0, 616.0, 402.0, 305.0, 212.0, 141.0, 99.0, 66.0, 57.0, 30.0, 28.0, 15.0, 15.0, 11.0, 3.0, 5.0, 2.0, 0.0, 2.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.00115203857421875, -0.0011129528284072876, -0.0010738670825958252, -0.0010347813367843628, -0.0009956955909729004, -0.000956609845161438, -0.0009175240993499756, -0.0008784383535385132, -0.0008393526077270508, -0.0008002668619155884, -0.000761181116104126, -0.0007220953702926636, -0.0006830096244812012, -0.0006439238786697388, -0.0006048381328582764, -0.000565752387046814, -0.0005266666412353516, -0.00048758089542388916, -0.00044849514961242676, -0.00040940940380096436, -0.00037032365798950195, -0.00033123791217803955, -0.00029215216636657715, -0.00025306642055511475, -0.00021398067474365234, -0.00017489492893218994, -0.00013580918312072754, -9.672343730926514e-05, -5.7637691497802734e-05, -1.8551945686340332e-05, 2.053380012512207e-05, 5.961954593658447e-05, 9.870529174804688e-05, 0.00013779103755950928, 0.00017687678337097168, 0.00021596252918243408, 0.0002550482749938965, 0.0002941340208053589, 0.0003332197666168213, 0.0003723055124282837, 0.0004113912582397461, 0.0004504770040512085, 0.0004895627498626709, 0.0005286484956741333, 0.0005677342414855957, 0.0006068199872970581, 0.0006459057331085205, 0.0006849914789199829, 0.0007240772247314453, 0.0007631629705429077, 0.0008022487163543701, 0.0008413344621658325, 0.0008804202079772949, 0.0009195059537887573, 0.0009585916996002197, 0.0009976774454116821, 0.0010367631912231445, 0.001075848937034607, 0.0011149346828460693, 0.0011540204286575317, 0.0011931061744689941, 0.0012321919202804565, 0.001271277666091919, 0.0013103634119033813, 0.0013494491577148438]}, "gradients/decoder.model.decoder.layers.5.self_attn.v_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 2.0, 2.0, 1.0, 3.0, 0.0, 3.0, 3.0, 3.0, 5.0, 5.0, 12.0, 16.0, 10.0, 23.0, 13.0, 34.0, 35.0, 41.0, 57.0, 49.0, 58.0, 65.0, 79.0, 76.0, 71.0, 60.0, 51.0, 47.0, 26.0, 32.0, 26.0, 18.0, 22.0, 19.0, 15.0, 8.0, 3.0, 5.0, 3.0, 2.0, 3.0, 1.0, 4.0, 2.0, 1.0, 1.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.000827789306640625, -0.0007990747690200806, -0.0007703602313995361, -0.0007416456937789917, -0.0007129311561584473, -0.0006842166185379028, -0.0006555020809173584, -0.000626787543296814, -0.0005980730056762695, -0.0005693584680557251, -0.0005406439304351807, -0.0005119293928146362, -0.0004832148551940918, -0.00045450031757354736, -0.00042578577995300293, -0.0003970712423324585, -0.00036835670471191406, -0.00033964216709136963, -0.0003109276294708252, -0.00028221309185028076, -0.00025349855422973633, -0.0002247840166091919, -0.00019606947898864746, -0.00016735494136810303, -0.0001386404037475586, -0.00010992586612701416, -8.121132850646973e-05, -5.249679088592529e-05, -2.378225326538086e-05, 4.932284355163574e-06, 3.364682197570801e-05, 6.236135959625244e-05, 9.107589721679688e-05, 0.00011979043483734131, 0.00014850497245788574, 0.00017721951007843018, 0.0002059340476989746, 0.00023464858531951904, 0.0002633631229400635, 0.0002920776605606079, 0.00032079219818115234, 0.0003495067358016968, 0.0003782212734222412, 0.00040693581104278564, 0.0004356503486633301, 0.0004643648862838745, 0.0004930794239044189, 0.0005217939615249634, 0.0005505084991455078, 0.0005792230367660522, 0.0006079375743865967, 0.0006366521120071411, 0.0006653666496276855, 0.00069408118724823, 0.0007227957248687744, 0.0007515102624893188, 0.0007802248001098633, 0.0008089393377304077, 0.0008376538753509521, 0.0008663684129714966, 0.000895082950592041, 0.0009237974882125854, 0.0009525120258331299, 0.0009812265634536743, 0.0010099411010742188]}, "gradients/decoder.model.decoder.layers.5.self_attn.k_proj.weight": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 2.0, 1.0, 0.0, 3.0, 8.0, 16.0, 13.0, 7.0, 10.0, 19.0, 25.0, 28.0, 43.0, 58.0, 62.0, 83.0, 129.0, 149.0, 258.0, 406.0, 792.0, 1736.0, 5341.0, 173753.0, 855587.0, 5980.0, 1854.0, 812.0, 414.0, 270.0, 192.0, 129.0, 105.0, 61.0, 51.0, 27.0, 24.0, 32.0, 25.0, 17.0, 8.0, 6.0, 10.0, 7.0, 3.0, 1.0, 1.0, 2.0, 4.0, 3.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.004180908203125, -0.004046201705932617, -0.003911495208740234, -0.0037767887115478516, -0.0036420822143554688, -0.003507375717163086, -0.003372669219970703, -0.0032379627227783203, -0.0031032562255859375, -0.0029685497283935547, -0.002833843231201172, -0.002699136734008789, -0.0025644302368164062, -0.0024297237396240234, -0.0022950172424316406, -0.002160310745239258, -0.002025604248046875, -0.0018908977508544922, -0.0017561912536621094, -0.0016214847564697266, -0.0014867782592773438, -0.001352071762084961, -0.0012173652648925781, -0.0010826587677001953, -0.0009479522705078125, -0.0008132457733154297, -0.0006785392761230469, -0.0005438327789306641, -0.00040912628173828125, -0.00027441978454589844, -0.00013971328735351562, -5.0067901611328125e-06, 0.00012969970703125, 0.0002644062042236328, 0.0003991127014160156, 0.0005338191986083984, 0.0006685256958007812, 0.0008032321929931641, 0.0009379386901855469, 0.0010726451873779297, 0.0012073516845703125, 0.0013420581817626953, 0.0014767646789550781, 0.001611471176147461, 0.0017461776733398438, 0.0018808841705322266, 0.0020155906677246094, 0.002150297164916992, 0.002285003662109375, 0.002419710159301758, 0.0025544166564941406, 0.0026891231536865234, 0.0028238296508789062, 0.002958536148071289, 0.003093242645263672, 0.0032279491424560547, 0.0033626556396484375, 0.0034973621368408203, 0.003632068634033203, 0.003766775131225586, 0.0039014816284179688, 0.0040361881256103516, 0.004170894622802734, 0.004305601119995117, 0.0044403076171875]}, "gradients/decoder.model.decoder.layers.5.self_attn.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 2.0, 0.0, 2.0, 0.0, 3.0, 0.0, 7.0, 13.0, 0.0, 26.0, 0.0, 93.0, 232.0, 0.0, 288.0, 0.0, 207.0, 0.0, 96.0, 31.0, 0.0, 8.0, 0.0, 3.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-9.5367431640625e-07, -9.192153811454773e-07, -8.847564458847046e-07, -8.502975106239319e-07, -8.158385753631592e-07, -7.813796401023865e-07, -7.469207048416138e-07, -7.124617695808411e-07, -6.780028343200684e-07, -6.435438990592957e-07, -6.09084963798523e-07, -5.746260285377502e-07, -5.401670932769775e-07, -5.057081580162048e-07, -4.7124922275543213e-07, -4.367902874946594e-07, -4.023313522338867e-07, -3.67872416973114e-07, -3.334134817123413e-07, -2.989545464515686e-07, -2.644956111907959e-07, -2.300366759300232e-07, -1.955777406692505e-07, -1.6111880540847778e-07, -1.2665987014770508e-07, -9.220093488693237e-08, -5.774199962615967e-08, -2.3283064365386963e-08, 1.1175870895385742e-08, 4.563480615615845e-08, 8.009374141693115e-08, 1.1455267667770386e-07, 1.4901161193847656e-07, 1.8347054719924927e-07, 2.1792948246002197e-07, 2.523884177207947e-07, 2.868473529815674e-07, 3.213062882423401e-07, 3.557652235031128e-07, 3.902241587638855e-07, 4.246830940246582e-07, 4.591420292854309e-07, 4.936009645462036e-07, 5.280598998069763e-07, 5.62518835067749e-07, 5.969777703285217e-07, 6.314367055892944e-07, 6.658956408500671e-07, 7.003545761108398e-07, 7.348135113716125e-07, 7.692724466323853e-07, 8.03731381893158e-07, 8.381903171539307e-07, 8.726492524147034e-07, 9.071081876754761e-07, 9.415671229362488e-07, 9.760260581970215e-07, 1.0104849934577942e-06, 1.044943928718567e-06, 1.0794028639793396e-06, 1.1138617992401123e-06, 1.148320734500885e-06, 1.1827796697616577e-06, 1.2172386050224304e-06, 1.2516975402832031e-06]}, "gradients/decoder.model.decoder.layers.5.self_attn.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 5.0, 7.0, 8.0, 8.0, 24.0, 18.0, 32.0, 38.0, 45.0, 70.0, 96.0, 117.0, 137.0, 198.0, 275.0, 326.0, 433.0, 549.0, 810.0, 1140.0, 1558.0, 2366.0, 4357.0, 13735.0, 790192.0, 209676.0, 10618.0, 3803.0, 2279.0, 1540.0, 1086.0, 775.0, 542.0, 419.0, 310.0, 229.0, 177.0, 136.0, 115.0, 82.0, 56.0, 51.0, 33.0, 35.0, 14.0, 17.0, 8.0, 8.0, 6.0, 4.0, 4.0, 2.0, 2.0, 0.0, 0.0, 1.0], "bins": [-0.001140594482421875, -0.0011059492826461792, -0.0010713040828704834, -0.0010366588830947876, -0.0010020136833190918, -0.000967368483543396, -0.0009327232837677002, -0.0008980780839920044, -0.0008634328842163086, -0.0008287876844406128, -0.000794142484664917, -0.0007594972848892212, -0.0007248520851135254, -0.0006902068853378296, -0.0006555616855621338, -0.000620916485786438, -0.0005862712860107422, -0.0005516260862350464, -0.0005169808864593506, -0.0004823356866836548, -0.000447690486907959, -0.0004130452871322632, -0.0003784000873565674, -0.0003437548875808716, -0.0003091096878051758, -0.00027446448802948, -0.00023981928825378418, -0.00020517408847808838, -0.00017052888870239258, -0.00013588368892669678, -0.00010123848915100098, -6.659328937530518e-05, -3.1948089599609375e-05, 2.6971101760864258e-06, 3.7342309951782227e-05, 7.198750972747803e-05, 0.00010663270950317383, 0.00014127790927886963, 0.00017592310905456543, 0.00021056830883026123, 0.00024521350860595703, 0.00027985870838165283, 0.00031450390815734863, 0.00034914910793304443, 0.00038379430770874023, 0.00041843950748443604, 0.00045308470726013184, 0.00048772990703582764, 0.0005223751068115234, 0.0005570203065872192, 0.000591665506362915, 0.0006263107061386108, 0.0006609559059143066, 0.0006956011056900024, 0.0007302463054656982, 0.000764891505241394, 0.0007995367050170898, 0.0008341819047927856, 0.0008688271045684814, 0.0009034723043441772, 0.000938117504119873, 0.0009727627038955688, 0.0010074079036712646, 0.0010420531034469604, 0.0010766983032226562]}, "gradients/decoder.model.decoder.layers.5.self_attn.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 1.0, 0.0, 1.0, 2.0, 1.0, 4.0, 1.0, 1.0, 3.0, 0.0, 5.0, 15.0, 16.0, 80.0, 416.0, 343.0, 69.0, 26.0, 5.0, 5.0, 1.0, 3.0, 1.0, 0.0, 1.0, 1.0, 1.0, 2.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.0007772445678710938, -0.0007536560297012329, -0.0007300674915313721, -0.0007064789533615112, -0.0006828904151916504, -0.0006593018770217896, -0.0006357133388519287, -0.0006121248006820679, -0.000588536262512207, -0.0005649477243423462, -0.0005413591861724854, -0.0005177706480026245, -0.0004941821098327637, -0.00047059357166290283, -0.000447005033493042, -0.00042341649532318115, -0.0003998279571533203, -0.00037623941898345947, -0.00035265088081359863, -0.0003290623426437378, -0.00030547380447387695, -0.0002818852663040161, -0.0002582967281341553, -0.00023470818996429443, -0.0002111196517944336, -0.00018753111362457275, -0.00016394257545471191, -0.00014035403728485107, -0.00011676549911499023, -9.31769609451294e-05, -6.958842277526855e-05, -4.5999884605407715e-05, -2.2411346435546875e-05, 1.1771917343139648e-06, 2.4765729904174805e-05, 4.8354268074035645e-05, 7.194280624389648e-05, 9.553134441375732e-05, 0.00011911988258361816, 0.000142708420753479, 0.00016629695892333984, 0.00018988549709320068, 0.00021347403526306152, 0.00023706257343292236, 0.0002606511116027832, 0.00028423964977264404, 0.0003078281879425049, 0.0003314167261123657, 0.00035500526428222656, 0.0003785938024520874, 0.00040218234062194824, 0.0004257708787918091, 0.0004493594169616699, 0.00047294795513153076, 0.0004965364933013916, 0.0005201250314712524, 0.0005437135696411133, 0.0005673021078109741, 0.000590890645980835, 0.0006144791841506958, 0.0006380677223205566, 0.0006616562604904175, 0.0006852447986602783, 0.0007088333368301392, 0.000732421875]}, "gradients/decoder.model.decoder.layers.4.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 2.0, 2.0, 0.0, 2.0, 2.0, 3.0, 4.0, 7.0, 6.0, 7.0, 18.0, 30.0, 35.0, 55.0, 113.0, 150.0, 202.0, 130.0, 90.0, 56.0, 22.0, 27.0, 14.0, 6.0, 9.0, 5.0, 2.0, 3.0, 2.0, 2.0, 2.0, 0.0, 3.0, 0.0, 2.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0040842583402991295, -0.0039343987591564655, -0.003784538945183158, -0.0036346791312098503, -0.0034848195500671864, -0.0033349597360938787, -0.003185099922120571, -0.003035240340977907, -0.0028853805270045996, -0.002735520713031292, -0.002585661131888628, -0.0024358013179153204, -0.002285941503942013, -0.002136081922799349, -0.0019862221088260412, -0.0018363624112680554, -0.0016865027137100697, -0.0015366430161520839, -0.001386783318594098, -0.0012369235046207905, -0.0010870638070628047, -0.0009372041095048189, -0.0007873443537391722, -0.0006374845979735255, -0.00048762490041553974, -0.0003377651737537235, -0.00018790544709190726, -3.8045720430091023e-05, 0.00011181400623172522, 0.000261673703789711, 0.0004115334595553577, 0.0005613932153210044, 0.0007112524472177029, 0.0008611121447756886, 0.0010109718423336744, 0.001160831656306982, 0.0013106913538649678, 0.0014605510514229536, 0.0016104108653962612, 0.001760270562954247, 0.0019101302605122328, 0.0020599900744855404, 0.0022098496556282043, 0.002359709469601512, 0.0025095692835748196, 0.0026594288647174835, 0.002809288678690791, 0.0029591484926640987, 0.0031090080738067627, 0.0032588678877800703, 0.0034087274689227343, 0.003558587282896042, 0.003708446864038706, 0.0038583066780120134, 0.004008166491985321, 0.004158026073127985, 0.004307885654270649, 0.004457745235413313, 0.004607605282217264, 0.004757464863359928, 0.004907324444502592, 0.005057184025645256, 0.005207044072449207, 0.005356903653591871, 0.0055067637003958225]}, "gradients/decoder.model.decoder.layers.4.final_layer_norm.bias": {"_type": "histogram", "values": [6.0, 1.0, 1.0, 1.0, 3.0, 5.0, 9.0, 9.0, 4.0, 3.0, 16.0, 16.0, 15.0, 13.0, 17.0, 21.0, 28.0, 33.0, 30.0, 29.0, 22.0, 30.0, 46.0, 50.0, 36.0, 30.0, 40.0, 43.0, 28.0, 32.0, 41.0, 26.0, 39.0, 44.0, 30.0, 25.0, 26.0, 22.0, 24.0, 23.0, 18.0, 10.0, 12.0, 6.0, 12.0, 14.0, 5.0, 7.0, 6.0, 4.0, 4.0, 1.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 1.0], "bins": [-0.0009674841421656311, -0.0009320353274233639, -0.0008965865708887577, -0.0008611377561464906, -0.0008256889996118844, -0.0007902401848696172, -0.000754791428335011, -0.0007193426135927439, -0.0006838937988504767, -0.0006484449841082096, -0.0006129962275736034, -0.0005775474128313363, -0.00054209865629673, -0.0005066498415544629, -0.00047120105591602623, -0.00043575227027758956, -0.00040030351374298334, -0.00036485472810454667, -0.00032940594246611, -0.00029395712772384286, -0.00025850837118923664, -0.00022305957099888474, -0.00018761077080853283, -0.00015216198517009616, -0.00011671319953165948, -8.126441389322281e-05, -4.581562097882852e-05, -1.036682806443423e-05, 2.5081957574002445e-05, 6.053074321243912e-05, 9.597954340279102e-05, 0.0001314283290412277, 0.0001668771728873253, 0.00020232595852576196, 0.00023777474416419864, 0.00027322355890646577, 0.000308672315441072, 0.0003441211301833391, 0.0003795699158217758, 0.00041501870146021247, 0.00045046748709864914, 0.0004859162727370858, 0.000521365087479353, 0.0005568138440139592, 0.0005922626587562263, 0.0006277114152908325, 0.0006631602300330997, 0.0006986090447753668, 0.000734057801309973, 0.0007695066160522401, 0.0008049553725868464, 0.0008404041873291135, 0.0008758529438637197, 0.0009113017586059868, 0.000946750515140593, 0.0009821993298828602, 0.0010176481446251273, 0.0010530969593673944, 0.0010885457741096616, 0.0011239944724366069, 0.001159443287178874, 0.0011948921019211411, 0.0012303409166634083, 0.0012657896149903536, 0.0013012384297326207]}, "gradients/decoder.model.decoder.layers.4.fc2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 1.0, 5.0, 4.0, 5.0, 4.0, 5.0, 12.0, 12.0, 27.0, 41.0, 60.0, 59.0, 101.0, 183.0, 263.0, 357.0, 564.0, 893.0, 1593.0, 2703.0, 5385.0, 12706.0, 62439.0, 4053838.0, 32640.0, 9820.0, 4447.0, 2280.0, 1313.0, 861.0, 465.0, 375.0, 245.0, 175.0, 116.0, 76.0, 61.0, 41.0, 27.0, 18.0, 21.0, 18.0, 13.0, 5.0, 3.0, 5.0, 5.0, 3.0, 1.0, 1.0, 1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.003772735595703125, -0.0036443471908569336, -0.003515958786010742, -0.0033875703811645508, -0.0032591819763183594, -0.003130793571472168, -0.0030024051666259766, -0.002874016761779785, -0.0027456283569335938, -0.0026172399520874023, -0.002488851547241211, -0.0023604631423950195, -0.002232074737548828, -0.0021036863327026367, -0.0019752979278564453, -0.001846909523010254, -0.0017185211181640625, -0.001590132713317871, -0.0014617443084716797, -0.0013333559036254883, -0.0012049674987792969, -0.0010765790939331055, -0.0009481906890869141, -0.0008198022842407227, -0.0006914138793945312, -0.0005630254745483398, -0.00043463706970214844, -0.00030624866485595703, -0.00017786026000976562, -4.947185516357422e-05, 7.891654968261719e-05, 0.0002073049545288086, 0.000335693359375, 0.0004640817642211914, 0.0005924701690673828, 0.0007208585739135742, 0.0008492469787597656, 0.000977635383605957, 0.0011060237884521484, 0.0012344121932983398, 0.0013628005981445312, 0.0014911890029907227, 0.001619577407836914, 0.0017479658126831055, 0.0018763542175292969, 0.0020047426223754883, 0.0021331310272216797, 0.002261519432067871, 0.0023899078369140625, 0.002518296241760254, 0.0026466846466064453, 0.0027750730514526367, 0.002903461456298828, 0.0030318498611450195, 0.003160238265991211, 0.0032886266708374023, 0.0034170150756835938, 0.003545403480529785, 0.0036737918853759766, 0.003802180290222168, 0.003930568695068359, 0.004058957099914551, 0.004187345504760742, 0.004315733909606934, 0.004444122314453125]}, "gradients/decoder.model.decoder.layers.4.fc2.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 2.0, 2.0, 4.0, 4.0, 6.0, 5.0, 19.0, 14.0, 9.0, 22.0, 29.0, 41.0, 48.0, 54.0, 65.0, 57.0, 62.0, 75.0, 72.0, 65.0, 57.0, 64.0, 49.0, 43.0, 35.0, 32.0, 16.0, 18.0, 10.0, 8.0, 7.0, 2.0, 2.0, 5.0, 3.0, 3.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0], "bins": [-0.0003910064697265625, -0.0003774799406528473, -0.0003639534115791321, -0.00035042688250541687, -0.00033690035343170166, -0.00032337382435798645, -0.00030984729528427124, -0.00029632076621055603, -0.0002827942371368408, -0.0002692677080631256, -0.0002557411789894104, -0.0002422146499156952, -0.00022868812084197998, -0.00021516159176826477, -0.00020163506269454956, -0.00018810853362083435, -0.00017458200454711914, -0.00016105547547340393, -0.00014752894639968872, -0.0001340024173259735, -0.0001204758882522583, -0.00010694935917854309, -9.342283010482788e-05, -7.989630103111267e-05, -6.636977195739746e-05, -5.284324288368225e-05, -3.931671380996704e-05, -2.579018473625183e-05, -1.2263655662536621e-05, 1.2628734111785889e-06, 1.4789402484893799e-05, 2.831593155860901e-05, 4.184246063232422e-05, 5.536898970603943e-05, 6.889551877975464e-05, 8.242204785346985e-05, 9.594857692718506e-05, 0.00010947510600090027, 0.00012300163507461548, 0.0001365281641483307, 0.0001500546932220459, 0.0001635812222957611, 0.00017710775136947632, 0.00019063428044319153, 0.00020416080951690674, 0.00021768733859062195, 0.00023121386766433716, 0.00024474039673805237, 0.0002582669258117676, 0.0002717934548854828, 0.000285319983959198, 0.0002988465130329132, 0.0003123730421066284, 0.00032589957118034363, 0.00033942610025405884, 0.00035295262932777405, 0.00036647915840148926, 0.00038000568747520447, 0.0003935322165489197, 0.0004070587456226349, 0.0004205852746963501, 0.0004341118037700653, 0.0004476383328437805, 0.00046116486191749573, 0.00047469139099121094]}, "gradients/decoder.model.decoder.layers.4.fc1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 2.0, 1.0, 3.0, 1.0, 0.0, 4.0, 2.0, 2.0, 6.0, 9.0, 13.0, 16.0, 24.0, 45.0, 81.0, 215.0, 945.0, 8844.0, 4116371.0, 64292.0, 2674.0, 413.0, 131.0, 71.0, 35.0, 28.0, 21.0, 10.0, 4.0, 2.0, 8.0, 2.0, 5.0, 1.0, 3.0, 1.0, 3.0, 3.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.00595855712890625, -0.0057648420333862305, -0.005571126937866211, -0.005377411842346191, -0.005183696746826172, -0.004989981651306152, -0.004796266555786133, -0.004602551460266113, -0.004408836364746094, -0.004215121269226074, -0.004021406173706055, -0.003827691078186035, -0.0036339759826660156, -0.003440260887145996, -0.0032465457916259766, -0.003052830696105957, -0.0028591156005859375, -0.002665400505065918, -0.0024716854095458984, -0.002277970314025879, -0.0020842552185058594, -0.0018905401229858398, -0.0016968250274658203, -0.0015031099319458008, -0.0013093948364257812, -0.0011156797409057617, -0.0009219646453857422, -0.0007282495498657227, -0.0005345344543457031, -0.0003408193588256836, -0.00014710426330566406, 4.661083221435547e-05, 0.000240325927734375, 0.00043404102325439453, 0.0006277561187744141, 0.0008214712142944336, 0.0010151863098144531, 0.0012089014053344727, 0.0014026165008544922, 0.0015963315963745117, 0.0017900466918945312, 0.0019837617874145508, 0.0021774768829345703, 0.00237119197845459, 0.0025649070739746094, 0.002758622169494629, 0.0029523372650146484, 0.003146052360534668, 0.0033397674560546875, 0.003533482551574707, 0.0037271976470947266, 0.003920912742614746, 0.004114627838134766, 0.004308342933654785, 0.004502058029174805, 0.004695773124694824, 0.004889488220214844, 0.005083203315734863, 0.005276918411254883, 0.005470633506774902, 0.005664348602294922, 0.005858063697814941, 0.006051778793334961, 0.0062454938888549805, 0.006439208984375]}, "gradients/decoder.model.decoder.layers.4.fc1.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 4.0, 1.0, 1.0, 3.0, 4.0, 6.0, 8.0, 18.0, 21.0, 40.0, 94.0, 479.0, 2738.0, 461.0, 92.0, 44.0, 20.0, 22.0, 15.0, 6.0, 4.0, 2.0, 0.0, 3.0, 0.0, 1.0, 2.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0010738372802734375, -0.0010491609573364258, -0.001024484634399414, -0.0009998083114624023, -0.0009751319885253906, -0.0009504556655883789, -0.0009257793426513672, -0.0009011030197143555, -0.0008764266967773438, -0.000851750373840332, -0.0008270740509033203, -0.0008023977279663086, -0.0007777214050292969, -0.0007530450820922852, -0.0007283687591552734, -0.0007036924362182617, -0.00067901611328125, -0.0006543397903442383, -0.0006296634674072266, -0.0006049871444702148, -0.0005803108215332031, -0.0005556344985961914, -0.0005309581756591797, -0.000506281852722168, -0.00048160552978515625, -0.00045692920684814453, -0.0004322528839111328, -0.0004075765609741211, -0.0003829002380371094, -0.00035822391510009766, -0.00033354759216308594, -0.0003088712692260742, -0.0002841949462890625, -0.0002595186233520508, -0.00023484230041503906, -0.00021016597747802734, -0.00018548965454101562, -0.0001608133316040039, -0.0001361370086669922, -0.00011146068572998047, -8.678436279296875e-05, -6.210803985595703e-05, -3.743171691894531e-05, -1.2755393981933594e-05, 1.1920928955078125e-05, 3.6597251892089844e-05, 6.127357482910156e-05, 8.594989776611328e-05, 0.000110626220703125, 0.00013530254364013672, 0.00015997886657714844, 0.00018465518951416016, 0.00020933151245117188, 0.0002340078353881836, 0.0002586841583251953, 0.00028336048126220703, 0.00030803680419921875, 0.00033271312713623047, 0.0003573894500732422, 0.0003820657730102539, 0.0004067420959472656, 0.00043141841888427734, 0.00045609474182128906, 0.0004807710647583008, 0.0005054473876953125]}, "gradients/decoder.model.decoder.layers.4.encoder_attn_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 3.0, 0.0, 3.0, 3.0, 4.0, 3.0, 4.0, 3.0, 9.0, 12.0, 14.0, 15.0, 20.0, 37.0, 48.0, 76.0, 97.0, 101.0, 148.0, 108.0, 92.0, 56.0, 52.0, 22.0, 18.0, 10.0, 11.0, 6.0, 4.0, 6.0, 3.0, 4.0, 8.0, 1.0, 3.0, 3.0, 2.0, 1.0, 2.0, 1.0, 0.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.0009207038674503565, -0.00088558963034302, -0.0008504753932356834, -0.0008153612143360078, -0.0007802469772286713, -0.0007451327401213348, -0.0007100185612216592, -0.0006749043241143227, -0.0006397900870069861, -0.0006046758498996496, -0.0005695616127923131, -0.0005344474338926375, -0.000499333196785301, -0.00046421895967796445, -0.0004291047516744584, -0.0003939905436709523, -0.0003588763065636158, -0.0003237620694562793, -0.0002886478614527732, -0.00025353365344926715, -0.00021841941634193063, -0.00018330519378650934, -0.00014819097123108804, -0.00011307676322758198, -7.796252612024546e-05, -4.2848303564824164e-05, -7.734081009402871e-06, 2.738014154601842e-05, 6.249436410143971e-05, 9.760858665686101e-05, 0.0001327228092122823, 0.00016783701721578836, 0.00020295125432312489, 0.00023806547687854618, 0.00027317969943396747, 0.00030829390743747354, 0.00034340814454481006, 0.0003785223816521466, 0.00041363658965565264, 0.0004487507976591587, 0.00048386503476649523, 0.0005189792718738317, 0.0005540935089811683, 0.0005892076878808439, 0.0006243219249881804, 0.0006594361620955169, 0.0006945503409951925, 0.000729664578102529, 0.0007647788152098656, 0.0007998930523172021, 0.0008350072894245386, 0.0008701214683242142, 0.0009052357054315507, 0.0009403499425388873, 0.0009754641214385629, 0.0010105783585458994, 0.001045692595653236, 0.0010808068327605724, 0.001115921069867909, 0.0011510353069752455, 0.0011861494276672602, 0.0012212636647745967, 0.0012563779018819332, 0.0012914921389892697, 0.0013266063760966063]}, "gradients/decoder.model.decoder.layers.4.encoder_attn_layer_norm.bias": {"_type": "histogram", "values": [1.0, 2.0, 1.0, 1.0, 1.0, 2.0, 1.0, 4.0, 4.0, 5.0, 3.0, 17.0, 10.0, 14.0, 17.0, 17.0, 31.0, 35.0, 33.0, 23.0, 30.0, 52.0, 44.0, 38.0, 50.0, 57.0, 50.0, 48.0, 52.0, 46.0, 40.0, 41.0, 32.0, 45.0, 34.0, 22.0, 23.0, 17.0, 18.0, 16.0, 12.0, 7.0, 9.0, 7.0, 4.0, 0.0, 5.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00041233206866309047, -0.00039586066850461066, -0.00037938926834613085, -0.0003629178972914815, -0.0003464464971330017, -0.0003299750969745219, -0.0003135037259198725, -0.0002970323257613927, -0.0002805609256029129, -0.0002640895254444331, -0.0002476181252859533, -0.00023114675423130393, -0.00021467535407282412, -0.0001982039539143443, -0.00018173256830777973, -0.00016526118270121515, -0.00014878978254273534, -0.00013231838238425553, -0.00011584699677769095, -9.937560389516875e-05, -8.290421101264656e-05, -6.643281813012436e-05, -4.9961425247602165e-05, -3.349003236507997e-05, -1.7018639482557774e-05, -5.47246600035578e-07, 1.5924146282486618e-05, 3.239553916500881e-05, 4.886693204753101e-05, 6.53383249300532e-05, 8.18097178125754e-05, 9.82811106950976e-05, 0.00011475244536995888, 0.0001312238455284387, 0.00014769523113500327, 0.00016416661674156785, 0.00018063801690004766, 0.00019710941705852747, 0.00021358080266509205, 0.00023005218827165663, 0.00024652358843013644, 0.00026299498858861625, 0.00027946638874709606, 0.0002959377598017454, 0.0003124091599602252, 0.00032888056011870503, 0.0003453519311733544, 0.0003618233313318342, 0.000378294731490314, 0.0003947661316487938, 0.0004112375318072736, 0.000427708902861923, 0.0004441803030204028, 0.0004606517031788826, 0.00047712307423353195, 0.0004935945034958422, 0.0005100658745504916, 0.0005265372456051409, 0.0005430086748674512, 0.0005594800459221005, 0.0005759514169767499, 0.0005924228462390602, 0.0006088942172937095, 0.0006253656465560198, 0.0006418370176106691]}, "gradients/decoder.model.decoder.layers.4.encoder_attn.out_proj.weight": {"_type": "histogram", "values": [2.0, 4.0, 1.0, 1.0, 3.0, 3.0, 9.0, 5.0, 10.0, 18.0, 19.0, 16.0, 33.0, 38.0, 49.0, 60.0, 89.0, 106.0, 148.0, 228.0, 363.0, 601.0, 1197.0, 3117.0, 11394.0, 69564.0, 809643.0, 128176.0, 16116.0, 4006.0, 1529.0, 739.0, 395.0, 245.0, 163.0, 122.0, 87.0, 67.0, 51.0, 39.0, 22.0, 21.0, 22.0, 13.0, 9.0, 8.0, 2.0, 5.0, 5.0, 0.0, 1.0, 3.0, 1.0, 3.0, 1.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-9.006261825561523e-05, -8.66791233420372e-05, -8.329562842845917e-05, -7.991213351488113e-05, -7.65286386013031e-05, -7.314514368772507e-05, -6.976164877414703e-05, -6.6378153860569e-05, -6.299465894699097e-05, -5.961116403341293e-05, -5.62276691198349e-05, -5.2844174206256866e-05, -4.946067929267883e-05, -4.60771843791008e-05, -4.2693689465522766e-05, -3.931019455194473e-05, -3.59266996383667e-05, -3.2543204724788666e-05, -2.9159709811210632e-05, -2.57762148976326e-05, -2.2392719984054565e-05, -1.9009225070476532e-05, -1.56257301568985e-05, -1.2242235243320465e-05, -8.858740329742432e-06, -5.475245416164398e-06, -2.0917505025863647e-06, 1.2917444109916687e-06, 4.675239324569702e-06, 8.058734238147736e-06, 1.1442229151725769e-05, 1.4825724065303802e-05, 1.8209218978881836e-05, 2.159271389245987e-05, 2.4976208806037903e-05, 2.8359703719615936e-05, 3.174319863319397e-05, 3.5126693546772e-05, 3.851018846035004e-05, 4.189368337392807e-05, 4.5277178287506104e-05, 4.866067320108414e-05, 5.204416811466217e-05, 5.5427663028240204e-05, 5.881115794181824e-05, 6.219465285539627e-05, 6.55781477689743e-05, 6.896164268255234e-05, 7.234513759613037e-05, 7.57286325097084e-05, 7.911212742328644e-05, 8.249562233686447e-05, 8.58791172504425e-05, 8.926261216402054e-05, 9.264610707759857e-05, 9.60296019911766e-05, 9.941309690475464e-05, 0.00010279659181833267, 0.0001061800867319107, 0.00010956358164548874, 0.00011294707655906677, 0.0001163305714726448, 0.00011971406638622284, 0.00012309756129980087, 0.0001264810562133789]}, "gradients/decoder.model.decoder.layers.4.encoder_attn.out_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 2.0, 1.0, 0.0, 1.0, 2.0, 5.0, 4.0, 6.0, 4.0, 6.0, 6.0, 11.0, 16.0, 12.0, 20.0, 19.0, 29.0, 37.0, 32.0, 39.0, 36.0, 53.0, 38.0, 64.0, 55.0, 53.0, 56.0, 47.0, 41.0, 45.0, 48.0, 47.0, 41.0, 33.0, 24.0, 12.0, 12.0, 11.0, 11.0, 14.0, 5.0, 5.0, 3.0, 1.0, 4.0, 0.0, 1.0, 1.0, 1.0, 2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0003762245178222656, -0.0003623291850090027, -0.00034843385219573975, -0.0003345385193824768, -0.00032064318656921387, -0.00030674785375595093, -0.000292852520942688, -0.00027895718812942505, -0.0002650618553161621, -0.00025116652250289917, -0.00023727118968963623, -0.0002233758568763733, -0.00020948052406311035, -0.0001955851912498474, -0.00018168985843658447, -0.00016779452562332153, -0.0001538991928100586, -0.00014000385999679565, -0.00012610852718353271, -0.00011221319437026978, -9.831786155700684e-05, -8.44225287437439e-05, -7.052719593048096e-05, -5.663186311721802e-05, -4.273653030395508e-05, -2.884119749069214e-05, -1.49458646774292e-05, -1.0505318641662598e-06, 1.284480094909668e-05, 2.674013376235962e-05, 4.063546657562256e-05, 5.45307993888855e-05, 6.842613220214844e-05, 8.232146501541138e-05, 9.621679782867432e-05, 0.00011011213064193726, 0.0001240074634552002, 0.00013790279626846313, 0.00015179812908172607, 0.00016569346189498901, 0.00017958879470825195, 0.0001934841275215149, 0.00020737946033477783, 0.00022127479314804077, 0.0002351701259613037, 0.00024906545877456665, 0.0002629607915878296, 0.00027685612440109253, 0.00029075145721435547, 0.0003046467900276184, 0.00031854212284088135, 0.0003324374556541443, 0.0003463327884674072, 0.00036022812128067017, 0.0003741234540939331, 0.00038801878690719604, 0.000401914119720459, 0.0004158094525337219, 0.00042970478534698486, 0.0004436001181602478, 0.00045749545097351074, 0.0004713907837867737, 0.0004852861166000366, 0.0004991814494132996, 0.0005130767822265625]}, "gradients/decoder.model.decoder.layers.4.encoder_attn.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 1.0, 3.0, 7.0, 5.0, 16.0, 21.0, 30.0, 38.0, 73.0, 97.0, 104.0, 198.0, 320.0, 1002.0, 6254.0, 101999.0, 911023.0, 23183.0, 2649.0, 709.0, 268.0, 177.0, 120.0, 61.0, 71.0, 42.0, 23.0, 16.0, 18.0, 12.0, 6.0, 6.0, 5.0, 2.0, 3.0, 2.0, 1.0, 2.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.6776065826416016e-05, -3.526173532009125e-05, -3.374740481376648e-05, -3.223307430744171e-05, -3.071874380111694e-05, -2.9204413294792175e-05, -2.7690082788467407e-05, -2.617575228214264e-05, -2.466142177581787e-05, -2.3147091269493103e-05, -2.1632760763168335e-05, -2.0118430256843567e-05, -1.86040997505188e-05, -1.708976924419403e-05, -1.5575438737869263e-05, -1.4061108231544495e-05, -1.2546777725219727e-05, -1.1032447218894958e-05, -9.51811671257019e-06, -8.003786206245422e-06, -6.489455699920654e-06, -4.975125193595886e-06, -3.460794687271118e-06, -1.94646418094635e-06, -4.3213367462158203e-07, 1.082196831703186e-06, 2.596527338027954e-06, 4.110857844352722e-06, 5.62518835067749e-06, 7.139518857002258e-06, 8.653849363327026e-06, 1.0168179869651794e-05, 1.1682510375976562e-05, 1.319684088230133e-05, 1.4711171388626099e-05, 1.6225501894950867e-05, 1.7739832401275635e-05, 1.9254162907600403e-05, 2.076849341392517e-05, 2.228282392024994e-05, 2.3797154426574707e-05, 2.5311484932899475e-05, 2.6825815439224243e-05, 2.834014594554901e-05, 2.985447645187378e-05, 3.136880695819855e-05, 3.2883137464523315e-05, 3.4397467970848083e-05, 3.591179847717285e-05, 3.742612898349762e-05, 3.894045948982239e-05, 4.0454789996147156e-05, 4.1969120502471924e-05, 4.348345100879669e-05, 4.499778151512146e-05, 4.651211202144623e-05, 4.8026442527770996e-05, 4.9540773034095764e-05, 5.105510354042053e-05, 5.25694340467453e-05, 5.408376455307007e-05, 5.5598095059394836e-05, 5.7112425565719604e-05, 5.862675607204437e-05, 6.014108657836914e-05]}, "gradients/decoder.model.decoder.layers.4.encoder_attn.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 3.0, 7.0, 5.0, 13.0, 18.0, 17.0, 20.0, 36.0, 31.0, 31.0, 44.0, 64.0, 72.0, 56.0, 54.0, 75.0, 65.0, 54.0, 75.0, 52.0, 49.0, 36.0, 26.0, 29.0, 27.0, 11.0, 11.0, 12.0, 6.0, 3.0, 5.0, 2.0, 1.0, 2.0, 3.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0009632110595703125, -0.0009235888719558716, -0.0008839666843414307, -0.0008443444967269897, -0.0008047223091125488, -0.0007651001214981079, -0.000725477933883667, -0.0006858557462692261, -0.0006462335586547852, -0.0006066113710403442, -0.0005669891834259033, -0.0005273669958114624, -0.0004877448081970215, -0.00044812262058258057, -0.00040850043296813965, -0.00036887824535369873, -0.0003292560577392578, -0.0002896338701248169, -0.000250011682510376, -0.00021038949489593506, -0.00017076730728149414, -0.00013114511966705322, -9.15229320526123e-05, -5.190074443817139e-05, -1.2278556823730469e-05, 2.734363079071045e-05, 6.696581840515137e-05, 0.00010658800601959229, 0.0001462101936340332, 0.00018583238124847412, 0.00022545456886291504, 0.00026507675647735596, 0.0003046989440917969, 0.0003443211317062378, 0.0003839433193206787, 0.00042356550693511963, 0.00046318769454956055, 0.0005028098821640015, 0.0005424320697784424, 0.0005820542573928833, 0.0006216764450073242, 0.0006612986326217651, 0.0007009208202362061, 0.000740543007850647, 0.0007801651954650879, 0.0008197873830795288, 0.0008594095706939697, 0.0008990317583084106, 0.0009386539459228516, 0.0009782761335372925, 0.0010178983211517334, 0.0010575205087661743, 0.0010971426963806152, 0.0011367648839950562, 0.001176387071609497, 0.001216009259223938, 0.001255631446838379, 0.0012952536344528198, 0.0013348758220672607, 0.0013744980096817017, 0.0014141201972961426, 0.0014537423849105835, 0.0014933645725250244, 0.0015329867601394653, 0.0015726089477539062]}, "gradients/decoder.model.decoder.layers.4.encoder_attn.k_proj.weight": {"_type": "histogram", "values": [10.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1948.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1044605.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2005.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 8.0], "bins": [-1.1920928955078125e-07, -1.1548399925231934e-07, -1.1175870895385742e-07, -1.0803341865539551e-07, -1.043081283569336e-07, -1.0058283805847168e-07, -9.685754776000977e-08, -9.313225746154785e-08, -8.940696716308594e-08, -8.568167686462402e-08, -8.195638656616211e-08, -7.82310962677002e-08, -7.450580596923828e-08, -7.078051567077637e-08, -6.705522537231445e-08, -6.332993507385254e-08, -5.960464477539063e-08, -5.587935447692871e-08, -5.21540641784668e-08, -4.842877388000488e-08, -4.470348358154297e-08, -4.0978193283081055e-08, -3.725290298461914e-08, -3.3527612686157227e-08, -2.9802322387695312e-08, -2.60770320892334e-08, -2.2351741790771484e-08, -1.862645149230957e-08, -1.4901161193847656e-08, -1.1175870895385742e-08, -7.450580596923828e-09, -3.725290298461914e-09, 0.0, 3.725290298461914e-09, 7.450580596923828e-09, 1.1175870895385742e-08, 1.4901161193847656e-08, 1.862645149230957e-08, 2.2351741790771484e-08, 2.60770320892334e-08, 2.9802322387695312e-08, 3.3527612686157227e-08, 3.725290298461914e-08, 4.0978193283081055e-08, 4.470348358154297e-08, 4.842877388000488e-08, 5.21540641784668e-08, 5.587935447692871e-08, 5.960464477539063e-08, 6.332993507385254e-08, 6.705522537231445e-08, 7.078051567077637e-08, 7.450580596923828e-08, 7.82310962677002e-08, 8.195638656616211e-08, 8.568167686462402e-08, 8.940696716308594e-08, 9.313225746154785e-08, 9.685754776000977e-08, 1.0058283805847168e-07, 1.043081283569336e-07, 1.0803341865539551e-07, 1.1175870895385742e-07, 1.1548399925231934e-07, 1.1920928955078125e-07]}, "gradients/decoder.model.decoder.layers.4.encoder_attn.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 4.0, 0.0, 0.0, 17.0, 0.0, 20.0, 0.0, 0.0, 34.0, 0.0, 48.0, 0.0, 0.0, 90.0, 0.0, 111.0, 0.0, 0.0, 128.0, 0.0, 135.0, 0.0, 117.0, 0.0, 0.0, 101.0, 0.0, 67.0, 0.0, 0.0, 71.0, 0.0, 36.0, 0.0, 0.0, 23.0, 0.0, 8.0, 0.0, 0.0, 4.0, 0.0, 5.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0], "bins": [-8.344650268554688e-07, -8.102506399154663e-07, -7.860362529754639e-07, -7.618218660354614e-07, -7.37607479095459e-07, -7.133930921554565e-07, -6.891787052154541e-07, -6.649643182754517e-07, -6.407499313354492e-07, -6.165355443954468e-07, -5.923211574554443e-07, -5.681067705154419e-07, -5.438923835754395e-07, -5.19677996635437e-07, -4.954636096954346e-07, -4.7124922275543213e-07, -4.470348358154297e-07, -4.2282044887542725e-07, -3.986060619354248e-07, -3.7439167499542236e-07, -3.501772880554199e-07, -3.259629011154175e-07, -3.0174851417541504e-07, -2.775341272354126e-07, -2.5331974029541016e-07, -2.2910535335540771e-07, -2.0489096641540527e-07, -1.8067657947540283e-07, -1.564621925354004e-07, -1.3224780559539795e-07, -1.0803341865539551e-07, -8.381903171539307e-08, -5.960464477539063e-08, -3.5390257835388184e-08, -1.1175870895385742e-08, 1.30385160446167e-08, 3.725290298461914e-08, 6.146728992462158e-08, 8.568167686462402e-08, 1.0989606380462646e-07, 1.341104507446289e-07, 1.5832483768463135e-07, 1.825392246246338e-07, 2.0675361156463623e-07, 2.3096799850463867e-07, 2.551823854446411e-07, 2.7939677238464355e-07, 3.03611159324646e-07, 3.2782554626464844e-07, 3.520399332046509e-07, 3.762543201446533e-07, 4.0046870708465576e-07, 4.246830940246582e-07, 4.4889748096466064e-07, 4.731118679046631e-07, 4.973262548446655e-07, 5.21540641784668e-07, 5.457550287246704e-07, 5.699694156646729e-07, 5.941838026046753e-07, 6.183981895446777e-07, 6.426125764846802e-07, 6.668269634246826e-07, 6.910413503646851e-07, 7.152557373046875e-07]}, "gradients/decoder.model.decoder.layers.4.encoder_attn.q_proj.weight": {"_type": "histogram", "values": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1048576.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], "bins": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]}, "gradients/decoder.model.decoder.layers.4.encoder_attn.q_proj.bias": {"_type": "histogram", "values": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1024.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], "bins": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]}, "gradients/decoder.model.decoder.layers.4.self_attn_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 4.0, 2.0, 2.0, 7.0, 7.0, 5.0, 19.0, 15.0, 26.0, 43.0, 59.0, 84.0, 124.0, 159.0, 151.0, 92.0, 81.0, 41.0, 22.0, 20.0, 10.0, 5.0, 7.0, 7.0, 6.0, 2.0, 3.0, 2.0, 2.0, 1.0, 1.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.0008186378399841487, -0.0007920170901343226, -0.0007653962820768356, -0.0007387755322270095, -0.0007121547823771834, -0.0006855339743196964, -0.0006589132244698703, -0.0006322924746200442, -0.0006056717247702181, -0.000579050974920392, -0.000552430166862905, -0.0005258094170130789, -0.0004991886671632528, -0.0004725678882095963, -0.0004459471092559397, -0.0004193263594061136, -0.00039270558045245707, -0.0003660848014988005, -0.0003394640516489744, -0.00031284327269531786, -0.00028622252284549177, -0.0002596017438918352, -0.0002329809794900939, -0.00020636021508835256, -0.00017973945068661124, -0.0001531186862848699, -0.00012649792188312858, -9.987715020542964e-05, -7.325638580368832e-05, -4.663561412598938e-05, -2.001484972424805e-05, 6.605914677493274e-06, 3.32266790792346e-05, 5.9847443480975926e-05, 8.646820788271725e-05, 0.00011308897956041619, 0.0001397097366861999, 0.00016633051563985646, 0.00019295128004159778, 0.0002195720444433391, 0.0002461927942931652, 0.00027281357324682176, 0.00029943432309664786, 0.0003260551020503044, 0.0003526758519001305, 0.00037929663085378706, 0.0004059174098074436, 0.0004325381596572697, 0.00045915893861092627, 0.0004857797175645828, 0.0005124004674144089, 0.000539021217264235, 0.000565642025321722, 0.0005922627751715481, 0.0006188835250213742, 0.0006455043330788612, 0.0006721250829286873, 0.0006987458327785134, 0.0007253666408360004, 0.0007519873906858265, 0.0007786081405356526, 0.0008052289485931396, 0.0008318496984429657, 0.0008584704482927918, 0.0008850911981426179]}, "gradients/decoder.model.decoder.layers.4.self_attn_layer_norm.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 2.0, 3.0, 6.0, 6.0, 13.0, 10.0, 17.0, 20.0, 24.0, 42.0, 41.0, 21.0, 36.0, 59.0, 46.0, 52.0, 57.0, 69.0, 54.0, 62.0, 50.0, 53.0, 44.0, 40.0, 38.0, 26.0, 30.0, 21.0, 13.0, 17.0, 13.0, 13.0, 5.0, 2.0, 5.0, 2.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0003279922530055046, -0.00031572103034704924, -0.00030344980768859386, -0.0002911785850301385, -0.0002789073623716831, -0.00026663613971322775, -0.0002543648879509419, -0.00024209366529248655, -0.00022982244263403118, -0.0002175512199755758, -0.00020527999731712043, -0.00019300876010674983, -0.00018073753744829446, -0.0001684663147898391, -0.0001561950775794685, -0.00014392385492101312, -0.00013165263226255774, -0.00011938140960410237, -0.00010711017966968939, -9.48389497352764e-05, -8.256772707682103e-05, -7.029650441836566e-05, -5.802527448395267e-05, -4.5754044549539685e-05, -3.3482821891084313e-05, -2.1211595594650134e-05, -8.940369298215955e-06, 3.3308569982182235e-06, 1.5602083294652402e-05, 2.7873305953107774e-05, 4.014453588752076e-05, 5.2415765821933746e-05, 6.468695937655866e-05, 7.695818203501403e-05, 8.922941196942702e-05, 0.00010150064190384, 0.00011377186456229538, 0.00012604308722075075, 0.00013831432443112135, 0.00015058554708957672, 0.0001628567697480321, 0.00017512799240648746, 0.00018739921506494284, 0.00019967045227531344, 0.0002119416749337688, 0.00022421289759222418, 0.00023648413480259478, 0.00024875535746105015, 0.0002610265801195055, 0.0002732978027779609, 0.00028556902543641627, 0.00029784024809487164, 0.00031011149985715747, 0.00032238272251561284, 0.0003346539451740682, 0.0003469251678325236, 0.00035919639049097896, 0.00037146761314943433, 0.0003837388358078897, 0.00039601005846634507, 0.00040828128112480044, 0.0004205525037832558, 0.00043282375554554164, 0.000445094978203997, 0.0004573662008624524]}, "gradients/decoder.model.decoder.layers.4.self_attn.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 2.0, 4.0, 0.0, 4.0, 7.0, 7.0, 7.0, 15.0, 21.0, 25.0, 41.0, 67.0, 101.0, 137.0, 229.0, 304.0, 514.0, 841.0, 1260.0, 2211.0, 3789.0, 7109.0, 13653.0, 31421.0, 88447.0, 405079.0, 354373.0, 80651.0, 29308.0, 12964.0, 6740.0, 3668.0, 2081.0, 1265.0, 760.0, 533.0, 315.0, 190.0, 139.0, 91.0, 49.0, 48.0, 28.0, 24.0, 17.0, 10.0, 7.0, 4.0, 2.0, 2.0, 3.0, 2.0, 0.0, 1.0, 2.0], "bins": [-0.00074005126953125, -0.0007188841700553894, -0.0006977170705795288, -0.0006765499711036682, -0.0006553828716278076, -0.000634215772151947, -0.0006130486726760864, -0.0005918815732002258, -0.0005707144737243652, -0.0005495473742485046, -0.000528380274772644, -0.0005072131752967834, -0.00048604607582092285, -0.00046487897634506226, -0.00044371187686920166, -0.00042254477739334106, -0.00040137767791748047, -0.0003802105784416199, -0.0003590434789657593, -0.0003378763794898987, -0.0003167092800140381, -0.0002955421805381775, -0.0002743750810623169, -0.0002532079815864563, -0.0002320408821105957, -0.0002108737826347351, -0.0001897066831588745, -0.00016853958368301392, -0.00014737248420715332, -0.00012620538473129272, -0.00010503828525543213, -8.387118577957153e-05, -6.270408630371094e-05, -4.153698682785034e-05, -2.0369887351989746e-05, 7.972121238708496e-07, 2.1964311599731445e-05, 4.313141107559204e-05, 6.429851055145264e-05, 8.546561002731323e-05, 0.00010663270950317383, 0.00012779980897903442, 0.00014896690845489502, 0.00017013400793075562, 0.0001913011074066162, 0.0002124682068824768, 0.0002336353063583374, 0.000254802405834198, 0.0002759695053100586, 0.0002971366047859192, 0.0003183037042617798, 0.0003394708037376404, 0.000360637903213501, 0.00038180500268936157, 0.00040297210216522217, 0.00042413920164108276, 0.00044530630111694336, 0.00046647340059280396, 0.00048764050006866455, 0.0005088075995445251, 0.0005299746990203857, 0.0005511417984962463, 0.0005723088979721069, 0.0005934759974479675, 0.0006146430969238281]}, "gradients/decoder.model.decoder.layers.4.self_attn.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 4.0, 1.0, 3.0, 4.0, 2.0, 3.0, 7.0, 8.0, 10.0, 12.0, 20.0, 21.0, 35.0, 27.0, 38.0, 34.0, 48.0, 57.0, 58.0, 70.0, 77.0, 63.0, 72.0, 57.0, 56.0, 45.0, 38.0, 25.0, 28.0, 23.0, 22.0, 6.0, 14.0, 4.0, 5.0, 10.0, 1.0, 3.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.0001373291015625, -0.0001328159123659134, -0.00012830272316932678, -0.00012378953397274017, -0.00011927634477615356, -0.00011476315557956696, -0.00011024996638298035, -0.00010573677718639374, -0.00010122358798980713, -9.671039879322052e-05, -9.219720959663391e-05, -8.76840204000473e-05, -8.31708312034607e-05, -7.865764200687408e-05, -7.414445281028748e-05, -6.963126361370087e-05, -6.511807441711426e-05, -6.060488522052765e-05, -5.609169602394104e-05, -5.157850682735443e-05, -4.706531763076782e-05, -4.255212843418121e-05, -3.8038939237594604e-05, -3.3525750041007996e-05, -2.9012560844421387e-05, -2.4499371647834778e-05, -1.998618245124817e-05, -1.547299325466156e-05, -1.0959804058074951e-05, -6.446614861488342e-06, -1.9334256649017334e-06, 2.5797635316848755e-06, 7.092952728271484e-06, 1.1606141924858093e-05, 1.6119331121444702e-05, 2.063252031803131e-05, 2.514570951461792e-05, 2.965889871120453e-05, 3.417208790779114e-05, 3.8685277104377747e-05, 4.3198466300964355e-05, 4.7711655497550964e-05, 5.222484469413757e-05, 5.673803389072418e-05, 6.125122308731079e-05, 6.57644122838974e-05, 7.027760148048401e-05, 7.479079067707062e-05, 7.930397987365723e-05, 8.381716907024384e-05, 8.833035826683044e-05, 9.284354746341705e-05, 9.735673666000366e-05, 0.00010186992585659027, 0.00010638311505317688, 0.00011089630424976349, 0.0001154094934463501, 0.0001199226826429367, 0.00012443587183952332, 0.00012894906103610992, 0.00013346225023269653, 0.00013797543942928314, 0.00014248862862586975, 0.00014700181782245636, 0.00015151500701904297]}, "gradients/decoder.model.decoder.layers.4.self_attn.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 2.0, 8.0, 6.0, 10.0, 14.0, 15.0, 14.0, 24.0, 39.0, 56.0, 77.0, 127.0, 252.0, 519.0, 1182.0, 2942.0, 8024.0, 25004.0, 93618.0, 450946.0, 356662.0, 76362.0, 21275.0, 6819.0, 2515.0, 1043.0, 462.0, 224.0, 115.0, 73.0, 39.0, 35.0, 19.0, 12.0, 6.0, 6.0, 9.0, 6.0, 3.0, 2.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0], "bins": [-0.0006322860717773438, -0.0006156153976917267, -0.0005989447236061096, -0.0005822740495204926, -0.0005656033754348755, -0.0005489327013492584, -0.0005322620272636414, -0.0005155913531780243, -0.0004989206790924072, -0.00048225000500679016, -0.0004655793309211731, -0.00044890865683555603, -0.00043223798274993896, -0.0004155673086643219, -0.00039889663457870483, -0.00038222596049308777, -0.0003655552864074707, -0.00034888461232185364, -0.00033221393823623657, -0.0003155432641506195, -0.00029887259006500244, -0.0002822019159793854, -0.0002655312418937683, -0.00024886056780815125, -0.00023218989372253418, -0.00021551921963691711, -0.00019884854555130005, -0.00018217787146568298, -0.00016550719738006592, -0.00014883652329444885, -0.0001321658492088318, -0.00011549517512321472, -9.882450103759766e-05, -8.215382695198059e-05, -6.548315286636353e-05, -4.881247878074646e-05, -3.2141804695129395e-05, -1.547113060951233e-05, 1.1995434761047363e-06, 1.7870217561721802e-05, 3.454089164733887e-05, 5.121156573295593e-05, 6.7882239818573e-05, 8.455291390419006e-05, 0.00010122358798980713, 0.0001178942620754242, 0.00013456493616104126, 0.00015123561024665833, 0.0001679062843322754, 0.00018457695841789246, 0.00020124763250350952, 0.0002179183065891266, 0.00023458898067474365, 0.0002512596547603607, 0.0002679303288459778, 0.00028460100293159485, 0.0003012716770172119, 0.000317942351102829, 0.00033461302518844604, 0.0003512836992740631, 0.0003679543733596802, 0.00038462504744529724, 0.0004012957215309143, 0.00041796639561653137, 0.00043463706970214844]}, "gradients/decoder.model.decoder.layers.4.self_attn.v_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 3.0, 0.0, 3.0, 1.0, 3.0, 1.0, 3.0, 4.0, 12.0, 3.0, 11.0, 14.0, 23.0, 34.0, 32.0, 37.0, 45.0, 36.0, 38.0, 44.0, 55.0, 66.0, 57.0, 70.0, 61.0, 49.0, 46.0, 40.0, 43.0, 41.0, 15.0, 27.0, 26.0, 13.0, 13.0, 15.0, 6.0, 5.0, 8.0, 4.0, 1.0, 2.0, 7.0, 3.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00017499923706054688, -0.00016838312149047852, -0.00016176700592041016, -0.0001551508903503418, -0.00014853477478027344, -0.00014191865921020508, -0.00013530254364013672, -0.00012868642807006836, -0.0001220703125, -0.00011545419692993164, -0.00010883808135986328, -0.00010222196578979492, -9.560585021972656e-05, -8.89897346496582e-05, -8.237361907958984e-05, -7.575750350952148e-05, -6.914138793945312e-05, -6.252527236938477e-05, -5.5909156799316406e-05, -4.929304122924805e-05, -4.267692565917969e-05, -3.606081008911133e-05, -2.944469451904297e-05, -2.282857894897461e-05, -1.621246337890625e-05, -9.59634780883789e-06, -2.9802322387695312e-06, 3.635883331298828e-06, 1.0251998901367188e-05, 1.6868114471435547e-05, 2.3484230041503906e-05, 3.0100345611572266e-05, 3.6716461181640625e-05, 4.3332576751708984e-05, 4.9948692321777344e-05, 5.65648078918457e-05, 6.318092346191406e-05, 6.979703903198242e-05, 7.641315460205078e-05, 8.302927017211914e-05, 8.96453857421875e-05, 9.626150131225586e-05, 0.00010287761688232422, 0.00010949373245239258, 0.00011610984802246094, 0.0001227259635925293, 0.00012934207916259766, 0.00013595819473266602, 0.00014257431030273438, 0.00014919042587280273, 0.0001558065414428711, 0.00016242265701293945, 0.0001690387725830078, 0.00017565488815307617, 0.00018227100372314453, 0.0001888871192932129, 0.00019550323486328125, 0.0002021193504333496, 0.00020873546600341797, 0.00021535158157348633, 0.0002219676971435547, 0.00022858381271362305, 0.0002351999282836914, 0.00024181604385375977, 0.0002484321594238281]}, "gradients/decoder.model.decoder.layers.4.self_attn.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 2.0, 2.0, 6.0, 4.0, 11.0, 7.0, 12.0, 19.0, 24.0, 48.0, 52.0, 71.0, 124.0, 220.0, 442.0, 987.0, 2689.0, 15194.0, 1003931.0, 19468.0, 3052.0, 1088.0, 472.0, 227.0, 150.0, 68.0, 64.0, 38.0, 25.0, 19.0, 13.0, 11.0, 7.0, 6.0, 7.0, 0.0, 3.0, 5.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.000244140625, -0.00023587793111801147, -0.00022761523723602295, -0.00021935254335403442, -0.0002110898494720459, -0.00020282715559005737, -0.00019456446170806885, -0.00018630176782608032, -0.0001780390739440918, -0.00016977638006210327, -0.00016151368618011475, -0.00015325099229812622, -0.0001449882984161377, -0.00013672560453414917, -0.00012846291065216064, -0.00012020021677017212, -0.0001119375228881836, -0.00010367482900619507, -9.541213512420654e-05, -8.714944124221802e-05, -7.888674736022949e-05, -7.062405347824097e-05, -6.236135959625244e-05, -5.4098665714263916e-05, -4.583597183227539e-05, -3.7573277950286865e-05, -2.931058406829834e-05, -2.1047890186309814e-05, -1.2785196304321289e-05, -4.522502422332764e-06, 3.7401914596557617e-06, 1.2002885341644287e-05, 2.0265579223632812e-05, 2.8528273105621338e-05, 3.679096698760986e-05, 4.505366086959839e-05, 5.3316354751586914e-05, 6.157904863357544e-05, 6.984174251556396e-05, 7.810443639755249e-05, 8.636713027954102e-05, 9.462982416152954e-05, 0.00010289251804351807, 0.00011115521192550659, 0.00011941790580749512, 0.00012768059968948364, 0.00013594329357147217, 0.0001442059874534607, 0.00015246868133544922, 0.00016073137521743774, 0.00016899406909942627, 0.0001772567629814148, 0.00018551945686340332, 0.00019378215074539185, 0.00020204484462738037, 0.0002103075385093689, 0.00021857023239135742, 0.00022683292627334595, 0.00023509562015533447, 0.000243358314037323, 0.0002516210079193115, 0.00025988370180130005, 0.0002681463956832886, 0.0002764090895652771, 0.0002846717834472656]}, "gradients/decoder.model.decoder.layers.4.self_attn.k_proj.bias": {"_type": "histogram", "values": [4.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 27.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 207.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 539.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 206.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 38.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.7881393432617188e-07, -1.7229467630386353e-07, -1.6577541828155518e-07, -1.5925616025924683e-07, -1.5273690223693848e-07, -1.4621764421463013e-07, -1.3969838619232178e-07, -1.3317912817001343e-07, -1.2665987014770508e-07, -1.2014061212539673e-07, -1.1362135410308838e-07, -1.0710209608078003e-07, -1.0058283805847168e-07, -9.406358003616333e-08, -8.754432201385498e-08, -8.102506399154663e-08, -7.450580596923828e-08, -6.798654794692993e-08, -6.146728992462158e-08, -5.494803190231323e-08, -4.842877388000488e-08, -4.190951585769653e-08, -3.5390257835388184e-08, -2.8870999813079834e-08, -2.2351741790771484e-08, -1.5832483768463135e-08, -9.313225746154785e-09, -2.7939677238464355e-09, 3.725290298461914e-09, 1.0244548320770264e-08, 1.6763806343078613e-08, 2.3283064365386963e-08, 2.9802322387695312e-08, 3.632158041000366e-08, 4.284083843231201e-08, 4.936009645462036e-08, 5.587935447692871e-08, 6.239861249923706e-08, 6.891787052154541e-08, 7.543712854385376e-08, 8.195638656616211e-08, 8.847564458847046e-08, 9.499490261077881e-08, 1.0151416063308716e-07, 1.0803341865539551e-07, 1.1455267667770386e-07, 1.210719347000122e-07, 1.2759119272232056e-07, 1.341104507446289e-07, 1.4062970876693726e-07, 1.471489667892456e-07, 1.5366822481155396e-07, 1.601874828338623e-07, 1.6670674085617065e-07, 1.73225998878479e-07, 1.7974525690078735e-07, 1.862645149230957e-07, 1.9278377294540405e-07, 1.993030309677124e-07, 2.0582228899002075e-07, 2.123415470123291e-07, 2.1886080503463745e-07, 2.253800630569458e-07, 2.3189932107925415e-07, 2.384185791015625e-07]}, "gradients/decoder.model.decoder.layers.4.self_attn.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 3.0, 3.0, 1.0, 1.0, 2.0, 7.0, 6.0, 11.0, 13.0, 13.0, 34.0, 47.0, 83.0, 143.0, 282.0, 519.0, 1072.0, 2353.0, 5692.0, 19962.0, 961802.0, 42699.0, 7883.0, 3142.0, 1420.0, 626.0, 330.0, 169.0, 89.0, 57.0, 45.0, 20.0, 5.0, 7.0, 6.0, 8.0, 4.0, 3.0, 1.0, 1.0, 2.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-9.739398956298828e-05, -9.441189467906952e-05, -9.142979979515076e-05, -8.8447704911232e-05, -8.546561002731323e-05, -8.248351514339447e-05, -7.950142025947571e-05, -7.651932537555695e-05, -7.353723049163818e-05, -7.055513560771942e-05, -6.757304072380066e-05, -6.45909458398819e-05, -6.160885095596313e-05, -5.862675607204437e-05, -5.564466118812561e-05, -5.266256630420685e-05, -4.9680471420288086e-05, -4.6698376536369324e-05, -4.371628165245056e-05, -4.07341867685318e-05, -3.775209188461304e-05, -3.4769997000694275e-05, -3.178790211677551e-05, -2.880580723285675e-05, -2.5823712348937988e-05, -2.2841617465019226e-05, -1.9859522581100464e-05, -1.68774276971817e-05, -1.389533281326294e-05, -1.0913237929344177e-05, -7.931143045425415e-06, -4.949048161506653e-06, -1.9669532775878906e-06, 1.0151416063308716e-06, 3.997236490249634e-06, 6.979331374168396e-06, 9.961426258087158e-06, 1.294352114200592e-05, 1.5925616025924683e-05, 1.8907710909843445e-05, 2.1889805793762207e-05, 2.487190067768097e-05, 2.785399556159973e-05, 3.0836090445518494e-05, 3.3818185329437256e-05, 3.680028021335602e-05, 3.978237509727478e-05, 4.276446998119354e-05, 4.5746564865112305e-05, 4.872865974903107e-05, 5.171075463294983e-05, 5.469284951686859e-05, 5.7674944400787354e-05, 6.0657039284706116e-05, 6.363913416862488e-05, 6.662122905254364e-05, 6.96033239364624e-05, 7.258541882038116e-05, 7.556751370429993e-05, 7.854960858821869e-05, 8.153170347213745e-05, 8.451379835605621e-05, 8.749589323997498e-05, 9.047798812389374e-05, 9.34600830078125e-05]}, "gradients/decoder.model.decoder.layers.4.self_attn.q_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 2.0, 0.0, 1.0, 2.0, 1.0, 0.0, 0.0, 4.0, 6.0, 2.0, 3.0, 1.0, 3.0, 10.0, 11.0, 22.0, 28.0, 144.0, 368.0, 260.0, 71.0, 23.0, 8.0, 5.0, 7.0, 3.0, 2.0, 2.0, 2.0, 4.0, 4.0, 3.0, 2.0, 3.0, 1.0, 1.0, 3.0, 0.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.4749507904052734e-05, -3.365706652402878e-05, -3.256462514400482e-05, -3.1472183763980865e-05, -3.037974238395691e-05, -2.9287301003932953e-05, -2.8194859623908997e-05, -2.710241824388504e-05, -2.6009976863861084e-05, -2.4917535483837128e-05, -2.382509410381317e-05, -2.2732652723789215e-05, -2.164021134376526e-05, -2.0547769963741302e-05, -1.9455328583717346e-05, -1.836288720369339e-05, -1.7270445823669434e-05, -1.6178004443645477e-05, -1.5085563063621521e-05, -1.3993121683597565e-05, -1.2900680303573608e-05, -1.1808238923549652e-05, -1.0715797543525696e-05, -9.62335616350174e-06, -8.530914783477783e-06, -7.438473403453827e-06, -6.346032023429871e-06, -5.253590643405914e-06, -4.161149263381958e-06, -3.0687078833580017e-06, -1.9762665033340454e-06, -8.838251233100891e-07, 2.086162567138672e-07, 1.3010576367378235e-06, 2.3934990167617798e-06, 3.485940396785736e-06, 4.578381776809692e-06, 5.670823156833649e-06, 6.763264536857605e-06, 7.855705916881561e-06, 8.948147296905518e-06, 1.0040588676929474e-05, 1.113303005695343e-05, 1.2225471436977386e-05, 1.3317912817001343e-05, 1.4410354197025299e-05, 1.5502795577049255e-05, 1.659523695707321e-05, 1.7687678337097168e-05, 1.8780119717121124e-05, 1.987256109714508e-05, 2.0965002477169037e-05, 2.2057443857192993e-05, 2.314988523721695e-05, 2.4242326617240906e-05, 2.5334767997264862e-05, 2.642720937728882e-05, 2.7519650757312775e-05, 2.861209213733673e-05, 2.9704533517360687e-05, 3.0796974897384644e-05, 3.18894162774086e-05, 3.2981857657432556e-05, 3.407429903745651e-05, 3.516674041748047e-05]}, "gradients/decoder.model.decoder.layers.3.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 2.0, 3.0, 2.0, 3.0, 4.0, 3.0, 3.0, 5.0, 8.0, 16.0, 21.0, 30.0, 34.0, 54.0, 77.0, 108.0, 127.0, 146.0, 110.0, 81.0, 51.0, 33.0, 25.0, 22.0, 13.0, 16.0, 6.0, 2.0, 4.0, 2.0, 2.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.000589753151871264, -0.000569464173167944, -0.0005491751944646239, -0.0005288862157613039, -0.0005085972370579839, -0.0004883082583546638, -0.0004680193087551743, -0.00044773033005185425, -0.0004274413513485342, -0.0004071523726452142, -0.0003868633939418942, -0.00036657441523857415, -0.0003462854656390846, -0.00032599648693576455, -0.0003057075082324445, -0.0002854185295291245, -0.00026512955082580447, -0.00024484057212248445, -0.00022455159341916442, -0.00020426262926775962, -0.0001839736505644396, -0.00016368467186111957, -0.00014339570770971477, -0.00012310672900639474, -0.00010281775030307472, -8.252877159975469e-05, -6.223980017239228e-05, -4.195082510705106e-05, -2.166185004170984e-05, -1.372871338389814e-06, 1.89161000889726e-05, 3.920507151633501e-05, 5.949410842731595e-05, 7.978308713063598e-05, 0.00010007205855799839, 0.0001203610299853608, 0.00014065000868868083, 0.00016093898739200085, 0.00018122795154340565, 0.00020151693024672568, 0.0002218059089500457, 0.00024209488765336573, 0.00026238386635668576, 0.0002826728450600058, 0.00030296179465949535, 0.0003232507733628154, 0.0003435397520661354, 0.00036382873076945543, 0.00038411770947277546, 0.0004044066881760955, 0.0004246956668794155, 0.00044498464558273554, 0.00046527362428605556, 0.0004855626029893756, 0.0005058515816926956, 0.0005261405603960156, 0.0005464295390993357, 0.0005667185178026557, 0.0005870074965059757, 0.0006072964752092957, 0.0006275854539126158, 0.0006478744326159358, 0.0006681634113192558, 0.0006884523900225759, 0.000708741310518235]}, "gradients/decoder.model.decoder.layers.3.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 1.0, 5.0, 7.0, 7.0, 16.0, 11.0, 5.0, 18.0, 17.0, 16.0, 18.0, 19.0, 35.0, 35.0, 37.0, 38.0, 47.0, 59.0, 43.0, 46.0, 54.0, 58.0, 53.0, 49.0, 30.0, 47.0, 47.0, 38.0, 25.0, 18.0, 19.0, 14.0, 19.0, 11.0, 18.0, 9.0, 11.0, 7.0, 4.0, 2.0, 0.0, 1.0, 2.0, 2.0, 0.0, 0.0, 1.0], "bins": [-0.0004166070430073887, -0.0004056436591781676, -0.000394680246245116, -0.00038371686241589487, -0.00037275347858667374, -0.00036179006565362215, -0.000350826681824401, -0.0003398632979951799, -0.0003288998850621283, -0.0003179365012329072, -0.0003069730882998556, -0.00029600970447063446, -0.00028504632064141333, -0.0002740829368121922, -0.0002631195238791406, -0.0002521561400499195, -0.00024119275622069836, -0.000230229357839562, -0.00021926597401034087, -0.0002083025756292045, -0.00019733919179998338, -0.00018637579341884702, -0.00017541239503771067, -0.00016444901120848954, -0.00015348561282735318, -0.00014252221444621682, -0.0001315588306169957, -0.00012059543223585933, -0.00010963204113068059, -9.866865002550185e-05, -8.770525164436549e-05, -7.674186053918675e-05, -6.577849853783846e-05, -5.4815107432659715e-05, -4.3851712689502165e-05, -3.2888317946344614e-05, -2.192492684116587e-05, -1.0961535735987127e-05, 1.862645149230957e-09, 1.0965253750327975e-05, 2.1928644855506718e-05, 3.289203596068546e-05, 4.385543070384301e-05, 5.481882544700056e-05, 6.57822165521793e-05, 7.674560765735805e-05, 8.770900603849441e-05, 9.867239714367315e-05, 0.0001096357882488519, 0.00012059917935403064, 0.00013156257045920938, 0.00014252596884034574, 0.00015348935266956687, 0.00016445275105070323, 0.00017541614943183959, 0.00018637953326106071, 0.00019734293164219707, 0.00020830633002333343, 0.00021926971385255456, 0.00023023311223369092, 0.00024119651061482728, 0.0002521598944440484, 0.0002631233073771, 0.0002740866912063211, 0.00028505007503554225]}, "gradients/decoder.model.decoder.layers.3.fc2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 5.0, 3.0, 1.0, 5.0, 6.0, 10.0, 9.0, 17.0, 12.0, 37.0, 48.0, 57.0, 81.0, 187.0, 319.0, 520.0, 858.0, 1654.0, 3421.0, 17996.0, 4112754.0, 47884.0, 4047.0, 1896.0, 1017.0, 666.0, 326.0, 185.0, 98.0, 53.0, 42.0, 26.0, 19.0, 9.0, 8.0, 6.0, 6.0, 1.0, 4.0, 3.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00093841552734375, -0.0009066909551620483, -0.0008749663829803467, -0.000843241810798645, -0.0008115172386169434, -0.0007797926664352417, -0.00074806809425354, -0.0007163435220718384, -0.0006846189498901367, -0.0006528943777084351, -0.0006211698055267334, -0.0005894452333450317, -0.0005577206611633301, -0.0005259960889816284, -0.0004942715167999268, -0.0004625469446182251, -0.00043082237243652344, -0.0003990978002548218, -0.0003673732280731201, -0.00033564865589141846, -0.0003039240837097168, -0.00027219951152801514, -0.00024047493934631348, -0.00020875036716461182, -0.00017702579498291016, -0.0001453012228012085, -0.00011357665061950684, -8.185207843780518e-05, -5.0127506256103516e-05, -1.8402934074401855e-05, 1.3321638107299805e-05, 4.5046210289001465e-05, 7.677078247070312e-05, 0.00010849535465240479, 0.00014021992683410645, 0.0001719444990158081, 0.00020366907119750977, 0.00023539364337921143, 0.0002671182155609131, 0.00029884278774261475, 0.0003305673599243164, 0.00036229193210601807, 0.0003940165042877197, 0.0004257410764694214, 0.00045746564865112305, 0.0004891902208328247, 0.0005209147930145264, 0.000552639365196228, 0.0005843639373779297, 0.0006160885095596313, 0.000647813081741333, 0.0006795376539230347, 0.0007112622261047363, 0.000742986798286438, 0.0007747113704681396, 0.0008064359426498413, 0.000838160514831543, 0.0008698850870132446, 0.0009016096591949463, 0.000933334231376648, 0.0009650588035583496, 0.0009967833757400513, 0.001028507947921753, 0.0010602325201034546, 0.0010919570922851562]}, "gradients/decoder.model.decoder.layers.3.fc2.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 3.0, 2.0, 0.0, 4.0, 4.0, 6.0, 6.0, 10.0, 8.0, 7.0, 15.0, 12.0, 9.0, 21.0, 25.0, 27.0, 34.0, 34.0, 31.0, 55.0, 45.0, 43.0, 46.0, 65.0, 51.0, 44.0, 42.0, 41.0, 39.0, 46.0, 32.0, 40.0, 24.0, 22.0, 21.0, 14.0, 17.0, 20.0, 11.0, 11.0, 4.0, 10.0, 5.0, 3.0, 3.0, 4.0, 1.0, 1.0, 2.0, 0.0, 0.0, 1.0], "bins": [-0.00021791458129882812, -0.00021186843514442444, -0.00020582228899002075, -0.00019977614283561707, -0.00019372999668121338, -0.0001876838505268097, -0.000181637704372406, -0.00017559155821800232, -0.00016954541206359863, -0.00016349926590919495, -0.00015745311975479126, -0.00015140697360038757, -0.0001453608274459839, -0.0001393146812915802, -0.00013326853513717651, -0.00012722238898277283, -0.00012117624282836914, -0.00011513009667396545, -0.00010908395051956177, -0.00010303780436515808, -9.69916582107544e-05, -9.094551205635071e-05, -8.489936590194702e-05, -7.885321974754333e-05, -7.280707359313965e-05, -6.676092743873596e-05, -6.0714781284332275e-05, -5.466863512992859e-05, -4.86224889755249e-05, -4.2576342821121216e-05, -3.653019666671753e-05, -3.0484050512313843e-05, -2.4437904357910156e-05, -1.839175820350647e-05, -1.2345612049102783e-05, -6.299465894699097e-06, -2.5331974029541016e-07, 5.792826414108276e-06, 1.1838972568511963e-05, 1.788511872291565e-05, 2.3931264877319336e-05, 2.9977411031723022e-05, 3.602355718612671e-05, 4.2069703340530396e-05, 4.811584949493408e-05, 5.416199564933777e-05, 6.0208141803741455e-05, 6.625428795814514e-05, 7.230043411254883e-05, 7.834658026695251e-05, 8.43927264213562e-05, 9.043887257575989e-05, 9.648501873016357e-05, 0.00010253116488456726, 0.00010857731103897095, 0.00011462345719337463, 0.00012066960334777832, 0.000126715749502182, 0.0001327618956565857, 0.00013880804181098938, 0.00014485418796539307, 0.00015090033411979675, 0.00015694648027420044, 0.00016299262642860413, 0.0001690387725830078]}, "gradients/decoder.model.decoder.layers.3.fc1.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 3.0, 0.0, 2.0, 1.0, 1.0, 7.0, 8.0, 12.0, 19.0, 28.0, 51.0, 79.0, 159.0, 467.0, 1555.0, 5845.0, 87038.0, 4080443.0, 14304.0, 2711.0, 907.0, 327.0, 135.0, 68.0, 50.0, 23.0, 16.0, 11.0, 4.0, 3.0, 3.0, 5.0, 2.0, 1.0, 1.0, 2.0, 2.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0010080337524414062, -0.0009785965085029602, -0.0009491592645645142, -0.0009197220206260681, -0.0008902847766876221, -0.000860847532749176, -0.00083141028881073, -0.0008019730448722839, -0.0007725358009338379, -0.0007430985569953918, -0.0007136613130569458, -0.0006842240691184998, -0.0006547868251800537, -0.0006253495812416077, -0.0005959123373031616, -0.0005664750933647156, -0.0005370378494262695, -0.0005076006054878235, -0.00047816336154937744, -0.0004487261176109314, -0.00041928887367248535, -0.0003898516297340393, -0.00036041438579559326, -0.0003309771418571472, -0.00030153989791870117, -0.0002721026539802551, -0.00024266541004180908, -0.00021322816610336304, -0.000183790922164917, -0.00015435367822647095, -0.0001249164342880249, -9.547919034957886e-05, -6.604194641113281e-05, -3.660470247268677e-05, -7.167458534240723e-06, 2.2269785404205322e-05, 5.170702934265137e-05, 8.114427328109741e-05, 0.00011058151721954346, 0.0001400187611579895, 0.00016945600509643555, 0.0001988932490348816, 0.00022833049297332764, 0.0002577677369117737, 0.0002872049808502197, 0.00031664222478866577, 0.0003460794687271118, 0.00037551671266555786, 0.0004049539566040039, 0.00043439120054244995, 0.000463828444480896, 0.000493265688419342, 0.0005227029323577881, 0.0005521401762962341, 0.0005815774202346802, 0.0006110146641731262, 0.0006404519081115723, 0.0006698891520500183, 0.0006993263959884644, 0.0007287636399269104, 0.0007582008838653564, 0.0007876381278038025, 0.0008170753717422485, 0.0008465126156806946, 0.0008759498596191406]}, "gradients/decoder.model.decoder.layers.3.fc1.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 3.0, 0.0, 0.0, 0.0, 3.0, 7.0, 4.0, 7.0, 10.0, 9.0, 22.0, 30.0, 118.0, 451.0, 2244.0, 795.0, 219.0, 61.0, 39.0, 21.0, 8.0, 6.0, 8.0, 3.0, 1.0, 3.0, 0.0, 2.0, 3.0, 2.0, 1.0, 2.0, 2.0, 1.0, 0.0, 1.0, 1.0], "bins": [-0.0001957416534423828, -0.0001910543069243431, -0.0001863669604063034, -0.0001816796138882637, -0.000176992267370224, -0.0001723049208521843, -0.0001676175743341446, -0.0001629302278161049, -0.00015824288129806519, -0.00015355553478002548, -0.00014886818826198578, -0.00014418084174394608, -0.00013949349522590637, -0.00013480614870786667, -0.00013011880218982697, -0.00012543145567178726, -0.00012074410915374756, -0.00011605676263570786, -0.00011136941611766815, -0.00010668206959962845, -0.00010199472308158875, -9.730737656354904e-05, -9.262003004550934e-05, -8.793268352746964e-05, -8.324533700942993e-05, -7.855799049139023e-05, -7.387064397335052e-05, -6.918329745531082e-05, -6.449595093727112e-05, -5.9808604419231415e-05, -5.512125790119171e-05, -5.043391138315201e-05, -4.5746564865112305e-05, -4.10592183470726e-05, -3.63718718290329e-05, -3.1684525310993195e-05, -2.699717879295349e-05, -2.2309832274913788e-05, -1.7622485756874084e-05, -1.2935139238834381e-05, -8.247792720794678e-06, -3.5604462027549744e-06, 1.126900315284729e-06, 5.814246833324432e-06, 1.0501593351364136e-05, 1.5188939869403839e-05, 1.9876286387443542e-05, 2.4563632905483246e-05, 2.925097942352295e-05, 3.393832594156265e-05, 3.8625672459602356e-05, 4.331301897764206e-05, 4.800036549568176e-05, 5.2687712013721466e-05, 5.737505853176117e-05, 6.206240504980087e-05, 6.674975156784058e-05, 7.143709808588028e-05, 7.612444460391998e-05, 8.081179112195969e-05, 8.549913763999939e-05, 9.018648415803909e-05, 9.48738306760788e-05, 9.95611771941185e-05, 0.0001042485237121582]}, "gradients/decoder.model.decoder.layers.3.encoder_attn_layer_norm.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 3.0, 0.0, 4.0, 2.0, 11.0, 3.0, 15.0, 17.0, 21.0, 25.0, 45.0, 78.0, 96.0, 144.0, 153.0, 127.0, 76.0, 54.0, 39.0, 32.0, 7.0, 19.0, 9.0, 13.0, 9.0, 3.0, 3.0, 3.0, 1.0, 0.0, 1.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0003225227410439402, -0.0003102122573181987, -0.0002979017735924572, -0.0002855913189705461, -0.0002732808352448046, -0.0002609703515190631, -0.0002486598677933216, -0.00023634939861949533, -0.00022403892944566905, -0.00021172844571992755, -0.00019941797654610127, -0.00018710749282035977, -0.0001747970236465335, -0.00016248653992079198, -0.00015017605619505048, -0.0001378655870212242, -0.0001255551032954827, -0.0001132446268456988, -0.00010093415039591491, -8.86236666701734e-05, -7.631319749634713e-05, -6.400271377060562e-05, -5.169223732082173e-05, -3.938176087103784e-05, -2.707128442125395e-05, -1.4760807061975356e-05, -2.450329702696763e-06, 9.860148566076532e-06, 2.2170625015860423e-05, 3.448110510362312e-05, 4.6791581553407013e-05, 5.9102058003190905e-05, 7.14125344529748e-05, 8.372301090275869e-05, 9.603348735254258e-05, 0.00010834397107828408, 0.00012065444025211036, 0.00013296492397785187, 0.00014527540770359337, 0.00015758587687741965, 0.00016989634605124593, 0.00018220682977698743, 0.0001945172989508137, 0.00020682778267655522, 0.0002191382518503815, 0.000231448735576123, 0.0002437592193018645, 0.000256069703027606, 0.00026838015764951706, 0.00028069064137525856, 0.00029300112510100007, 0.0003053115797229111, 0.0003176220634486526, 0.00032993254717439413, 0.00034224303090013564, 0.00035455351462587714, 0.00036686399835161865, 0.00037917448207736015, 0.00039148496580310166, 0.0004037954204250127, 0.0004161059041507542, 0.0004284163878764957, 0.0004407268716022372, 0.0004530373262241483, 0.0004653478099498898]}, "gradients/decoder.model.decoder.layers.3.encoder_attn_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 3.0, 2.0, 3.0, 2.0, 4.0, 9.0, 11.0, 18.0, 11.0, 11.0, 22.0, 24.0, 33.0, 28.0, 27.0, 43.0, 45.0, 50.0, 53.0, 44.0, 41.0, 46.0, 49.0, 48.0, 49.0, 39.0, 56.0, 35.0, 35.0, 42.0, 17.0, 21.0, 13.0, 11.0, 13.0, 13.0, 6.0, 12.0, 5.0, 9.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.00022118148626759648, -0.00021492889209184796, -0.00020867629791609943, -0.0002024237037403509, -0.00019617110956460238, -0.00018991851538885385, -0.00018366592121310532, -0.0001774133270373568, -0.00017116073286160827, -0.00016490813868585974, -0.0001586555445101112, -0.00015240295033436269, -0.00014615035615861416, -0.00013989776198286563, -0.0001336451678071171, -0.00012739257363136858, -0.00012113998673157766, -0.00011488739255582914, -0.00010863479838008061, -0.00010238220420433208, -9.612961002858356e-05, -8.987702312879264e-05, -8.362442895304412e-05, -7.737183477729559e-05, -7.111924060154706e-05, -6.486664642579854e-05, -5.861405225005001e-05, -5.236145807430148e-05, -4.6108863898552954e-05, -3.9856273360783234e-05, -3.360367918503471e-05, -2.735108500928618e-05, -2.1098487195558846e-05, -1.4845893019810319e-05, -8.593299753556494e-06, -2.3407064873026684e-06, 3.911887688445859e-06, 1.0164480045204982e-05, 1.641707422095351e-05, 2.2669668396702036e-05, 2.8922262572450563e-05, 3.517485674819909e-05, 4.142745092394762e-05, 4.768004146171734e-05, 5.3932635637465864e-05, 6.018522981321439e-05, 6.643782398896292e-05, 7.269041816471145e-05, 7.894301234045997e-05, 8.51956065162085e-05, 9.144820069195703e-05, 9.770079486770555e-05, 0.00010395338904345408, 0.000110205975943245, 0.00011645857011899352, 0.00012271116429474205, 0.00012896375847049057, 0.0001352163526462391, 0.00014146894682198763, 0.00014772154099773616, 0.00015397413517348468, 0.0001602267293492332, 0.00016647932352498174, 0.00017273191770073026, 0.0001789845118764788]}, "gradients/decoder.model.decoder.layers.3.encoder_attn.out_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 0.0, 4.0, 4.0, 9.0, 10.0, 12.0, 13.0, 11.0, 35.0, 57.0, 84.0, 107.0, 165.0, 300.0, 474.0, 920.0, 1768.0, 3546.0, 9036.0, 28090.0, 308158.0, 636326.0, 40474.0, 10737.0, 3941.0, 1898.0, 945.0, 594.0, 282.0, 184.0, 136.0, 69.0, 59.0, 34.0, 18.0, 21.0, 12.0, 8.0, 10.0, 6.0, 3.0, 1.0, 2.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0], "bins": [-3.445148468017578e-05, -3.334321081638336e-05, -3.223493695259094e-05, -3.112666308879852e-05, -3.0018389225006104e-05, -2.8910115361213684e-05, -2.7801841497421265e-05, -2.6693567633628845e-05, -2.5585293769836426e-05, -2.4477019906044006e-05, -2.3368746042251587e-05, -2.2260472178459167e-05, -2.1152198314666748e-05, -2.004392445087433e-05, -1.893565058708191e-05, -1.782737672328949e-05, -1.671910285949707e-05, -1.561082899570465e-05, -1.4502555131912231e-05, -1.3394281268119812e-05, -1.2286007404327393e-05, -1.1177733540534973e-05, -1.0069459676742554e-05, -8.961185812950134e-06, -7.852911949157715e-06, -6.744638085365295e-06, -5.636364221572876e-06, -4.5280903577804565e-06, -3.419816493988037e-06, -2.3115426301956177e-06, -1.2032687664031982e-06, -9.499490261077881e-08, 1.0132789611816406e-06, 2.12155282497406e-06, 3.2298266887664795e-06, 4.338100552558899e-06, 5.446374416351318e-06, 6.554648280143738e-06, 7.662922143936157e-06, 8.771196007728577e-06, 9.879469871520996e-06, 1.0987743735313416e-05, 1.2096017599105835e-05, 1.3204291462898254e-05, 1.4312565326690674e-05, 1.5420839190483093e-05, 1.6529113054275513e-05, 1.7637386918067932e-05, 1.874566078186035e-05, 1.985393464565277e-05, 2.096220850944519e-05, 2.207048237323761e-05, 2.317875623703003e-05, 2.428703010082245e-05, 2.5395303964614868e-05, 2.6503577828407288e-05, 2.7611851692199707e-05, 2.8720125555992126e-05, 2.9828399419784546e-05, 3.0936673283576965e-05, 3.2044947147369385e-05, 3.3153221011161804e-05, 3.4261494874954224e-05, 3.536976873874664e-05, 3.647804260253906e-05]}, "gradients/decoder.model.decoder.layers.3.encoder_attn.out_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 2.0, 3.0, 3.0, 1.0, 1.0, 3.0, 4.0, 5.0, 7.0, 6.0, 18.0, 12.0, 24.0, 17.0, 36.0, 29.0, 46.0, 41.0, 59.0, 65.0, 58.0, 54.0, 53.0, 55.0, 47.0, 53.0, 52.0, 47.0, 41.0, 34.0, 26.0, 20.0, 17.0, 16.0, 8.0, 17.0, 5.0, 8.0, 5.0, 2.0, 5.0, 6.0, 3.0, 0.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.0001697540283203125, -0.00016495771706104279, -0.00016016140580177307, -0.00015536509454250336, -0.00015056878328323364, -0.00014577247202396393, -0.00014097616076469421, -0.0001361798495054245, -0.00013138353824615479, -0.00012658722698688507, -0.00012179091572761536, -0.00011699460446834564, -0.00011219829320907593, -0.00010740198194980621, -0.0001026056706905365, -9.780935943126678e-05, -9.301304817199707e-05, -8.821673691272736e-05, -8.342042565345764e-05, -7.862411439418793e-05, -7.382780313491821e-05, -6.90314918756485e-05, -6.423518061637878e-05, -5.943886935710907e-05, -5.4642558097839355e-05, -4.984624683856964e-05, -4.504993557929993e-05, -4.025362432003021e-05, -3.54573130607605e-05, -3.0661001801490784e-05, -2.586469054222107e-05, -2.1068379282951355e-05, -1.627206802368164e-05, -1.1475756764411926e-05, -6.679445505142212e-06, -1.8831342458724976e-06, 2.913177013397217e-06, 7.709488272666931e-06, 1.2505799531936646e-05, 1.730211079120636e-05, 2.2098422050476074e-05, 2.689473330974579e-05, 3.16910445690155e-05, 3.648735582828522e-05, 4.128366708755493e-05, 4.6079978346824646e-05, 5.087628960609436e-05, 5.5672600865364075e-05, 6.046891212463379e-05, 6.52652233839035e-05, 7.006153464317322e-05, 7.485784590244293e-05, 7.965415716171265e-05, 8.445046842098236e-05, 8.924677968025208e-05, 9.404309093952179e-05, 9.88394021987915e-05, 0.00010363571345806122, 0.00010843202471733093, 0.00011322833597660065, 0.00011802464723587036, 0.00012282095849514008, 0.0001276172697544098, 0.0001324135810136795, 0.00013720989227294922]}, "gradients/decoder.model.decoder.layers.3.encoder_attn.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 2.0, 0.0, 3.0, 3.0, 5.0, 4.0, 6.0, 9.0, 10.0, 18.0, 24.0, 26.0, 26.0, 43.0, 84.0, 62.0, 107.0, 150.0, 187.0, 373.0, 900.0, 3321.0, 18953.0, 290935.0, 697182.0, 28967.0, 4692.0, 1148.0, 415.0, 226.0, 153.0, 115.0, 91.0, 67.0, 62.0, 60.0, 37.0, 22.0, 19.0, 12.0, 9.0, 15.0, 6.0, 3.0, 6.0, 5.0, 2.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0], "bins": [-9.357929229736328e-06, -9.057112038135529e-06, -8.756294846534729e-06, -8.45547765493393e-06, -8.15466046333313e-06, -7.85384327173233e-06, -7.553026080131531e-06, -7.252208888530731e-06, -6.951391696929932e-06, -6.650574505329132e-06, -6.3497573137283325e-06, -6.048940122127533e-06, -5.748122930526733e-06, -5.447305738925934e-06, -5.146488547325134e-06, -4.845671355724335e-06, -4.544854164123535e-06, -4.244036972522736e-06, -3.943219780921936e-06, -3.6424025893211365e-06, -3.341585397720337e-06, -3.0407682061195374e-06, -2.739951014518738e-06, -2.4391338229179382e-06, -2.1383166313171387e-06, -1.8374994397163391e-06, -1.5366822481155396e-06, -1.23586505651474e-06, -9.350478649139404e-07, -6.342306733131409e-07, -3.334134817123413e-07, -3.259629011154175e-08, 2.682209014892578e-07, 5.690380930900574e-07, 8.698552846908569e-07, 1.1706724762916565e-06, 1.471489667892456e-06, 1.7723068594932556e-06, 2.073124051094055e-06, 2.3739412426948547e-06, 2.6747584342956543e-06, 2.975575625896454e-06, 3.2763928174972534e-06, 3.577210009098053e-06, 3.8780272006988525e-06, 4.178844392299652e-06, 4.479661583900452e-06, 4.780478775501251e-06, 5.081295967102051e-06, 5.38211315870285e-06, 5.68293035030365e-06, 5.9837475419044495e-06, 6.284564733505249e-06, 6.585381925106049e-06, 6.886199116706848e-06, 7.187016308307648e-06, 7.487833499908447e-06, 7.788650691509247e-06, 8.089467883110046e-06, 8.390285074710846e-06, 8.691102266311646e-06, 8.991919457912445e-06, 9.292736649513245e-06, 9.593553841114044e-06, 9.894371032714844e-06]}, "gradients/decoder.model.decoder.layers.3.encoder_attn.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 1.0, 1.0, 2.0, 2.0, 4.0, 5.0, 5.0, 5.0, 4.0, 13.0, 15.0, 12.0, 13.0, 26.0, 33.0, 22.0, 39.0, 37.0, 42.0, 47.0, 30.0, 42.0, 35.0, 43.0, 57.0, 50.0, 56.0, 41.0, 31.0, 27.0, 33.0, 41.0, 31.0, 25.0, 25.0, 21.0, 20.0, 18.0, 11.0, 12.0, 11.0, 9.0, 6.0, 1.0, 5.0, 4.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0], "bins": [-0.0002460479736328125, -0.00023815780878067017, -0.00023026764392852783, -0.0002223774790763855, -0.00021448731422424316, -0.00020659714937210083, -0.0001987069845199585, -0.00019081681966781616, -0.00018292665481567383, -0.0001750364899635315, -0.00016714632511138916, -0.00015925616025924683, -0.0001513659954071045, -0.00014347583055496216, -0.00013558566570281982, -0.0001276955008506775, -0.00011980533599853516, -0.00011191517114639282, -0.00010402500629425049, -9.613484144210815e-05, -8.824467658996582e-05, -8.035451173782349e-05, -7.246434688568115e-05, -6.457418203353882e-05, -5.6684017181396484e-05, -4.879385232925415e-05, -4.0903687477111816e-05, -3.301352262496948e-05, -2.512335777282715e-05, -1.7233192920684814e-05, -9.34302806854248e-06, -1.4528632164001465e-06, 6.4373016357421875e-06, 1.4327466487884521e-05, 2.2217631340026855e-05, 3.010779619216919e-05, 3.7997961044311523e-05, 4.588812589645386e-05, 5.377829074859619e-05, 6.166845560073853e-05, 6.955862045288086e-05, 7.74487853050232e-05, 8.533895015716553e-05, 9.322911500930786e-05, 0.0001011192798614502, 0.00010900944471359253, 0.00011689960956573486, 0.0001247897744178772, 0.00013267993927001953, 0.00014057010412216187, 0.0001484602689743042, 0.00015635043382644653, 0.00016424059867858887, 0.0001721307635307312, 0.00018002092838287354, 0.00018791109323501587, 0.0001958012580871582, 0.00020369142293930054, 0.00021158158779144287, 0.0002194717526435852, 0.00022736191749572754, 0.00023525208234786987, 0.0002431422472000122, 0.00025103241205215454, 0.0002589225769042969]}, "gradients/decoder.model.decoder.layers.3.encoder_attn.k_proj.weight": {"_type": "histogram", "values": [12.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1048551.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 13.0], "bins": [-5.960464477539063e-08, -5.774199962615967e-08, -5.587935447692871e-08, -5.4016709327697754e-08, -5.21540641784668e-08, -5.029141902923584e-08, -4.842877388000488e-08, -4.6566128730773926e-08, -4.470348358154297e-08, -4.284083843231201e-08, -4.0978193283081055e-08, -3.91155481338501e-08, -3.725290298461914e-08, -3.5390257835388184e-08, -3.3527612686157227e-08, -3.166496753692627e-08, -2.9802322387695312e-08, -2.7939677238464355e-08, -2.60770320892334e-08, -2.421438694000244e-08, -2.2351741790771484e-08, -2.0489096641540527e-08, -1.862645149230957e-08, -1.6763806343078613e-08, -1.4901161193847656e-08, -1.30385160446167e-08, -1.1175870895385742e-08, -9.313225746154785e-09, -7.450580596923828e-09, -5.587935447692871e-09, -3.725290298461914e-09, -1.862645149230957e-09, 0.0, 1.862645149230957e-09, 3.725290298461914e-09, 5.587935447692871e-09, 7.450580596923828e-09, 9.313225746154785e-09, 1.1175870895385742e-08, 1.30385160446167e-08, 1.4901161193847656e-08, 1.6763806343078613e-08, 1.862645149230957e-08, 2.0489096641540527e-08, 2.2351741790771484e-08, 2.421438694000244e-08, 2.60770320892334e-08, 2.7939677238464355e-08, 2.9802322387695312e-08, 3.166496753692627e-08, 3.3527612686157227e-08, 3.5390257835388184e-08, 3.725290298461914e-08, 3.91155481338501e-08, 4.0978193283081055e-08, 4.284083843231201e-08, 4.470348358154297e-08, 4.6566128730773926e-08, 4.842877388000488e-08, 5.029141902923584e-08, 5.21540641784668e-08, 5.4016709327697754e-08, 5.587935447692871e-08, 5.774199962615967e-08, 5.960464477539063e-08]}, "gradients/decoder.model.decoder.layers.3.encoder_attn.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 8.0, 8.0, 0.0, 11.0, 0.0, 20.0, 33.0, 0.0, 43.0, 77.0, 0.0, 101.0, 0.0, 136.0, 127.0, 0.0, 127.0, 109.0, 0.0, 82.0, 0.0, 59.0, 32.0, 0.0, 13.0, 6.0, 0.0, 8.0, 0.0, 5.0, 9.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.2516975402832031e-06, -1.214444637298584e-06, -1.1771917343139648e-06, -1.1399388313293457e-06, -1.1026859283447266e-06, -1.0654330253601074e-06, -1.0281801223754883e-06, -9.909272193908691e-07, -9.5367431640625e-07, -9.164214134216309e-07, -8.791685104370117e-07, -8.419156074523926e-07, -8.046627044677734e-07, -7.674098014831543e-07, -7.301568984985352e-07, -6.92903995513916e-07, -6.556510925292969e-07, -6.183981895446777e-07, -5.811452865600586e-07, -5.438923835754395e-07, -5.066394805908203e-07, -4.6938657760620117e-07, -4.3213367462158203e-07, -3.948807716369629e-07, -3.5762786865234375e-07, -3.203749656677246e-07, -2.8312206268310547e-07, -2.4586915969848633e-07, -2.086162567138672e-07, -1.7136335372924805e-07, -1.341104507446289e-07, -9.685754776000977e-08, -5.960464477539063e-08, -2.2351741790771484e-08, 1.4901161193847656e-08, 5.21540641784668e-08, 8.940696716308594e-08, 1.2665987014770508e-07, 1.6391277313232422e-07, 2.0116567611694336e-07, 2.384185791015625e-07, 2.7567148208618164e-07, 3.129243850708008e-07, 3.501772880554199e-07, 3.8743019104003906e-07, 4.246830940246582e-07, 4.6193599700927734e-07, 4.991888999938965e-07, 5.364418029785156e-07, 5.736947059631348e-07, 6.109476089477539e-07, 6.48200511932373e-07, 6.854534149169922e-07, 7.227063179016113e-07, 7.599592208862305e-07, 7.972121238708496e-07, 8.344650268554688e-07, 8.717179298400879e-07, 9.08970832824707e-07, 9.462237358093262e-07, 9.834766387939453e-07, 1.0207295417785645e-06, 1.0579824447631836e-06, 1.0952353477478027e-06, 1.1324882507324219e-06]}, "gradients/decoder.model.decoder.layers.3.encoder_attn.q_proj.weight": {"_type": "histogram", "values": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1048576.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], "bins": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]}, "gradients/decoder.model.decoder.layers.3.encoder_attn.q_proj.bias": {"_type": "histogram", "values": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1024.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], "bins": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]}, "gradients/decoder.model.decoder.layers.3.self_attn_layer_norm.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 3.0, 1.0, 1.0, 0.0, 5.0, 2.0, 4.0, 5.0, 6.0, 14.0, 18.0, 15.0, 35.0, 54.0, 77.0, 98.0, 146.0, 168.0, 121.0, 87.0, 41.0, 41.0, 13.0, 16.0, 14.0, 13.0, 5.0, 7.0, 2.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00022119141067378223, -0.00021303765242919326, -0.0002048838941846043, -0.00019673013594001532, -0.00018857637769542634, -0.00018042261945083737, -0.0001722688612062484, -0.00016411510296165943, -0.00015596134471707046, -0.0001478075864724815, -0.00013965382822789252, -0.00013150006998330355, -0.00012334631173871458, -0.0001151925534941256, -0.00010703879524953663, -9.888503700494766e-05, -9.073127876035869e-05, -8.257752051576972e-05, -7.442376227118075e-05, -6.627000402659178e-05, -5.811624578200281e-05, -4.9962487537413836e-05, -4.1808729292824864e-05, -3.365497104823589e-05, -2.5501212803646922e-05, -1.734745455905795e-05, -9.19369631446898e-06, -1.0399380698800087e-06, 7.1138201747089624e-06, 1.5267578419297934e-05, 2.3421336663886905e-05, 3.1575094908475876e-05, 3.972885315306485e-05, 4.788261139765382e-05, 5.603636964224279e-05, 6.419012788683176e-05, 7.234388613142073e-05, 8.04976443760097e-05, 8.865140262059867e-05, 9.680516086518764e-05, 0.00010495891910977662, 0.00011311267735436559, 0.00012126643559895456, 0.00012942019384354353, 0.0001375739520881325, 0.00014572771033272147, 0.00015388146857731044, 0.00016203522682189941, 0.00017018898506648839, 0.00017834274331107736, 0.00018649650155566633, 0.0001946502598002553, 0.00020280401804484427, 0.00021095777628943324, 0.0002191115345340222, 0.00022726529277861118, 0.00023541905102320015, 0.00024357280926778913, 0.0002517265675123781, 0.00025988032575696707, 0.00026803408400155604, 0.000276187842246145, 0.000284341600490734, 0.00029249535873532295, 0.0003006491169799119]}, "gradients/decoder.model.decoder.layers.3.self_attn_layer_norm.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 2.0, 1.0, 1.0, 3.0, 1.0, 0.0, 4.0, 5.0, 9.0, 13.0, 13.0, 18.0, 26.0, 17.0, 35.0, 30.0, 34.0, 40.0, 46.0, 63.0, 51.0, 52.0, 41.0, 58.0, 49.0, 51.0, 56.0, 52.0, 39.0, 42.0, 29.0, 25.0, 19.0, 10.0, 18.0, 13.0, 15.0, 4.0, 7.0, 7.0, 3.0, 5.0, 4.0, 3.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.00014838608331046999, -0.00014413964527193457, -0.00013989320723339915, -0.0001356467546429485, -0.0001314003166044131, -0.00012715387856587768, -0.00012290742597542703, -0.00011866098793689162, -0.0001144145498983562, -0.00011016811185982078, -0.00010592166654532775, -0.00010167522123083472, -9.74287831922993e-05, -9.318234515376389e-05, -8.893589983927086e-05, -8.468945452477783e-05, -8.044301648624241e-05, -7.6196578447707e-05, -7.195013313321397e-05, -6.770368781872094e-05, -6.345724978018552e-05, -5.92108081036713e-05, -5.4964366427157074e-05, -5.071792475064285e-05, -4.647148307412863e-05, -4.2225041397614405e-05, -3.797859972110018e-05, -3.373215804458596e-05, -2.9485716368071735e-05, -2.523927469155751e-05, -2.099283301504329e-05, -1.6746391338529065e-05, -1.249996421393007e-05, -8.253522537415847e-06, -4.007080860901624e-06, 2.3936081561259925e-07, 4.4858024921268225e-06, 8.732244168641046e-06, 1.2978685845155269e-05, 1.7225127521669492e-05, 2.1471569198183715e-05, 2.571801087469794e-05, 2.9964452551212162e-05, 3.4210894227726385e-05, 3.845733590424061e-05, 4.270377758075483e-05, 4.6950219257269055e-05, 5.119666093378328e-05, 5.54431026102975e-05, 5.9689544286811724e-05, 6.393598596332595e-05, 6.818243127781898e-05, 7.24288693163544e-05, 7.667530735488981e-05, 8.092175266938284e-05, 8.516819798387587e-05, 8.941463602241129e-05, 9.36610740609467e-05, 9.790751937543973e-05, 0.00010215396468993276, 0.00010640040272846818, 0.0001106468407670036, 0.00011489328608149663, 0.00011913973139598966, 0.00012338616943452507]}, "gradients/decoder.model.decoder.layers.3.self_attn.out_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 3.0, 0.0, 2.0, 0.0, 1.0, 6.0, 3.0, 7.0, 3.0, 7.0, 7.0, 18.0, 15.0, 21.0, 39.0, 48.0, 63.0, 120.0, 145.0, 251.0, 394.0, 677.0, 1043.0, 1854.0, 3591.0, 6947.0, 14277.0, 33694.0, 95400.0, 382711.0, 357859.0, 89232.0, 31757.0, 13535.0, 6644.0, 3481.0, 1911.0, 1088.0, 646.0, 366.0, 238.0, 157.0, 116.0, 67.0, 48.0, 26.0, 25.0, 7.0, 6.0, 4.0, 2.0, 2.0, 2.0, 3.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0], "bins": [-0.00018835067749023438, -0.00018245913088321686, -0.00017656758427619934, -0.00017067603766918182, -0.0001647844910621643, -0.0001588929444551468, -0.00015300139784812927, -0.00014710985124111176, -0.00014121830463409424, -0.00013532675802707672, -0.0001294352114200592, -0.0001235436648130417, -0.00011765211820602417, -0.00011176057159900665, -0.00010586902499198914, -9.997747838497162e-05, -9.40859317779541e-05, -8.819438517093658e-05, -8.230283856391907e-05, -7.641129195690155e-05, -7.051974534988403e-05, -6.462819874286652e-05, -5.8736652135849e-05, -5.284510552883148e-05, -4.6953558921813965e-05, -4.106201231479645e-05, -3.517046570777893e-05, -2.9278919100761414e-05, -2.3387372493743896e-05, -1.749582588672638e-05, -1.1604279279708862e-05, -5.712732672691345e-06, 1.7881393432617188e-07, 6.070360541343689e-06, 1.1961907148361206e-05, 1.7853453755378723e-05, 2.374500036239624e-05, 2.9636546969413757e-05, 3.5528093576431274e-05, 4.141964018344879e-05, 4.731118679046631e-05, 5.3202733397483826e-05, 5.909428000450134e-05, 6.498582661151886e-05, 7.087737321853638e-05, 7.67689198255539e-05, 8.266046643257141e-05, 8.855201303958893e-05, 9.444355964660645e-05, 0.00010033510625362396, 0.00010622665286064148, 0.000112118199467659, 0.00011800974607467651, 0.00012390129268169403, 0.00012979283928871155, 0.00013568438589572906, 0.00014157593250274658, 0.0001474674791097641, 0.00015335902571678162, 0.00015925057232379913, 0.00016514211893081665, 0.00017103366553783417, 0.00017692521214485168, 0.0001828167587518692, 0.00018870830535888672]}, "gradients/decoder.model.decoder.layers.3.self_attn.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 0.0, 1.0, 0.0, 2.0, 2.0, 3.0, 4.0, 6.0, 5.0, 9.0, 10.0, 12.0, 21.0, 26.0, 38.0, 36.0, 45.0, 41.0, 45.0, 56.0, 61.0, 50.0, 56.0, 63.0, 58.0, 56.0, 65.0, 43.0, 36.0, 45.0, 18.0, 22.0, 19.0, 16.0, 5.0, 9.0, 10.0, 8.0, 5.0, 3.0, 1.0, 2.0, 0.0, 1.0, 3.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.000385284423828125, -0.0003749430179595947, -0.00036460161209106445, -0.0003542602062225342, -0.0003439188003540039, -0.00033357739448547363, -0.00032323598861694336, -0.0003128945827484131, -0.0003025531768798828, -0.00029221177101135254, -0.00028187036514282227, -0.000271528959274292, -0.0002611875534057617, -0.00025084614753723145, -0.00024050474166870117, -0.0002301633358001709, -0.00021982192993164062, -0.00020948052406311035, -0.00019913911819458008, -0.0001887977123260498, -0.00017845630645751953, -0.00016811490058898926, -0.00015777349472045898, -0.0001474320888519287, -0.00013709068298339844, -0.00012674927711486816, -0.00011640787124633789, -0.00010606646537780762, -9.572505950927734e-05, -8.538365364074707e-05, -7.50422477722168e-05, -6.470084190368652e-05, -5.435943603515625e-05, -4.4018030166625977e-05, -3.36766242980957e-05, -2.333521842956543e-05, -1.2993812561035156e-05, -2.652406692504883e-06, 7.68899917602539e-06, 1.8030405044555664e-05, 2.8371810913085938e-05, 3.871321678161621e-05, 4.9054622650146484e-05, 5.939602851867676e-05, 6.973743438720703e-05, 8.00788402557373e-05, 9.042024612426758e-05, 0.00010076165199279785, 0.00011110305786132812, 0.0001214444637298584, 0.00013178586959838867, 0.00014212727546691895, 0.00015246868133544922, 0.0001628100872039795, 0.00017315149307250977, 0.00018349289894104004, 0.0001938343048095703, 0.00020417571067810059, 0.00021451711654663086, 0.00022485852241516113, 0.0002351999282836914, 0.0002455413341522217, 0.00025588274002075195, 0.0002662241458892822, 0.0002765655517578125]}, "gradients/decoder.model.decoder.layers.3.self_attn.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 2.0, 4.0, 6.0, 10.0, 6.0, 13.0, 11.0, 8.0, 18.0, 27.0, 41.0, 44.0, 80.0, 110.0, 158.0, 314.0, 640.0, 5020.0, 843671.0, 194685.0, 2437.0, 482.0, 264.0, 135.0, 100.0, 68.0, 56.0, 37.0, 34.0, 20.0, 13.0, 7.0, 11.0, 7.0, 5.0, 3.0, 3.0, 5.0, 3.0, 3.0, 2.0, 1.0, 0.0, 2.0], "bins": [-0.0007524490356445312, -0.0007330924272537231, -0.000713735818862915, -0.0006943792104721069, -0.0006750226020812988, -0.0006556659936904907, -0.0006363093852996826, -0.0006169527769088745, -0.0005975961685180664, -0.0005782395601272583, -0.0005588829517364502, -0.0005395263433456421, -0.000520169734954834, -0.0005008131265640259, -0.0004814565181732178, -0.00046209990978240967, -0.00044274330139160156, -0.00042338669300079346, -0.00040403008460998535, -0.00038467347621917725, -0.00036531686782836914, -0.00034596025943756104, -0.00032660365104675293, -0.0003072470426559448, -0.0002878904342651367, -0.0002685338258743286, -0.0002491772174835205, -0.0002298206090927124, -0.0002104640007019043, -0.0001911073923110962, -0.00017175078392028809, -0.00015239417552947998, -0.00013303756713867188, -0.00011368095874786377, -9.432435035705566e-05, -7.496774196624756e-05, -5.561113357543945e-05, -3.625452518463135e-05, -1.6897916793823242e-05, 2.4586915969848633e-06, 2.181529998779297e-05, 4.1171908378601074e-05, 6.052851676940918e-05, 7.988512516021729e-05, 9.924173355102539e-05, 0.0001185983419418335, 0.0001379549503326416, 0.0001573115587234497, 0.0001766681671142578, 0.00019602477550506592, 0.00021538138389587402, 0.00023473799228668213, 0.00025409460067749023, 0.00027345120906829834, 0.00029280781745910645, 0.00031216442584991455, 0.00033152103424072266, 0.00035087764263153076, 0.00037023425102233887, 0.00038959085941314697, 0.0004089474678039551, 0.0004283040761947632, 0.0004476606845855713, 0.0004670172929763794, 0.0004863739013671875]}, "gradients/decoder.model.decoder.layers.3.self_attn.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 2.0, 0.0, 2.0, 1.0, 1.0, 2.0, 1.0, 6.0, 3.0, 7.0, 7.0, 10.0, 11.0, 15.0, 15.0, 18.0, 20.0, 22.0, 28.0, 46.0, 46.0, 40.0, 74.0, 57.0, 68.0, 70.0, 51.0, 47.0, 47.0, 53.0, 38.0, 47.0, 28.0, 24.0, 19.0, 12.0, 14.0, 14.0, 9.0, 11.0, 6.0, 6.0, 2.0, 3.0, 5.0, 3.0, 3.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0], "bins": [-0.0005097389221191406, -0.0004955828189849854, -0.0004814267158508301, -0.0004672706127166748, -0.00045311450958251953, -0.00043895840644836426, -0.000424802303314209, -0.0004106462001800537, -0.00039649009704589844, -0.00038233399391174316, -0.0003681778907775879, -0.0003540217876434326, -0.00033986568450927734, -0.00032570958137512207, -0.0003115534782409668, -0.0002973973751068115, -0.00028324127197265625, -0.000269085168838501, -0.0002549290657043457, -0.00024077296257019043, -0.00022661685943603516, -0.00021246075630187988, -0.0001983046531677246, -0.00018414855003356934, -0.00016999244689941406, -0.0001558363437652588, -0.00014168024063110352, -0.00012752413749694824, -0.00011336803436279297, -9.92119312286377e-05, -8.505582809448242e-05, -7.089972496032715e-05, -5.6743621826171875e-05, -4.25875186920166e-05, -2.8431415557861328e-05, -1.4275312423706055e-05, -1.1920928955078125e-07, 1.4036893844604492e-05, 2.8192996978759766e-05, 4.234910011291504e-05, 5.650520324707031e-05, 7.066130638122559e-05, 8.481740951538086e-05, 9.897351264953613e-05, 0.0001131296157836914, 0.00012728571891784668, 0.00014144182205200195, 0.00015559792518615723, 0.0001697540283203125, 0.00018391013145446777, 0.00019806623458862305, 0.00021222233772277832, 0.0002263784408569336, 0.00024053454399108887, 0.00025469064712524414, 0.0002688467502593994, 0.0002830028533935547, 0.00029715895652770996, 0.00031131505966186523, 0.0003254711627960205, 0.0003396272659301758, 0.00035378336906433105, 0.00036793947219848633, 0.0003820955753326416, 0.0003962516784667969]}, "gradients/decoder.model.decoder.layers.3.self_attn.k_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 1.0, 0.0, 1.0, 2.0, 3.0, 7.0, 1.0, 3.0, 3.0, 6.0, 4.0, 10.0, 10.0, 16.0, 11.0, 21.0, 47.0, 40.0, 87.0, 162.0, 298.0, 532.0, 1044.0, 2314.0, 5849.0, 55991.0, 968278.0, 6570.0, 3718.0, 1701.0, 848.0, 404.0, 228.0, 138.0, 74.0, 45.0, 42.0, 20.0, 10.0, 10.0, 0.0, 6.0, 4.0, 2.0, 2.0, 0.0, 0.0, 3.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-8.702278137207031e-06, -8.468516170978546e-06, -8.234754204750061e-06, -8.000992238521576e-06, -7.76723027229309e-06, -7.533468306064606e-06, -7.299706339836121e-06, -7.0659443736076355e-06, -6.83218240737915e-06, -6.598420441150665e-06, -6.36465847492218e-06, -6.130896508693695e-06, -5.89713454246521e-06, -5.663372576236725e-06, -5.42961061000824e-06, -5.195848643779755e-06, -4.9620866775512695e-06, -4.7283247113227844e-06, -4.494562745094299e-06, -4.260800778865814e-06, -4.027038812637329e-06, -3.793276846408844e-06, -3.559514880180359e-06, -3.3257529139518738e-06, -3.0919909477233887e-06, -2.8582289814949036e-06, -2.6244670152664185e-06, -2.3907050490379333e-06, -2.1569430828094482e-06, -1.923181116580963e-06, -1.689419150352478e-06, -1.455657184123993e-06, -1.2218952178955078e-06, -9.881332516670227e-07, -7.543712854385376e-07, -5.206093192100525e-07, -2.868473529815674e-07, -5.3085386753082275e-08, 1.8067657947540283e-07, 4.1443854570388794e-07, 6.48200511932373e-07, 8.819624781608582e-07, 1.1157244443893433e-06, 1.3494864106178284e-06, 1.5832483768463135e-06, 1.8170103430747986e-06, 2.0507723093032837e-06, 2.284534275531769e-06, 2.518296241760254e-06, 2.752058207988739e-06, 2.985820174217224e-06, 3.2195821404457092e-06, 3.4533441066741943e-06, 3.6871060729026794e-06, 3.9208680391311646e-06, 4.15463000535965e-06, 4.388391971588135e-06, 4.62215393781662e-06, 4.855915904045105e-06, 5.08967787027359e-06, 5.323439836502075e-06, 5.55720180273056e-06, 5.790963768959045e-06, 6.0247257351875305e-06, 6.258487701416016e-06]}, "gradients/decoder.model.decoder.layers.3.self_attn.k_proj.bias": {"_type": "histogram", "values": [5.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 81.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 853.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 77.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 8.0], "bins": [-1.1920928955078125e-07, -1.1548399925231934e-07, -1.1175870895385742e-07, -1.0803341865539551e-07, -1.043081283569336e-07, -1.0058283805847168e-07, -9.685754776000977e-08, -9.313225746154785e-08, -8.940696716308594e-08, -8.568167686462402e-08, -8.195638656616211e-08, -7.82310962677002e-08, -7.450580596923828e-08, -7.078051567077637e-08, -6.705522537231445e-08, -6.332993507385254e-08, -5.960464477539063e-08, -5.587935447692871e-08, -5.21540641784668e-08, -4.842877388000488e-08, -4.470348358154297e-08, -4.0978193283081055e-08, -3.725290298461914e-08, -3.3527612686157227e-08, -2.9802322387695312e-08, -2.60770320892334e-08, -2.2351741790771484e-08, -1.862645149230957e-08, -1.4901161193847656e-08, -1.1175870895385742e-08, -7.450580596923828e-09, -3.725290298461914e-09, 0.0, 3.725290298461914e-09, 7.450580596923828e-09, 1.1175870895385742e-08, 1.4901161193847656e-08, 1.862645149230957e-08, 2.2351741790771484e-08, 2.60770320892334e-08, 2.9802322387695312e-08, 3.3527612686157227e-08, 3.725290298461914e-08, 4.0978193283081055e-08, 4.470348358154297e-08, 4.842877388000488e-08, 5.21540641784668e-08, 5.587935447692871e-08, 5.960464477539063e-08, 6.332993507385254e-08, 6.705522537231445e-08, 7.078051567077637e-08, 7.450580596923828e-08, 7.82310962677002e-08, 8.195638656616211e-08, 8.568167686462402e-08, 8.940696716308594e-08, 9.313225746154785e-08, 9.685754776000977e-08, 1.0058283805847168e-07, 1.043081283569336e-07, 1.0803341865539551e-07, 1.1175870895385742e-07, 1.1548399925231934e-07, 1.1920928955078125e-07]}, "gradients/decoder.model.decoder.layers.3.self_attn.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 4.0, 8.0, 7.0, 9.0, 6.0, 17.0, 19.0, 38.0, 63.0, 93.0, 214.0, 287.0, 710.0, 1635.0, 4364.0, 14347.0, 995813.0, 21906.0, 5356.0, 2033.0, 826.0, 346.0, 166.0, 117.0, 72.0, 39.0, 21.0, 11.0, 8.0, 15.0, 5.0, 5.0, 4.0, 1.0, 0.0, 1.0, 3.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-9.059906005859375e-06, -8.773058652877808e-06, -8.48621129989624e-06, -8.199363946914673e-06, -7.912516593933105e-06, -7.625669240951538e-06, -7.338821887969971e-06, -7.051974534988403e-06, -6.765127182006836e-06, -6.4782798290252686e-06, -6.191432476043701e-06, -5.904585123062134e-06, -5.617737770080566e-06, -5.330890417098999e-06, -5.044043064117432e-06, -4.757195711135864e-06, -4.470348358154297e-06, -4.1835010051727295e-06, -3.896653652191162e-06, -3.6098062992095947e-06, -3.3229589462280273e-06, -3.03611159324646e-06, -2.7492642402648926e-06, -2.462416887283325e-06, -2.175569534301758e-06, -1.8887221813201904e-06, -1.601874828338623e-06, -1.3150274753570557e-06, -1.0281801223754883e-06, -7.413327693939209e-07, -4.544854164123535e-07, -1.6763806343078613e-07, 1.1920928955078125e-07, 4.0605664253234863e-07, 6.92903995513916e-07, 9.797513484954834e-07, 1.2665987014770508e-06, 1.5534460544586182e-06, 1.8402934074401855e-06, 2.127140760421753e-06, 2.4139881134033203e-06, 2.7008354663848877e-06, 2.987682819366455e-06, 3.2745301723480225e-06, 3.56137752532959e-06, 3.848224878311157e-06, 4.135072231292725e-06, 4.421919584274292e-06, 4.708766937255859e-06, 4.995614290237427e-06, 5.282461643218994e-06, 5.5693089962005615e-06, 5.856156349182129e-06, 6.143003702163696e-06, 6.429851055145264e-06, 6.716698408126831e-06, 7.0035457611083984e-06, 7.290393114089966e-06, 7.577240467071533e-06, 7.8640878200531e-06, 8.150935173034668e-06, 8.437782526016235e-06, 8.724629878997803e-06, 9.01147723197937e-06, 9.298324584960938e-06]}, "gradients/decoder.model.decoder.layers.3.self_attn.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 1.0, 1.0, 0.0, 4.0, 1.0, 2.0, 1.0, 2.0, 1.0, 6.0, 8.0, 5.0, 7.0, 11.0, 34.0, 117.0, 430.0, 247.0, 57.0, 22.0, 9.0, 7.0, 6.0, 9.0, 4.0, 4.0, 5.0, 3.0, 3.0, 1.0, 0.0, 3.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.351139068603516e-06, -4.231929779052734e-06, -4.112720489501953e-06, -3.993511199951172e-06, -3.874301910400391e-06, -3.7550926208496094e-06, -3.635883331298828e-06, -3.516674041748047e-06, -3.3974647521972656e-06, -3.2782554626464844e-06, -3.159046173095703e-06, -3.039836883544922e-06, -2.9206275939941406e-06, -2.8014183044433594e-06, -2.682209014892578e-06, -2.562999725341797e-06, -2.4437904357910156e-06, -2.3245811462402344e-06, -2.205371856689453e-06, -2.086162567138672e-06, -1.9669532775878906e-06, -1.8477439880371094e-06, -1.7285346984863281e-06, -1.6093254089355469e-06, -1.4901161193847656e-06, -1.3709068298339844e-06, -1.2516975402832031e-06, -1.1324882507324219e-06, -1.0132789611816406e-06, -8.940696716308594e-07, -7.748603820800781e-07, -6.556510925292969e-07, -5.364418029785156e-07, -4.172325134277344e-07, -2.980232238769531e-07, -1.7881393432617188e-07, -5.960464477539063e-08, 5.960464477539063e-08, 1.7881393432617188e-07, 2.980232238769531e-07, 4.172325134277344e-07, 5.364418029785156e-07, 6.556510925292969e-07, 7.748603820800781e-07, 8.940696716308594e-07, 1.0132789611816406e-06, 1.1324882507324219e-06, 1.2516975402832031e-06, 1.3709068298339844e-06, 1.4901161193847656e-06, 1.6093254089355469e-06, 1.7285346984863281e-06, 1.8477439880371094e-06, 1.9669532775878906e-06, 2.086162567138672e-06, 2.205371856689453e-06, 2.3245811462402344e-06, 2.4437904357910156e-06, 2.562999725341797e-06, 2.682209014892578e-06, 2.8014183044433594e-06, 2.9206275939941406e-06, 3.039836883544922e-06, 3.159046173095703e-06, 3.2782554626464844e-06]}, "gradients/decoder.model.decoder.layers.2.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 97.0, 886.0, 33.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00042072980431839824, -0.0003545028157532215, -0.0002882758271880448, -0.00022204883862286806, -0.00015582185005769134, -8.959486149251461e-05, -2.3367872927337885e-05, 4.285911563783884e-05, 0.00010908610420301557, 0.0001753130927681923, 0.00024154008133336902, 0.00030776706989854574, 0.00037399405846372247, 0.0004402210470288992, 0.0005064480355940759, 0.0005726750241592526, 0.0006389020127244294, 0.0007051290012896061, 0.0007713559898547828, 0.0008375829784199595, 0.0009038099669851363, 0.000970036955550313, 0.0010362640023231506, 0.0011024909326806664, 0.0011687178630381823, 0.00123494490981102, 0.0013011718401685357, 0.0013673987705260515, 0.0014336258172988892, 0.0014998528640717268, 0.0015660797944292426, 0.0016323067247867584, 0.0016985340043902397, 0.0017647610511630774, 0.0018309879815205932, 0.001897214911878109, 0.0019634419586509466, 0.0020296690054237843, 0.0020958958193659782, 0.002162122866138816, 0.0022283499129116535, 0.002294576959684491, 0.002360804006457329, 0.0024270308203995228, 0.0024932578671723604, 0.002559484913945198, 0.002625711727887392, 0.0026919387746602297, 0.0027581658214330673, 0.002824392868205905, 0.0028906199149787426, 0.0029568467289209366, 0.0030230737756937742, 0.003089300822466612, 0.003155527636408806, 0.0032217546831816435, 0.003287981729954481, 0.0033542087767273188, 0.0034204358235001564, 0.0034866626374423504, 0.003552889684215188, 0.0036191167309880257, 0.0036853435449302197, 0.0037515705917030573, 0.003817797638475895]}, "gradients/decoder.model.decoder.layers.2.final_layer_norm.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 1.0, 1.0, 4.0, 4.0, 9.0, 7.0, 6.0, 5.0, 12.0, 10.0, 15.0, 23.0, 14.0, 22.0, 27.0, 23.0, 26.0, 29.0, 40.0, 36.0, 30.0, 40.0, 38.0, 40.0, 37.0, 34.0, 35.0, 40.0, 44.0, 38.0, 30.0, 38.0, 18.0, 27.0, 37.0, 28.0, 26.0, 15.0, 16.0, 11.0, 23.0, 12.0, 11.0, 5.0, 1.0, 10.0, 4.0, 2.0, 2.0, 3.0, 2.0, 1.0, 1.0, 0.0, 1.0, 1.0], "bins": [-0.0006319772801361978, -0.000612594245467335, -0.0005932112107984722, -0.0005738281179219484, -0.0005544450832530856, -0.0005350620485842228, -0.00051567901391536, -0.0004962959792464972, -0.0004769129154738039, -0.00045752988080494106, -0.0004381468170322478, -0.00041876378236338496, -0.00039938074769452214, -0.00037999768392182887, -0.00036061464925296605, -0.00034123158548027277, -0.00032184855081140995, -0.00030246551614254713, -0.00028308245236985385, -0.00026369941770099103, -0.00024431635392829776, -0.00022493331925943494, -0.00020555028459057212, -0.00018616723536979407, -0.00016678418614901602, -0.00014740113692823797, -0.00012801808770745993, -0.00010863505303859711, -8.925200381781906e-05, -6.986895459704101e-05, -5.048591265222058e-05, -3.110287070740014e-05, -1.1719821486622095e-05, 7.663224096177146e-06, 2.7046269678976387e-05, 4.642931526177563e-05, 6.581236084457487e-05, 8.519541006535292e-05, 0.00010457845201017335, 0.00012396149395499378, 0.00014334454317577183, 0.00016272759239654988, 0.00018211064161732793, 0.00020149367628619075, 0.0002208767255069688, 0.00024025977472774684, 0.00025964280939660966, 0.00027902587316930294, 0.00029840890783816576, 0.0003177919425070286, 0.00033717500627972186, 0.0003565580409485847, 0.00037594110472127795, 0.00039532413939014077, 0.0004147071740590036, 0.0004340902087278664, 0.0004534732725005597, 0.0004728563071694225, 0.0004922393709421158, 0.0005116224056109786, 0.0005310054402798414, 0.0005503884749487042, 0.000569771567825228, 0.0005891546024940908, 0.0006085376371629536]}, "gradients/decoder.model.decoder.layers.2.fc2.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 2.0, 4.0, 3.0, 4.0, 2.0, 5.0, 3.0, 9.0, 9.0, 21.0, 12.0, 26.0, 26.0, 27.0, 27.0, 36.0, 55.0, 56.0, 67.0, 98.0, 106.0, 134.0, 237.0, 1201.0, 4163987.0, 26608.0, 678.0, 188.0, 114.0, 105.0, 58.0, 66.0, 66.0, 46.0, 31.0, 34.0, 32.0, 27.0, 25.0, 17.0, 10.0, 8.0, 6.0, 8.0, 3.0, 7.0, 3.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0009427070617675781, -0.0009088888764381409, -0.0008750706911087036, -0.0008412525057792664, -0.0008074343204498291, -0.0007736161351203918, -0.0007397979497909546, -0.0007059797644615173, -0.0006721615791320801, -0.0006383433938026428, -0.0006045252084732056, -0.0005707070231437683, -0.0005368888378143311, -0.0005030706524848938, -0.00046925246715545654, -0.0004354342818260193, -0.00040161609649658203, -0.0003677979111671448, -0.0003339797258377075, -0.00030016154050827026, -0.000266343355178833, -0.00023252516984939575, -0.0001987069845199585, -0.00016488879919052124, -0.00013107061386108398, -9.725242853164673e-05, -6.343424320220947e-05, -2.9616057872772217e-05, 4.202127456665039e-06, 3.8020312786102295e-05, 7.183849811553955e-05, 0.0001056566834449768, 0.00013947486877441406, 0.00017329305410385132, 0.00020711123943328857, 0.00024092942476272583, 0.0002747476100921631, 0.00030856579542160034, 0.0003423839807510376, 0.00037620216608047485, 0.0004100203514099121, 0.00044383853673934937, 0.0004776567220687866, 0.0005114749073982239, 0.0005452930927276611, 0.0005791112780570984, 0.0006129294633865356, 0.0006467476487159729, 0.0006805658340454102, 0.0007143840193748474, 0.0007482022047042847, 0.0007820203900337219, 0.0008158385753631592, 0.0008496567606925964, 0.0008834749460220337, 0.000917293131351471, 0.0009511113166809082, 0.0009849295020103455, 0.0010187476873397827, 0.00105256587266922, 0.0010863840579986572, 0.0011202022433280945, 0.0011540204286575317, 0.001187838613986969, 0.0012216567993164062]}, "gradients/decoder.model.decoder.layers.2.fc2.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 4.0, 3.0, 3.0, 5.0, 6.0, 5.0, 9.0, 8.0, 10.0, 6.0, 14.0, 17.0, 13.0, 16.0, 19.0, 26.0, 31.0, 38.0, 28.0, 36.0, 41.0, 43.0, 52.0, 32.0, 42.0, 47.0, 42.0, 49.0, 32.0, 37.0, 30.0, 36.0, 25.0, 33.0, 33.0, 23.0, 19.0, 23.0, 9.0, 18.0, 6.0, 10.0, 10.0, 8.0, 4.0, 7.0, 0.0, 3.0, 4.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-3.0934810638427734e-05, -3.0025839805603027e-05, -2.911686897277832e-05, -2.8207898139953613e-05, -2.7298927307128906e-05, -2.63899564743042e-05, -2.5480985641479492e-05, -2.4572014808654785e-05, -2.3663043975830078e-05, -2.275407314300537e-05, -2.1845102310180664e-05, -2.0936131477355957e-05, -2.002716064453125e-05, -1.9118189811706543e-05, -1.8209218978881836e-05, -1.730024814605713e-05, -1.6391277313232422e-05, -1.5482306480407715e-05, -1.4573335647583008e-05, -1.36643648147583e-05, -1.2755393981933594e-05, -1.1846423149108887e-05, -1.093745231628418e-05, -1.0028481483459473e-05, -9.119510650634766e-06, -8.210539817810059e-06, -7.3015689849853516e-06, -6.3925981521606445e-06, -5.4836273193359375e-06, -4.5746564865112305e-06, -3.6656856536865234e-06, -2.7567148208618164e-06, -1.8477439880371094e-06, -9.387731552124023e-07, -2.9802322387695312e-08, 8.791685104370117e-07, 1.7881393432617188e-06, 2.6971101760864258e-06, 3.606081008911133e-06, 4.51505184173584e-06, 5.424022674560547e-06, 6.332993507385254e-06, 7.241964340209961e-06, 8.150935173034668e-06, 9.059906005859375e-06, 9.968876838684082e-06, 1.0877847671508789e-05, 1.1786818504333496e-05, 1.2695789337158203e-05, 1.360476016998291e-05, 1.4513731002807617e-05, 1.5422701835632324e-05, 1.633167266845703e-05, 1.7240643501281738e-05, 1.8149614334106445e-05, 1.9058585166931152e-05, 1.996755599975586e-05, 2.0876526832580566e-05, 2.1785497665405273e-05, 2.269446849822998e-05, 2.3603439331054688e-05, 2.4512410163879395e-05, 2.54213809967041e-05, 2.633035182952881e-05, 2.7239322662353516e-05]}, "gradients/decoder.model.decoder.layers.2.fc1.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 0.0, 1.0, 1.0, 1.0, 2.0, 2.0, 3.0, 3.0, 1.0, 5.0, 9.0, 18.0, 72.0, 257.0, 4189966.0, 3707.0, 154.0, 38.0, 14.0, 10.0, 4.0, 8.0, 7.0, 0.0, 2.0, 2.0, 1.0, 1.0, 2.0, 3.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0011339187622070312, -0.0010931193828582764, -0.0010523200035095215, -0.0010115206241607666, -0.0009707212448120117, -0.0009299218654632568, -0.000889122486114502, -0.0008483231067657471, -0.0008075237274169922, -0.0007667243480682373, -0.0007259249687194824, -0.0006851255893707275, -0.0006443262100219727, -0.0006035268306732178, -0.0005627274513244629, -0.000521928071975708, -0.0004811286926269531, -0.00044032931327819824, -0.00039952993392944336, -0.0003587305545806885, -0.0003179311752319336, -0.0002771317958831787, -0.00023633241653442383, -0.00019553303718566895, -0.00015473365783691406, -0.00011393427848815918, -7.31348991394043e-05, -3.2335519790649414e-05, 8.463859558105469e-06, 4.926323890686035e-05, 9.006261825561523e-05, 0.00013086199760437012, 0.000171661376953125, 0.00021246075630187988, 0.00025326013565063477, 0.00029405951499938965, 0.00033485889434814453, 0.0003756582736968994, 0.0004164576530456543, 0.0004572570323944092, 0.0004980564117431641, 0.0005388557910919189, 0.0005796551704406738, 0.0006204545497894287, 0.0006612539291381836, 0.0007020533084869385, 0.0007428526878356934, 0.0007836520671844482, 0.0008244514465332031, 0.000865250825881958, 0.0009060502052307129, 0.0009468495845794678, 0.0009876489639282227, 0.0010284483432769775, 0.0010692477226257324, 0.0011100471019744873, 0.0011508464813232422, 0.001191645860671997, 0.001232445240020752, 0.0012732446193695068, 0.0013140439987182617, 0.0013548433780670166, 0.0013956427574157715, 0.0014364421367645264, 0.0014772415161132812]}, "gradients/decoder.model.decoder.layers.2.fc1.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 0.0, 2.0, 3.0, 4.0, 8.0, 8.0, 7.0, 6.0, 22.0, 27.0, 47.0, 98.0, 274.0, 1717.0, 1440.0, 204.0, 94.0, 43.0, 20.0, 14.0, 10.0, 7.0, 6.0, 6.0, 3.0, 4.0, 5.0, 2.0, 1.0, 1.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.2292137145996094e-05, -2.1432526409626007e-05, -2.057291567325592e-05, -1.9713304936885834e-05, -1.8853694200515747e-05, -1.799408346414566e-05, -1.7134472727775574e-05, -1.6274861991405487e-05, -1.54152512550354e-05, -1.4555640518665314e-05, -1.3696029782295227e-05, -1.283641904592514e-05, -1.1976808309555054e-05, -1.1117197573184967e-05, -1.025758683681488e-05, -9.397976100444794e-06, -8.538365364074707e-06, -7.67875462770462e-06, -6.819143891334534e-06, -5.959533154964447e-06, -5.09992241859436e-06, -4.240311682224274e-06, -3.380700945854187e-06, -2.5210902094841003e-06, -1.6614794731140137e-06, -8.01868736743927e-07, 5.774199962615967e-08, 9.173527359962463e-07, 1.776963472366333e-06, 2.6365742087364197e-06, 3.4961849451065063e-06, 4.355795681476593e-06, 5.21540641784668e-06, 6.075017154216766e-06, 6.934627890586853e-06, 7.79423862695694e-06, 8.653849363327026e-06, 9.513460099697113e-06, 1.03730708360672e-05, 1.1232681572437286e-05, 1.2092292308807373e-05, 1.295190304517746e-05, 1.3811513781547546e-05, 1.4671124517917633e-05, 1.553073525428772e-05, 1.6390345990657806e-05, 1.7249956727027893e-05, 1.810956746339798e-05, 1.8969178199768066e-05, 1.9828788936138153e-05, 2.068839967250824e-05, 2.1548010408878326e-05, 2.2407621145248413e-05, 2.32672318816185e-05, 2.4126842617988586e-05, 2.4986453354358673e-05, 2.584606409072876e-05, 2.6705674827098846e-05, 2.7565285563468933e-05, 2.842489629983902e-05, 2.9284507036209106e-05, 3.0144117772579193e-05, 3.100372850894928e-05, 3.1863339245319366e-05, 3.272294998168945e-05]}, "gradients/decoder.model.decoder.layers.2.encoder_attn_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 0.0, 1.0, 0.0, 2.0, 2.0, 4.0, 0.0, 4.0, 8.0, 2.0, 12.0, 4.0, 13.0, 9.0, 21.0, 21.0, 25.0, 49.0, 59.0, 79.0, 85.0, 97.0, 105.0, 99.0, 78.0, 57.0, 43.0, 24.0, 23.0, 22.0, 10.0, 13.0, 10.0, 8.0, 2.0, 7.0, 6.0, 2.0, 1.0, 3.0, 1.0, 1.0, 2.0, 1.0, 3.0, 1.0, 0.0, 0.0, 1.0], "bins": [-4.8570349463261664e-05, -4.725164035335183e-05, -4.5932931243442e-05, -4.4614222133532166e-05, -4.329550938564353e-05, -4.1976800275733694e-05, -4.065809116582386e-05, -3.933938205591403e-05, -3.802066930802539e-05, -3.670196019811556e-05, -3.5383251088205725e-05, -3.406454197829589e-05, -3.274582923040725e-05, -3.142712012049742e-05, -3.0108411010587588e-05, -2.8789701900677755e-05, -2.7470992790767923e-05, -2.615228368085809e-05, -2.4833572751958854e-05, -2.3514863642049022e-05, -2.2196152713149786e-05, -2.0877443603239954e-05, -1.955873449333012e-05, -1.824002538342029e-05, -1.6921314454521053e-05, -1.560260534461122e-05, -1.4283894415711984e-05, -1.2965185305802152e-05, -1.1646475286397617e-05, -1.0327765266993083e-05, -9.00905615708325e-06, -7.690346137678716e-06, -6.371632480295375e-06, -5.052922460890841e-06, -3.7342128962336574e-06, -2.415503331576474e-06, -1.0967933121719398e-06, 2.219167072325945e-07, 1.540625817142427e-06, 2.8593358365469612e-06, 4.1780458559514955e-06, 5.49675587535603e-06, 6.815465440013213e-06, 8.134175004670396e-06, 9.45288502407493e-06, 1.0771595043479465e-05, 1.2090304153389297e-05, 1.3409014172793832e-05, 1.4727724192198366e-05, 1.60464333021082e-05, 1.7365144231007434e-05, 1.8683853340917267e-05, 2.0002564269816503e-05, 2.1321273379726335e-05, 2.2639982489636168e-05, 2.3958691599546e-05, 2.5277402528445236e-05, 2.659611163835507e-05, 2.7914822567254305e-05, 2.9233531677164137e-05, 3.055224078707397e-05, 3.18709498969838e-05, 3.3189659006893635e-05, 3.4508371754782274e-05, 3.582708086469211e-05]}, "gradients/decoder.model.decoder.layers.2.encoder_attn_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 2.0, 1.0, 1.0, 1.0, 5.0, 2.0, 8.0, 9.0, 9.0, 6.0, 8.0, 19.0, 14.0, 14.0, 13.0, 26.0, 25.0, 41.0, 28.0, 37.0, 44.0, 41.0, 50.0, 43.0, 36.0, 42.0, 45.0, 45.0, 39.0, 37.0, 35.0, 47.0, 38.0, 33.0, 44.0, 22.0, 17.0, 14.0, 13.0, 19.0, 12.0, 9.0, 7.0, 5.0, 2.0, 5.0, 3.0, 1.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.986637264257297e-05, -2.883641718653962e-05, -2.780646173050627e-05, -2.6776508093462326e-05, -2.5746552637428977e-05, -2.4716597181395628e-05, -2.3686643544351682e-05, -2.2656688088318333e-05, -2.1626732632284984e-05, -2.0596777176251635e-05, -1.9566821720218286e-05, -1.853686808317434e-05, -1.750691262714099e-05, -1.6476957171107642e-05, -1.5447003534063697e-05, -1.4417048078030348e-05, -1.3387092621996999e-05, -1.235713716596365e-05, -1.1327182619425002e-05, -1.0297228072886355e-05, -9.267272616853006e-06, -8.237317160819657e-06, -7.20736261428101e-06, -6.177407612995012e-06, -5.147452611709014e-06, -4.1174976104230154e-06, -3.0875426091370173e-06, -2.057587607851019e-06, -1.027632606565021e-06, 2.3223947209771723e-09, 1.0322773960069753e-06, 2.0622323972929735e-06, 3.0921837606001645e-06, 4.122138761886163e-06, 5.152093763172161e-06, 6.182048764458159e-06, 7.212003765744157e-06, 8.241959221777506e-06, 9.271913768316153e-06, 1.03018683148548e-05, 1.133182377088815e-05, 1.2361779226921499e-05, 1.3391733773460146e-05, 1.4421688319998793e-05, 1.5451643776032142e-05, 1.648159923206549e-05, 1.7511552869109437e-05, 1.8541508325142786e-05, 1.9571463781176135e-05, 2.0601419237209484e-05, 2.1631374693242833e-05, 2.266132833028678e-05, 2.3691283786320128e-05, 2.4721239242353477e-05, 2.5751192879397422e-05, 2.678114833543077e-05, 2.781110379146412e-05, 2.884105924749747e-05, 2.9871014703530818e-05, 3.090097015956417e-05, 3.1930925615597516e-05, 3.296087743365206e-05, 3.399083288968541e-05, 3.5020788345718756e-05, 3.6050743801752105e-05]}, "gradients/decoder.model.decoder.layers.2.encoder_attn.out_proj.weight": {"_type": "histogram", "values": [1.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0, 8.0, 6.0, 7.0, 8.0, 6.0, 18.0, 21.0, 25.0, 31.0, 29.0, 50.0, 94.0, 111.0, 124.0, 215.0, 338.0, 338.0, 706.0, 1209.0, 1720.0, 4108.0, 11239.0, 36496.0, 852360.0, 114209.0, 12875.0, 5908.0, 2607.0, 1128.0, 854.0, 558.0, 293.0, 249.0, 174.0, 84.0, 82.0, 73.0, 59.0, 42.0, 29.0, 13.0, 15.0, 8.0, 16.0, 9.0, 8.0, 1.0, 2.0, 1.0, 1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0], "bins": [-8.821487426757812e-06, -8.543021976947784e-06, -8.264556527137756e-06, -7.986091077327728e-06, -7.7076256275177e-06, -7.429160177707672e-06, -7.150694727897644e-06, -6.872229278087616e-06, -6.593763828277588e-06, -6.31529837846756e-06, -6.036832928657532e-06, -5.758367478847504e-06, -5.479902029037476e-06, -5.2014365792274475e-06, -4.9229711294174194e-06, -4.644505679607391e-06, -4.366040229797363e-06, -4.087574779987335e-06, -3.809109330177307e-06, -3.530643880367279e-06, -3.252178430557251e-06, -2.973712980747223e-06, -2.695247530937195e-06, -2.4167820811271667e-06, -2.1383166313171387e-06, -1.8598511815071106e-06, -1.5813857316970825e-06, -1.3029202818870544e-06, -1.0244548320770264e-06, -7.459893822669983e-07, -4.675239324569702e-07, -1.8905848264694214e-07, 8.940696716308594e-08, 3.67872416973114e-07, 6.463378667831421e-07, 9.248033165931702e-07, 1.2032687664031982e-06, 1.4817342162132263e-06, 1.7601996660232544e-06, 2.0386651158332825e-06, 2.3171305656433105e-06, 2.5955960154533386e-06, 2.8740614652633667e-06, 3.1525269150733948e-06, 3.430992364883423e-06, 3.709457814693451e-06, 3.987923264503479e-06, 4.266388714313507e-06, 4.544854164123535e-06, 4.823319613933563e-06, 5.101785063743591e-06, 5.380250513553619e-06, 5.6587159633636475e-06, 5.9371814131736755e-06, 6.215646862983704e-06, 6.494112312793732e-06, 6.77257776260376e-06, 7.051043212413788e-06, 7.329508662223816e-06, 7.607974112033844e-06, 7.886439561843872e-06, 8.1649050116539e-06, 8.443370461463928e-06, 8.721835911273956e-06, 9.000301361083984e-06]}, "gradients/decoder.model.decoder.layers.2.encoder_attn.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 2.0, 0.0, 2.0, 4.0, 3.0, 6.0, 7.0, 4.0, 2.0, 7.0, 10.0, 10.0, 11.0, 23.0, 21.0, 21.0, 34.0, 32.0, 40.0, 45.0, 39.0, 53.0, 41.0, 41.0, 48.0, 52.0, 57.0, 51.0, 40.0, 35.0, 44.0, 35.0, 46.0, 28.0, 22.0, 17.0, 13.0, 11.0, 15.0, 14.0, 7.0, 6.0, 5.0, 2.0, 3.0, 0.0, 6.0, 0.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.771615982055664e-05, -2.68714502453804e-05, -2.6026740670204163e-05, -2.5182031095027924e-05, -2.4337321519851685e-05, -2.3492611944675446e-05, -2.2647902369499207e-05, -2.1803192794322968e-05, -2.095848321914673e-05, -2.011377364397049e-05, -1.926906406879425e-05, -1.842435449361801e-05, -1.7579644918441772e-05, -1.6734935343265533e-05, -1.5890225768089294e-05, -1.5045516192913055e-05, -1.4200806617736816e-05, -1.3356097042560577e-05, -1.2511387467384338e-05, -1.16666778922081e-05, -1.082196831703186e-05, -9.977258741855621e-06, -9.132549166679382e-06, -8.287839591503143e-06, -7.443130016326904e-06, -6.598420441150665e-06, -5.753710865974426e-06, -4.909001290798187e-06, -4.064291715621948e-06, -3.2195821404457092e-06, -2.3748725652694702e-06, -1.5301629900932312e-06, -6.854534149169922e-07, 1.5925616025924683e-07, 1.0039657354354858e-06, 1.8486753106117249e-06, 2.693384885787964e-06, 3.538094460964203e-06, 4.382804036140442e-06, 5.227513611316681e-06, 6.07222318649292e-06, 6.916932761669159e-06, 7.761642336845398e-06, 8.606351912021637e-06, 9.451061487197876e-06, 1.0295771062374115e-05, 1.1140480637550354e-05, 1.1985190212726593e-05, 1.2829899787902832e-05, 1.3674609363079071e-05, 1.451931893825531e-05, 1.536402851343155e-05, 1.6208738088607788e-05, 1.7053447663784027e-05, 1.7898157238960266e-05, 1.8742866814136505e-05, 1.9587576389312744e-05, 2.0432285964488983e-05, 2.1276995539665222e-05, 2.212170511484146e-05, 2.29664146900177e-05, 2.381112426519394e-05, 2.4655833840370178e-05, 2.5500543415546417e-05, 2.6345252990722656e-05]}, "gradients/decoder.model.decoder.layers.2.encoder_attn.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 6.0, 2.0, 1.0, 6.0, 7.0, 8.0, 9.0, 19.0, 25.0, 33.0, 46.0, 153.0, 178.0, 377.0, 1559.0, 5602.0, 929965.0, 102617.0, 6429.0, 717.0, 444.0, 135.0, 103.0, 42.0, 15.0, 20.0, 8.0, 9.0, 2.0, 7.0, 4.0, 3.0, 2.0, 1.0, 1.0, 2.0, 3.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-5.245208740234375e-06, -5.098991096019745e-06, -4.952773451805115e-06, -4.806555807590485e-06, -4.6603381633758545e-06, -4.514120519161224e-06, -4.367902874946594e-06, -4.221685230731964e-06, -4.075467586517334e-06, -3.929249942302704e-06, -3.7830322980880737e-06, -3.6368146538734436e-06, -3.4905970096588135e-06, -3.3443793654441833e-06, -3.1981617212295532e-06, -3.051944077014923e-06, -2.905726432800293e-06, -2.759508788585663e-06, -2.6132911443710327e-06, -2.4670735001564026e-06, -2.3208558559417725e-06, -2.1746382117271423e-06, -2.028420567512512e-06, -1.882202923297882e-06, -1.735985279083252e-06, -1.5897676348686218e-06, -1.4435499906539917e-06, -1.2973323464393616e-06, -1.1511147022247314e-06, -1.0048970580101013e-06, -8.586794137954712e-07, -7.124617695808411e-07, -5.662441253662109e-07, -4.200264811515808e-07, -2.738088369369507e-07, -1.2759119272232056e-07, 1.862645149230957e-08, 1.648440957069397e-07, 3.110617399215698e-07, 4.5727938413619995e-07, 6.034970283508301e-07, 7.497146725654602e-07, 8.959323167800903e-07, 1.0421499609947205e-06, 1.1883676052093506e-06, 1.3345852494239807e-06, 1.4808028936386108e-06, 1.627020537853241e-06, 1.773238182067871e-06, 1.9194558262825012e-06, 2.0656734704971313e-06, 2.2118911147117615e-06, 2.3581087589263916e-06, 2.5043264031410217e-06, 2.650544047355652e-06, 2.796761691570282e-06, 2.942979335784912e-06, 3.0891969799995422e-06, 3.2354146242141724e-06, 3.3816322684288025e-06, 3.5278499126434326e-06, 3.6740675568580627e-06, 3.820285201072693e-06, 3.966502845287323e-06, 4.112720489501953e-06]}, "gradients/decoder.model.decoder.layers.2.encoder_attn.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 3.0, 1.0, 1.0, 5.0, 3.0, 0.0, 5.0, 7.0, 16.0, 10.0, 21.0, 40.0, 62.0, 63.0, 74.0, 89.0, 109.0, 96.0, 93.0, 82.0, 72.0, 51.0, 31.0, 34.0, 14.0, 10.0, 5.0, 3.0, 1.0, 3.0, 2.0, 1.0, 2.0, 0.0, 1.0, 0.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.00013816356658935547, -0.0001343265175819397, -0.00013048946857452393, -0.00012665241956710815, -0.00012281537055969238, -0.00011897832155227661, -0.00011514127254486084, -0.00011130422353744507, -0.0001074671745300293, -0.00010363012552261353, -9.979307651519775e-05, -9.595602750778198e-05, -9.211897850036621e-05, -8.828192949295044e-05, -8.444488048553467e-05, -8.06078314781189e-05, -7.677078247070312e-05, -7.293373346328735e-05, -6.909668445587158e-05, -6.525963544845581e-05, -6.142258644104004e-05, -5.758553743362427e-05, -5.3748488426208496e-05, -4.9911439418792725e-05, -4.607439041137695e-05, -4.223734140396118e-05, -3.840029239654541e-05, -3.456324338912964e-05, -3.072619438171387e-05, -2.6889145374298096e-05, -2.3052096366882324e-05, -1.9215047359466553e-05, -1.537799835205078e-05, -1.154094934463501e-05, -7.703900337219238e-06, -3.866851329803467e-06, -2.9802322387695312e-08, 3.807246685028076e-06, 7.644295692443848e-06, 1.1481344699859619e-05, 1.531839370727539e-05, 1.9155442714691162e-05, 2.2992491722106934e-05, 2.6829540729522705e-05, 3.0666589736938477e-05, 3.450363874435425e-05, 3.834068775177002e-05, 4.217773675918579e-05, 4.601478576660156e-05, 4.9851834774017334e-05, 5.3688883781433105e-05, 5.752593278884888e-05, 6.136298179626465e-05, 6.520003080368042e-05, 6.903707981109619e-05, 7.287412881851196e-05, 7.671117782592773e-05, 8.05482268333435e-05, 8.438527584075928e-05, 8.822232484817505e-05, 9.205937385559082e-05, 9.589642286300659e-05, 9.973347187042236e-05, 0.00010357052087783813, 0.0001074075698852539]}, "gradients/decoder.model.decoder.layers.2.encoder_attn.k_proj.weight": {"_type": "histogram", "values": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1048576.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], "bins": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]}, "gradients/decoder.model.decoder.layers.2.encoder_attn.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 6.0, 0.0, 0.0, 0.0, 13.0, 0.0, 0.0, 0.0, 0.0, 41.0, 0.0, 0.0, 0.0, 94.0, 0.0, 0.0, 0.0, 0.0, 127.0, 0.0, 0.0, 0.0, 0.0, 417.0, 0.0, 0.0, 0.0, 151.0, 0.0, 0.0, 0.0, 0.0, 89.0, 0.0, 0.0, 0.0, 50.0, 0.0, 0.0, 0.0, 0.0, 28.0, 0.0, 0.0, 0.0, 5.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.172325134277344e-07, -4.041939973831177e-07, -3.91155481338501e-07, -3.781169652938843e-07, -3.650784492492676e-07, -3.520399332046509e-07, -3.390014171600342e-07, -3.259629011154175e-07, -3.129243850708008e-07, -2.998858690261841e-07, -2.868473529815674e-07, -2.738088369369507e-07, -2.60770320892334e-07, -2.477318048477173e-07, -2.3469328880310059e-07, -2.2165477275848389e-07, -2.086162567138672e-07, -1.955777406692505e-07, -1.825392246246338e-07, -1.695007085800171e-07, -1.564621925354004e-07, -1.434236764907837e-07, -1.30385160446167e-07, -1.1734664440155029e-07, -1.043081283569336e-07, -9.12696123123169e-08, -7.82310962677002e-08, -6.51925802230835e-08, -5.21540641784668e-08, -3.91155481338501e-08, -2.60770320892334e-08, -1.30385160446167e-08, 0.0, 1.30385160446167e-08, 2.60770320892334e-08, 3.91155481338501e-08, 5.21540641784668e-08, 6.51925802230835e-08, 7.82310962677002e-08, 9.12696123123169e-08, 1.043081283569336e-07, 1.1734664440155029e-07, 1.30385160446167e-07, 1.434236764907837e-07, 1.564621925354004e-07, 1.695007085800171e-07, 1.825392246246338e-07, 1.955777406692505e-07, 2.086162567138672e-07, 2.2165477275848389e-07, 2.3469328880310059e-07, 2.477318048477173e-07, 2.60770320892334e-07, 2.738088369369507e-07, 2.868473529815674e-07, 2.998858690261841e-07, 3.129243850708008e-07, 3.259629011154175e-07, 3.390014171600342e-07, 3.520399332046509e-07, 3.650784492492676e-07, 3.781169652938843e-07, 3.91155481338501e-07, 4.041939973831177e-07, 4.172325134277344e-07]}, "gradients/decoder.model.decoder.layers.2.encoder_attn.q_proj.weight": {"_type": "histogram", "values": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1048576.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], "bins": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]}, "gradients/decoder.model.decoder.layers.2.encoder_attn.q_proj.bias": {"_type": "histogram", "values": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1024.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], "bins": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]}, "gradients/decoder.model.decoder.layers.2.self_attn_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 3.0, 1.0, 5.0, 9.0, 30.0, 44.0, 150.0, 305.0, 290.0, 104.0, 47.0, 16.0, 3.0, 8.0, 3.0, 1.0, 1.0], "bins": [-0.00017781545466277748, -0.00017457333160564303, -0.0001713312231004238, -0.00016808910004328936, -0.00016484697698615491, -0.0001616048684809357, -0.00015836274542380124, -0.0001551206223666668, -0.00015187851386144757, -0.00014863639080431312, -0.0001453942822990939, -0.00014215215924195945, -0.000138910036184825, -0.00013566792767960578, -0.00013242580462247133, -0.00012918368156533688, -0.00012594155850820243, -0.00012269943545106798, -0.00011945731966989115, -0.00011621520388871431, -0.00011297308810753748, -0.00010973097232636064, -0.0001064888492692262, -0.00010324673348804936, -0.00010000461770687252, -9.676250192569569e-05, -9.352037886856124e-05, -9.02782630873844e-05, -8.703614730620757e-05, -8.379403152503073e-05, -8.055190846789628e-05, -7.730979268671945e-05, -7.4067669629585e-05, -7.082555384840816e-05, -6.758343079127371e-05, -6.434131501009688e-05, -6.109919922892004e-05, -5.78570798097644e-05, -5.461496039060876e-05, -5.137284460943192e-05, -4.813072882825509e-05, -4.4888609409099445e-05, -4.164649362792261e-05, -3.840437420876697e-05, -3.516225842759013e-05, -3.192013900843449e-05, -2.8678019589278847e-05, -2.5435901989112608e-05, -2.2193782569956966e-05, -1.8951664969790727e-05, -1.5709545550635085e-05, -1.2467427950468846e-05, -9.225310350302607e-06, -5.983192750136368e-06, -2.7410733309807256e-06, 5.010442691855133e-07, 3.743161869351752e-06, 6.985279924265342e-06, 1.0227397979178932e-05, 1.3469516488839872e-05, 1.671163408900611e-05, 1.995375168917235e-05, 2.3195871108327992e-05, 2.643798870849423e-05, 2.968010630866047e-05]}, "gradients/decoder.model.decoder.layers.2.self_attn_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 2.0, 3.0, 3.0, 6.0, 8.0, 11.0, 3.0, 11.0, 13.0, 18.0, 12.0, 17.0, 22.0, 26.0, 39.0, 38.0, 31.0, 49.0, 47.0, 49.0, 46.0, 32.0, 44.0, 40.0, 56.0, 30.0, 42.0, 38.0, 40.0, 54.0, 46.0, 26.0, 14.0, 13.0, 14.0, 22.0, 10.0, 9.0, 10.0, 6.0, 2.0, 4.0, 2.0, 3.0, 3.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-2.4854645744198933e-05, -2.4061584554146975e-05, -2.3268523364095017e-05, -2.2475463993032463e-05, -2.1682402802980505e-05, -2.0889341612928547e-05, -2.009628042287659e-05, -1.9303221051814035e-05, -1.8510159861762077e-05, -1.771709867171012e-05, -1.692403748165816e-05, -1.6130978110595606e-05, -1.5337916920543648e-05, -1.454485573049169e-05, -1.3751794540439732e-05, -1.2958734259882476e-05, -1.2165673069830518e-05, -1.137261187977856e-05, -1.0579551599221304e-05, -9.786490409169346e-06, -8.99343012861209e-06, -8.200368938560132e-06, -7.407308203255525e-06, -6.614247467950918e-06, -5.821186732646311e-06, -5.028125997341704e-06, -4.2350652620370965e-06, -3.442004299358814e-06, -2.648943564054207e-06, -1.8558826013759244e-06, -1.0628218660713173e-06, -2.6976113076671027e-07, 5.232996045378968e-07, 1.3163603398425039e-06, 2.109421075147111e-06, 2.9024820378253935e-06, 3.6955427731300006e-06, 4.488603735808283e-06, 5.28166447111289e-06, 6.074725206417497e-06, 6.867785941722104e-06, 7.660846677026711e-06, 8.45390786707867e-06, 9.246968147635926e-06, 1.0040029337687884e-05, 1.083308961824514e-05, 1.1626150808297098e-05, 1.2419211998349056e-05, 1.3212272278906312e-05, 1.400533346895827e-05, 1.4798393749515526e-05, 1.5591454939567484e-05, 1.6384516129619442e-05, 1.7177575500681996e-05, 1.7970636690733954e-05, 1.8763697880785912e-05, 1.955675907083787e-05, 2.0349820260889828e-05, 2.1142881450941786e-05, 2.193594082200434e-05, 2.27290020120563e-05, 2.3522063202108257e-05, 2.4315124392160214e-05, 2.510818376322277e-05, 2.5901244953274727e-05]}, "gradients/decoder.model.decoder.layers.2.self_attn.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 3.0, 2.0, 1.0, 4.0, 4.0, 7.0, 11.0, 9.0, 11.0, 15.0, 21.0, 27.0, 50.0, 62.0, 76.0, 140.0, 433.0, 1746.0, 12086.0, 123888.0, 798578.0, 98954.0, 10123.0, 1524.0, 353.0, 144.0, 84.0, 57.0, 35.0, 23.0, 27.0, 23.0, 11.0, 12.0, 11.0, 6.0, 3.0, 1.0, 2.0, 3.0, 0.0, 1.0, 1.0], "bins": [-0.00011485815048217773, -0.00011208746582269669, -0.00010931678116321564, -0.00010654609650373459, -0.00010377541184425354, -0.00010100472718477249, -9.823404252529144e-05, -9.54633578658104e-05, -9.269267320632935e-05, -8.99219885468483e-05, -8.715130388736725e-05, -8.43806192278862e-05, -8.160993456840515e-05, -7.88392499089241e-05, -7.606856524944305e-05, -7.3297880589962e-05, -7.052719593048096e-05, -6.775651127099991e-05, -6.498582661151886e-05, -6.221514195203781e-05, -5.944445729255676e-05, -5.6673772633075714e-05, -5.3903087973594666e-05, -5.113240331411362e-05, -4.836171865463257e-05, -4.559103399515152e-05, -4.282034933567047e-05, -4.004966467618942e-05, -3.7278980016708374e-05, -3.4508295357227325e-05, -3.173761069774628e-05, -2.8966926038265228e-05, -2.619624137878418e-05, -2.342555671930313e-05, -2.0654872059822083e-05, -1.7884187400341034e-05, -1.5113502740859985e-05, -1.2342818081378937e-05, -9.572133421897888e-06, -6.80144876241684e-06, -4.030764102935791e-06, -1.2600794434547424e-06, 1.5106052160263062e-06, 4.281289875507355e-06, 7.051974534988403e-06, 9.822659194469452e-06, 1.25933438539505e-05, 1.536402851343155e-05, 1.8134713172912598e-05, 2.0905397832393646e-05, 2.3676082491874695e-05, 2.6446767151355743e-05, 2.9217451810836792e-05, 3.198813647031784e-05, 3.475882112979889e-05, 3.752950578927994e-05, 4.0300190448760986e-05, 4.3070875108242035e-05, 4.5841559767723083e-05, 4.861224442720413e-05, 5.138292908668518e-05, 5.415361374616623e-05, 5.692429840564728e-05, 5.9694983065128326e-05, 6.246566772460938e-05]}, "gradients/decoder.model.decoder.layers.2.self_attn.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 3.0, 0.0, 3.0, 4.0, 4.0, 6.0, 11.0, 8.0, 10.0, 12.0, 14.0, 24.0, 31.0, 34.0, 38.0, 38.0, 55.0, 49.0, 59.0, 67.0, 64.0, 67.0, 51.0, 58.0, 46.0, 39.0, 47.0, 30.0, 24.0, 29.0, 17.0, 23.0, 12.0, 10.0, 13.0, 7.0, 4.0, 1.0, 1.0, 2.0, 3.0, 1.0, 1.0], "bins": [-0.0002276897430419922, -0.0002222210168838501, -0.000216752290725708, -0.00021128356456756592, -0.00020581483840942383, -0.00020034611225128174, -0.00019487738609313965, -0.00018940865993499756, -0.00018393993377685547, -0.00017847120761871338, -0.0001730024814605713, -0.0001675337553024292, -0.0001620650291442871, -0.00015659630298614502, -0.00015112757682800293, -0.00014565885066986084, -0.00014019012451171875, -0.00013472139835357666, -0.00012925267219543457, -0.00012378394603729248, -0.00011831521987915039, -0.0001128464937210083, -0.00010737776756286621, -0.00010190904140472412, -9.644031524658203e-05, -9.097158908843994e-05, -8.550286293029785e-05, -8.003413677215576e-05, -7.456541061401367e-05, -6.909668445587158e-05, -6.362795829772949e-05, -5.81592321395874e-05, -5.269050598144531e-05, -4.722177982330322e-05, -4.175305366516113e-05, -3.628432750701904e-05, -3.081560134887695e-05, -2.5346875190734863e-05, -1.9878149032592773e-05, -1.4409422874450684e-05, -8.940696716308594e-06, -3.471970558166504e-06, 1.996755599975586e-06, 7.465481758117676e-06, 1.2934207916259766e-05, 1.8402934074401855e-05, 2.3871660232543945e-05, 2.9340386390686035e-05, 3.4809112548828125e-05, 4.0277838706970215e-05, 4.5746564865112305e-05, 5.1215291023254395e-05, 5.6684017181396484e-05, 6.215274333953857e-05, 6.762146949768066e-05, 7.309019565582275e-05, 7.855892181396484e-05, 8.402764797210693e-05, 8.949637413024902e-05, 9.496510028839111e-05, 0.0001004338264465332, 0.00010590255260467529, 0.00011137127876281738, 0.00011684000492095947, 0.00012230873107910156]}, "gradients/decoder.model.decoder.layers.2.self_attn.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 2.0, 0.0, 2.0, 4.0, 2.0, 4.0, 4.0, 10.0, 6.0, 10.0, 14.0, 23.0, 22.0, 49.0, 70.0, 124.0, 277.0, 874.0, 835238.0, 210400.0, 793.0, 337.0, 93.0, 67.0, 27.0, 28.0, 32.0, 16.0, 11.0, 9.0, 6.0, 2.0, 2.0, 1.0, 2.0, 2.0, 1.0, 1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 1.0], "bins": [-0.0009350776672363281, -0.0009110867977142334, -0.0008870959281921387, -0.0008631050586700439, -0.0008391141891479492, -0.0008151233196258545, -0.0007911324501037598, -0.000767141580581665, -0.0007431507110595703, -0.0007191598415374756, -0.0006951689720153809, -0.0006711781024932861, -0.0006471872329711914, -0.0006231963634490967, -0.000599205493927002, -0.0005752146244049072, -0.0005512237548828125, -0.0005272328853607178, -0.000503242015838623, -0.0004792511463165283, -0.0004552602767944336, -0.00043126940727233887, -0.00040727853775024414, -0.0003832876682281494, -0.0003592967987060547, -0.00033530592918395996, -0.00031131505966186523, -0.0002873241901397705, -0.0002633333206176758, -0.00023934245109558105, -0.00021535158157348633, -0.0001913607120513916, -0.00016736984252929688, -0.00014337897300720215, -0.00011938810348510742, -9.53972339630127e-05, -7.140636444091797e-05, -4.741549491882324e-05, -2.3424625396728516e-05, 5.662441253662109e-07, 2.4557113647460938e-05, 4.8547983169555664e-05, 7.253885269165039e-05, 9.652972221374512e-05, 0.00012052059173583984, 0.00014451146125793457, 0.0001685023307800293, 0.00019249320030212402, 0.00021648406982421875, 0.00024047493934631348, 0.0002644658088684082, 0.00028845667839050293, 0.00031244754791259766, 0.0003364384174346924, 0.0003604292869567871, 0.00038442015647888184, 0.00040841102600097656, 0.0004324018955230713, 0.000456392765045166, 0.00048038363456726074, 0.0005043745040893555, 0.0005283653736114502, 0.0005523562431335449, 0.0005763471126556396, 0.0006003379821777344]}, "gradients/decoder.model.decoder.layers.2.self_attn.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 5.0, 0.0, 2.0, 6.0, 7.0, 8.0, 8.0, 7.0, 20.0, 21.0, 38.0, 39.0, 45.0, 67.0, 88.0, 100.0, 107.0, 106.0, 87.0, 59.0, 52.0, 39.0, 27.0, 22.0, 18.0, 12.0, 6.0, 7.0, 3.0, 1.0, 2.0, 0.0, 3.0, 0.0, 2.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.00045561790466308594, -0.00044374167919158936, -0.0004318654537200928, -0.0004199892282485962, -0.0004081130027770996, -0.00039623677730560303, -0.00038436055183410645, -0.00037248432636260986, -0.0003606081008911133, -0.0003487318754196167, -0.0003368556499481201, -0.00032497942447662354, -0.00031310319900512695, -0.00030122697353363037, -0.0002893507480621338, -0.0002774745225906372, -0.0002655982971191406, -0.00025372207164764404, -0.00024184584617614746, -0.00022996962070465088, -0.0002180933952331543, -0.00020621716976165771, -0.00019434094429016113, -0.00018246471881866455, -0.00017058849334716797, -0.0001587122678756714, -0.0001468360424041748, -0.00013495981693267822, -0.00012308359146118164, -0.00011120736598968506, -9.933114051818848e-05, -8.74549150466919e-05, -7.557868957519531e-05, -6.370246410369873e-05, -5.182623863220215e-05, -3.9950013160705566e-05, -2.8073787689208984e-05, -1.6197562217712402e-05, -4.32133674621582e-06, 7.554888725280762e-06, 1.9431114196777344e-05, 3.1307339668273926e-05, 4.318356513977051e-05, 5.505979061126709e-05, 6.693601608276367e-05, 7.881224155426025e-05, 9.068846702575684e-05, 0.00010256469249725342, 0.00011444091796875, 0.00012631714344024658, 0.00013819336891174316, 0.00015006959438323975, 0.00016194581985473633, 0.0001738220453262329, 0.0001856982707977295, 0.00019757449626922607, 0.00020945072174072266, 0.00022132694721221924, 0.00023320317268371582, 0.0002450793981552124, 0.000256955623626709, 0.00026883184909820557, 0.00028070807456970215, 0.00029258430004119873, 0.0003044605255126953]}, "gradients/decoder.model.decoder.layers.2.self_attn.k_proj.weight": {"_type": "histogram", "values": [2.0, 2.0, 1.0, 1.0, 2.0, 4.0, 1.0, 9.0, 3.0, 6.0, 26.0, 20.0, 34.0, 51.0, 70.0, 102.0, 151.0, 254.0, 512.0, 869.0, 1827.0, 12918.0, 39992.0, 934693.0, 40308.0, 9148.0, 3639.0, 1747.0, 904.0, 559.0, 254.0, 175.0, 148.0, 37.0, 26.0, 21.0, 13.0, 10.0, 8.0, 6.0, 2.0, 2.0, 5.0, 1.0, 3.0, 2.0, 0.0, 0.0, 0.0, 2.0, 4.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.5497207641601562e-06, -1.4845281839370728e-06, -1.4193356037139893e-06, -1.3541430234909058e-06, -1.2889504432678223e-06, -1.2237578630447388e-06, -1.1585652828216553e-06, -1.0933727025985718e-06, -1.0281801223754883e-06, -9.629875421524048e-07, -8.977949619293213e-07, -8.326023817062378e-07, -7.674098014831543e-07, -7.022172212600708e-07, -6.370246410369873e-07, -5.718320608139038e-07, -5.066394805908203e-07, -4.414469003677368e-07, -3.762543201446533e-07, -3.110617399215698e-07, -2.4586915969848633e-07, -1.8067657947540283e-07, -1.1548399925231934e-07, -5.029141902923584e-08, 1.4901161193847656e-08, 8.009374141693115e-08, 1.4528632164001465e-07, 2.1047890186309814e-07, 2.7567148208618164e-07, 3.4086406230926514e-07, 4.0605664253234863e-07, 4.7124922275543213e-07, 5.364418029785156e-07, 6.016343832015991e-07, 6.668269634246826e-07, 7.320195436477661e-07, 7.972121238708496e-07, 8.624047040939331e-07, 9.275972843170166e-07, 9.927898645401e-07, 1.0579824447631836e-06, 1.123175024986267e-06, 1.1883676052093506e-06, 1.253560185432434e-06, 1.3187527656555176e-06, 1.383945345878601e-06, 1.4491379261016846e-06, 1.514330506324768e-06, 1.5795230865478516e-06, 1.644715666770935e-06, 1.7099082469940186e-06, 1.775100827217102e-06, 1.8402934074401855e-06, 1.905485987663269e-06, 1.9706785678863525e-06, 2.035871148109436e-06, 2.1010637283325195e-06, 2.166256308555603e-06, 2.2314488887786865e-06, 2.29664146900177e-06, 2.3618340492248535e-06, 2.427026629447937e-06, 2.4922192096710205e-06, 2.557411789894104e-06, 2.6226043701171875e-06]}, "gradients/decoder.model.decoder.layers.2.self_attn.k_proj.bias": {"_type": "histogram", "values": [4.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 93.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 808.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 111.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 8.0], "bins": [-1.1920928955078125e-07, -1.1548399925231934e-07, -1.1175870895385742e-07, -1.0803341865539551e-07, -1.043081283569336e-07, -1.0058283805847168e-07, -9.685754776000977e-08, -9.313225746154785e-08, -8.940696716308594e-08, -8.568167686462402e-08, -8.195638656616211e-08, -7.82310962677002e-08, -7.450580596923828e-08, -7.078051567077637e-08, -6.705522537231445e-08, -6.332993507385254e-08, -5.960464477539063e-08, -5.587935447692871e-08, -5.21540641784668e-08, -4.842877388000488e-08, -4.470348358154297e-08, -4.0978193283081055e-08, -3.725290298461914e-08, -3.3527612686157227e-08, -2.9802322387695312e-08, -2.60770320892334e-08, -2.2351741790771484e-08, -1.862645149230957e-08, -1.4901161193847656e-08, -1.1175870895385742e-08, -7.450580596923828e-09, -3.725290298461914e-09, 0.0, 3.725290298461914e-09, 7.450580596923828e-09, 1.1175870895385742e-08, 1.4901161193847656e-08, 1.862645149230957e-08, 2.2351741790771484e-08, 2.60770320892334e-08, 2.9802322387695312e-08, 3.3527612686157227e-08, 3.725290298461914e-08, 4.0978193283081055e-08, 4.470348358154297e-08, 4.842877388000488e-08, 5.21540641784668e-08, 5.587935447692871e-08, 5.960464477539063e-08, 6.332993507385254e-08, 6.705522537231445e-08, 7.078051567077637e-08, 7.450580596923828e-08, 7.82310962677002e-08, 8.195638656616211e-08, 8.568167686462402e-08, 8.940696716308594e-08, 9.313225746154785e-08, 9.685754776000977e-08, 1.0058283805847168e-07, 1.043081283569336e-07, 1.0803341865539551e-07, 1.1175870895385742e-07, 1.1548399925231934e-07, 1.1920928955078125e-07]}, "gradients/decoder.model.decoder.layers.2.self_attn.q_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 4.0, 3.0, 1.0, 2.0, 4.0, 4.0, 9.0, 10.0, 9.0, 20.0, 21.0, 20.0, 54.0, 51.0, 66.0, 83.0, 101.0, 145.0, 200.0, 265.0, 423.0, 699.0, 1283.0, 2288.0, 4263.0, 9772.0, 41035.0, 926723.0, 41388.0, 9631.0, 4378.0, 2156.0, 1212.0, 731.0, 443.0, 296.0, 173.0, 139.0, 103.0, 69.0, 67.0, 49.0, 36.0, 28.0, 21.0, 21.0, 16.0, 17.0, 4.0, 4.0, 9.0, 7.0, 0.0, 2.0, 2.0, 6.0, 2.0, 3.0, 0.0, 2.0], "bins": [-1.9073486328125e-06, -1.846812665462494e-06, -1.7862766981124878e-06, -1.7257407307624817e-06, -1.6652047634124756e-06, -1.6046687960624695e-06, -1.5441328287124634e-06, -1.4835968613624573e-06, -1.4230608940124512e-06, -1.362524926662445e-06, -1.301988959312439e-06, -1.2414529919624329e-06, -1.1809170246124268e-06, -1.1203810572624207e-06, -1.0598450899124146e-06, -9.993091225624084e-07, -9.387731552124023e-07, -8.782371878623962e-07, -8.177012205123901e-07, -7.57165253162384e-07, -6.966292858123779e-07, -6.360933184623718e-07, -5.755573511123657e-07, -5.150213837623596e-07, -4.544854164123535e-07, -3.939494490623474e-07, -3.334134817123413e-07, -2.728775143623352e-07, -2.123415470123291e-07, -1.51805579662323e-07, -9.12696123123169e-08, -3.073364496231079e-08, 2.9802322387695312e-08, 9.033828973770142e-08, 1.5087425708770752e-07, 2.1141022443771362e-07, 2.7194619178771973e-07, 3.3248215913772583e-07, 3.9301812648773193e-07, 4.5355409383773804e-07, 5.140900611877441e-07, 5.746260285377502e-07, 6.351619958877563e-07, 6.956979632377625e-07, 7.562339305877686e-07, 8.167698979377747e-07, 8.773058652877808e-07, 9.378418326377869e-07, 9.98377799987793e-07, 1.058913767337799e-06, 1.1194497346878052e-06, 1.1799857020378113e-06, 1.2405216693878174e-06, 1.3010576367378235e-06, 1.3615936040878296e-06, 1.4221295714378357e-06, 1.4826655387878418e-06, 1.543201506137848e-06, 1.603737473487854e-06, 1.6642734408378601e-06, 1.7248094081878662e-06, 1.7853453755378723e-06, 1.8458813428878784e-06, 1.9064173102378845e-06, 1.9669532775878906e-06]}, "gradients/decoder.model.decoder.layers.2.self_attn.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 3.0, 0.0, 7.0, 0.0, 0.0, 9.0, 0.0, 0.0, 11.0, 0.0, 22.0, 0.0, 0.0, 46.0, 0.0, 105.0, 0.0, 0.0, 576.0, 0.0, 125.0, 0.0, 0.0, 48.0, 0.0, 26.0, 0.0, 0.0, 13.0, 0.0, 0.0, 6.0, 0.0, 7.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 0.0, 4.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 1.0], "bins": [-7.748603820800781e-07, -7.515773177146912e-07, -7.282942533493042e-07, -7.050111889839172e-07, -6.817281246185303e-07, -6.584450602531433e-07, -6.351619958877563e-07, -6.118789315223694e-07, -5.885958671569824e-07, -5.653128027915955e-07, -5.420297384262085e-07, -5.187466740608215e-07, -4.954636096954346e-07, -4.721805453300476e-07, -4.4889748096466064e-07, -4.256144165992737e-07, -4.023313522338867e-07, -3.7904828786849976e-07, -3.557652235031128e-07, -3.3248215913772583e-07, -3.0919909477233887e-07, -2.859160304069519e-07, -2.6263296604156494e-07, -2.39349901676178e-07, -2.1606683731079102e-07, -1.9278377294540405e-07, -1.695007085800171e-07, -1.4621764421463013e-07, -1.2293457984924316e-07, -9.96515154838562e-08, -7.636845111846924e-08, -5.3085386753082275e-08, -2.9802322387695312e-08, -6.51925802230835e-09, 1.6763806343078613e-08, 4.0046870708465576e-08, 6.332993507385254e-08, 8.66129994392395e-08, 1.0989606380462646e-07, 1.3317912817001343e-07, 1.564621925354004e-07, 1.7974525690078735e-07, 2.0302832126617432e-07, 2.2631138563156128e-07, 2.4959444999694824e-07, 2.728775143623352e-07, 2.9616057872772217e-07, 3.1944364309310913e-07, 3.427267074584961e-07, 3.6600977182388306e-07, 3.8929283618927e-07, 4.12575900554657e-07, 4.3585896492004395e-07, 4.591420292854309e-07, 4.824250936508179e-07, 5.057081580162048e-07, 5.289912223815918e-07, 5.522742867469788e-07, 5.755573511123657e-07, 5.988404154777527e-07, 6.221234798431396e-07, 6.454065442085266e-07, 6.686896085739136e-07, 6.919726729393005e-07, 7.152557373046875e-07]}, "gradients/decoder.model.decoder.layers.1.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 25.0, 983.0, 10.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0005731600103899837, -0.000545083312317729, -0.0005170066142454743, -0.0004889298579655588, -0.0004608531598933041, -0.00043277646182104945, -0.00040469973464496434, -0.0003766230074688792, -0.00034854630939662457, -0.0003204696113243699, -0.0002923928841482848, -0.0002643161569721997, -0.00023623945889994502, -0.00020816274627577513, -0.00018008603365160525, -0.00015200932102743536, -0.00012393260840326548, -9.585589577909559e-05, -6.77791831549257e-05, -3.970247053075582e-05, -1.1625757906585932e-05, 1.6450954717583954e-05, 4.452766734175384e-05, 7.260437996592373e-05, 0.00010068109259009361, 0.0001287578052142635, 0.00015683451783843338, 0.00018491123046260327, 0.00021298794308677316, 0.00024106465571094304, 0.00026914136833511293, 0.00029721809551119804, 0.0003252947935834527, 0.00035337149165570736, 0.0003814482188317925, 0.0004095249460078776, 0.00043760164408013225, 0.0004656783421523869, 0.0004937550984323025, 0.0005218317965045571, 0.0005499084945768118, 0.0005779851926490664, 0.0006060618907213211, 0.0006341386470012367, 0.0006622153450734913, 0.000690292043145746, 0.0007183687994256616, 0.0007464454974979162, 0.0007745221955701709, 0.0008025988936424255, 0.0008306755917146802, 0.0008587523479945958, 0.0008868290460668504, 0.0009149057441391051, 0.0009429825004190207, 0.0009710591984912753, 0.00099913589656353, 0.0010272125946357846, 0.0010552892927080393, 0.001083365990780294, 0.0011114426888525486, 0.001139519503340125, 0.0011675962014123797, 0.0011956728994846344, 0.001223749597556889]}, "gradients/decoder.model.decoder.layers.1.final_layer_norm.bias": {"_type": "histogram", "values": [2.0, 3.0, 1.0, 3.0, 1.0, 2.0, 4.0, 8.0, 8.0, 6.0, 5.0, 12.0, 10.0, 11.0, 17.0, 9.0, 14.0, 17.0, 22.0, 30.0, 24.0, 33.0, 33.0, 37.0, 38.0, 39.0, 40.0, 38.0, 49.0, 45.0, 45.0, 38.0, 36.0, 39.0, 25.0, 34.0, 23.0, 28.0, 31.0, 17.0, 15.0, 20.0, 22.0, 11.0, 16.0, 11.0, 14.0, 5.0, 7.0, 2.0, 6.0, 3.0, 1.0, 3.0, 1.0, 1.0, 2.0, 1.0, 2.0, 2.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.00030405286815948784, -0.00029360834741964936, -0.0002831638266798109, -0.0002727193059399724, -0.0002622747852001339, -0.00025183026446029544, -0.00024138575827237219, -0.0002309412375325337, -0.00022049671679269522, -0.00021005219605285674, -0.00019960767531301826, -0.000189163169125095, -0.00017871864838525653, -0.00016827412764541805, -0.00015782960690557957, -0.00014738508616574109, -0.0001369405654259026, -0.00012649604468606412, -0.00011605152394622564, -0.00010560701048234478, -9.51624897425063e-05, -8.471796900266781e-05, -7.427345553878695e-05, -6.382893479894847e-05, -5.3384414059109986e-05, -4.2939893319271505e-05, -3.249537621741183e-05, -2.2050859115552157e-05, -1.1606338375713676e-05, -1.1618176358751953e-06, 9.282695828005672e-06, 1.9727216567844152e-05, 3.0171737307682633e-05, 4.0616258047521114e-05, 5.106077514938079e-05, 6.150529225124046e-05, 7.194981299107894e-05, 8.239433373091742e-05, 9.283884719479829e-05, 0.00010328336793463677, 0.00011372788867447525, 0.00012417240941431373, 0.00013461693015415221, 0.00014506143634207547, 0.00015550595708191395, 0.00016595047782175243, 0.0001763949985615909, 0.0001868395193014294, 0.00019728404004126787, 0.00020772856078110635, 0.00021817308152094483, 0.00022861760226078331, 0.0002390621230006218, 0.0002495066437404603, 0.00025995116448029876, 0.0002703956561163068, 0.0002808402059599757, 0.0002912847266998142, 0.0003017292474396527, 0.00031217376817949116, 0.00032261828891932964, 0.0003330628096591681, 0.0003435073303990066, 0.00035395182203501463, 0.0003643963427748531]}, "gradients/decoder.model.decoder.layers.1.fc2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 4.0, 2.0, 8.0, 6.0, 4.0, 5.0, 8.0, 20.0, 17.0, 22.0, 30.0, 38.0, 49.0, 45.0, 76.0, 65.0, 89.0, 94.0, 97.0, 123.0, 120.0, 158.0, 227.0, 2451.0, 4188573.0, 693.0, 205.0, 184.0, 152.0, 121.0, 104.0, 77.0, 80.0, 54.0, 54.0, 45.0, 42.0, 29.0, 30.0, 21.0, 18.0, 14.0, 13.0, 9.0, 4.0, 6.0, 4.0, 2.0, 2.0, 3.0, 2.0, 0.0, 0.0, 1.0], "bins": [-0.0003685951232910156, -0.0003578700125217438, -0.0003471449017524719, -0.0003364197909832001, -0.0003256946802139282, -0.00031496956944465637, -0.0003042444586753845, -0.00029351934790611267, -0.0002827942371368408, -0.00027206912636756897, -0.0002613440155982971, -0.00025061890482902527, -0.00023989379405975342, -0.00022916868329048157, -0.00021844357252120972, -0.00020771846175193787, -0.00019699335098266602, -0.00018626824021339417, -0.00017554312944412231, -0.00016481801867485046, -0.0001540929079055786, -0.00014336779713630676, -0.0001326426863670349, -0.00012191757559776306, -0.00011119246482849121, -0.00010046735405921936, -8.974224328994751e-05, -7.901713252067566e-05, -6.829202175140381e-05, -5.756691098213196e-05, -4.684180021286011e-05, -3.611668944358826e-05, -2.5391578674316406e-05, -1.4666467905044556e-05, -3.941357135772705e-06, 6.7837536334991455e-06, 1.7508864402770996e-05, 2.8233975172042847e-05, 3.89590859413147e-05, 4.968419671058655e-05, 6.04093074798584e-05, 7.113441824913025e-05, 8.18595290184021e-05, 9.258463978767395e-05, 0.0001033097505569458, 0.00011403486132621765, 0.0001247599720954895, 0.00013548508286476135, 0.0001462101936340332, 0.00015693530440330505, 0.0001676604151725769, 0.00017838552594184875, 0.0001891106367111206, 0.00019983574748039246, 0.0002105608582496643, 0.00022128596901893616, 0.000232011079788208, 0.00024273619055747986, 0.0002534613013267517, 0.00026418641209602356, 0.0002749115228652954, 0.00028563663363456726, 0.0002963617444038391, 0.00030708685517311096, 0.0003178119659423828]}, "gradients/decoder.model.decoder.layers.1.fc2.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 4.0, 3.0, 7.0, 7.0, 9.0, 15.0, 9.0, 7.0, 13.0, 23.0, 20.0, 25.0, 39.0, 29.0, 26.0, 32.0, 48.0, 47.0, 49.0, 60.0, 51.0, 30.0, 48.0, 57.0, 53.0, 44.0, 33.0, 36.0, 19.0, 32.0, 24.0, 15.0, 20.0, 19.0, 11.0, 10.0, 6.0, 8.0, 6.0, 5.0, 8.0, 4.0, 0.0, 1.0, 2.0, 0.0, 2.0, 1.0, 0.0, 1.0], "bins": [-8.046627044677734e-06, -7.816590368747711e-06, -7.586553692817688e-06, -7.356517016887665e-06, -7.126480340957642e-06, -6.896443665027618e-06, -6.666406989097595e-06, -6.436370313167572e-06, -6.206333637237549e-06, -5.976296961307526e-06, -5.7462602853775024e-06, -5.516223609447479e-06, -5.286186933517456e-06, -5.056150257587433e-06, -4.82611358165741e-06, -4.5960769057273865e-06, -4.366040229797363e-06, -4.13600355386734e-06, -3.905966877937317e-06, -3.6759302020072937e-06, -3.4458935260772705e-06, -3.2158568501472473e-06, -2.985820174217224e-06, -2.755783498287201e-06, -2.5257468223571777e-06, -2.2957101464271545e-06, -2.0656734704971313e-06, -1.8356367945671082e-06, -1.605600118637085e-06, -1.3755634427070618e-06, -1.1455267667770386e-06, -9.154900908470154e-07, -6.854534149169922e-07, -4.55416738986969e-07, -2.253800630569458e-07, 4.6566128730773926e-09, 2.3469328880310059e-07, 4.647299647331238e-07, 6.94766640663147e-07, 9.248033165931702e-07, 1.1548399925231934e-06, 1.3848766684532166e-06, 1.6149133443832397e-06, 1.844950020313263e-06, 2.074986696243286e-06, 2.3050233721733093e-06, 2.5350600481033325e-06, 2.7650967240333557e-06, 2.995133399963379e-06, 3.225170075893402e-06, 3.4552067518234253e-06, 3.6852434277534485e-06, 3.915280103683472e-06, 4.145316779613495e-06, 4.375353455543518e-06, 4.605390131473541e-06, 4.8354268074035645e-06, 5.065463483333588e-06, 5.295500159263611e-06, 5.525536835193634e-06, 5.755573511123657e-06, 5.98561018705368e-06, 6.215646862983704e-06, 6.445683538913727e-06, 6.67572021484375e-06]}, "gradients/decoder.model.decoder.layers.1.fc1.weight": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 1.0, 0.0, 2.0, 1.0, 1.0, 3.0, 2.0, 2.0, 3.0, 8.0, 12.0, 14.0, 16.0, 23.0, 54.0, 90.0, 189.0, 453.0, 1315.0, 4822.0, 28572.0, 3979279.0, 166146.0, 9790.0, 2118.0, 741.0, 291.0, 134.0, 76.0, 32.0, 30.0, 20.0, 11.0, 11.0, 12.0, 5.0, 3.0, 2.0, 2.0, 2.0, 3.0, 2.0, 1.0, 0.0, 1.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-3.2067298889160156e-05, -3.077089786529541e-05, -2.9474496841430664e-05, -2.8178095817565918e-05, -2.6881694793701172e-05, -2.5585293769836426e-05, -2.428889274597168e-05, -2.2992491722106934e-05, -2.1696090698242188e-05, -2.039968967437744e-05, -1.9103288650512695e-05, -1.780688762664795e-05, -1.6510486602783203e-05, -1.5214085578918457e-05, -1.3917684555053711e-05, -1.2621283531188965e-05, -1.1324882507324219e-05, -1.0028481483459473e-05, -8.732080459594727e-06, -7.4356794357299805e-06, -6.139278411865234e-06, -4.842877388000488e-06, -3.546476364135742e-06, -2.250075340270996e-06, -9.5367431640625e-07, 3.427267074584961e-07, 1.6391277313232422e-06, 2.9355287551879883e-06, 4.231929779052734e-06, 5.5283308029174805e-06, 6.8247318267822266e-06, 8.121132850646973e-06, 9.417533874511719e-06, 1.0713934898376465e-05, 1.2010335922241211e-05, 1.3306736946105957e-05, 1.4603137969970703e-05, 1.589953899383545e-05, 1.7195940017700195e-05, 1.849234104156494e-05, 1.9788742065429688e-05, 2.1085143089294434e-05, 2.238154411315918e-05, 2.3677945137023926e-05, 2.4974346160888672e-05, 2.6270747184753418e-05, 2.7567148208618164e-05, 2.886354923248291e-05, 3.0159950256347656e-05, 3.14563512802124e-05, 3.275275230407715e-05, 3.4049153327941895e-05, 3.534555435180664e-05, 3.664195537567139e-05, 3.793835639953613e-05, 3.923475742340088e-05, 4.0531158447265625e-05, 4.182755947113037e-05, 4.312396049499512e-05, 4.442036151885986e-05, 4.571676254272461e-05, 4.7013163566589355e-05, 4.83095645904541e-05, 4.960596561431885e-05, 5.0902366638183594e-05]}, "gradients/decoder.model.decoder.layers.1.fc1.bias": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 5.0, 1.0, 5.0, 5.0, 11.0, 15.0, 5.0, 17.0, 25.0, 49.0, 56.0, 145.0, 454.0, 965.0, 1458.0, 412.0, 200.0, 66.0, 58.0, 31.0, 23.0, 24.0, 10.0, 8.0, 4.0, 6.0, 4.0, 5.0, 5.0, 3.0, 3.0, 2.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-5.543231964111328e-06, -5.330890417098999e-06, -5.11854887008667e-06, -4.906207323074341e-06, -4.693865776062012e-06, -4.481524229049683e-06, -4.2691826820373535e-06, -4.056841135025024e-06, -3.844499588012695e-06, -3.632158041000366e-06, -3.419816493988037e-06, -3.207474946975708e-06, -2.995133399963379e-06, -2.78279185295105e-06, -2.5704503059387207e-06, -2.3581087589263916e-06, -2.1457672119140625e-06, -1.9334256649017334e-06, -1.7210841178894043e-06, -1.5087425708770752e-06, -1.296401023864746e-06, -1.084059476852417e-06, -8.717179298400879e-07, -6.593763828277588e-07, -4.470348358154297e-07, -2.3469328880310059e-07, -2.2351741790771484e-08, 1.8998980522155762e-07, 4.023313522338867e-07, 6.146728992462158e-07, 8.270144462585449e-07, 1.039355993270874e-06, 1.2516975402832031e-06, 1.4640390872955322e-06, 1.6763806343078613e-06, 1.8887221813201904e-06, 2.1010637283325195e-06, 2.3134052753448486e-06, 2.5257468223571777e-06, 2.738088369369507e-06, 2.950429916381836e-06, 3.162771463394165e-06, 3.375113010406494e-06, 3.5874545574188232e-06, 3.7997961044311523e-06, 4.0121376514434814e-06, 4.2244791984558105e-06, 4.43682074546814e-06, 4.649162292480469e-06, 4.861503839492798e-06, 5.073845386505127e-06, 5.286186933517456e-06, 5.498528480529785e-06, 5.710870027542114e-06, 5.923211574554443e-06, 6.1355531215667725e-06, 6.3478946685791016e-06, 6.560236215591431e-06, 6.77257776260376e-06, 6.984919309616089e-06, 7.197260856628418e-06, 7.409602403640747e-06, 7.621943950653076e-06, 7.834285497665405e-06, 8.046627044677734e-06]}, "gradients/decoder.model.decoder.layers.1.encoder_attn_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 1.0, 2.0, 0.0, 5.0, 3.0, 8.0, 11.0, 15.0, 15.0, 28.0, 37.0, 49.0, 99.0, 141.0, 166.0, 144.0, 97.0, 55.0, 38.0, 32.0, 23.0, 14.0, 11.0, 3.0, 7.0, 3.0, 2.0, 2.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.2486548257584218e-05, -1.1811855983978603e-05, -1.1137162800878286e-05, -1.0462470527272671e-05, -9.787778253667057e-06, -9.11308507056674e-06, -8.438392796961125e-06, -7.763699613860808e-06, -7.089007340255193e-06, -6.414314611902228e-06, -5.739621883549262e-06, -5.064929609943647e-06, -4.390236881590681e-06, -3.7155441532377154e-06, -3.0408518796321005e-06, -2.3661591512791347e-06, -1.6914664229261689e-06, -1.0167738082600408e-06, -3.4208119359391276e-07, 3.326113073853776e-07, 1.0073040357383434e-06, 1.6819967640913092e-06, 2.356689037696924e-06, 3.03138176604989e-06, 3.7060744944028556e-06, 4.380767222755821e-06, 5.055459951108787e-06, 5.730152224714402e-06, 6.404844953067368e-06, 7.079537681420334e-06, 7.754229955025949e-06, 8.428922228631563e-06, 9.103616321226582e-06, 9.778308594832197e-06, 1.0453001777932514e-05, 1.1127694051538128e-05, 1.1802387234638445e-05, 1.247707950824406e-05, 1.3151771781849675e-05, 1.382646405545529e-05, 1.4501157238555606e-05, 1.5175849512161221e-05, 1.5850542695261538e-05, 1.6525234968867153e-05, 1.7199927242472768e-05, 1.7874619516078383e-05, 1.8549311789683998e-05, 1.9224005882279016e-05, 1.989869815588463e-05, 2.0573390429490246e-05, 2.124808270309586e-05, 2.192277679569088e-05, 2.2597469069296494e-05, 2.327216134290211e-05, 2.3946853616507724e-05, 2.462154589011334e-05, 2.5296238163718954e-05, 2.597093043732457e-05, 2.6645622710930184e-05, 2.73203149845358e-05, 2.7995009077130817e-05, 2.8669701350736432e-05, 2.9344393624342047e-05, 3.001908589794766e-05, 3.069377999054268e-05]}, "gradients/decoder.model.decoder.layers.1.encoder_attn_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 1.0, 0.0, 1.0, 2.0, 3.0, 4.0, 6.0, 5.0, 7.0, 8.0, 4.0, 16.0, 13.0, 16.0, 16.0, 18.0, 14.0, 27.0, 27.0, 32.0, 34.0, 27.0, 28.0, 30.0, 39.0, 41.0, 44.0, 45.0, 40.0, 38.0, 39.0, 44.0, 40.0, 36.0, 24.0, 27.0, 35.0, 29.0, 23.0, 28.0, 15.0, 11.0, 12.0, 14.0, 16.0, 12.0, 4.0, 5.0, 6.0, 4.0, 5.0, 1.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-8.288695426017512e-06, -8.024278031371068e-06, -7.759860636724625e-06, -7.4954427873308305e-06, -7.231025392684387e-06, -6.966607543290593e-06, -6.702190148644149e-06, -6.437772753997706e-06, -6.1733553593512625e-06, -5.908937964704819e-06, -5.644520115311025e-06, -5.380102720664581e-06, -5.115685326018138e-06, -4.8512674766243435e-06, -4.5868500819779e-06, -4.322432687331457e-06, -4.058014837937662e-06, -3.7935972159175435e-06, -3.5291798212711e-06, -3.264762199250981e-06, -3.0003448046045378e-06, -2.735927182584419e-06, -2.4715095605643e-06, -2.2070921659178566e-06, -1.9426745438977377e-06, -1.6782570355644566e-06, -1.4138395272311755e-06, -1.1494219052110566e-06, -8.850043968777754e-07, -6.205868885444943e-07, -3.561692665243754e-07, -9.175175819109427e-08, 1.7266575014218688e-07, 4.3708328689717746e-07, 7.01500823652168e-07, 9.65918388828868e-07, 1.2303358971621492e-06, 1.4947534054954303e-06, 1.7591710275155492e-06, 2.0235884221619926e-06, 2.2880060441821115e-06, 2.5524236662022304e-06, 2.816841060848674e-06, 3.0812586828687927e-06, 3.3456763048889115e-06, 3.610093699535355e-06, 3.874511094181798e-06, 4.138928943575593e-06, 4.403346338222036e-06, 4.6677637328684796e-06, 4.932181582262274e-06, 5.196598976908717e-06, 5.461016371555161e-06, 5.725433766201604e-06, 5.9898516155953985e-06, 6.254269010241842e-06, 6.518686859635636e-06, 6.78310425428208e-06, 7.047522103675874e-06, 7.311939498322317e-06, 7.576356892968761e-06, 7.840774742362555e-06, 8.105192137008999e-06, 8.369609531655442e-06, 8.634026926301885e-06]}, "gradients/decoder.model.decoder.layers.1.encoder_attn.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 4.0, 0.0, 1.0, 3.0, 1.0, 5.0, 7.0, 10.0, 14.0, 9.0, 15.0, 39.0, 23.0, 34.0, 119.0, 64.0, 90.0, 294.0, 276.0, 974.0, 928.0, 1604.0, 9562.0, 17332.0, 87899.0, 897497.0, 17671.0, 6411.0, 4666.0, 964.0, 1005.0, 283.0, 197.0, 233.0, 65.0, 51.0, 79.0, 28.0, 23.0, 29.0, 15.0, 13.0, 14.0, 3.0, 8.0, 1.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 2.0], "bins": [-2.682209014892578e-06, -2.6011839509010315e-06, -2.520158886909485e-06, -2.4391338229179382e-06, -2.3581087589263916e-06, -2.277083694934845e-06, -2.1960586309432983e-06, -2.1150335669517517e-06, -2.034008502960205e-06, -1.9529834389686584e-06, -1.8719583749771118e-06, -1.7909333109855652e-06, -1.7099082469940186e-06, -1.628883183002472e-06, -1.5478581190109253e-06, -1.4668330550193787e-06, -1.385807991027832e-06, -1.3047829270362854e-06, -1.2237578630447388e-06, -1.1427327990531921e-06, -1.0617077350616455e-06, -9.806826710700989e-07, -8.996576070785522e-07, -8.186325430870056e-07, -7.37607479095459e-07, -6.565824151039124e-07, -5.755573511123657e-07, -4.945322871208191e-07, -4.1350722312927246e-07, -3.3248215913772583e-07, -2.514570951461792e-07, -1.7043203115463257e-07, -8.940696716308594e-08, -8.381903171539307e-09, 7.264316082000732e-08, 1.5366822481155396e-07, 2.3469328880310059e-07, 3.157183527946472e-07, 3.9674341678619385e-07, 4.777684807777405e-07, 5.587935447692871e-07, 6.398186087608337e-07, 7.208436727523804e-07, 8.01868736743927e-07, 8.828938007354736e-07, 9.639188647270203e-07, 1.044943928718567e-06, 1.1259689927101135e-06, 1.2069940567016602e-06, 1.2880191206932068e-06, 1.3690441846847534e-06, 1.4500692486763e-06, 1.5310943126678467e-06, 1.6121193766593933e-06, 1.69314444065094e-06, 1.7741695046424866e-06, 1.8551945686340332e-06, 1.93621963262558e-06, 2.0172446966171265e-06, 2.098269760608673e-06, 2.1792948246002197e-06, 2.2603198885917664e-06, 2.341344952583313e-06, 2.4223700165748596e-06, 2.5033950805664062e-06]}, "gradients/decoder.model.decoder.layers.1.encoder_attn.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 4.0, 1.0, 2.0, 2.0, 6.0, 5.0, 5.0, 9.0, 13.0, 13.0, 12.0, 18.0, 21.0, 21.0, 28.0, 35.0, 36.0, 40.0, 44.0, 44.0, 45.0, 42.0, 62.0, 54.0, 50.0, 71.0, 30.0, 37.0, 32.0, 33.0, 50.0, 25.0, 20.0, 27.0, 10.0, 16.0, 9.0, 18.0, 6.0, 6.0, 4.0, 3.0, 5.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-6.556510925292969e-06, -6.342306733131409e-06, -6.128102540969849e-06, -5.9138983488082886e-06, -5.6996941566467285e-06, -5.4854899644851685e-06, -5.271285772323608e-06, -5.057081580162048e-06, -4.842877388000488e-06, -4.628673195838928e-06, -4.414469003677368e-06, -4.200264811515808e-06, -3.986060619354248e-06, -3.771856427192688e-06, -3.557652235031128e-06, -3.343448042869568e-06, -3.129243850708008e-06, -2.9150396585464478e-06, -2.7008354663848877e-06, -2.4866312742233276e-06, -2.2724270820617676e-06, -2.0582228899002075e-06, -1.8440186977386475e-06, -1.6298145055770874e-06, -1.4156103134155273e-06, -1.2014061212539673e-06, -9.872019290924072e-07, -7.729977369308472e-07, -5.587935447692871e-07, -3.4458935260772705e-07, -1.30385160446167e-07, 8.381903171539307e-08, 2.980232238769531e-07, 5.122274160385132e-07, 7.264316082000732e-07, 9.406358003616333e-07, 1.1548399925231934e-06, 1.3690441846847534e-06, 1.5832483768463135e-06, 1.7974525690078735e-06, 2.0116567611694336e-06, 2.2258609533309937e-06, 2.4400651454925537e-06, 2.6542693376541138e-06, 2.868473529815674e-06, 3.082677721977234e-06, 3.296881914138794e-06, 3.511086106300354e-06, 3.725290298461914e-06, 3.939494490623474e-06, 4.153698682785034e-06, 4.367902874946594e-06, 4.582107067108154e-06, 4.796311259269714e-06, 5.010515451431274e-06, 5.2247196435928345e-06, 5.4389238357543945e-06, 5.653128027915955e-06, 5.867332220077515e-06, 6.081536412239075e-06, 6.295740604400635e-06, 6.509944796562195e-06, 6.724148988723755e-06, 6.938353180885315e-06, 7.152557373046875e-06]}, "gradients/decoder.model.decoder.layers.1.encoder_attn.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 5.0, 0.0, 5.0, 0.0, 0.0, 18.0, 0.0, 0.0, 35.0, 0.0, 0.0, 89.0, 0.0, 0.0, 194.0, 0.0, 505.0, 0.0, 0.0, 9321.0, 0.0, 0.0, 1028126.0, 0.0, 0.0, 9411.0, 0.0, 499.0, 0.0, 0.0, 199.0, 0.0, 0.0, 94.0, 0.0, 0.0, 41.0, 0.0, 0.0, 19.0, 0.0, 7.0, 0.0, 0.0, 3.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-7.152557373046875e-07, -6.938353180885315e-07, -6.724148988723755e-07, -6.509944796562195e-07, -6.295740604400635e-07, -6.081536412239075e-07, -5.867332220077515e-07, -5.653128027915955e-07, -5.438923835754395e-07, -5.224719643592834e-07, -5.010515451431274e-07, -4.796311259269714e-07, -4.5821070671081543e-07, -4.367902874946594e-07, -4.153698682785034e-07, -3.939494490623474e-07, -3.725290298461914e-07, -3.511086106300354e-07, -3.296881914138794e-07, -3.082677721977234e-07, -2.868473529815674e-07, -2.654269337654114e-07, -2.4400651454925537e-07, -2.2258609533309937e-07, -2.0116567611694336e-07, -1.7974525690078735e-07, -1.5832483768463135e-07, -1.3690441846847534e-07, -1.1548399925231934e-07, -9.406358003616333e-08, -7.264316082000732e-08, -5.122274160385132e-08, -2.9802322387695312e-08, -8.381903171539307e-09, 1.30385160446167e-08, 3.4458935260772705e-08, 5.587935447692871e-08, 7.729977369308472e-08, 9.872019290924072e-08, 1.2014061212539673e-07, 1.4156103134155273e-07, 1.6298145055770874e-07, 1.8440186977386475e-07, 2.0582228899002075e-07, 2.2724270820617676e-07, 2.4866312742233276e-07, 2.7008354663848877e-07, 2.915039658546448e-07, 3.129243850708008e-07, 3.343448042869568e-07, 3.557652235031128e-07, 3.771856427192688e-07, 3.986060619354248e-07, 4.200264811515808e-07, 4.414469003677368e-07, 4.628673195838928e-07, 4.842877388000488e-07, 5.057081580162048e-07, 5.271285772323608e-07, 5.485489964485168e-07, 5.699694156646729e-07, 5.913898348808289e-07, 6.128102540969849e-07, 6.342306733131409e-07, 6.556510925292969e-07]}, "gradients/decoder.model.decoder.layers.1.encoder_attn.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 2.0, 0.0, 1.0, 3.0, 5.0, 6.0, 7.0, 9.0, 5.0, 12.0, 17.0, 19.0, 29.0, 25.0, 33.0, 27.0, 42.0, 47.0, 37.0, 45.0, 47.0, 67.0, 59.0, 55.0, 40.0, 53.0, 55.0, 28.0, 52.0, 33.0, 24.0, 23.0, 21.0, 19.0, 13.0, 15.0, 9.0, 10.0, 8.0, 6.0, 1.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.806020736694336e-05, -1.751817762851715e-05, -1.6976147890090942e-05, -1.6434118151664734e-05, -1.5892088413238525e-05, -1.5350058674812317e-05, -1.4808028936386108e-05, -1.42659991979599e-05, -1.3723969459533691e-05, -1.3181939721107483e-05, -1.2639909982681274e-05, -1.2097880244255066e-05, -1.1555850505828857e-05, -1.1013820767402649e-05, -1.047179102897644e-05, -9.929761290550232e-06, -9.387731552124023e-06, -8.845701813697815e-06, -8.303672075271606e-06, -7.761642336845398e-06, -7.2196125984191895e-06, -6.677582859992981e-06, -6.1355531215667725e-06, -5.593523383140564e-06, -5.0514936447143555e-06, -4.509463906288147e-06, -3.9674341678619385e-06, -3.42540442943573e-06, -2.8833746910095215e-06, -2.341344952583313e-06, -1.7993152141571045e-06, -1.257285475730896e-06, -7.152557373046875e-07, -1.73225998878479e-07, 3.688037395477295e-07, 9.10833477973938e-07, 1.4528632164001465e-06, 1.994892954826355e-06, 2.5369226932525635e-06, 3.078952431678772e-06, 3.6209821701049805e-06, 4.163011908531189e-06, 4.7050416469573975e-06, 5.247071385383606e-06, 5.7891011238098145e-06, 6.331130862236023e-06, 6.8731606006622314e-06, 7.41519033908844e-06, 7.957220077514648e-06, 8.499249815940857e-06, 9.041279554367065e-06, 9.583309292793274e-06, 1.0125339031219482e-05, 1.0667368769645691e-05, 1.12093985080719e-05, 1.1751428246498108e-05, 1.2293457984924316e-05, 1.2835487723350525e-05, 1.3377517461776733e-05, 1.3919547200202942e-05, 1.446157693862915e-05, 1.5003606677055359e-05, 1.5545636415481567e-05, 1.6087666153907776e-05, 1.6629695892333984e-05]}, "gradients/decoder.model.decoder.layers.1.encoder_attn.k_proj.weight": {"_type": "histogram", "values": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1048576.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], "bins": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]}, "gradients/decoder.model.decoder.layers.1.encoder_attn.k_proj.bias": {"_type": "histogram", "values": [3.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 8.0, 0.0, 0.0, 0.0, 0.0, 24.0, 0.0, 0.0, 0.0, 0.0, 36.0, 0.0, 0.0, 0.0, 0.0, 124.0, 0.0, 0.0, 0.0, 0.0, 625.0, 0.0, 0.0, 0.0, 0.0, 121.0, 0.0, 0.0, 0.0, 0.0, 48.0, 0.0, 0.0, 0.0, 0.0, 26.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 2.0], "bins": [-3.5762786865234375e-07, -3.4552067518234253e-07, -3.334134817123413e-07, -3.213062882423401e-07, -3.0919909477233887e-07, -2.9709190130233765e-07, -2.849847078323364e-07, -2.728775143623352e-07, -2.60770320892334e-07, -2.4866312742233276e-07, -2.3655593395233154e-07, -2.2444874048233032e-07, -2.123415470123291e-07, -2.0023435354232788e-07, -1.8812716007232666e-07, -1.7601996660232544e-07, -1.6391277313232422e-07, -1.51805579662323e-07, -1.3969838619232178e-07, -1.2759119272232056e-07, -1.1548399925231934e-07, -1.0337680578231812e-07, -9.12696123123169e-08, -7.916241884231567e-08, -6.705522537231445e-08, -5.494803190231323e-08, -4.284083843231201e-08, -3.073364496231079e-08, -1.862645149230957e-08, -6.51925802230835e-09, 5.587935447692871e-09, 1.7695128917694092e-08, 2.9802322387695312e-08, 4.190951585769653e-08, 5.4016709327697754e-08, 6.612390279769897e-08, 7.82310962677002e-08, 9.033828973770142e-08, 1.0244548320770264e-07, 1.1455267667770386e-07, 1.2665987014770508e-07, 1.387670636177063e-07, 1.5087425708770752e-07, 1.6298145055770874e-07, 1.7508864402770996e-07, 1.8719583749771118e-07, 1.993030309677124e-07, 2.1141022443771362e-07, 2.2351741790771484e-07, 2.3562461137771606e-07, 2.477318048477173e-07, 2.598389983177185e-07, 2.7194619178771973e-07, 2.8405338525772095e-07, 2.9616057872772217e-07, 3.082677721977234e-07, 3.203749656677246e-07, 3.3248215913772583e-07, 3.4458935260772705e-07, 3.5669654607772827e-07, 3.688037395477295e-07, 3.809109330177307e-07, 3.9301812648773193e-07, 4.0512531995773315e-07, 4.172325134277344e-07]}, "gradients/decoder.model.decoder.layers.1.encoder_attn.q_proj.weight": {"_type": "histogram", "values": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1048576.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], "bins": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]}, "gradients/decoder.model.decoder.layers.1.encoder_attn.q_proj.bias": {"_type": "histogram", "values": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1024.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], "bins": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]}, "gradients/decoder.model.decoder.layers.1.self_attn_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0, 4.0, 15.0, 29.0, 46.0, 121.0, 313.0, 289.0, 109.0, 45.0, 26.0, 5.0, 3.0, 2.0, 4.0, 2.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.4162338629830629e-05, -1.3277227481012233e-05, -1.2392116332193837e-05, -1.150700518337544e-05, -1.0621894034557045e-05, -9.736782885738648e-06, -8.851671736920252e-06, -7.966560588101856e-06, -7.0814489845361095e-06, -6.1963378357177135e-06, -5.3112266868993174e-06, -4.4261150833335705e-06, -3.54100416188885e-06, -2.6558927856967784e-06, -1.7707816368783824e-06, -8.856704880599864e-07, -5.593392415903509e-10, 8.845518664202245e-07, 1.7696630720820394e-06, 2.654774334587273e-06, 3.5398854834056692e-06, 4.424996859597741e-06, 5.310108008416137e-06, 6.195219157234533e-06, 7.080330306052929e-06, 7.965441909618676e-06, 8.850553058437072e-06, 9.735664207255468e-06, 1.0620775356073864e-05, 1.150588650489226e-05, 1.2390997653710656e-05, 1.3276108802529052e-05, 1.4161219951347448e-05, 1.5046331100165844e-05, 1.593144224898424e-05, 1.6816553397802636e-05, 1.7701664546621032e-05, 1.8586775695439428e-05, 1.9471886844257824e-05, 2.035699799307622e-05, 2.1242109141894616e-05, 2.2127220290713012e-05, 2.3012331439531408e-05, 2.3897442588349804e-05, 2.47825537371682e-05, 2.5667664885986596e-05, 2.6552776034804992e-05, 2.743788718362339e-05, 2.8323000151431188e-05, 2.9208111300249584e-05, 3.009322244906798e-05, 3.097833541687578e-05, 3.186344474670477e-05, 3.274855771451257e-05, 3.3633667044341564e-05, 3.4518780012149364e-05, 3.5403889341978356e-05, 3.6289002309786156e-05, 3.717411163961515e-05, 3.805922460742295e-05, 3.894433393725194e-05, 3.982944690505974e-05, 4.071455623488873e-05, 4.159966920269653e-05, 4.2484778532525524e-05]}, "gradients/decoder.model.decoder.layers.1.self_attn_layer_norm.bias": {"_type": "histogram", "values": [2.0, 3.0, 2.0, 3.0, 2.0, 4.0, 2.0, 4.0, 5.0, 2.0, 11.0, 13.0, 16.0, 13.0, 16.0, 14.0, 21.0, 21.0, 26.0, 34.0, 36.0, 33.0, 30.0, 38.0, 43.0, 39.0, 57.0, 42.0, 37.0, 49.0, 46.0, 34.0, 29.0, 40.0, 24.0, 38.0, 38.0, 29.0, 20.0, 22.0, 8.0, 9.0, 15.0, 13.0, 11.0, 8.0, 5.0, 4.0, 2.0, 4.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-4.8184119805227965e-06, -4.643654847313883e-06, -4.468897259357618e-06, -4.2941401261487044e-06, -4.119382992939791e-06, -3.944625859730877e-06, -3.7698682717746124e-06, -3.5951111385656986e-06, -3.420354005356785e-06, -3.2455966447741957e-06, -3.070839511565282e-06, -2.896082150982693e-06, -2.721325017773779e-06, -2.54656765719119e-06, -2.3718102966086008e-06, -2.197053163399687e-06, -2.022295802817098e-06, -1.8475385559213464e-06, -1.672781309025595e-06, -1.4980239484430058e-06, -1.323266815234092e-06, -1.1485094546515029e-06, -9.737522077557514e-07, -7.9899496086e-07, -6.242377139642485e-07, -4.494804670684971e-07, -2.747231917510362e-07, -9.996591643357533e-08, 7.479133046217612e-08, 2.4954857735792757e-07, 4.243058810970979e-07, 5.990631279928493e-07, 7.738203748886008e-07, 9.485776217843522e-07, 1.1233348686801037e-06, 1.2980922292626929e-06, 1.4728493624716066e-06, 1.6476067230541958e-06, 1.8223639699499472e-06, 1.9971212168456987e-06, 2.1718783500546124e-06, 2.3466357106372016e-06, 2.5213928438461153e-06, 2.6961502044287045e-06, 2.870907337637618e-06, 3.0456646982202074e-06, 3.2204220588027965e-06, 3.3951791920117103e-06, 3.5699365525942994e-06, 3.7446939131768886e-06, 3.919451046385802e-06, 4.094208179594716e-06, 4.268965767550981e-06, 4.443722900759894e-06, 4.618480033968808e-06, 4.793237167177722e-06, 4.9679947551339865e-06, 5.1427518883429e-06, 5.317509476299165e-06, 5.4922666095080785e-06, 5.667023742716992e-06, 5.841780875925906e-06, 6.016538463882171e-06, 6.191295597091084e-06, 6.366052730299998e-06]}, "gradients/decoder.model.decoder.layers.1.self_attn.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 3.0, 3.0, 0.0, 3.0, 2.0, 2.0, 6.0, 12.0, 14.0, 21.0, 37.0, 40.0, 51.0, 121.0, 111.0, 202.0, 298.0, 456.0, 790.0, 1113.0, 2204.0, 3131.0, 5623.0, 12484.0, 20219.0, 52972.0, 120656.0, 490605.0, 222076.0, 55726.0, 29892.0, 11980.0, 8076.0, 3671.0, 2102.0, 1539.0, 797.0, 551.0, 310.0, 227.0, 159.0, 80.0, 65.0, 40.0, 18.0, 27.0, 22.0, 10.0, 7.0, 5.0, 3.0, 2.0, 4.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-1.0192394256591797e-05, -9.870156645774841e-06, -9.547919034957886e-06, -9.22568142414093e-06, -8.903443813323975e-06, -8.581206202507019e-06, -8.258968591690063e-06, -7.936730980873108e-06, -7.614493370056152e-06, -7.292255759239197e-06, -6.970018148422241e-06, -6.647780537605286e-06, -6.32554292678833e-06, -6.0033053159713745e-06, -5.681067705154419e-06, -5.358830094337463e-06, -5.036592483520508e-06, -4.714354872703552e-06, -4.392117261886597e-06, -4.069879651069641e-06, -3.7476420402526855e-06, -3.42540442943573e-06, -3.1031668186187744e-06, -2.780929207801819e-06, -2.4586915969848633e-06, -2.1364539861679077e-06, -1.8142163753509521e-06, -1.4919787645339966e-06, -1.169741153717041e-06, -8.475035429000854e-07, -5.252659320831299e-07, -2.0302832126617432e-07, 1.1920928955078125e-07, 4.414469003677368e-07, 7.636845111846924e-07, 1.085922122001648e-06, 1.4081597328186035e-06, 1.730397343635559e-06, 2.0526349544525146e-06, 2.3748725652694702e-06, 2.6971101760864258e-06, 3.0193477869033813e-06, 3.341585397720337e-06, 3.6638230085372925e-06, 3.986060619354248e-06, 4.308298230171204e-06, 4.630535840988159e-06, 4.952773451805115e-06, 5.27501106262207e-06, 5.597248673439026e-06, 5.9194862842559814e-06, 6.241723895072937e-06, 6.563961505889893e-06, 6.886199116706848e-06, 7.208436727523804e-06, 7.530674338340759e-06, 7.852911949157715e-06, 8.17514955997467e-06, 8.497387170791626e-06, 8.819624781608582e-06, 9.141862392425537e-06, 9.464100003242493e-06, 9.786337614059448e-06, 1.0108575224876404e-05, 1.043081283569336e-05]}, "gradients/decoder.model.decoder.layers.1.self_attn.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 2.0, 2.0, 2.0, 5.0, 5.0, 7.0, 6.0, 6.0, 7.0, 10.0, 13.0, 18.0, 24.0, 27.0, 29.0, 36.0, 35.0, 46.0, 47.0, 49.0, 55.0, 56.0, 57.0, 60.0, 54.0, 52.0, 41.0, 37.0, 35.0, 28.0, 34.0, 30.0, 24.0, 16.0, 12.0, 11.0, 10.0, 8.0, 4.0, 5.0, 1.0, 6.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-2.4616718292236328e-05, -2.3937784135341644e-05, -2.325884997844696e-05, -2.2579915821552277e-05, -2.1900981664657593e-05, -2.122204750776291e-05, -2.0543113350868225e-05, -1.986417919397354e-05, -1.9185245037078857e-05, -1.8506310880184174e-05, -1.782737672328949e-05, -1.7148442566394806e-05, -1.6469508409500122e-05, -1.5790574252605438e-05, -1.5111640095710754e-05, -1.443270593881607e-05, -1.3753771781921387e-05, -1.3074837625026703e-05, -1.2395903468132019e-05, -1.1716969311237335e-05, -1.1038035154342651e-05, -1.0359100997447968e-05, -9.680166840553284e-06, -9.0012326836586e-06, -8.322298526763916e-06, -7.643364369869232e-06, -6.964430212974548e-06, -6.2854960560798645e-06, -5.606561899185181e-06, -4.927627742290497e-06, -4.248693585395813e-06, -3.569759428501129e-06, -2.8908252716064453e-06, -2.2118911147117615e-06, -1.5329569578170776e-06, -8.540228009223938e-07, -1.7508864402770996e-07, 5.038455128669739e-07, 1.1827796697616577e-06, 1.8617138266563416e-06, 2.5406479835510254e-06, 3.2195821404457092e-06, 3.898516297340393e-06, 4.577450454235077e-06, 5.256384611129761e-06, 5.935318768024445e-06, 6.614252924919128e-06, 7.293187081813812e-06, 7.972121238708496e-06, 8.65105539560318e-06, 9.329989552497864e-06, 1.0008923709392548e-05, 1.0687857866287231e-05, 1.1366792023181915e-05, 1.2045726180076599e-05, 1.2724660336971283e-05, 1.3403594493865967e-05, 1.408252865076065e-05, 1.4761462807655334e-05, 1.544039696455002e-05, 1.6119331121444702e-05, 1.6798265278339386e-05, 1.747719943523407e-05, 1.8156133592128754e-05, 1.8835067749023438e-05]}, "gradients/decoder.model.decoder.layers.1.self_attn.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 2.0, 1.0, 2.0, 2.0, 1.0, 4.0, 3.0, 4.0, 5.0, 15.0, 20.0, 27.0, 26.0, 50.0, 73.0, 102.0, 211.0, 488.0, 1784.0, 30800.0, 999165.0, 13538.0, 1357.0, 410.0, 167.0, 109.0, 65.0, 34.0, 32.0, 21.0, 13.0, 11.0, 6.0, 4.0, 3.0, 3.0, 2.0, 2.0, 1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-8.958578109741211e-05, -8.698180317878723e-05, -8.437782526016235e-05, -8.177384734153748e-05, -7.91698694229126e-05, -7.656589150428772e-05, -7.396191358566284e-05, -7.135793566703796e-05, -6.875395774841309e-05, -6.614997982978821e-05, -6.354600191116333e-05, -6.094202399253845e-05, -5.8338046073913574e-05, -5.5734068155288696e-05, -5.313009023666382e-05, -5.052611231803894e-05, -4.792213439941406e-05, -4.5318156480789185e-05, -4.271417856216431e-05, -4.011020064353943e-05, -3.750622272491455e-05, -3.490224480628967e-05, -3.2298266887664795e-05, -2.9694288969039917e-05, -2.709031105041504e-05, -2.448633313179016e-05, -2.1882355213165283e-05, -1.9278377294540405e-05, -1.6674399375915527e-05, -1.407042145729065e-05, -1.1466443538665771e-05, -8.862465620040894e-06, -6.258487701416016e-06, -3.6545097827911377e-06, -1.0505318641662598e-06, 1.5534460544586182e-06, 4.157423973083496e-06, 6.761401891708374e-06, 9.365379810333252e-06, 1.196935772895813e-05, 1.4573335647583008e-05, 1.7177313566207886e-05, 1.9781291484832764e-05, 2.238526940345764e-05, 2.498924732208252e-05, 2.7593225240707397e-05, 3.0197203159332275e-05, 3.280118107795715e-05, 3.540515899658203e-05, 3.800913691520691e-05, 4.061311483383179e-05, 4.3217092752456665e-05, 4.582107067108154e-05, 4.842504858970642e-05, 5.10290265083313e-05, 5.363300442695618e-05, 5.6236982345581055e-05, 5.884096026420593e-05, 6.144493818283081e-05, 6.404891610145569e-05, 6.665289402008057e-05, 6.925687193870544e-05, 7.186084985733032e-05, 7.44648277759552e-05, 7.706880569458008e-05]}, "gradients/decoder.model.decoder.layers.1.self_attn.v_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 5.0, 2.0, 2.0, 4.0, 11.0, 6.0, 12.0, 14.0, 18.0, 21.0, 30.0, 32.0, 54.0, 46.0, 51.0, 60.0, 60.0, 76.0, 77.0, 75.0, 53.0, 44.0, 49.0, 42.0, 40.0, 31.0, 19.0, 16.0, 13.0, 8.0, 6.0, 7.0, 7.0, 4.0, 6.0, 3.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0], "bins": [-4.035234451293945e-05, -3.921985626220703e-05, -3.808736801147461e-05, -3.695487976074219e-05, -3.5822391510009766e-05, -3.4689903259277344e-05, -3.355741500854492e-05, -3.24249267578125e-05, -3.129243850708008e-05, -3.0159950256347656e-05, -2.9027462005615234e-05, -2.7894973754882812e-05, -2.676248550415039e-05, -2.562999725341797e-05, -2.4497509002685547e-05, -2.3365020751953125e-05, -2.2232532501220703e-05, -2.110004425048828e-05, -1.996755599975586e-05, -1.8835067749023438e-05, -1.7702579498291016e-05, -1.6570091247558594e-05, -1.5437602996826172e-05, -1.430511474609375e-05, -1.3172626495361328e-05, -1.2040138244628906e-05, -1.0907649993896484e-05, -9.775161743164062e-06, -8.64267349243164e-06, -7.510185241699219e-06, -6.377696990966797e-06, -5.245208740234375e-06, -4.112720489501953e-06, -2.9802322387695312e-06, -1.8477439880371094e-06, -7.152557373046875e-07, 4.172325134277344e-07, 1.5497207641601562e-06, 2.682209014892578e-06, 3.814697265625e-06, 4.947185516357422e-06, 6.079673767089844e-06, 7.212162017822266e-06, 8.344650268554688e-06, 9.47713851928711e-06, 1.0609626770019531e-05, 1.1742115020751953e-05, 1.2874603271484375e-05, 1.4007091522216797e-05, 1.5139579772949219e-05, 1.627206802368164e-05, 1.7404556274414062e-05, 1.8537044525146484e-05, 1.9669532775878906e-05, 2.0802021026611328e-05, 2.193450927734375e-05, 2.3066997528076172e-05, 2.4199485778808594e-05, 2.5331974029541016e-05, 2.6464462280273438e-05, 2.759695053100586e-05, 2.872943878173828e-05, 2.9861927032470703e-05, 3.0994415283203125e-05, 3.212690353393555e-05]}, "gradients/decoder.model.decoder.layers.1.self_attn.k_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 4.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 4.0, 0.0, 7.0, 0.0, 20.0, 0.0, 48.0, 0.0, 0.0, 415.0, 0.0, 9387.0, 0.0, 1028830.0, 0.0, 9335.0, 0.0, 431.0, 0.0, 47.0, 0.0, 22.0, 0.0, 11.0, 0.0, 0.0, 1.0, 0.0, 4.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 1.0], "bins": [-1.0132789611816406e-06, -9.853392839431763e-07, -9.57399606704712e-07, -9.294599294662476e-07, -9.015202522277832e-07, -8.735805749893188e-07, -8.456408977508545e-07, -8.177012205123901e-07, -7.897615432739258e-07, -7.618218660354614e-07, -7.338821887969971e-07, -7.059425115585327e-07, -6.780028343200684e-07, -6.50063157081604e-07, -6.221234798431396e-07, -5.941838026046753e-07, -5.662441253662109e-07, -5.383044481277466e-07, -5.103647708892822e-07, -4.824250936508179e-07, -4.544854164123535e-07, -4.2654573917388916e-07, -3.986060619354248e-07, -3.7066638469696045e-07, -3.427267074584961e-07, -3.1478703022003174e-07, -2.868473529815674e-07, -2.5890767574310303e-07, -2.3096799850463867e-07, -2.0302832126617432e-07, -1.7508864402770996e-07, -1.471489667892456e-07, -1.1920928955078125e-07, -9.12696123123169e-08, -6.332993507385254e-08, -3.5390257835388184e-08, -7.450580596923828e-09, 2.0489096641540527e-08, 4.842877388000488e-08, 7.636845111846924e-08, 1.043081283569336e-07, 1.3224780559539795e-07, 1.601874828338623e-07, 1.8812716007232666e-07, 2.1606683731079102e-07, 2.4400651454925537e-07, 2.7194619178771973e-07, 2.998858690261841e-07, 3.2782554626464844e-07, 3.557652235031128e-07, 3.8370490074157715e-07, 4.116445779800415e-07, 4.3958425521850586e-07, 4.675239324569702e-07, 4.954636096954346e-07, 5.234032869338989e-07, 5.513429641723633e-07, 5.792826414108276e-07, 6.07222318649292e-07, 6.351619958877563e-07, 6.631016731262207e-07, 6.910413503646851e-07, 7.189810276031494e-07, 7.469207048416138e-07, 7.748603820800781e-07]}, "gradients/decoder.model.decoder.layers.1.self_attn.k_proj.bias": {"_type": "histogram", "values": [40.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 946.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 35.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0], "bins": [-5.960464477539063e-08, -5.681067705154419e-08, -5.4016709327697754e-08, -5.122274160385132e-08, -4.842877388000488e-08, -4.563480615615845e-08, -4.284083843231201e-08, -4.0046870708465576e-08, -3.725290298461914e-08, -3.4458935260772705e-08, -3.166496753692627e-08, -2.8870999813079834e-08, -2.60770320892334e-08, -2.3283064365386963e-08, -2.0489096641540527e-08, -1.7695128917694092e-08, -1.4901161193847656e-08, -1.210719347000122e-08, -9.313225746154785e-09, -6.51925802230835e-09, -3.725290298461914e-09, -9.313225746154785e-10, 1.862645149230957e-09, 4.6566128730773926e-09, 7.450580596923828e-09, 1.0244548320770264e-08, 1.30385160446167e-08, 1.5832483768463135e-08, 1.862645149230957e-08, 2.1420419216156006e-08, 2.421438694000244e-08, 2.7008354663848877e-08, 2.9802322387695312e-08, 3.259629011154175e-08, 3.5390257835388184e-08, 3.818422555923462e-08, 4.0978193283081055e-08, 4.377216100692749e-08, 4.6566128730773926e-08, 4.936009645462036e-08, 5.21540641784668e-08, 5.494803190231323e-08, 5.774199962615967e-08, 6.05359673500061e-08, 6.332993507385254e-08, 6.612390279769897e-08, 6.891787052154541e-08, 7.171183824539185e-08, 7.450580596923828e-08, 7.729977369308472e-08, 8.009374141693115e-08, 8.288770914077759e-08, 8.568167686462402e-08, 8.847564458847046e-08, 9.12696123123169e-08, 9.406358003616333e-08, 9.685754776000977e-08, 9.96515154838562e-08, 1.0244548320770264e-07, 1.0523945093154907e-07, 1.0803341865539551e-07, 1.1082738637924194e-07, 1.1362135410308838e-07, 1.1641532182693481e-07, 1.1920928955078125e-07]}, "gradients/decoder.model.decoder.layers.1.self_attn.q_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 3.0, 0.0, 7.0, 4.0, 7.0, 0.0, 4.0, 18.0, 0.0, 69.0, 227.0, 0.0, 1174.0, 14444.0, 1016671.0, 0.0, 14435.0, 1165.0, 0.0, 233.0, 69.0, 14.0, 0.0, 7.0, 2.0, 0.0, 3.0, 5.0, 0.0, 5.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.2516975402832031e-06, -1.2097880244255066e-06, -1.16787850856781e-06, -1.1259689927101135e-06, -1.084059476852417e-06, -1.0421499609947205e-06, -1.000240445137024e-06, -9.583309292793274e-07, -9.164214134216309e-07, -8.745118975639343e-07, -8.326023817062378e-07, -7.906928658485413e-07, -7.487833499908447e-07, -7.068738341331482e-07, -6.649643182754517e-07, -6.230548024177551e-07, -5.811452865600586e-07, -5.392357707023621e-07, -4.973262548446655e-07, -4.55416738986969e-07, -4.1350722312927246e-07, -3.7159770727157593e-07, -3.296881914138794e-07, -2.8777867555618286e-07, -2.4586915969848633e-07, -2.039596438407898e-07, -1.6205012798309326e-07, -1.2014061212539673e-07, -7.82310962677002e-08, -3.632158041000366e-08, 5.587935447692871e-09, 4.7497451305389404e-08, 8.940696716308594e-08, 1.3131648302078247e-07, 1.73225998878479e-07, 2.1513551473617554e-07, 2.5704503059387207e-07, 2.989545464515686e-07, 3.4086406230926514e-07, 3.8277357816696167e-07, 4.246830940246582e-07, 4.6659260988235474e-07, 5.085021257400513e-07, 5.504116415977478e-07, 5.923211574554443e-07, 6.342306733131409e-07, 6.761401891708374e-07, 7.180497050285339e-07, 7.599592208862305e-07, 8.01868736743927e-07, 8.437782526016235e-07, 8.856877684593201e-07, 9.275972843170166e-07, 9.695068001747131e-07, 1.0114163160324097e-06, 1.0533258318901062e-06, 1.0952353477478027e-06, 1.1371448636054993e-06, 1.1790543794631958e-06, 1.2209638953208923e-06, 1.2628734111785889e-06, 1.3047829270362854e-06, 1.346692442893982e-06, 1.3886019587516785e-06, 1.430511474609375e-06]}, "gradients/decoder.model.decoder.layers.1.self_attn.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 4.0, 0.0, 0.0, 0.0, 0.0, 12.0, 0.0, 0.0, 0.0, 0.0, 90.0, 0.0, 0.0, 0.0, 0.0, 801.0, 0.0, 0.0, 0.0, 0.0, 72.0, 0.0, 0.0, 0.0, 0.0, 36.0, 0.0, 0.0, 0.0, 0.0, 5.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.172325134277344e-07, -4.0512531995773315e-07, -3.9301812648773193e-07, -3.809109330177307e-07, -3.688037395477295e-07, -3.5669654607772827e-07, -3.4458935260772705e-07, -3.3248215913772583e-07, -3.203749656677246e-07, -3.082677721977234e-07, -2.9616057872772217e-07, -2.8405338525772095e-07, -2.7194619178771973e-07, -2.598389983177185e-07, -2.477318048477173e-07, -2.3562461137771606e-07, -2.2351741790771484e-07, -2.1141022443771362e-07, -1.993030309677124e-07, -1.8719583749771118e-07, -1.7508864402770996e-07, -1.6298145055770874e-07, -1.5087425708770752e-07, -1.387670636177063e-07, -1.2665987014770508e-07, -1.1455267667770386e-07, -1.0244548320770264e-07, -9.033828973770142e-08, -7.82310962677002e-08, -6.612390279769897e-08, -5.4016709327697754e-08, -4.190951585769653e-08, -2.9802322387695312e-08, -1.7695128917694092e-08, -5.587935447692871e-09, 6.51925802230835e-09, 1.862645149230957e-08, 3.073364496231079e-08, 4.284083843231201e-08, 5.494803190231323e-08, 6.705522537231445e-08, 7.916241884231567e-08, 9.12696123123169e-08, 1.0337680578231812e-07, 1.1548399925231934e-07, 1.2759119272232056e-07, 1.3969838619232178e-07, 1.51805579662323e-07, 1.6391277313232422e-07, 1.7601996660232544e-07, 1.8812716007232666e-07, 2.0023435354232788e-07, 2.123415470123291e-07, 2.2444874048233032e-07, 2.3655593395233154e-07, 2.4866312742233276e-07, 2.60770320892334e-07, 2.728775143623352e-07, 2.849847078323364e-07, 2.9709190130233765e-07, 3.0919909477233887e-07, 3.213062882423401e-07, 3.334134817123413e-07, 3.4552067518234253e-07, 3.5762786865234375e-07]}, "gradients/decoder.model.decoder.layers.0.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 3.0, 20.0, 130.0, 638.0, 182.0, 34.0, 8.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00020560156553983688, -0.00020161608699709177, -0.00019763060845434666, -0.00019364512991160154, -0.00018965965136885643, -0.0001856741582741961, -0.00018168867973145097, -0.00017770320118870586, -0.00017371772264596075, -0.00016973224410321563, -0.00016574676556047052, -0.0001617612870177254, -0.00015777579392306507, -0.00015379031538031995, -0.00014980483683757484, -0.00014581935829482973, -0.0001418338797520846, -0.0001378484012093395, -0.00013386292266659439, -0.00012987744412384927, -0.00012589196558110416, -0.00012190647976240143, -0.0001179209939436987, -0.00011393551540095359, -0.00010995003685820848, -0.00010596455831546336, -0.00010197907977271825, -9.799359395401552e-05, -9.400811541127041e-05, -9.00226368685253e-05, -8.603715104982257e-05, -8.205167250707746e-05, -7.806620851624757e-05, -7.408072997350246e-05, -7.009525143075734e-05, -6.610976561205462e-05, -6.21242870693095e-05, -5.813880852656439e-05, -5.415332634584047e-05, -5.016784416511655e-05, -4.618236198439263e-05, -4.219687980366871e-05, -3.8211401260923594e-05, -3.422592271817848e-05, -3.024044053745456e-05, -2.6254960175720043e-05, -2.2269479813985527e-05, -1.828399945225101e-05, -1.4298519090516493e-05, -1.0313038728781976e-05, -6.327558367047459e-06, -2.342078005312942e-06, 1.643402356421575e-06, 5.628882718156092e-06, 9.614363079890609e-06, 1.3599843441625126e-05, 1.7585323803359643e-05, 2.157080416509416e-05, 2.5556284526828676e-05, 2.9541764888563193e-05, 3.352724525029771e-05, 3.751272743102163e-05, 4.1498205973766744e-05, 4.548368451651186e-05, 4.946916669723578e-05]}, "gradients/decoder.model.decoder.layers.0.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 3.0, 2.0, 4.0, 1.0, 5.0, 9.0, 7.0, 9.0, 8.0, 19.0, 18.0, 11.0, 21.0, 19.0, 19.0, 25.0, 32.0, 37.0, 41.0, 45.0, 41.0, 49.0, 42.0, 50.0, 50.0, 50.0, 42.0, 46.0, 34.0, 32.0, 38.0, 30.0, 19.0, 27.0, 22.0, 22.0, 22.0, 21.0, 7.0, 9.0, 5.0, 4.0, 8.0, 2.0, 4.0, 2.0, 2.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 2.0], "bins": [-4.583987174555659e-05, -4.447176252142526e-05, -4.310365329729393e-05, -4.17355440731626e-05, -4.036743484903127e-05, -3.8999329262878746e-05, -3.763121640076861e-05, -3.6263110814616084e-05, -3.489500159048475e-05, -3.352689236635342e-05, -3.215878314222209e-05, -3.079067391809076e-05, -2.9422566512948833e-05, -2.8054457288817503e-05, -2.6686348064686172e-05, -2.5318240659544244e-05, -2.395012961642351e-05, -2.258202039229218e-05, -2.121391116816085e-05, -1.984580376301892e-05, -1.847769453888759e-05, -1.710958531475626e-05, -1.574147609062493e-05, -1.43733677759883e-05, -1.3005258551856969e-05, -1.1637149327725638e-05, -1.0269041013089009e-05, -8.900931788957678e-06, -7.532823019573698e-06, -6.164714250189718e-06, -4.796605026058387e-06, -3.428496711421758e-06, -2.060387487290427e-06, -6.922786042196094e-07, 6.758302788512083e-07, 2.0439392756088637e-06, 3.4120480449928436e-06, 4.780156814376824e-06, 6.1482660385081545e-06, 7.5163743531447835e-06, 8.884483577276114e-06, 1.0252592801407445e-05, 1.1620701116044074e-05, 1.2988810340175405e-05, 1.4356919564306736e-05, 1.5725028788438067e-05, 1.7093138012569398e-05, 1.8461245417711325e-05, 1.9829354641842656e-05, 2.1197463865973987e-05, 2.2565573090105318e-05, 2.3933680495247245e-05, 2.5301789719378576e-05, 2.6669898943509907e-05, 2.8038008167641237e-05, 2.9406117391772568e-05, 3.07742266159039e-05, 3.214233584003523e-05, 3.351044506416656e-05, 3.487855428829789e-05, 3.624666351242922e-05, 3.761477273656055e-05, 3.898287832271308e-05, 4.035098754684441e-05, 4.171909677097574e-05]}, "gradients/decoder.model.decoder.layers.0.fc2.weight": {"_type": "histogram", "values": [4.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 10.0, 1.0, 3.0, 2.0, 4.0, 8.0, 26.0, 11.0, 11.0, 32.0, 27.0, 125.0, 69.0, 79.0, 115.0, 199.0, 518.0, 490.0, 994.0, 2200.0, 6316.0, 1237207.0, 2934809.0, 6206.0, 2188.0, 962.0, 449.0, 518.0, 178.0, 107.0, 99.0, 66.0, 136.0, 31.0, 25.0, 11.0, 9.0, 11.0, 29.0, 3.0, 2.0, 1.0, 4.0, 3.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.4928321838378906e-05, -3.3836811780929565e-05, -3.2745301723480225e-05, -3.1653791666030884e-05, -3.056228160858154e-05, -2.9470771551132202e-05, -2.837926149368286e-05, -2.728775143623352e-05, -2.619624137878418e-05, -2.510473132133484e-05, -2.4013221263885498e-05, -2.2921711206436157e-05, -2.1830201148986816e-05, -2.0738691091537476e-05, -1.9647181034088135e-05, -1.8555670976638794e-05, -1.7464160919189453e-05, -1.6372650861740112e-05, -1.528114080429077e-05, -1.418963074684143e-05, -1.309812068939209e-05, -1.2006610631942749e-05, -1.0915100574493408e-05, -9.823590517044067e-06, -8.732080459594727e-06, -7.640570402145386e-06, -6.549060344696045e-06, -5.457550287246704e-06, -4.366040229797363e-06, -3.2745301723480225e-06, -2.1830201148986816e-06, -1.0915100574493408e-06, 0.0, 1.0915100574493408e-06, 2.1830201148986816e-06, 3.2745301723480225e-06, 4.366040229797363e-06, 5.457550287246704e-06, 6.549060344696045e-06, 7.640570402145386e-06, 8.732080459594727e-06, 9.823590517044067e-06, 1.0915100574493408e-05, 1.2006610631942749e-05, 1.309812068939209e-05, 1.418963074684143e-05, 1.528114080429077e-05, 1.6372650861740112e-05, 1.7464160919189453e-05, 1.8555670976638794e-05, 1.9647181034088135e-05, 2.0738691091537476e-05, 2.1830201148986816e-05, 2.2921711206436157e-05, 2.4013221263885498e-05, 2.510473132133484e-05, 2.619624137878418e-05, 2.728775143623352e-05, 2.837926149368286e-05, 2.9470771551132202e-05, 3.056228160858154e-05, 3.1653791666030884e-05, 3.2745301723480225e-05, 3.3836811780929565e-05, 3.4928321838378906e-05]}, "gradients/decoder.model.decoder.layers.0.fc2.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 2.0, 1.0, 0.0, 3.0, 1.0, 2.0, 5.0, 9.0, 12.0, 11.0, 15.0, 11.0, 17.0, 26.0, 28.0, 32.0, 30.0, 30.0, 43.0, 45.0, 50.0, 37.0, 47.0, 50.0, 51.0, 67.0, 54.0, 50.0, 49.0, 34.0, 17.0, 34.0, 26.0, 21.0, 25.0, 17.0, 10.0, 12.0, 8.0, 10.0, 10.0, 2.0, 5.0, 1.0, 1.0, 3.0, 2.0, 1.0, 2.0], "bins": [-6.67572021484375e-06, -6.5034255385398865e-06, -6.331130862236023e-06, -6.1588361859321594e-06, -5.986541509628296e-06, -5.814246833324432e-06, -5.641952157020569e-06, -5.469657480716705e-06, -5.297362804412842e-06, -5.125068128108978e-06, -4.952773451805115e-06, -4.780478775501251e-06, -4.608184099197388e-06, -4.435889422893524e-06, -4.263594746589661e-06, -4.091300070285797e-06, -3.919005393981934e-06, -3.74671071767807e-06, -3.5744160413742065e-06, -3.402121365070343e-06, -3.2298266887664795e-06, -3.057532012462616e-06, -2.8852373361587524e-06, -2.712942659854889e-06, -2.5406479835510254e-06, -2.368353307247162e-06, -2.1960586309432983e-06, -2.023763954639435e-06, -1.8514692783355713e-06, -1.6791746020317078e-06, -1.5068799257278442e-06, -1.3345852494239807e-06, -1.1622905731201172e-06, -9.899958968162537e-07, -8.177012205123901e-07, -6.454065442085266e-07, -4.731118679046631e-07, -3.0081719160079956e-07, -1.2852251529693604e-07, 4.377216100692749e-08, 2.1606683731079102e-07, 3.8836151361465454e-07, 5.606561899185181e-07, 7.329508662223816e-07, 9.052455425262451e-07, 1.0775402188301086e-06, 1.2498348951339722e-06, 1.4221295714378357e-06, 1.5944242477416992e-06, 1.7667189240455627e-06, 1.9390136003494263e-06, 2.11130827665329e-06, 2.2836029529571533e-06, 2.455897629261017e-06, 2.6281923055648804e-06, 2.800486981868744e-06, 2.9727816581726074e-06, 3.145076334476471e-06, 3.3173710107803345e-06, 3.489665687084198e-06, 3.6619603633880615e-06, 3.834255039691925e-06, 4.0065497159957886e-06, 4.178844392299652e-06, 4.351139068603516e-06]}, "gradients/decoder.model.decoder.layers.0.fc1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 2.0, 2.0, 4.0, 1.0, 3.0, 11.0, 21.0, 242.0, 1597.0, 4190300.0, 1822.0, 229.0, 40.0, 8.0, 6.0, 4.0, 4.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-9.85860824584961e-05, -9.326264262199402e-05, -8.793920278549194e-05, -8.261576294898987e-05, -7.729232311248779e-05, -7.196888327598572e-05, -6.664544343948364e-05, -6.132200360298157e-05, -5.599856376647949e-05, -5.067512392997742e-05, -4.535168409347534e-05, -4.0028244256973267e-05, -3.470480442047119e-05, -2.9381364583969116e-05, -2.405792474746704e-05, -1.8734484910964966e-05, -1.341104507446289e-05, -8.087605237960815e-06, -2.7641654014587402e-06, 2.559274435043335e-06, 7.88271427154541e-06, 1.3206154108047485e-05, 1.852959394454956e-05, 2.3853033781051636e-05, 2.917647361755371e-05, 3.4499913454055786e-05, 3.982335329055786e-05, 4.5146793127059937e-05, 5.047023296356201e-05, 5.579367280006409e-05, 6.111711263656616e-05, 6.644055247306824e-05, 7.176399230957031e-05, 7.708743214607239e-05, 8.241087198257446e-05, 8.773431181907654e-05, 9.305775165557861e-05, 9.838119149208069e-05, 0.00010370463132858276, 0.00010902807116508484, 0.00011435151100158691, 0.00011967495083808899, 0.00012499839067459106, 0.00013032183051109314, 0.00013564527034759521, 0.0001409687101840973, 0.00014629215002059937, 0.00015161558985710144, 0.00015693902969360352, 0.0001622624695301056, 0.00016758590936660767, 0.00017290934920310974, 0.00017823278903961182, 0.0001835562288761139, 0.00018887966871261597, 0.00019420310854911804, 0.00019952654838562012, 0.0002048499882221222, 0.00021017342805862427, 0.00021549686789512634, 0.00022082030773162842, 0.0002261437475681305, 0.00023146718740463257, 0.00023679062724113464, 0.00024211406707763672]}, "gradients/decoder.model.decoder.layers.0.fc1.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 3.0, 1.0, 3.0, 8.0, 0.0, 10.0, 16.0, 25.0, 45.0, 308.0, 2635.0, 855.0, 115.0, 31.0, 16.0, 6.0, 6.0, 4.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0], "bins": [-6.377696990966797e-06, -6.083399057388306e-06, -5.7891011238098145e-06, -5.494803190231323e-06, -5.200505256652832e-06, -4.906207323074341e-06, -4.61190938949585e-06, -4.317611455917358e-06, -4.023313522338867e-06, -3.729015588760376e-06, -3.4347176551818848e-06, -3.1404197216033936e-06, -2.8461217880249023e-06, -2.551823854446411e-06, -2.25752592086792e-06, -1.9632279872894287e-06, -1.6689300537109375e-06, -1.3746321201324463e-06, -1.080334186553955e-06, -7.860362529754639e-07, -4.917383193969727e-07, -1.9744038581848145e-07, 9.685754776000977e-08, 3.91155481338501e-07, 6.854534149169922e-07, 9.797513484954834e-07, 1.2740492820739746e-06, 1.5683472156524658e-06, 1.862645149230957e-06, 2.1569430828094482e-06, 2.4512410163879395e-06, 2.7455389499664307e-06, 3.039836883544922e-06, 3.334134817123413e-06, 3.6284327507019043e-06, 3.9227306842803955e-06, 4.217028617858887e-06, 4.511326551437378e-06, 4.805624485015869e-06, 5.09992241859436e-06, 5.3942203521728516e-06, 5.688518285751343e-06, 5.982816219329834e-06, 6.277114152908325e-06, 6.571412086486816e-06, 6.865710020065308e-06, 7.160007953643799e-06, 7.45430588722229e-06, 7.748603820800781e-06, 8.042901754379272e-06, 8.337199687957764e-06, 8.631497621536255e-06, 8.925795555114746e-06, 9.220093488693237e-06, 9.514391422271729e-06, 9.80868935585022e-06, 1.0102987289428711e-05, 1.0397285223007202e-05, 1.0691583156585693e-05, 1.0985881090164185e-05, 1.1280179023742676e-05, 1.1574476957321167e-05, 1.1868774890899658e-05, 1.216307282447815e-05, 1.245737075805664e-05]}, "gradients/decoder.model.decoder.layers.0.encoder_attn_layer_norm.weight": {"_type": "histogram", "values": [2.0, 3.0, 2.0, 2.0, 6.0, 2.0, 11.0, 11.0, 31.0, 37.0, 80.0, 119.0, 200.0, 229.0, 116.0, 80.0, 40.0, 19.0, 12.0, 7.0, 4.0, 2.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-8.262283699878026e-06, -7.6266087489784695e-06, -6.990933343331562e-06, -6.355258392432006e-06, -5.719582986785099e-06, -5.083908035885543e-06, -4.448233084985986e-06, -3.812557679339079e-06, -3.176882728439523e-06, -2.541207550166291e-06, -1.9055324855798972e-06, -1.2698574209935032e-06, -6.341822427202715e-07, 1.4929355529602617e-09, 6.371678864525165e-07, 1.2728432920994237e-06, 1.90851824299898e-06, 2.5441934212722117e-06, 3.1798685995454434e-06, 3.815543550445e-06, 4.451218956091907e-06, 5.086893906991463e-06, 5.7225688578910194e-06, 6.358244263537927e-06, 6.993919214437483e-06, 7.62959462008439e-06, 8.265269570983946e-06, 8.900944521883503e-06, 9.536619472783059e-06, 1.0172294423682615e-05, 1.0807969374582171e-05, 1.144364523497643e-05, 1.2079321095370688e-05, 1.2714996046270244e-05, 1.33506709971698e-05, 1.3986345948069356e-05, 1.4622021808463614e-05, 1.525769675936317e-05, 1.589337261975743e-05, 1.6529047570656985e-05, 1.716472252155654e-05, 1.7800397472456098e-05, 1.8436072423355654e-05, 1.907174737425521e-05, 1.9707422325154766e-05, 2.0343097276054323e-05, 2.097877222695388e-05, 2.161444899684284e-05, 2.225012212875299e-05, 2.2885797079652548e-05, 2.3521472030552104e-05, 2.415714698145166e-05, 2.4792821932351217e-05, 2.5428496883250773e-05, 2.606417183415033e-05, 2.669984860403929e-05, 2.7335523554938845e-05, 2.79711985058384e-05, 2.8606873456737958e-05, 2.9242548407637514e-05, 2.987822335853707e-05, 3.051390012842603e-05, 3.1149575079325587e-05, 3.178525003022514e-05, 3.24209249811247e-05]}, "gradients/decoder.model.decoder.layers.0.encoder_attn_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 3.0, 4.0, 7.0, 7.0, 14.0, 19.0, 27.0, 20.0, 41.0, 52.0, 51.0, 52.0, 75.0, 52.0, 53.0, 68.0, 50.0, 67.0, 56.0, 56.0, 54.0, 43.0, 37.0, 31.0, 16.0, 20.0, 9.0, 8.0, 10.0, 8.0, 4.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-7.904036465333775e-06, -7.630450454598758e-06, -7.356865353358444e-06, -7.083279797370778e-06, -6.809694241383113e-06, -6.536108230648097e-06, -6.262522674660431e-06, -5.988937118672766e-06, -5.7153515626851e-06, -5.441766006697435e-06, -5.1681804507097695e-06, -4.894594894722104e-06, -4.621008883987088e-06, -4.347423782746773e-06, -4.073837772011757e-06, -3.8002522160240915e-06, -3.526666660036426e-06, -3.2530811040487606e-06, -2.979495548061095e-06, -2.7059097646997543e-06, -2.432324208712089e-06, -2.1587386527244234e-06, -1.8851529830499203e-06, -1.6115673133754171e-06, -1.3379817573877517e-06, -1.0643962014000863e-06, -7.908105317255831e-07, -5.172249188944988e-07, -2.4363930606341455e-07, 2.994624992425088e-08, 3.0353191959875403e-07, 5.771175892732572e-07, 8.507022357662208e-07, 1.1242877917538863e-06, 1.3978734614283894e-06, 1.6714591311028926e-06, 1.945044687090558e-06, 2.2186302430782234e-06, 2.4922160264395643e-06, 2.7658015824272297e-06, 3.039387138414895e-06, 3.3129726944025606e-06, 3.586558250390226e-06, 3.8601438063778915e-06, 4.133729817112908e-06, 4.407314918353222e-06, 4.680900929088239e-06, 4.954486485075904e-06, 5.2280720410635695e-06, 5.501657597051235e-06, 5.7752431530389e-06, 6.048828709026566e-06, 6.322414265014231e-06, 6.5960002757492475e-06, 6.869585831736913e-06, 7.143171387724578e-06, 7.416756943712244e-06, 7.69034249969991e-06, 7.963928510434926e-06, 8.23751361167524e-06, 8.511099622410256e-06, 8.784684723650571e-06, 9.058270734385587e-06, 9.331855835625902e-06, 9.605441846360918e-06]}, "gradients/decoder.model.decoder.layers.0.encoder_attn.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 3.0, 5.0, 4.0, 8.0, 19.0, 43.0, 66.0, 125.0, 350.0, 673.0, 2068.0, 54174.0, 987680.0, 1981.0, 744.0, 289.0, 197.0, 63.0, 32.0, 16.0, 10.0, 7.0, 1.0, 6.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-7.748603820800781e-06, -7.554888725280762e-06, -7.361173629760742e-06, -7.167458534240723e-06, -6.973743438720703e-06, -6.780028343200684e-06, -6.586313247680664e-06, -6.3925981521606445e-06, -6.198883056640625e-06, -6.0051679611206055e-06, -5.811452865600586e-06, -5.617737770080566e-06, -5.424022674560547e-06, -5.230307579040527e-06, -5.036592483520508e-06, -4.842877388000488e-06, -4.649162292480469e-06, -4.455447196960449e-06, -4.26173210144043e-06, -4.06801700592041e-06, -3.874301910400391e-06, -3.680586814880371e-06, -3.4868717193603516e-06, -3.293156623840332e-06, -3.0994415283203125e-06, -2.905726432800293e-06, -2.7120113372802734e-06, -2.518296241760254e-06, -2.3245811462402344e-06, -2.130866050720215e-06, -1.9371509552001953e-06, -1.7434358596801758e-06, -1.5497207641601562e-06, -1.3560056686401367e-06, -1.1622905731201172e-06, -9.685754776000977e-07, -7.748603820800781e-07, -5.811452865600586e-07, -3.8743019104003906e-07, -1.9371509552001953e-07, 0.0, 1.9371509552001953e-07, 3.8743019104003906e-07, 5.811452865600586e-07, 7.748603820800781e-07, 9.685754776000977e-07, 1.1622905731201172e-06, 1.3560056686401367e-06, 1.5497207641601562e-06, 1.7434358596801758e-06, 1.9371509552001953e-06, 2.130866050720215e-06, 2.3245811462402344e-06, 2.518296241760254e-06, 2.7120113372802734e-06, 2.905726432800293e-06, 3.0994415283203125e-06, 3.293156623840332e-06, 3.4868717193603516e-06, 3.680586814880371e-06, 3.874301910400391e-06, 4.06801700592041e-06, 4.26173210144043e-06, 4.455447196960449e-06, 4.649162292480469e-06]}, "gradients/decoder.model.decoder.layers.0.encoder_attn.out_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 4.0, 3.0, 15.0, 8.0, 20.0, 32.0, 62.0, 48.0, 84.0, 74.0, 99.0, 97.0, 83.0, 83.0, 78.0, 70.0, 53.0, 35.0, 18.0, 18.0, 13.0, 10.0, 6.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.4836273193359375e-06, -5.218200385570526e-06, -4.952773451805115e-06, -4.687346518039703e-06, -4.421919584274292e-06, -4.156492650508881e-06, -3.891065716743469e-06, -3.625638782978058e-06, -3.3602118492126465e-06, -3.094784915447235e-06, -2.8293579816818237e-06, -2.5639310479164124e-06, -2.298504114151001e-06, -2.0330771803855896e-06, -1.7676502466201782e-06, -1.5022233128547668e-06, -1.2367963790893555e-06, -9.71369445323944e-07, -7.059425115585327e-07, -4.4051557779312134e-07, -1.7508864402770996e-07, 9.033828973770142e-08, 3.557652235031128e-07, 6.211921572685242e-07, 8.866190910339355e-07, 1.152046024799347e-06, 1.4174729585647583e-06, 1.6828998923301697e-06, 1.948326826095581e-06, 2.2137537598609924e-06, 2.479180693626404e-06, 2.744607627391815e-06, 3.0100345611572266e-06, 3.275461494922638e-06, 3.5408884286880493e-06, 3.8063153624534607e-06, 4.071742296218872e-06, 4.3371692299842834e-06, 4.602596163749695e-06, 4.868023097515106e-06, 5.133450031280518e-06, 5.398876965045929e-06, 5.66430389881134e-06, 5.929730832576752e-06, 6.195157766342163e-06, 6.4605847001075745e-06, 6.726011633872986e-06, 6.991438567638397e-06, 7.256865501403809e-06, 7.52229243516922e-06, 7.787719368934631e-06, 8.053146302700043e-06, 8.318573236465454e-06, 8.584000170230865e-06, 8.849427103996277e-06, 9.114854037761688e-06, 9.3802809715271e-06, 9.645707905292511e-06, 9.911134839057922e-06, 1.0176561772823334e-05, 1.0441988706588745e-05, 1.0707415640354156e-05, 1.0972842574119568e-05, 1.123826950788498e-05, 1.150369644165039e-05]}, "gradients/decoder.model.decoder.layers.0.encoder_attn.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 4.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 14.0, 0.0, 0.0, 43.0, 0.0, 0.0, 0.0, 131.0, 0.0, 0.0, 0.0, 351.0, 0.0, 0.0, 4971.0, 0.0, 0.0, 0.0, 1037511.0, 0.0, 0.0, 5006.0, 0.0, 0.0, 0.0, 337.0, 0.0, 0.0, 126.0, 0.0, 0.0, 0.0, 52.0, 0.0, 0.0, 19.0, 0.0, 0.0, 0.0, 7.0, 0.0, 0.0, 1.0], "bins": [-6.556510925292969e-07, -6.388872861862183e-07, -6.221234798431396e-07, -6.05359673500061e-07, -5.885958671569824e-07, -5.718320608139038e-07, -5.550682544708252e-07, -5.383044481277466e-07, -5.21540641784668e-07, -5.047768354415894e-07, -4.880130290985107e-07, -4.7124922275543213e-07, -4.544854164123535e-07, -4.377216100692749e-07, -4.209578037261963e-07, -4.041939973831177e-07, -3.8743019104003906e-07, -3.7066638469696045e-07, -3.5390257835388184e-07, -3.371387720108032e-07, -3.203749656677246e-07, -3.03611159324646e-07, -2.868473529815674e-07, -2.7008354663848877e-07, -2.5331974029541016e-07, -2.3655593395233154e-07, -2.1979212760925293e-07, -2.0302832126617432e-07, -1.862645149230957e-07, -1.695007085800171e-07, -1.5273690223693848e-07, -1.3597309589385986e-07, -1.1920928955078125e-07, -1.0244548320770264e-07, -8.568167686462402e-08, -6.891787052154541e-08, -5.21540641784668e-08, -3.5390257835388184e-08, -1.862645149230957e-08, -1.862645149230957e-09, 1.4901161193847656e-08, 3.166496753692627e-08, 4.842877388000488e-08, 6.51925802230835e-08, 8.195638656616211e-08, 9.872019290924072e-08, 1.1548399925231934e-07, 1.3224780559539795e-07, 1.4901161193847656e-07, 1.6577541828155518e-07, 1.825392246246338e-07, 1.993030309677124e-07, 2.1606683731079102e-07, 2.3283064365386963e-07, 2.4959444999694824e-07, 2.6635825634002686e-07, 2.8312206268310547e-07, 2.998858690261841e-07, 3.166496753692627e-07, 3.334134817123413e-07, 3.501772880554199e-07, 3.6694109439849854e-07, 3.8370490074157715e-07, 4.0046870708465576e-07, 4.172325134277344e-07]}, "gradients/decoder.model.decoder.layers.0.encoder_attn.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 5.0, 3.0, 5.0, 8.0, 8.0, 10.0, 16.0, 15.0, 24.0, 21.0, 20.0, 29.0, 38.0, 36.0, 47.0, 46.0, 49.0, 48.0, 65.0, 46.0, 63.0, 74.0, 47.0, 50.0, 38.0, 36.0, 32.0, 31.0, 14.0, 15.0, 15.0, 10.0, 13.0, 8.0, 6.0, 9.0, 1.0, 7.0, 2.0, 6.0, 1.0, 0.0, 0.0, 1.0], "bins": [-1.6748905181884766e-05, -1.6318634152412415e-05, -1.5888363122940063e-05, -1.5458092093467712e-05, -1.5027821063995361e-05, -1.459755003452301e-05, -1.416727900505066e-05, -1.3737007975578308e-05, -1.3306736946105957e-05, -1.2876465916633606e-05, -1.2446194887161255e-05, -1.2015923857688904e-05, -1.1585652828216553e-05, -1.1155381798744202e-05, -1.072511076927185e-05, -1.02948397397995e-05, -9.864568710327148e-06, -9.434297680854797e-06, -9.004026651382446e-06, -8.573755621910095e-06, -8.143484592437744e-06, -7.713213562965393e-06, -7.282942533493042e-06, -6.852671504020691e-06, -6.42240047454834e-06, -5.992129445075989e-06, -5.561858415603638e-06, -5.131587386131287e-06, -4.7013163566589355e-06, -4.2710453271865845e-06, -3.840774297714233e-06, -3.4105032682418823e-06, -2.9802322387695312e-06, -2.54996120929718e-06, -2.119690179824829e-06, -1.689419150352478e-06, -1.259148120880127e-06, -8.288770914077759e-07, -3.986060619354248e-07, 3.166496753692627e-08, 4.6193599700927734e-07, 8.922070264816284e-07, 1.3224780559539795e-06, 1.7527490854263306e-06, 2.1830201148986816e-06, 2.6132911443710327e-06, 3.043562173843384e-06, 3.473833203315735e-06, 3.904104232788086e-06, 4.334375262260437e-06, 4.764646291732788e-06, 5.194917321205139e-06, 5.62518835067749e-06, 6.055459380149841e-06, 6.485730409622192e-06, 6.9160014390945435e-06, 7.3462724685668945e-06, 7.776543498039246e-06, 8.206814527511597e-06, 8.637085556983948e-06, 9.067356586456299e-06, 9.49762761592865e-06, 9.927898645401001e-06, 1.0358169674873352e-05, 1.0788440704345703e-05]}, "gradients/decoder.model.decoder.layers.0.encoder_attn.k_proj.weight": {"_type": "histogram", "values": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1048576.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], "bins": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]}, "gradients/decoder.model.decoder.layers.0.encoder_attn.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 8.0, 0.0, 0.0, 0.0, 0.0, 5.0, 0.0, 0.0, 0.0, 0.0, 0.0, 13.0, 0.0, 0.0, 0.0, 0.0, 25.0, 0.0, 0.0, 0.0, 0.0, 60.0, 0.0, 0.0, 0.0, 0.0, 0.0, 785.0, 0.0, 0.0, 0.0, 0.0, 64.0, 0.0, 0.0, 0.0, 0.0, 33.0, 0.0, 0.0, 0.0, 0.0, 0.0, 19.0, 0.0, 0.0, 0.0, 0.0, 8.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.5762786865234375e-07, -3.46451997756958e-07, -3.3527612686157227e-07, -3.241002559661865e-07, -3.129243850708008e-07, -3.0174851417541504e-07, -2.905726432800293e-07, -2.7939677238464355e-07, -2.682209014892578e-07, -2.5704503059387207e-07, -2.4586915969848633e-07, -2.3469328880310059e-07, -2.2351741790771484e-07, -2.123415470123291e-07, -2.0116567611694336e-07, -1.8998980522155762e-07, -1.7881393432617188e-07, -1.6763806343078613e-07, -1.564621925354004e-07, -1.4528632164001465e-07, -1.341104507446289e-07, -1.2293457984924316e-07, -1.1175870895385742e-07, -1.0058283805847168e-07, -8.940696716308594e-08, -7.82310962677002e-08, -6.705522537231445e-08, -5.587935447692871e-08, -4.470348358154297e-08, -3.3527612686157227e-08, -2.2351741790771484e-08, -1.1175870895385742e-08, 0.0, 1.1175870895385742e-08, 2.2351741790771484e-08, 3.3527612686157227e-08, 4.470348358154297e-08, 5.587935447692871e-08, 6.705522537231445e-08, 7.82310962677002e-08, 8.940696716308594e-08, 1.0058283805847168e-07, 1.1175870895385742e-07, 1.2293457984924316e-07, 1.341104507446289e-07, 1.4528632164001465e-07, 1.564621925354004e-07, 1.6763806343078613e-07, 1.7881393432617188e-07, 1.8998980522155762e-07, 2.0116567611694336e-07, 2.123415470123291e-07, 2.2351741790771484e-07, 2.3469328880310059e-07, 2.4586915969848633e-07, 2.5704503059387207e-07, 2.682209014892578e-07, 2.7939677238464355e-07, 2.905726432800293e-07, 3.0174851417541504e-07, 3.129243850708008e-07, 3.241002559661865e-07, 3.3527612686157227e-07, 3.46451997756958e-07, 3.5762786865234375e-07]}, "gradients/decoder.model.decoder.layers.0.encoder_attn.q_proj.weight": {"_type": "histogram", "values": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1048576.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], "bins": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]}, "gradients/decoder.model.decoder.layers.0.encoder_attn.q_proj.bias": {"_type": "histogram", "values": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1024.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], "bins": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]}, "gradients/decoder.model.decoder.layers.0.self_attn_layer_norm.weight": {"_type": "histogram", "values": [10.0, 11.0, 58.0, 251.0, 551.0, 115.0, 15.0, 6.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.121250524098286e-06, -3.928803835151484e-06, -2.736357146204682e-06, -1.54391045725788e-06, -3.5146376831107773e-07, 8.409829206357244e-07, 2.0334296095825266e-06, 3.2258762985293288e-06, 4.418322987476131e-06, 5.610769676422933e-06, 6.803216365369735e-06, 7.995662599569187e-06, 9.18811019801069e-06, 1.0380555977462791e-05, 1.1573003575904295e-05, 1.2765449355356395e-05, 1.39578969537979e-05, 1.5150343642744701e-05, 1.6342790331691504e-05, 1.7535236111143604e-05, 1.8727683709585108e-05, 1.9920131308026612e-05, 2.1112577087478712e-05, 2.2305022866930813e-05, 2.3497470465372317e-05, 2.468991806381382e-05, 2.588236384326592e-05, 2.707480962271802e-05, 2.8267257221159525e-05, 2.945970481960103e-05, 3.065215059905313e-05, 3.184459637850523e-05, 3.303704579593614e-05, 3.422949157538824e-05, 3.542193735484034e-05, 3.6614386772271246e-05, 3.7806832551723346e-05, 3.8999278331175447e-05, 4.0191727748606354e-05, 4.1384173528058454e-05, 4.2576619307510555e-05, 4.3769065086962655e-05, 4.4961510866414756e-05, 4.615396028384566e-05, 4.7346406063297763e-05, 4.8538851842749864e-05, 4.973130126018077e-05, 5.092374703963287e-05, 5.211619281908497e-05, 5.330863859853707e-05, 5.450108437798917e-05, 5.569353379542008e-05, 5.688597957487218e-05, 5.807842535432428e-05, 5.927087477175519e-05, 6.046332055120729e-05, 6.165576633065939e-05, 6.28482157480903e-05, 6.404065788956359e-05, 6.52331073069945e-05, 6.642554944846779e-05, 6.76179988658987e-05, 6.88104482833296e-05, 7.00028904248029e-05, 7.119533984223381e-05]}, "gradients/decoder.model.decoder.layers.0.self_attn_layer_norm.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 11.0, 14.0, 28.0, 38.0, 49.0, 67.0, 73.0, 85.0, 80.0, 84.0, 77.0, 86.0, 82.0, 66.0, 48.0, 40.0, 23.0, 21.0, 13.0, 6.0, 11.0, 2.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.19500508744386e-06, -4.954760242981138e-06, -4.714515853265766e-06, -4.4742710088030435e-06, -4.234026164340321e-06, -3.9937813198775984e-06, -3.7535367027885513e-06, -3.5132920856995042e-06, -3.2730472412367817e-06, -3.032802396774059e-06, -2.792557779685012e-06, -2.552313162595965e-06, -2.3120683181332424e-06, -2.07182347367052e-06, -1.8315788565814728e-06, -1.591334125805588e-06, -1.3510893950297032e-06, -1.1108446642538183e-06, -8.705999334779335e-07, -6.303552027020487e-07, -3.901104719261639e-07, -1.4986574115027906e-07, 9.037898962560575e-08, 3.3062372040149057e-07, 5.708684511773754e-07, 8.111131819532602e-07, 1.051357912729145e-06, 1.2916026435050298e-06, 1.5318473742809147e-06, 1.7720921050567995e-06, 2.0123368358326843e-06, 2.2525814529217314e-06, 2.492825842637103e-06, 2.7330706870998256e-06, 2.9733153041888727e-06, 3.2135599212779198e-06, 3.4538047657406423e-06, 3.694049610203365e-06, 3.9342939999187365e-06, 4.174538844381459e-06, 4.414783688844182e-06, 4.655028533306904e-06, 4.895273377769627e-06, 5.135517767484998e-06, 5.375762611947721e-06, 5.616007456410443e-06, 5.856251846125815e-06, 6.096496690588538e-06, 6.33674153505126e-06, 6.576986379513983e-06, 6.817231223976705e-06, 7.057475613692077e-06, 7.297720458154799e-06, 7.537965302617522e-06, 7.778209692332894e-06, 8.018454536795616e-06, 8.258699381258339e-06, 8.498944225721061e-06, 8.739189070183784e-06, 8.979433914646506e-06, 9.219678759109229e-06, 9.45992269407725e-06, 9.700167538539972e-06, 9.940412383002695e-06, 1.0180657227465417e-05]}, "gradients/decoder.model.decoder.layers.0.self_attn.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 1.0, 4.0, 2.0, 5.0, 0.0, 8.0, 5.0, 2.0, 12.0, 16.0, 27.0, 33.0, 37.0, 45.0, 88.0, 154.0, 299.0, 488.0, 903.0, 2315.0, 6421.0, 17750.0, 109745.0, 771189.0, 104755.0, 23349.0, 5928.0, 2681.0, 1069.0, 470.0, 299.0, 163.0, 84.0, 67.0, 48.0, 21.0, 17.0, 23.0, 8.0, 10.0, 10.0, 6.0, 2.0, 2.0, 0.0, 1.0, 0.0, 0.0, 3.0, 0.0, 2.0, 1.0], "bins": [-1.4066696166992188e-05, -1.3670884072780609e-05, -1.327507197856903e-05, -1.2879259884357452e-05, -1.2483447790145874e-05, -1.2087635695934296e-05, -1.1691823601722717e-05, -1.1296011507511139e-05, -1.090019941329956e-05, -1.0504387319087982e-05, -1.0108575224876404e-05, -9.712763130664825e-06, -9.316951036453247e-06, -8.921138942241669e-06, -8.52532684803009e-06, -8.129514753818512e-06, -7.733702659606934e-06, -7.337890565395355e-06, -6.942078471183777e-06, -6.5462663769721985e-06, -6.15045428276062e-06, -5.754642188549042e-06, -5.358830094337463e-06, -4.963018000125885e-06, -4.567205905914307e-06, -4.171393811702728e-06, -3.77558171749115e-06, -3.3797696232795715e-06, -2.983957529067993e-06, -2.588145434856415e-06, -2.1923333406448364e-06, -1.796521246433258e-06, -1.4007091522216797e-06, -1.0048970580101013e-06, -6.09084963798523e-07, -2.1327286958694458e-07, 1.825392246246338e-07, 5.783513188362122e-07, 9.741634130477905e-07, 1.369975507259369e-06, 1.7657876014709473e-06, 2.1615996956825256e-06, 2.557411789894104e-06, 2.9532238841056824e-06, 3.3490359783172607e-06, 3.744848072528839e-06, 4.1406601667404175e-06, 4.536472260951996e-06, 4.932284355163574e-06, 5.328096449375153e-06, 5.723908543586731e-06, 6.119720637798309e-06, 6.515532732009888e-06, 6.911344826221466e-06, 7.3071569204330444e-06, 7.702969014644623e-06, 8.098781108856201e-06, 8.49459320306778e-06, 8.890405297279358e-06, 9.286217391490936e-06, 9.682029485702515e-06, 1.0077841579914093e-05, 1.0473653674125671e-05, 1.086946576833725e-05, 1.1265277862548828e-05]}, "gradients/decoder.model.decoder.layers.0.self_attn.out_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 1.0, 3.0, 4.0, 3.0, 5.0, 7.0, 9.0, 13.0, 20.0, 23.0, 18.0, 24.0, 41.0, 40.0, 34.0, 42.0, 59.0, 72.0, 64.0, 57.0, 62.0, 64.0, 51.0, 33.0, 49.0, 26.0, 38.0, 33.0, 16.0, 22.0, 21.0, 18.0, 6.0, 9.0, 8.0, 7.0, 5.0, 2.0, 4.0, 2.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.543231964111328e-06, -5.311332643032074e-06, -5.07943332195282e-06, -4.847534000873566e-06, -4.6156346797943115e-06, -4.383735358715057e-06, -4.151836037635803e-06, -3.919936716556549e-06, -3.688037395477295e-06, -3.4561380743980408e-06, -3.2242387533187866e-06, -2.9923394322395325e-06, -2.7604401111602783e-06, -2.528540790081024e-06, -2.29664146900177e-06, -2.064742147922516e-06, -1.8328428268432617e-06, -1.6009435057640076e-06, -1.3690441846847534e-06, -1.1371448636054993e-06, -9.052455425262451e-07, -6.73346221446991e-07, -4.414469003677368e-07, -2.0954757928848267e-07, 2.2351741790771484e-08, 2.5425106287002563e-07, 4.861503839492798e-07, 7.180497050285339e-07, 9.499490261077881e-07, 1.1818483471870422e-06, 1.4137476682662964e-06, 1.6456469893455505e-06, 1.8775463104248047e-06, 2.109445631504059e-06, 2.341344952583313e-06, 2.573244273662567e-06, 2.8051435947418213e-06, 3.0370429158210754e-06, 3.2689422369003296e-06, 3.5008415579795837e-06, 3.732740879058838e-06, 3.964640200138092e-06, 4.196539521217346e-06, 4.4284388422966e-06, 4.6603381633758545e-06, 4.892237484455109e-06, 5.124136805534363e-06, 5.356036126613617e-06, 5.587935447692871e-06, 5.819834768772125e-06, 6.051734089851379e-06, 6.2836334109306335e-06, 6.515532732009888e-06, 6.747432053089142e-06, 6.979331374168396e-06, 7.21123069524765e-06, 7.443130016326904e-06, 7.675029337406158e-06, 7.906928658485413e-06, 8.138827979564667e-06, 8.370727300643921e-06, 8.602626621723175e-06, 8.83452594280243e-06, 9.066425263881683e-06, 9.298324584960938e-06]}, "gradients/decoder.model.decoder.layers.0.self_attn.v_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 3.0, 1.0, 3.0, 3.0, 1.0, 3.0, 3.0, 6.0, 2.0, 11.0, 8.0, 13.0, 35.0, 38.0, 80.0, 156.0, 195.0, 450.0, 944.0, 1967.0, 9476.0, 129111.0, 853443.0, 44422.0, 5265.0, 1359.0, 748.0, 318.0, 212.0, 124.0, 53.0, 39.0, 21.0, 15.0, 13.0, 9.0, 6.0, 3.0, 4.0, 0.0, 0.0, 3.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0], "bins": [-1.3589859008789062e-05, -1.3073906302452087e-05, -1.2557953596115112e-05, -1.2042000889778137e-05, -1.1526048183441162e-05, -1.1010095477104187e-05, -1.0494142770767212e-05, -9.978190064430237e-06, -9.462237358093262e-06, -8.946284651756287e-06, -8.430331945419312e-06, -7.914379239082336e-06, -7.398426532745361e-06, -6.882473826408386e-06, -6.366521120071411e-06, -5.850568413734436e-06, -5.334615707397461e-06, -4.818663001060486e-06, -4.302710294723511e-06, -3.7867575883865356e-06, -3.2708048820495605e-06, -2.7548521757125854e-06, -2.2388994693756104e-06, -1.7229467630386353e-06, -1.2069940567016602e-06, -6.910413503646851e-07, -1.7508864402770996e-07, 3.4086406230926514e-07, 8.568167686462402e-07, 1.3727694749832153e-06, 1.8887221813201904e-06, 2.4046748876571655e-06, 2.9206275939941406e-06, 3.4365803003311157e-06, 3.952533006668091e-06, 4.468485713005066e-06, 4.984438419342041e-06, 5.500391125679016e-06, 6.016343832015991e-06, 6.532296538352966e-06, 7.048249244689941e-06, 7.5642019510269165e-06, 8.080154657363892e-06, 8.596107363700867e-06, 9.112060070037842e-06, 9.628012776374817e-06, 1.0143965482711792e-05, 1.0659918189048767e-05, 1.1175870895385742e-05, 1.1691823601722717e-05, 1.2207776308059692e-05, 1.2723729014396667e-05, 1.3239681720733643e-05, 1.3755634427070618e-05, 1.4271587133407593e-05, 1.4787539839744568e-05, 1.5303492546081543e-05, 1.5819445252418518e-05, 1.6335397958755493e-05, 1.6851350665092468e-05, 1.7367303371429443e-05, 1.788325607776642e-05, 1.8399208784103394e-05, 1.891516149044037e-05, 1.9431114196777344e-05]}, "gradients/decoder.model.decoder.layers.0.self_attn.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 3.0, 2.0, 4.0, 4.0, 6.0, 5.0, 8.0, 16.0, 15.0, 20.0, 24.0, 29.0, 30.0, 35.0, 60.0, 49.0, 50.0, 54.0, 69.0, 73.0, 70.0, 61.0, 58.0, 51.0, 40.0, 30.0, 28.0, 19.0, 25.0, 17.0, 9.0, 7.0, 9.0, 10.0, 5.0, 3.0, 4.0, 5.0, 2.0, 3.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-8.344650268554688e-06, -8.05780291557312e-06, -7.770955562591553e-06, -7.484108209609985e-06, -7.197260856628418e-06, -6.910413503646851e-06, -6.623566150665283e-06, -6.336718797683716e-06, -6.0498714447021484e-06, -5.763024091720581e-06, -5.476176738739014e-06, -5.189329385757446e-06, -4.902482032775879e-06, -4.6156346797943115e-06, -4.328787326812744e-06, -4.041939973831177e-06, -3.7550926208496094e-06, -3.468245267868042e-06, -3.1813979148864746e-06, -2.8945505619049072e-06, -2.60770320892334e-06, -2.3208558559417725e-06, -2.034008502960205e-06, -1.7471611499786377e-06, -1.4603137969970703e-06, -1.173466444015503e-06, -8.866190910339355e-07, -5.997717380523682e-07, -3.129243850708008e-07, -2.60770320892334e-08, 2.60770320892334e-07, 5.476176738739014e-07, 8.344650268554688e-07, 1.1213123798370361e-06, 1.4081597328186035e-06, 1.695007085800171e-06, 1.9818544387817383e-06, 2.2687017917633057e-06, 2.555549144744873e-06, 2.8423964977264404e-06, 3.129243850708008e-06, 3.416091203689575e-06, 3.7029385566711426e-06, 3.98978590965271e-06, 4.276633262634277e-06, 4.563480615615845e-06, 4.850327968597412e-06, 5.1371753215789795e-06, 5.424022674560547e-06, 5.710870027542114e-06, 5.997717380523682e-06, 6.284564733505249e-06, 6.571412086486816e-06, 6.858259439468384e-06, 7.145106792449951e-06, 7.4319541454315186e-06, 7.718801498413086e-06, 8.005648851394653e-06, 8.29249620437622e-06, 8.579343557357788e-06, 8.866190910339355e-06, 9.153038263320923e-06, 9.43988561630249e-06, 9.726732969284058e-06, 1.0013580322265625e-05]}, "gradients/decoder.model.decoder.layers.0.self_attn.k_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 4.0, 0.0, 0.0, 7.0, 0.0, 0.0, 0.0, 25.0, 0.0, 0.0, 64.0, 0.0, 0.0, 0.0, 200.0, 0.0, 0.0, 1883.0, 0.0, 0.0, 0.0, 29661.0, 0.0, 0.0, 0.0, 984997.0, 0.0, 0.0, 29519.0, 0.0, 0.0, 0.0, 1909.0, 0.0, 0.0, 200.0, 0.0, 0.0, 0.0, 72.0, 0.0, 0.0, 22.0, 0.0, 0.0, 0.0, 6.0, 0.0, 0.0, 4.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.364418029785156e-07, -5.19677996635437e-07, -5.029141902923584e-07, -4.861503839492798e-07, -4.6938657760620117e-07, -4.5262277126312256e-07, -4.3585896492004395e-07, -4.1909515857696533e-07, -4.023313522338867e-07, -3.855675458908081e-07, -3.688037395477295e-07, -3.520399332046509e-07, -3.3527612686157227e-07, -3.1851232051849365e-07, -3.0174851417541504e-07, -2.849847078323364e-07, -2.682209014892578e-07, -2.514570951461792e-07, -2.3469328880310059e-07, -2.1792948246002197e-07, -2.0116567611694336e-07, -1.8440186977386475e-07, -1.6763806343078613e-07, -1.5087425708770752e-07, -1.341104507446289e-07, -1.1734664440155029e-07, -1.0058283805847168e-07, -8.381903171539307e-08, -6.705522537231445e-08, -5.029141902923584e-08, -3.3527612686157227e-08, -1.6763806343078613e-08, 0.0, 1.6763806343078613e-08, 3.3527612686157227e-08, 5.029141902923584e-08, 6.705522537231445e-08, 8.381903171539307e-08, 1.0058283805847168e-07, 1.1734664440155029e-07, 1.341104507446289e-07, 1.5087425708770752e-07, 1.6763806343078613e-07, 1.8440186977386475e-07, 2.0116567611694336e-07, 2.1792948246002197e-07, 2.3469328880310059e-07, 2.514570951461792e-07, 2.682209014892578e-07, 2.849847078323364e-07, 3.0174851417541504e-07, 3.1851232051849365e-07, 3.3527612686157227e-07, 3.520399332046509e-07, 3.688037395477295e-07, 3.855675458908081e-07, 4.023313522338867e-07, 4.1909515857696533e-07, 4.3585896492004395e-07, 4.5262277126312256e-07, 4.6938657760620117e-07, 4.861503839492798e-07, 5.029141902923584e-07, 5.19677996635437e-07, 5.364418029785156e-07]}, "gradients/decoder.model.decoder.layers.0.self_attn.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 75.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 889.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 54.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 5.0], "bins": [-1.1920928955078125e-07, -1.1548399925231934e-07, -1.1175870895385742e-07, -1.0803341865539551e-07, -1.043081283569336e-07, -1.0058283805847168e-07, -9.685754776000977e-08, -9.313225746154785e-08, -8.940696716308594e-08, -8.568167686462402e-08, -8.195638656616211e-08, -7.82310962677002e-08, -7.450580596923828e-08, -7.078051567077637e-08, -6.705522537231445e-08, -6.332993507385254e-08, -5.960464477539063e-08, -5.587935447692871e-08, -5.21540641784668e-08, -4.842877388000488e-08, -4.470348358154297e-08, -4.0978193283081055e-08, -3.725290298461914e-08, -3.3527612686157227e-08, -2.9802322387695312e-08, -2.60770320892334e-08, -2.2351741790771484e-08, -1.862645149230957e-08, -1.4901161193847656e-08, -1.1175870895385742e-08, -7.450580596923828e-09, -3.725290298461914e-09, 0.0, 3.725290298461914e-09, 7.450580596923828e-09, 1.1175870895385742e-08, 1.4901161193847656e-08, 1.862645149230957e-08, 2.2351741790771484e-08, 2.60770320892334e-08, 2.9802322387695312e-08, 3.3527612686157227e-08, 3.725290298461914e-08, 4.0978193283081055e-08, 4.470348358154297e-08, 4.842877388000488e-08, 5.21540641784668e-08, 5.587935447692871e-08, 5.960464477539063e-08, 6.332993507385254e-08, 6.705522537231445e-08, 7.078051567077637e-08, 7.450580596923828e-08, 7.82310962677002e-08, 8.195638656616211e-08, 8.568167686462402e-08, 8.940696716308594e-08, 9.313225746154785e-08, 9.685754776000977e-08, 1.0058283805847168e-07, 1.043081283569336e-07, 1.0803341865539551e-07, 1.1175870895385742e-07, 1.1548399925231934e-07, 1.1920928955078125e-07]}, "gradients/decoder.model.decoder.layers.0.self_attn.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 6.0, 0.0, 9.0, 0.0, 15.0, 0.0, 0.0, 17.0, 0.0, 29.0, 0.0, 58.0, 0.0, 0.0, 105.0, 0.0, 320.0, 0.0, 0.0, 1140.0, 0.0, 5721.0, 0.0, 48809.0, 0.0, 0.0, 935941.0, 0.0, 48926.0, 0.0, 5833.0, 0.0, 0.0, 1056.0, 0.0, 307.0, 0.0, 0.0, 148.0, 0.0, 64.0, 0.0, 29.0, 0.0, 0.0, 12.0, 0.0, 8.0, 0.0, 10.0, 0.0, 0.0, 5.0, 0.0, 3.0, 0.0, 1.0], "bins": [-8.344650268554688e-07, -8.093193173408508e-07, -7.841736078262329e-07, -7.59027898311615e-07, -7.338821887969971e-07, -7.087364792823792e-07, -6.835907697677612e-07, -6.584450602531433e-07, -6.332993507385254e-07, -6.081536412239075e-07, -5.830079317092896e-07, -5.578622221946716e-07, -5.327165126800537e-07, -5.075708031654358e-07, -4.824250936508179e-07, -4.5727938413619995e-07, -4.3213367462158203e-07, -4.069879651069641e-07, -3.818422555923462e-07, -3.5669654607772827e-07, -3.3155083656311035e-07, -3.0640512704849243e-07, -2.812594175338745e-07, -2.561137080192566e-07, -2.3096799850463867e-07, -2.0582228899002075e-07, -1.8067657947540283e-07, -1.555308699607849e-07, -1.30385160446167e-07, -1.0523945093154907e-07, -8.009374141693115e-08, -5.494803190231323e-08, -2.9802322387695312e-08, -4.6566128730773926e-09, 2.0489096641540527e-08, 4.563480615615845e-08, 7.078051567077637e-08, 9.592622518539429e-08, 1.210719347000122e-07, 1.4621764421463013e-07, 1.7136335372924805e-07, 1.9650906324386597e-07, 2.2165477275848389e-07, 2.468004822731018e-07, 2.7194619178771973e-07, 2.9709190130233765e-07, 3.2223761081695557e-07, 3.473833203315735e-07, 3.725290298461914e-07, 3.976747393608093e-07, 4.2282044887542725e-07, 4.4796615839004517e-07, 4.731118679046631e-07, 4.98257577419281e-07, 5.234032869338989e-07, 5.485489964485168e-07, 5.736947059631348e-07, 5.988404154777527e-07, 6.239861249923706e-07, 6.491318345069885e-07, 6.742775440216064e-07, 6.994232535362244e-07, 7.245689630508423e-07, 7.497146725654602e-07, 7.748603820800781e-07]}, "gradients/decoder.model.decoder.layers.0.self_attn.q_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 5.0, 0.0, 0.0, 17.0, 0.0, 0.0, 37.0, 0.0, 0.0, 0.0, 87.0, 0.0, 0.0, 709.0, 0.0, 0.0, 99.0, 0.0, 0.0, 0.0, 29.0, 0.0, 0.0, 19.0, 0.0, 0.0, 0.0, 13.0, 0.0, 0.0, 3.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.172325134277344e-07, -3.995373845100403e-07, -3.818422555923462e-07, -3.641471266746521e-07, -3.46451997756958e-07, -3.287568688392639e-07, -3.110617399215698e-07, -2.9336661100387573e-07, -2.7567148208618164e-07, -2.5797635316848755e-07, -2.4028122425079346e-07, -2.2258609533309937e-07, -2.0489096641540527e-07, -1.8719583749771118e-07, -1.695007085800171e-07, -1.51805579662323e-07, -1.341104507446289e-07, -1.1641532182693481e-07, -9.872019290924072e-08, -8.102506399154663e-08, -6.332993507385254e-08, -4.563480615615845e-08, -2.7939677238464355e-08, -1.0244548320770264e-08, 7.450580596923828e-09, 2.514570951461792e-08, 4.284083843231201e-08, 6.05359673500061e-08, 7.82310962677002e-08, 9.592622518539429e-08, 1.1362135410308838e-07, 1.3131648302078247e-07, 1.4901161193847656e-07, 1.6670674085617065e-07, 1.8440186977386475e-07, 2.0209699869155884e-07, 2.1979212760925293e-07, 2.3748725652694702e-07, 2.551823854446411e-07, 2.728775143623352e-07, 2.905726432800293e-07, 3.082677721977234e-07, 3.259629011154175e-07, 3.4365803003311157e-07, 3.6135315895080566e-07, 3.7904828786849976e-07, 3.9674341678619385e-07, 4.1443854570388794e-07, 4.3213367462158203e-07, 4.498288035392761e-07, 4.675239324569702e-07, 4.852190613746643e-07, 5.029141902923584e-07, 5.206093192100525e-07, 5.383044481277466e-07, 5.559995770454407e-07, 5.736947059631348e-07, 5.913898348808289e-07, 6.09084963798523e-07, 6.26780092716217e-07, 6.444752216339111e-07, 6.621703505516052e-07, 6.798654794692993e-07, 6.975606083869934e-07, 7.152557373046875e-07]}, "gradients/decoder.model.decoder.layernorm_embedding.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 6.0, 18.0, 45.0, 299.0, 526.0, 87.0, 20.0, 7.0, 1.0, 6.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.909733226872049e-05, -5.714019789593294e-05, -5.518306352314539e-05, -5.322592915035784e-05, -5.126879477757029e-05, -4.931166040478274e-05, -4.7354522394016385e-05, -4.539739165920764e-05, -4.3440253648441285e-05, -4.1483119275653735e-05, -3.9525984902866185e-05, -3.7568850530078635e-05, -3.5611716157291085e-05, -3.3654581784503534e-05, -3.1697447411715984e-05, -2.974031121993903e-05, -2.7783178666140884e-05, -2.5826044293353334e-05, -2.3868909920565784e-05, -2.1911775547778234e-05, -1.9954641174990684e-05, -1.7997506802203134e-05, -1.604037061042618e-05, -1.408323623763863e-05, -1.212610186485108e-05, -1.016896749206353e-05, -8.21183311927598e-06, -6.254697836993728e-06, -4.297563464206178e-06, -2.3404290914186276e-06, -3.832938091363758e-07, 1.5738405636511743e-06, 3.5309712984599173e-06, 5.488105671247467e-06, 7.445240498782368e-06, 9.40237532631727e-06, 1.135950969910482e-05, 1.331664407189237e-05, 1.527377935417462e-05, 1.723091372696217e-05, 1.918804809974972e-05, 2.114518247253727e-05, 2.3102316845324822e-05, 2.5059453037101775e-05, 2.7016587409889325e-05, 2.8973721782676876e-05, 3.0930856155464426e-05, 3.2887990528251976e-05, 3.4845124901039526e-05, 3.6802259273827076e-05, 3.8759393646614626e-05, 4.0716528019402176e-05, 4.2673662392189726e-05, 4.4630796764977276e-05, 4.6587934775743634e-05, 4.8545065510552377e-05, 5.0502203521318734e-05, 5.2459337894106284e-05, 5.4416472266893834e-05, 5.6373606639681384e-05, 5.8330741012468934e-05, 6.0287875385256484e-05, 6.224500975804403e-05, 6.420214776881039e-05, 6.615927850361913e-05]}, "gradients/decoder.model.decoder.layernorm_embedding.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 2.0, 2.0, 1.0, 4.0, 5.0, 2.0, 1.0, 2.0, 2.0, 10.0, 6.0, 10.0, 8.0, 13.0, 12.0, 9.0, 23.0, 21.0, 15.0, 15.0, 31.0, 22.0, 29.0, 39.0, 37.0, 33.0, 39.0, 36.0, 45.0, 30.0, 44.0, 37.0, 42.0, 26.0, 42.0, 36.0, 31.0, 27.0, 38.0, 21.0, 20.0, 19.0, 19.0, 20.0, 11.0, 7.0, 15.0, 6.0, 10.0, 9.0, 8.0, 7.0, 8.0, 2.0, 1.0, 5.0, 1.0, 0.0, 2.0, 2.0, 2.0], "bins": [-9.717604370962363e-06, -9.422691618965473e-06, -9.127777957473882e-06, -8.832865205476992e-06, -8.537952453480102e-06, -8.243039701483212e-06, -7.948126949486323e-06, -7.653213287994731e-06, -7.3583005359978415e-06, -7.063387784000952e-06, -6.768474577256711e-06, -6.473561370512471e-06, -6.178648618515581e-06, -5.883735866518691e-06, -5.588822659774451e-06, -5.29390945303021e-06, -4.99899670103332e-06, -4.7040839490364306e-06, -4.40917074229219e-06, -4.114257535547949e-06, -3.81934478355106e-06, -3.5244318041804945e-06, -3.2295188248099294e-06, -2.934605845439364e-06, -2.639692866068799e-06, -2.344779886698234e-06, -2.0498669073276687e-06, -1.7549539279571036e-06, -1.4600409485865384e-06, -1.1651279692159733e-06, -8.702149898454081e-07, -5.75302010474843e-07, -2.803890311042778e-07, 1.4523948266287334e-08, 3.094369276368525e-07, 6.043499070074176e-07, 8.992628863779828e-07, 1.194175865748548e-06, 1.489088845119113e-06, 1.7840018244896783e-06, 2.0789148038602434e-06, 2.3738277832308086e-06, 2.6687407626013737e-06, 2.963653741971939e-06, 3.258566721342504e-06, 3.553479700713069e-06, 3.848392680083634e-06, 4.143305886827875e-06, 4.438218638824765e-06, 4.733131390821654e-06, 5.028044597565895e-06, 5.3229578043101355e-06, 5.617870556307025e-06, 5.912783308303915e-06, 6.2076965150481556e-06, 6.502609721792396e-06, 6.797522473789286e-06, 7.092435225786176e-06, 7.387348432530416e-06, 7.682261639274657e-06, 7.977174391271546e-06, 8.272087143268436e-06, 8.566999895265326e-06, 8.861913556756917e-06, 9.156826308753807e-06]}, "gradients/decoder.model.decoder.embed_positions.weight": {"_type": "histogram", "values": [1.0, 3.0, 0.0, 2.0, 0.0, 4.0, 4.0, 3.0, 5.0, 4.0, 8.0, 7.0, 5.0, 5.0, 13.0, 17.0, 16.0, 24.0, 30.0, 41.0, 34.0, 68.0, 66.0, 84.0, 97.0, 116.0, 153.0, 200.0, 279.0, 657.0, 1047011.0, 463.0, 234.0, 174.0, 165.0, 129.0, 104.0, 81.0, 57.0, 53.0, 30.0, 34.0, 27.0, 13.0, 21.0, 12.0, 14.0, 7.0, 11.0, 6.0, 6.0, 3.0, 2.0, 2.0, 6.0, 2.0, 5.0, 0.0, 2.0, 0.0, 2.0, 0.0, 1.0, 1.0], "bins": [-1.7621650840737857e-05, -1.7041518731275573e-05, -1.6461384802823886e-05, -1.5881252693361603e-05, -1.530112058389932e-05, -1.4720988474437036e-05, -1.414085545548005e-05, -1.3560722436523065e-05, -1.2980590327060781e-05, -1.2400458217598498e-05, -1.1820325198641513e-05, -1.1240192179684527e-05, -1.0660060070222244e-05, -1.007992796075996e-05, -9.499794941802975e-06, -8.91966192284599e-06, -8.339529813383706e-06, -7.759397703921422e-06, -7.179264684964437e-06, -6.599132120754803e-06, -6.018999556545168e-06, -5.438866992335534e-06, -4.858734428125899e-06, -4.278601863916265e-06, -3.6984692997066304e-06, -3.118336735496996e-06, -2.5382041712873615e-06, -1.958071607077727e-06, -1.3779390428680927e-06, -7.978064786584582e-07, -2.1767391444882378e-07, 3.6245864976081066e-07, 9.425912139704451e-07, 1.5227237781800795e-06, 2.102856342389714e-06, 2.6829889065993484e-06, 3.263121470808983e-06, 3.843254035018617e-06, 4.423386599228252e-06, 5.003519163437886e-06, 5.583651727647521e-06, 6.163784291857155e-06, 6.7439168560667895e-06, 7.324049420276424e-06, 7.904181984486058e-06, 8.484314093948342e-06, 9.064447112905327e-06, 9.644580131862313e-06, 1.0224712241324596e-05, 1.080484435078688e-05, 1.1384977369743865e-05, 1.196511038870085e-05, 1.2545242498163134e-05, 1.3125374607625417e-05, 1.3705507626582403e-05, 1.4285640645539388e-05, 1.4865772755001672e-05, 1.5445904864463955e-05, 1.6026038792915642e-05, 1.6606170902377926e-05, 1.718630301184021e-05, 1.7766435121302493e-05, 1.8346567230764776e-05, 1.8926701159216464e-05, 1.9506833268678747e-05]}, "gradients/decoder.model.decoder.embed_tokens.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 8.0, 1.0, 2.0, 0.0, 3.0, 0.0, 1.0, 0.0, 1.0, 2.0, 2.0, 1.0, 4.0, 3.0, 1.0, 25.0, 45.0, 233.0, 7237.0, 51462480.0, 1030.0, 194.0, 45.0, 24.0, 7.0, 5.0, 2.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0], "bins": [-122.0, -119.56494140625, -117.1298828125, -114.69482421875, -112.259765625, -109.82470703125, -107.3896484375, -104.95458984375, -102.51953125, -100.08447265625, -97.6494140625, -95.21435546875, -92.779296875, -90.34423828125, -87.9091796875, -85.47412109375, -83.0390625, -80.60400390625, -78.1689453125, -75.73388671875, -73.298828125, -70.86376953125, -68.4287109375, -65.99365234375, -63.55859375, -61.12353515625, -58.6884765625, -56.25341796875, -53.818359375, -51.38330078125, -48.9482421875, -46.51318359375, -44.078125, -41.64306640625, -39.2080078125, -36.77294921875, -34.337890625, -31.90283203125, -29.4677734375, -27.03271484375, -24.59765625, -22.16259765625, -19.7275390625, -17.29248046875, -14.857421875, -12.42236328125, -9.9873046875, -7.55224609375, -5.1171875, -2.68212890625, -0.2470703125, 2.18798828125, 4.623046875, 7.05810546875, 9.4931640625, 11.92822265625, 14.36328125, 16.79833984375, 19.2333984375, 21.66845703125, 24.103515625, 26.53857421875, 28.9736328125, 31.40869140625, 33.84375]}, "gradients/encoder.adapter.layers.2.conv.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 1.0, 2.0, 2.0, 0.0, 0.0, 5.0, 2.0, 3.0, 6.0, 5.0, 13.0, 15.0, 20.0, 20.0, 28.0, 26.0, 49.0, 65.0, 91.0, 108.0, 140.0, 223.0, 292.0, 415.0, 611.0, 1068.0, 2380.0, 80631.0, 6197942.0, 3363.0, 1346.0, 764.0, 504.0, 337.0, 253.0, 176.0, 147.0, 92.0, 72.0, 45.0, 43.0, 31.0, 24.0, 18.0, 12.0, 14.0, 10.0, 9.0, 6.0, 5.0, 4.0, 5.0, 4.0, 2.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.047332763671875, -0.04581117630004883, -0.044289588928222656, -0.042768001556396484, -0.04124641418457031, -0.03972482681274414, -0.03820323944091797, -0.0366816520690918, -0.035160064697265625, -0.03363847732543945, -0.03211688995361328, -0.03059530258178711, -0.029073715209960938, -0.027552127838134766, -0.026030540466308594, -0.024508953094482422, -0.02298736572265625, -0.021465778350830078, -0.019944190979003906, -0.018422603607177734, -0.016901016235351562, -0.01537942886352539, -0.013857841491699219, -0.012336254119873047, -0.010814666748046875, -0.009293079376220703, -0.007771492004394531, -0.006249904632568359, -0.0047283172607421875, -0.0032067298889160156, -0.0016851425170898438, -0.00016355514526367188, 0.0013580322265625, 0.002879619598388672, 0.004401206970214844, 0.005922794342041016, 0.0074443817138671875, 0.00896596908569336, 0.010487556457519531, 0.012009143829345703, 0.013530731201171875, 0.015052318572998047, 0.01657390594482422, 0.01809549331665039, 0.019617080688476562, 0.021138668060302734, 0.022660255432128906, 0.024181842803955078, 0.02570343017578125, 0.027225017547607422, 0.028746604919433594, 0.030268192291259766, 0.03178977966308594, 0.03331136703491211, 0.03483295440673828, 0.03635454177856445, 0.037876129150390625, 0.0393977165222168, 0.04091930389404297, 0.04244089126586914, 0.04396247863769531, 0.045484066009521484, 0.047005653381347656, 0.04852724075317383, 0.050048828125]}, "gradients/encoder.adapter.layers.2.conv.bias": {"_type": "histogram", "values": [2.0, 0.0, 2.0, 1.0, 5.0, 2.0, 2.0, 5.0, 6.0, 9.0, 8.0, 11.0, 8.0, 11.0, 16.0, 22.0, 23.0, 21.0, 29.0, 32.0, 39.0, 46.0, 46.0, 48.0, 47.0, 53.0, 55.0, 1076.0, 48.0, 32.0, 41.0, 42.0, 39.0, 36.0, 32.0, 24.0, 31.0, 16.0, 9.0, 16.0, 10.0, 9.0, 4.0, 9.0, 6.0, 6.0, 0.0, 5.0, 1.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-7.21875, -6.954345703125, -6.68994140625, -6.425537109375, -6.1611328125, -5.896728515625, -5.63232421875, -5.367919921875, -5.103515625, -4.839111328125, -4.57470703125, -4.310302734375, -4.0458984375, -3.781494140625, -3.51708984375, -3.252685546875, -2.98828125, -2.723876953125, -2.45947265625, -2.195068359375, -1.9306640625, -1.666259765625, -1.40185546875, -1.137451171875, -0.873046875, -0.608642578125, -0.34423828125, -0.079833984375, 0.1845703125, 0.448974609375, 0.71337890625, 0.977783203125, 1.2421875, 1.506591796875, 1.77099609375, 2.035400390625, 2.2998046875, 2.564208984375, 2.82861328125, 3.093017578125, 3.357421875, 3.621826171875, 3.88623046875, 4.150634765625, 4.4150390625, 4.679443359375, 4.94384765625, 5.208251953125, 5.47265625, 5.737060546875, 6.00146484375, 6.265869140625, 6.5302734375, 6.794677734375, 7.05908203125, 7.323486328125, 7.587890625, 7.852294921875, 8.11669921875, 8.381103515625, 8.6455078125, 8.909912109375, 9.17431640625, 9.438720703125, 9.703125]}, "gradients/encoder.adapter.layers.1.conv.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 3.0, 2.0, 0.0, 2.0, 3.0, 4.0, 2.0, 1.0, 9.0, 10.0, 9.0, 11.0, 19.0, 28.0, 38.0, 45.0, 94.0, 141.0, 304.0, 620.0, 2217.0, 6250531.0, 34712.0, 1526.0, 507.0, 229.0, 119.0, 82.0, 55.0, 34.0, 28.0, 14.0, 8.0, 10.0, 5.0, 6.0, 6.0, 5.0, 0.0, 3.0, 3.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.1107177734375, -0.10701370239257812, -0.10330963134765625, -0.09960556030273438, -0.0959014892578125, -0.09219741821289062, -0.08849334716796875, -0.08478927612304688, -0.081085205078125, -0.07738113403320312, -0.07367706298828125, -0.06997299194335938, -0.0662689208984375, -0.06256484985351562, -0.05886077880859375, -0.055156707763671875, -0.05145263671875, -0.047748565673828125, -0.04404449462890625, -0.040340423583984375, -0.0366363525390625, -0.032932281494140625, -0.02922821044921875, -0.025524139404296875, -0.021820068359375, -0.018115997314453125, -0.01441192626953125, -0.010707855224609375, -0.0070037841796875, -0.003299713134765625, 0.00040435791015625, 0.004108428955078125, 0.0078125, 0.011516571044921875, 0.01522064208984375, 0.018924713134765625, 0.0226287841796875, 0.026332855224609375, 0.03003692626953125, 0.033740997314453125, 0.037445068359375, 0.041149139404296875, 0.04485321044921875, 0.048557281494140625, 0.0522613525390625, 0.055965423583984375, 0.05966949462890625, 0.06337356567382812, 0.06707763671875, 0.07078170776367188, 0.07448577880859375, 0.07818984985351562, 0.0818939208984375, 0.08559799194335938, 0.08930206298828125, 0.09300613403320312, 0.096710205078125, 0.10041427612304688, 0.10411834716796875, 0.10782241821289062, 0.1115264892578125, 0.11523056030273438, 0.11893463134765625, 0.12263870239257812, 0.1263427734375]}, "gradients/encoder.adapter.layers.1.conv.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 1.0, 5.0, 8.0, 31.0, 1754.0, 215.0, 7.0, 8.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.05267333984375, -0.051412343978881836, -0.05015134811401367, -0.04889035224914551, -0.047629356384277344, -0.04636836051940918, -0.045107364654541016, -0.04384636878967285, -0.04258537292480469, -0.04132437705993652, -0.04006338119506836, -0.038802385330200195, -0.03754138946533203, -0.03628039360046387, -0.0350193977355957, -0.03375840187072754, -0.032497406005859375, -0.03123641014099121, -0.029975414276123047, -0.028714418411254883, -0.02745342254638672, -0.026192426681518555, -0.02493143081665039, -0.023670434951782227, -0.022409439086914062, -0.0211484432220459, -0.019887447357177734, -0.01862645149230957, -0.017365455627441406, -0.016104459762573242, -0.014843463897705078, -0.013582468032836914, -0.01232147216796875, -0.011060476303100586, -0.009799480438232422, -0.008538484573364258, -0.007277488708496094, -0.00601649284362793, -0.004755496978759766, -0.0034945011138916016, -0.0022335052490234375, -0.0009725093841552734, 0.0002884864807128906, 0.0015494823455810547, 0.0028104782104492188, 0.004071474075317383, 0.005332469940185547, 0.006593465805053711, 0.007854461669921875, 0.009115457534790039, 0.010376453399658203, 0.011637449264526367, 0.012898445129394531, 0.014159440994262695, 0.01542043685913086, 0.016681432723999023, 0.017942428588867188, 0.01920342445373535, 0.020464420318603516, 0.02172541618347168, 0.022986412048339844, 0.024247407913208008, 0.025508403778076172, 0.026769399642944336, 0.0280303955078125]}, "gradients/encoder.adapter.layers.0.conv.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 5.0, 5.0, 8.0, 9.0, 17.0, 19.0, 29.0, 41.0, 63.0, 99.0, 103.0, 196.0, 353.0, 589.0, 999.0, 1848.0, 3580.0, 8158.0, 22301.0, 96306.0, 5103252.0, 955721.0, 66450.0, 17511.0, 6739.0, 3148.0, 1571.0, 932.0, 529.0, 303.0, 198.0, 111.0, 87.0, 42.0, 34.0, 37.0, 19.0, 11.0, 3.0, 7.0, 8.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 1.0], "bins": [-0.005123138427734375, -0.004967391490936279, -0.004811644554138184, -0.004655897617340088, -0.004500150680541992, -0.0043444037437438965, -0.004188656806945801, -0.004032909870147705, -0.0038771629333496094, -0.0037214159965515137, -0.003565669059753418, -0.0034099221229553223, -0.0032541751861572266, -0.003098428249359131, -0.002942681312561035, -0.0027869343757629395, -0.0026311874389648438, -0.002475440502166748, -0.0023196935653686523, -0.0021639466285705566, -0.002008199691772461, -0.0018524527549743652, -0.0016967058181762695, -0.0015409588813781738, -0.0013852119445800781, -0.0012294650077819824, -0.0010737180709838867, -0.000917971134185791, -0.0007622241973876953, -0.0006064772605895996, -0.0004507303237915039, -0.0002949833869934082, -0.0001392364501953125, 1.6510486602783203e-05, 0.0001722574234008789, 0.0003280043601989746, 0.0004837512969970703, 0.000639498233795166, 0.0007952451705932617, 0.0009509921073913574, 0.0011067390441894531, 0.0012624859809875488, 0.0014182329177856445, 0.0015739798545837402, 0.001729726791381836, 0.0018854737281799316, 0.0020412206649780273, 0.002196967601776123, 0.0023527145385742188, 0.0025084614753723145, 0.00266420841217041, 0.002819955348968506, 0.0029757022857666016, 0.0031314492225646973, 0.003287196159362793, 0.0034429430961608887, 0.0035986900329589844, 0.00375443696975708, 0.003910183906555176, 0.0040659308433532715, 0.004221677780151367, 0.004377424716949463, 0.004533171653747559, 0.004688918590545654, 0.00484466552734375]}, "gradients/encoder.adapter.layers.0.conv.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 2.0, 1.0, 2.0, 2.0, 3.0, 5.0, 6.0, 7.0, 3.0, 6.0, 7.0, 9.0, 14.0, 26.0, 18.0, 43.0, 65.0, 97.0, 187.0, 769.0, 312.0, 147.0, 113.0, 66.0, 43.0, 21.0, 19.0, 16.0, 2.0, 2.0, 8.0, 5.0, 1.0, 5.0, 2.0, 1.0, 0.0, 2.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.00858306884765625, -0.008327007293701172, -0.008070945739746094, -0.007814884185791016, -0.0075588226318359375, -0.007302761077880859, -0.007046699523925781, -0.006790637969970703, -0.006534576416015625, -0.006278514862060547, -0.006022453308105469, -0.005766391754150391, -0.0055103302001953125, -0.005254268646240234, -0.004998207092285156, -0.004742145538330078, -0.004486083984375, -0.004230022430419922, -0.003973960876464844, -0.0037178993225097656, -0.0034618377685546875, -0.0032057762145996094, -0.0029497146606445312, -0.002693653106689453, -0.002437591552734375, -0.002181529998779297, -0.0019254684448242188, -0.0016694068908691406, -0.0014133453369140625, -0.0011572837829589844, -0.0009012222290039062, -0.0006451606750488281, -0.00038909912109375, -0.00013303756713867188, 0.00012302398681640625, 0.0003790855407714844, 0.0006351470947265625, 0.0008912086486816406, 0.0011472702026367188, 0.0014033317565917969, 0.001659393310546875, 0.0019154548645019531, 0.0021715164184570312, 0.0024275779724121094, 0.0026836395263671875, 0.0029397010803222656, 0.0031957626342773438, 0.003451824188232422, 0.0037078857421875, 0.003963947296142578, 0.004220008850097656, 0.004476070404052734, 0.0047321319580078125, 0.004988193511962891, 0.005244255065917969, 0.005500316619873047, 0.005756378173828125, 0.006012439727783203, 0.006268501281738281, 0.006524562835693359, 0.0067806243896484375, 0.007036685943603516, 0.007292747497558594, 0.007548809051513672, 0.00780487060546875]}, "gradients/encoder.encoder.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 6.0, 20.0, 438.0, 276.0, 124.0, 67.0, 32.0, 27.0, 6.0, 8.0, 5.0, 5.0, 2.0, 1.0, 1.0, 1.0, 0.0, 1.0], "bins": [-0.09881081432104111, -0.09678125381469727, -0.09475168585777283, -0.09272212535142899, -0.09069256484508514, -0.0886630043387413, -0.08663344383239746, -0.08460387587547302, -0.08257431536912918, -0.08054475486278534, -0.0785151869058609, -0.07648562639951706, -0.07445606589317322, -0.07242650538682938, -0.07039694488048553, -0.0683673769235611, -0.06633781641721725, -0.06430825591087341, -0.06227869167923927, -0.06024912744760513, -0.05821956694126129, -0.05619000643491745, -0.05416044220328331, -0.05213087797164917, -0.05010131746530533, -0.04807175695896149, -0.04604219272732735, -0.04401262849569321, -0.041983067989349365, -0.039953507483005524, -0.037923943251371384, -0.035894379019737244, -0.0338648185133934, -0.03183525800704956, -0.02980569377541542, -0.02777613140642643, -0.02574656903743744, -0.023717006668448448, -0.021687444299459457, -0.019657881930470467, -0.017628321424126625, -0.015598759055137634, -0.013569196686148643, -0.011539634317159653, -0.009510071948170662, -0.007480509579181671, -0.00545094721019268, -0.0034213848412036896, -0.0013918224722146988, 0.000637739896774292, 0.0026673022657632828, 0.0046968646347522736, 0.006726427003741264, 0.008755989372730255, 0.010785551741719246, 0.012815114110708237, 0.014844676479697227, 0.016874238848686218, 0.01890380121767521, 0.0209333635866642, 0.02296292595565319, 0.02499248832464218, 0.027022050693631172, 0.029051613062620163, 0.031081175431609154]}, "gradients/encoder.encoder.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 3.0, 1.0, 4.0, 4.0, 3.0, 6.0, 4.0, 8.0, 6.0, 8.0, 9.0, 19.0, 11.0, 16.0, 18.0, 29.0, 22.0, 21.0, 34.0, 34.0, 28.0, 23.0, 40.0, 44.0, 35.0, 53.0, 40.0, 34.0, 46.0, 26.0, 38.0, 37.0, 44.0, 30.0, 33.0, 30.0, 17.0, 20.0, 23.0, 26.0, 19.0, 17.0, 11.0, 5.0, 9.0, 4.0, 6.0, 11.0, 1.0, 2.0, 3.0, 2.0, 2.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.014644205570220947, -0.014194274321198463, -0.01374434307217598, -0.013294411823153496, -0.012844480574131012, -0.012394549325108528, -0.011944618076086044, -0.01149468682706356, -0.011044755578041077, -0.010594824329018593, -0.010144893079996109, -0.009694961830973625, -0.009245030581951141, -0.008795099332928658, -0.008345168083906174, -0.00789523683488369, -0.007445305585861206, -0.006995374336838722, -0.006545443087816238, -0.006095511838793755, -0.005645580589771271, -0.005195649340748787, -0.004745718091726303, -0.004295786842703819, -0.0038458555936813354, -0.0033959243446588516, -0.002945993095636368, -0.002496061846613884, -0.0020461305975914, -0.0015961993485689163, -0.0011462680995464325, -0.0006963368505239487, -0.00024640560150146484, 0.00020352564752101898, 0.0006534568965435028, 0.0011033881455659866, 0.0015533193945884705, 0.0020032506436109543, 0.002453181892633438, 0.002903113141655922, 0.0033530443906784058, 0.0038029756397008896, 0.004252906888723373, 0.004702838137745857, 0.005152769386768341, 0.005602700635790825, 0.006052631884813309, 0.0065025631338357925, 0.006952494382858276, 0.00740242563188076, 0.007852356880903244, 0.008302288129925728, 0.008752219378948212, 0.009202150627970695, 0.00965208187699318, 0.010102013126015663, 0.010551944375038147, 0.01100187562406063, 0.011451806873083115, 0.011901738122105598, 0.012351669371128082, 0.012801600620150566, 0.01325153186917305, 0.013701463118195534, 0.014151394367218018]}, "gradients/encoder.encoder.layers.23.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 3.0, 5.0, 4.0, 5.0, 8.0, 6.0, 16.0, 15.0, 32.0, 75.0, 113.0, 270.0, 578.0, 1235.0, 2622.0, 6667.0, 21208.0, 88121.0, 3928760.0, 107876.0, 22585.0, 7630.0, 3004.0, 1344.0, 738.0, 429.0, 311.0, 178.0, 138.0, 109.0, 58.0, 42.0, 34.0, 29.0, 14.0, 11.0, 5.0, 3.0, 5.0, 6.0, 1.0, 0.0, 0.0, 2.0, 1.0, 2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.00024199485778808594, -0.0002317279577255249, -0.00022146105766296387, -0.00021119415760040283, -0.0002009272575378418, -0.00019066035747528076, -0.00018039345741271973, -0.0001701265573501587, -0.00015985965728759766, -0.00014959275722503662, -0.00013932585716247559, -0.00012905895709991455, -0.00011879205703735352, -0.00010852515697479248, -9.825825691223145e-05, -8.799135684967041e-05, -7.772445678710938e-05, -6.745755672454834e-05, -5.7190656661987305e-05, -4.692375659942627e-05, -3.6656856536865234e-05, -2.63899564743042e-05, -1.6123056411743164e-05, -5.856156349182129e-06, 4.410743713378906e-06, 1.4677643775939941e-05, 2.4944543838500977e-05, 3.521144390106201e-05, 4.547834396362305e-05, 5.574524402618408e-05, 6.601214408874512e-05, 7.627904415130615e-05, 8.654594421386719e-05, 9.681284427642822e-05, 0.00010707974433898926, 0.00011734664440155029, 0.00012761354446411133, 0.00013788044452667236, 0.0001481473445892334, 0.00015841424465179443, 0.00016868114471435547, 0.0001789480447769165, 0.00018921494483947754, 0.00019948184490203857, 0.0002097487449645996, 0.00022001564502716064, 0.00023028254508972168, 0.00024054944515228271, 0.00025081634521484375, 0.0002610832452774048, 0.0002713501453399658, 0.00028161704540252686, 0.0002918839454650879, 0.0003021508455276489, 0.00031241774559020996, 0.000322684645652771, 0.00033295154571533203, 0.00034321844577789307, 0.0003534853458404541, 0.00036375224590301514, 0.00037401914596557617, 0.0003842860460281372, 0.00039455294609069824, 0.0004048198461532593, 0.0004150867462158203]}, "gradients/encoder.encoder.layers.23.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 3.0, 1.0, 2.0, 5.0, 1.0, 2.0, 3.0, 8.0, 9.0, 14.0, 21.0, 30.0, 30.0, 49.0, 67.0, 81.0, 130.0, 137.0, 130.0, 84.0, 59.0, 43.0, 26.0, 21.0, 14.0, 10.0, 8.0, 11.0, 6.0, 4.0, 0.0, 1.0, 3.0, 0.0, 2.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.3053417205810547e-05, -1.2495554983615875e-05, -1.1937692761421204e-05, -1.1379830539226532e-05, -1.082196831703186e-05, -1.0264106094837189e-05, -9.706243872642517e-06, -9.148381650447845e-06, -8.590519428253174e-06, -8.032657206058502e-06, -7.4747949838638306e-06, -6.916932761669159e-06, -6.359070539474487e-06, -5.801208317279816e-06, -5.243346095085144e-06, -4.685483872890472e-06, -4.127621650695801e-06, -3.569759428501129e-06, -3.0118972063064575e-06, -2.454034984111786e-06, -1.8961727619171143e-06, -1.3383105397224426e-06, -7.80448317527771e-07, -2.2258609533309937e-07, 3.3527612686157227e-07, 8.931383490562439e-07, 1.4510005712509155e-06, 2.008862793445587e-06, 2.566725015640259e-06, 3.1245872378349304e-06, 3.682449460029602e-06, 4.240311682224274e-06, 4.798173904418945e-06, 5.356036126613617e-06, 5.9138983488082886e-06, 6.47176057100296e-06, 7.029622793197632e-06, 7.5874850153923035e-06, 8.145347237586975e-06, 8.703209459781647e-06, 9.261071681976318e-06, 9.81893390417099e-06, 1.0376796126365662e-05, 1.0934658348560333e-05, 1.1492520570755005e-05, 1.2050382792949677e-05, 1.2608245015144348e-05, 1.316610723733902e-05, 1.3723969459533691e-05, 1.4281831681728363e-05, 1.4839693903923035e-05, 1.5397556126117706e-05, 1.5955418348312378e-05, 1.651328057050705e-05, 1.707114279270172e-05, 1.7629005014896393e-05, 1.8186867237091064e-05, 1.8744729459285736e-05, 1.9302591681480408e-05, 1.986045390367508e-05, 2.041831612586975e-05, 2.0976178348064423e-05, 2.1534040570259094e-05, 2.2091902792453766e-05, 2.2649765014648438e-05]}, "gradients/encoder.encoder.layers.23.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [2.0, 3.0, 0.0, 2.0, 7.0, 8.0, 25.0, 27.0, 47.0, 99.0, 175.0, 527.0, 1430.0, 5334.0, 23879.0, 198091.0, 3873905.0, 74218.0, 12133.0, 2811.0, 745.0, 295.0, 133.0, 94.0, 50.0, 49.0, 45.0, 25.0, 28.0, 29.0, 24.0, 10.0, 12.0, 6.0, 9.0, 7.0, 3.0, 2.0, 5.0, 3.0, 1.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-8.83936882019043e-05, -8.29668715596199e-05, -7.754005491733551e-05, -7.211323827505112e-05, -6.668642163276672e-05, -6.125960499048233e-05, -5.583278834819794e-05, -5.0405971705913544e-05, -4.497915506362915e-05, -3.955233842134476e-05, -3.4125521779060364e-05, -2.869870513677597e-05, -2.3271888494491577e-05, -1.7845071852207184e-05, -1.241825520992279e-05, -6.991438567638397e-06, -1.564621925354004e-06, 3.862194716930389e-06, 9.289011359214783e-06, 1.4715828001499176e-05, 2.014264464378357e-05, 2.5569461286067963e-05, 3.0996277928352356e-05, 3.642309457063675e-05, 4.184991121292114e-05, 4.7276727855205536e-05, 5.270354449748993e-05, 5.813036113977432e-05, 6.355717778205872e-05, 6.898399442434311e-05, 7.44108110666275e-05, 7.98376277089119e-05, 8.526444435119629e-05, 9.069126099348068e-05, 9.611807763576508e-05, 0.00010154489427804947, 0.00010697171092033386, 0.00011239852756261826, 0.00011782534420490265, 0.00012325216084718704, 0.00012867897748947144, 0.00013410579413175583, 0.00013953261077404022, 0.00014495942741632462, 0.000150386244058609, 0.0001558130607008934, 0.0001612398773431778, 0.0001666666939854622, 0.00017209351062774658, 0.00017752032727003098, 0.00018294714391231537, 0.00018837396055459976, 0.00019380077719688416, 0.00019922759383916855, 0.00020465441048145294, 0.00021008122712373734, 0.00021550804376602173, 0.00022093486040830612, 0.00022636167705059052, 0.0002317884936928749, 0.0002372153103351593, 0.0002426421269774437, 0.0002480689436197281, 0.0002534957602620125, 0.0002589225769042969]}, "gradients/encoder.encoder.layers.23.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 2.0, 2.0, 6.0, 2.0, 4.0, 8.0, 6.0, 8.0, 13.0, 10.0, 26.0, 26.0, 21.0, 32.0, 30.0, 46.0, 45.0, 38.0, 55.0, 58.0, 56.0, 81.0, 109.0, 329.0, 2057.0, 590.0, 138.0, 79.0, 74.0, 38.0, 27.0, 20.0, 22.0, 9.0, 6.0, 10.0, 4.0, 0.0, 0.0, 2.0, 0.0, 1.0], "bins": [-6.598234176635742e-05, -6.457231938838959e-05, -6.316229701042175e-05, -6.175227463245392e-05, -6.0342252254486084e-05, -5.893222987651825e-05, -5.7522207498550415e-05, -5.611218512058258e-05, -5.4702162742614746e-05, -5.329214036464691e-05, -5.188211798667908e-05, -5.047209560871124e-05, -4.906207323074341e-05, -4.7652050852775574e-05, -4.624202847480774e-05, -4.4832006096839905e-05, -4.342198371887207e-05, -4.2011961340904236e-05, -4.06019389629364e-05, -3.919191658496857e-05, -3.778189420700073e-05, -3.63718718290329e-05, -3.4961849451065063e-05, -3.355182707309723e-05, -3.2141804695129395e-05, -3.073178231716156e-05, -2.9321759939193726e-05, -2.791173756122589e-05, -2.6501715183258057e-05, -2.5091692805290222e-05, -2.3681670427322388e-05, -2.2271648049354553e-05, -2.086162567138672e-05, -1.9451603293418884e-05, -1.804158091545105e-05, -1.6631558537483215e-05, -1.5221536159515381e-05, -1.3811513781547546e-05, -1.2401491403579712e-05, -1.0991469025611877e-05, -9.581446647644043e-06, -8.171424269676208e-06, -6.761401891708374e-06, -5.3513795137405396e-06, -3.941357135772705e-06, -2.5313347578048706e-06, -1.1213123798370361e-06, 2.8870999813079834e-07, 1.6987323760986328e-06, 3.1087547540664673e-06, 4.518777132034302e-06, 5.928799510002136e-06, 7.338821887969971e-06, 8.748844265937805e-06, 1.015886664390564e-05, 1.1568889021873474e-05, 1.2978911399841309e-05, 1.4388933777809143e-05, 1.5798956155776978e-05, 1.7208978533744812e-05, 1.8619000911712646e-05, 2.002902328968048e-05, 2.1439045667648315e-05, 2.284906804561615e-05, 2.4259090423583984e-05]}, "gradients/encoder.encoder.layers.23.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 3.0, 5.0, 37.0, 295.0, 622.0, 54.0, 5.0], "bins": [-0.012877784669399261, -0.012667235918343067, -0.012456687167286873, -0.012246138416230679, -0.012035589665174484, -0.01182504091411829, -0.011614492163062096, -0.011403942480683327, -0.011193393729627132, -0.010982844978570938, -0.010772296227514744, -0.01056174747645855, -0.010351198725402355, -0.010140649974346161, -0.009930100291967392, -0.009719552472233772, -0.009509003721177578, -0.009298454970121384, -0.00908790621906519, -0.008877357468008995, -0.0086668087169528, -0.008456259965896606, -0.008245710283517838, -0.008035162463784218, -0.007824612781405449, -0.007614064030349255, -0.00740351527929306, -0.007192966528236866, -0.006982417311519384, -0.00677186856046319, -0.006561319809406996, -0.0063507710583508015, -0.006140222307294607, -0.005929673556238413, -0.0057191248051822186, -0.005508575588464737, -0.005298026837408543, -0.005087478086352348, -0.004876929335296154, -0.00466638058423996, -0.004455831833183765, -0.004245283082127571, -0.004034734331071377, -0.003824185347184539, -0.0036136365961283445, -0.0034030876122415066, -0.0031925388611853123, -0.002981990110129118, -0.0027714408934116364, -0.002560892142355442, -0.002350343158468604, -0.0021397944074124098, -0.0019292456563562155, -0.0017186967888846993, -0.0015081479214131832, -0.001297599170356989, -0.0010870504193007946, -0.0008765016100369394, -0.0006659528007730842, -0.00045540393330156803, -0.0002448551240377128, -3.4306314773857594e-05, 0.00017624255269765854, 0.00038679130375385284, 0.000597340171225369]}, "gradients/encoder.encoder.layers.23.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 4.0, 1.0, 2.0, 4.0, 6.0, 10.0, 6.0, 18.0, 29.0, 38.0, 39.0, 51.0, 50.0, 62.0, 51.0, 68.0, 66.0, 67.0, 69.0, 58.0, 60.0, 49.0, 48.0, 42.0, 38.0, 26.0, 18.0, 10.0, 8.0, 8.0, 0.0, 6.0, 3.0, 2.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.001138925552368164, -0.0011098375543951988, -0.0010807495564222336, -0.0010516615584492683, -0.001022573560476303, -0.0009934855625033379, -0.0009643975645303726, -0.0009353095665574074, -0.0009062215685844421, -0.0008771335706114769, -0.0008480455726385117, -0.0008189575746655464, -0.0007898695766925812, -0.0007607815787196159, -0.0007316935807466507, -0.0007026055827736855, -0.0006735175848007202, -0.000644429586827755, -0.0006153415888547897, -0.0005862535908818245, -0.0005571655929088593, -0.000528077594935894, -0.0004989895969629288, -0.00046990159898996353, -0.0004408136010169983, -0.00041172560304403305, -0.0003826376050710678, -0.00035354960709810257, -0.00032446160912513733, -0.0002953736111521721, -0.00026628561317920685, -0.0002371976152062416, -0.00020810961723327637, -0.00017902161926031113, -0.00014993362128734589, -0.00012084562331438065, -9.17576253414154e-05, -6.266962736845016e-05, -3.3581629395484924e-05, -4.493631422519684e-06, 2.4594366550445557e-05, 5.36823645234108e-05, 8.277036249637604e-05, 0.00011185836046934128, 0.00014094635844230652, 0.00017003435641527176, 0.000199122354388237, 0.00022821035236120224, 0.0002572983503341675, 0.0002863863483071327, 0.00031547434628009796, 0.0003445623442530632, 0.00037365034222602844, 0.0004027383401989937, 0.0004318263381719589, 0.00046091433614492416, 0.0004900023341178894, 0.0005190903320908546, 0.0005481783300638199, 0.0005772663280367851, 0.0006063543260097504, 0.0006354423239827156, 0.0006645303219556808, 0.0006936183199286461, 0.0007227063179016113]}, "gradients/encoder.encoder.layers.23.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 0.0, 1.0, 1.0, 2.0, 2.0, 1.0, 1.0, 2.0, 1.0, 4.0, 6.0, 7.0, 7.0, 12.0, 14.0, 9.0, 19.0, 16.0, 20.0, 59.0, 82.0, 138.0, 207.0, 324.0, 580.0, 1166.0, 2408.0, 6487.0, 23937.0, 217109.0, 740107.0, 39996.0, 9100.0, 3292.0, 1516.0, 770.0, 412.0, 250.0, 176.0, 98.0, 72.0, 46.0, 34.0, 23.0, 13.0, 13.0, 12.0, 6.0, 6.0, 2.0, 2.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.0004649162292480469, -0.00045304372906684875, -0.00044117122888565063, -0.0004292987287044525, -0.0004174262285232544, -0.0004055537283420563, -0.00039368122816085815, -0.00038180872797966003, -0.0003699362277984619, -0.0003580637276172638, -0.0003461912274360657, -0.00033431872725486755, -0.00032244622707366943, -0.0003105737268924713, -0.0002987012267112732, -0.0002868287265300751, -0.00027495622634887695, -0.00026308372616767883, -0.0002512112259864807, -0.0002393387258052826, -0.00022746622562408447, -0.00021559372544288635, -0.00020372122526168823, -0.0001918487250804901, -0.000179976224899292, -0.00016810372471809387, -0.00015623122453689575, -0.00014435872435569763, -0.0001324862241744995, -0.00012061372399330139, -0.00010874122381210327, -9.686872363090515e-05, -8.499622344970703e-05, -7.312372326850891e-05, -6.125122308731079e-05, -4.937872290611267e-05, -3.750622272491455e-05, -2.563372254371643e-05, -1.376122236251831e-05, -1.8887221813201904e-06, 9.98377799987793e-06, 2.185627818107605e-05, 3.372877836227417e-05, 4.560127854347229e-05, 5.747377872467041e-05, 6.934627890586853e-05, 8.121877908706665e-05, 9.309127926826477e-05, 0.00010496377944946289, 0.00011683627963066101, 0.00012870877981185913, 0.00014058127999305725, 0.00015245378017425537, 0.0001643262803554535, 0.0001761987805366516, 0.00018807128071784973, 0.00019994378089904785, 0.00021181628108024597, 0.0002236887812614441, 0.0002355612814426422, 0.00024743378162384033, 0.00025930628180503845, 0.00027117878198623657, 0.0002830512821674347, 0.0002949237823486328]}, "gradients/encoder.encoder.layers.23.attention.out_proj.bias": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 2.0, 3.0, 0.0, 5.0, 1.0, 3.0, 4.0, 7.0, 10.0, 16.0, 14.0, 18.0, 24.0, 39.0, 38.0, 76.0, 91.0, 99.0, 113.0, 131.0, 86.0, 75.0, 31.0, 39.0, 17.0, 16.0, 9.0, 10.0, 12.0, 9.0, 5.0, 2.0, 4.0, 0.0, 1.0, 0.0, 4.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.0609626770019531e-05, -1.0116957128047943e-05, -9.624287486076355e-06, -9.131617844104767e-06, -8.638948202133179e-06, -8.14627856016159e-06, -7.653608918190002e-06, -7.160939276218414e-06, -6.668269634246826e-06, -6.175599992275238e-06, -5.68293035030365e-06, -5.190260708332062e-06, -4.697591066360474e-06, -4.2049214243888855e-06, -3.7122517824172974e-06, -3.2195821404457092e-06, -2.726912498474121e-06, -2.234242856502533e-06, -1.7415732145309448e-06, -1.2489035725593567e-06, -7.562339305877686e-07, -2.635642886161804e-07, 2.2910535335540771e-07, 7.217749953269958e-07, 1.214444637298584e-06, 1.7071142792701721e-06, 2.1997839212417603e-06, 2.6924535632133484e-06, 3.1851232051849365e-06, 3.6777928471565247e-06, 4.170462489128113e-06, 4.663132131099701e-06, 5.155801773071289e-06, 5.648471415042877e-06, 6.141141057014465e-06, 6.6338106989860535e-06, 7.126480340957642e-06, 7.61914998292923e-06, 8.111819624900818e-06, 8.604489266872406e-06, 9.097158908843994e-06, 9.589828550815582e-06, 1.008249819278717e-05, 1.0575167834758759e-05, 1.1067837476730347e-05, 1.1560507118701935e-05, 1.2053176760673523e-05, 1.2545846402645111e-05, 1.30385160446167e-05, 1.3531185686588287e-05, 1.4023855328559875e-05, 1.4516524970531464e-05, 1.5009194612503052e-05, 1.550186425447464e-05, 1.5994533896446228e-05, 1.6487203538417816e-05, 1.6979873180389404e-05, 1.7472542822360992e-05, 1.796521246433258e-05, 1.845788210630417e-05, 1.8950551748275757e-05, 1.9443221390247345e-05, 1.9935891032218933e-05, 2.042856067419052e-05, 2.092123031616211e-05]}, "gradients/encoder.encoder.layers.23.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 5.0, 4.0, 12.0, 14.0, 12.0, 15.0, 37.0, 49.0, 65.0, 86.0, 119.0, 203.0, 298.0, 396.0, 642.0, 1029.0, 1740.0, 2964.0, 5177.0, 9187.0, 17679.0, 39275.0, 103831.0, 523896.0, 218035.0, 65685.0, 27705.0, 13201.0, 7078.0, 3925.0, 2347.0, 1378.0, 867.0, 514.0, 340.0, 240.0, 162.0, 96.0, 79.0, 63.0, 29.0, 22.0, 17.0, 13.0, 11.0, 10.0, 8.0, 3.0, 0.0, 3.0, 2.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0], "bins": [-9.316205978393555e-05, -9.013526141643524e-05, -8.710846304893494e-05, -8.408166468143463e-05, -8.105486631393433e-05, -7.802806794643402e-05, -7.500126957893372e-05, -7.197447121143341e-05, -6.89476728439331e-05, -6.59208744764328e-05, -6.28940761089325e-05, -5.986727774143219e-05, -5.6840479373931885e-05, -5.381368100643158e-05, -5.0786882638931274e-05, -4.776008427143097e-05, -4.4733285903930664e-05, -4.170648753643036e-05, -3.8679689168930054e-05, -3.565289080142975e-05, -3.262609243392944e-05, -2.9599294066429138e-05, -2.6572495698928833e-05, -2.3545697331428528e-05, -2.0518898963928223e-05, -1.7492100596427917e-05, -1.4465302228927612e-05, -1.1438503861427307e-05, -8.411705493927002e-06, -5.384907126426697e-06, -2.3581087589263916e-06, 6.686896085739136e-07, 3.6954879760742188e-06, 6.722286343574524e-06, 9.749084711074829e-06, 1.2775883078575134e-05, 1.580268144607544e-05, 1.8829479813575745e-05, 2.185627818107605e-05, 2.4883076548576355e-05, 2.790987491607666e-05, 3.0936673283576965e-05, 3.396347165107727e-05, 3.6990270018577576e-05, 4.001706838607788e-05, 4.3043866753578186e-05, 4.607066512107849e-05, 4.9097463488578796e-05, 5.21242618560791e-05, 5.515106022357941e-05, 5.817785859107971e-05, 6.120465695858002e-05, 6.423145532608032e-05, 6.725825369358063e-05, 7.028505206108093e-05, 7.331185042858124e-05, 7.633864879608154e-05, 7.936544716358185e-05, 8.239224553108215e-05, 8.541904389858246e-05, 8.844584226608276e-05, 9.147264063358307e-05, 9.449943900108337e-05, 9.752623736858368e-05, 0.00010055303573608398]}, "gradients/encoder.encoder.layers.23.attention.v_proj.bias": {"_type": "histogram", "values": [3.0, 1.0, 0.0, 2.0, 2.0, 2.0, 1.0, 2.0, 7.0, 8.0, 7.0, 5.0, 15.0, 15.0, 23.0, 18.0, 26.0, 26.0, 30.0, 25.0, 48.0, 36.0, 41.0, 61.0, 60.0, 83.0, 43.0, 58.0, 58.0, 46.0, 39.0, 40.0, 32.0, 24.0, 23.0, 20.0, 16.0, 13.0, 12.0, 9.0, 12.0, 6.0, 3.0, 4.0, 0.0, 6.0, 2.0, 0.0, 0.0, 3.0, 0.0, 3.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.6133995056152344e-05, -4.434678703546524e-05, -4.255957901477814e-05, -4.0772370994091034e-05, -3.898516297340393e-05, -3.719795495271683e-05, -3.5410746932029724e-05, -3.362353891134262e-05, -3.183633089065552e-05, -3.0049122869968414e-05, -2.826191484928131e-05, -2.6474706828594208e-05, -2.4687498807907104e-05, -2.290029078722e-05, -2.1113082766532898e-05, -1.9325874745845795e-05, -1.753866672515869e-05, -1.5751458704471588e-05, -1.3964250683784485e-05, -1.2177042663097382e-05, -1.0389834642410278e-05, -8.602626621723175e-06, -6.815418601036072e-06, -5.0282105803489685e-06, -3.2410025596618652e-06, -1.453794538974762e-06, 3.334134817123413e-07, 2.1206215023994446e-06, 3.907829523086548e-06, 5.695037543773651e-06, 7.482245564460754e-06, 9.269453585147858e-06, 1.1056661605834961e-05, 1.2843869626522064e-05, 1.4631077647209167e-05, 1.641828566789627e-05, 1.8205493688583374e-05, 1.9992701709270477e-05, 2.177990972995758e-05, 2.3567117750644684e-05, 2.5354325771331787e-05, 2.714153379201889e-05, 2.8928741812705994e-05, 3.07159498333931e-05, 3.25031578540802e-05, 3.4290365874767303e-05, 3.607757389545441e-05, 3.786478191614151e-05, 3.965198993682861e-05, 4.1439197957515717e-05, 4.322640597820282e-05, 4.501361399888992e-05, 4.6800822019577026e-05, 4.858803004026413e-05, 5.037523806095123e-05, 5.2162446081638336e-05, 5.394965410232544e-05, 5.573686212301254e-05, 5.7524070143699646e-05, 5.931127816438675e-05, 6.109848618507385e-05, 6.288569420576096e-05, 6.467290222644806e-05, 6.646011024713516e-05, 6.824731826782227e-05]}, "gradients/encoder.encoder.layers.23.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 2.0, 4.0, 1.0, 7.0, 6.0, 10.0, 14.0, 25.0, 34.0, 36.0, 72.0, 97.0, 175.0, 231.0, 508.0, 967.0, 1990.0, 4257.0, 10529.0, 1004324.0, 14991.0, 5331.0, 2385.0, 1153.0, 667.0, 288.0, 186.0, 84.0, 55.0, 37.0, 26.0, 13.0, 18.0, 9.0, 7.0, 2.0, 5.0, 5.0, 3.0, 2.0, 6.0, 2.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-0.0011816024780273438, -0.001145392656326294, -0.0011091828346252441, -0.0010729730129241943, -0.0010367631912231445, -0.0010005533695220947, -0.0009643435478210449, -0.0009281337261199951, -0.0008919239044189453, -0.0008557140827178955, -0.0008195042610168457, -0.0007832944393157959, -0.0007470846176147461, -0.0007108747959136963, -0.0006746649742126465, -0.0006384551525115967, -0.0006022453308105469, -0.0005660355091094971, -0.0005298256874084473, -0.0004936158657073975, -0.00045740604400634766, -0.00042119622230529785, -0.00038498640060424805, -0.00034877657890319824, -0.00031256675720214844, -0.00027635693550109863, -0.00024014711380004883, -0.00020393729209899902, -0.00016772747039794922, -0.00013151764869689941, -9.530782699584961e-05, -5.9098005294799805e-05, -2.288818359375e-05, 1.3321638107299805e-05, 4.953145980834961e-05, 8.574128150939941e-05, 0.00012195110321044922, 0.00015816092491149902, 0.00019437074661254883, 0.00023058056831359863, 0.00026679039001464844, 0.00030300021171569824, 0.00033921003341674805, 0.00037541985511779785, 0.00041162967681884766, 0.00044783949851989746, 0.00048404932022094727, 0.0005202591419219971, 0.0005564689636230469, 0.0005926787853240967, 0.0006288886070251465, 0.0006650984287261963, 0.0007013082504272461, 0.0007375180721282959, 0.0007737278938293457, 0.0008099377155303955, 0.0008461475372314453, 0.0008823573589324951, 0.0009185671806335449, 0.0009547770023345947, 0.0009909868240356445, 0.0010271966457366943, 0.0010634064674377441, 0.001099616289138794, 0.0011358261108398438]}, "gradients/encoder.encoder.layers.23.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 3.0, 2.0, 3.0, 3.0, 2.0, 6.0, 8.0, 9.0, 7.0, 9.0, 11.0, 14.0, 10.0, 11.0, 33.0, 79.0, 101.0, 175.0, 168.0, 142.0, 66.0, 40.0, 16.0, 8.0, 9.0, 15.0, 7.0, 7.0, 9.0, 4.0, 9.0, 6.0, 4.0, 3.0, 3.0, 1.0, 1.0, 2.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 1.0], "bins": [-9.357929229736328e-06, -9.074807167053223e-06, -8.791685104370117e-06, -8.508563041687012e-06, -8.225440979003906e-06, -7.9423189163208e-06, -7.659196853637695e-06, -7.37607479095459e-06, -7.092952728271484e-06, -6.809830665588379e-06, -6.5267086029052734e-06, -6.243586540222168e-06, -5.9604644775390625e-06, -5.677342414855957e-06, -5.3942203521728516e-06, -5.111098289489746e-06, -4.827976226806641e-06, -4.544854164123535e-06, -4.26173210144043e-06, -3.978610038757324e-06, -3.6954879760742188e-06, -3.4123659133911133e-06, -3.129243850708008e-06, -2.8461217880249023e-06, -2.562999725341797e-06, -2.2798776626586914e-06, -1.996755599975586e-06, -1.7136335372924805e-06, -1.430511474609375e-06, -1.1473894119262695e-06, -8.642673492431641e-07, -5.811452865600586e-07, -2.980232238769531e-07, -1.4901161193847656e-08, 2.682209014892578e-07, 5.513429641723633e-07, 8.344650268554688e-07, 1.1175870895385742e-06, 1.4007091522216797e-06, 1.6838312149047852e-06, 1.9669532775878906e-06, 2.250075340270996e-06, 2.5331974029541016e-06, 2.816319465637207e-06, 3.0994415283203125e-06, 3.382563591003418e-06, 3.6656856536865234e-06, 3.948807716369629e-06, 4.231929779052734e-06, 4.51505184173584e-06, 4.798173904418945e-06, 5.081295967102051e-06, 5.364418029785156e-06, 5.647540092468262e-06, 5.930662155151367e-06, 6.213784217834473e-06, 6.496906280517578e-06, 6.780028343200684e-06, 7.063150405883789e-06, 7.3462724685668945e-06, 7.62939453125e-06, 7.912516593933105e-06, 8.195638656616211e-06, 8.478760719299316e-06, 8.761882781982422e-06]}, "gradients/encoder.encoder.layers.23.attention.q_proj.weight": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 3.0, 2.0, 5.0, 4.0, 5.0, 9.0, 16.0, 12.0, 20.0, 20.0, 30.0, 34.0, 53.0, 80.0, 99.0, 163.0, 188.0, 237.0, 380.0, 569.0, 757.0, 1265.0, 1925.0, 3391.0, 8252.0, 95008.0, 911695.0, 13507.0, 4190.0, 2263.0, 1356.0, 943.0, 635.0, 386.0, 277.0, 217.0, 129.0, 110.0, 90.0, 79.0, 41.0, 37.0, 17.0, 24.0, 12.0, 9.0, 7.0, 7.0, 3.0, 2.0, 4.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0], "bins": [-9.226799011230469e-05, -8.910335600376129e-05, -8.59387218952179e-05, -8.27740877866745e-05, -7.96094536781311e-05, -7.644481956958771e-05, -7.328018546104431e-05, -7.011555135250092e-05, -6.695091724395752e-05, -6.378628313541412e-05, -6.062164902687073e-05, -5.745701491832733e-05, -5.4292380809783936e-05, -5.112774670124054e-05, -4.7963112592697144e-05, -4.479847848415375e-05, -4.163384437561035e-05, -3.8469210267066956e-05, -3.530457615852356e-05, -3.2139942049980164e-05, -2.8975307941436768e-05, -2.581067383289337e-05, -2.2646039724349976e-05, -1.948140561580658e-05, -1.6316771507263184e-05, -1.3152137398719788e-05, -9.987503290176392e-06, -6.822869181632996e-06, -3.6582350730895996e-06, -4.936009645462036e-07, 2.6710331439971924e-06, 5.835667252540588e-06, 9.000301361083984e-06, 1.216493546962738e-05, 1.5329569578170776e-05, 1.8494203686714172e-05, 2.165883779525757e-05, 2.4823471903800964e-05, 2.798810601234436e-05, 3.1152740120887756e-05, 3.431737422943115e-05, 3.748200833797455e-05, 4.0646642446517944e-05, 4.381127655506134e-05, 4.6975910663604736e-05, 5.014054477214813e-05, 5.330517888069153e-05, 5.6469812989234924e-05, 5.963444709777832e-05, 6.279908120632172e-05, 6.596371531486511e-05, 6.912834942340851e-05, 7.22929835319519e-05, 7.54576176404953e-05, 7.86222517490387e-05, 8.178688585758209e-05, 8.495151996612549e-05, 8.811615407466888e-05, 9.128078818321228e-05, 9.444542229175568e-05, 9.761005640029907e-05, 0.00010077469050884247, 0.00010393932461738586, 0.00010710395872592926, 0.00011026859283447266]}, "gradients/encoder.encoder.layers.23.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 4.0, 1.0, 2.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 1.0, 1.0, 3.0, 3.0, 2.0, 1.0, 6.0, 5.0, 19.0, 67.0, 200.0, 581.0, 71.0, 23.0, 7.0, 1.0, 2.0, 0.0, 2.0, 3.0, 2.0, 2.0, 1.0, 1.0, 1.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6.783008575439453e-05, -6.550364196300507e-05, -6.31771981716156e-05, -6.0850754380226135e-05, -5.852431058883667e-05, -5.6197866797447205e-05, -5.387142300605774e-05, -5.1544979214668274e-05, -4.921853542327881e-05, -4.689209163188934e-05, -4.456564784049988e-05, -4.223920404911041e-05, -3.991276025772095e-05, -3.758631646633148e-05, -3.5259872674942017e-05, -3.293342888355255e-05, -3.0606985092163086e-05, -2.828054130077362e-05, -2.5954097509384155e-05, -2.362765371799469e-05, -2.1301209926605225e-05, -1.897476613521576e-05, -1.6648322343826294e-05, -1.4321878552436829e-05, -1.1995434761047363e-05, -9.668990969657898e-06, -7.342547178268433e-06, -5.016103386878967e-06, -2.689659595489502e-06, -3.632158041000366e-07, 1.9632279872894287e-06, 4.289671778678894e-06, 6.616115570068359e-06, 8.942559361457825e-06, 1.126900315284729e-05, 1.3595446944236755e-05, 1.592189073562622e-05, 1.8248334527015686e-05, 2.057477831840515e-05, 2.2901222109794617e-05, 2.5227665901184082e-05, 2.7554109692573547e-05, 2.9880553483963013e-05, 3.220699727535248e-05, 3.453344106674194e-05, 3.685988485813141e-05, 3.9186328649520874e-05, 4.151277244091034e-05, 4.3839216232299805e-05, 4.616566002368927e-05, 4.8492103815078735e-05, 5.08185476064682e-05, 5.3144991397857666e-05, 5.547143518924713e-05, 5.77978789806366e-05, 6.012432277202606e-05, 6.245076656341553e-05, 6.477721035480499e-05, 6.710365414619446e-05, 6.943009793758392e-05, 7.175654172897339e-05, 7.408298552036285e-05, 7.640942931175232e-05, 7.873587310314178e-05, 8.106231689453125e-05]}, "gradients/encoder.encoder.layers.23.layer_norm.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 5.0, 3.0, 3.0, 3.0, 5.0, 5.0, 9.0, 20.0, 24.0, 31.0, 46.0, 50.0, 71.0, 127.0, 156.0, 186.0, 101.0, 63.0, 31.0, 21.0, 14.0, 9.0, 7.0, 6.0, 6.0, 3.0, 4.0, 0.0, 2.0, 1.0, 2.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0025740095879882574, -0.0024893530644476414, -0.002404696773737669, -0.002320040250197053, -0.002235383726656437, -0.002150727203115821, -0.002066070679575205, -0.0019814143888652325, -0.0018967578653246164, -0.0018121013417840004, -0.0017274449346587062, -0.001642788527533412, -0.001558132003992796, -0.00147347548045218, -0.0013888190733268857, -0.0013041626662015915, -0.0012195061426609755, -0.0011348496191203594, -0.0010501932119950652, -0.0009655367466621101, -0.000880880281329155, -0.0007962238159961998, -0.0007115673506632447, -0.0006269108853302896, -0.0005422544199973345, -0.00045759795466437936, -0.00037294148933142424, -0.0002882850239984691, -0.000203628558665514, -0.00011897209333255887, -3.431562799960375e-05, 5.0340837333351374e-05, 0.00013499753549695015, 0.00021965400082990527, 0.0003043104661628604, 0.0003889669314958155, 0.00047362339682877064, 0.0005582798621617258, 0.0006429363274946809, 0.000727592792827636, 0.0008122492581605911, 0.0008969057234935462, 0.0009815621888265014, 0.0010662185959517956, 0.0011508751194924116, 0.0012355316430330276, 0.0013201880501583219, 0.001404844457283616, 0.001489500980824232, 0.0015741575043648481, 0.0016588139114901423, 0.0017434703186154366, 0.0018281268421560526, 0.0019127833656966686, 0.001997439656406641, 0.002082096179947257, 0.002166752703487873, 0.002251409227028489, 0.002336065750569105, 0.0024207220412790775, 0.0025053785648196936, 0.0025900350883603096, 0.002674691379070282, 0.002759347902610898, 0.002844004426151514]}, "gradients/encoder.encoder.layers.23.layer_norm.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 2.0, 2.0, 0.0, 3.0, 0.0, 4.0, 7.0, 12.0, 12.0, 18.0, 23.0, 21.0, 23.0, 24.0, 24.0, 23.0, 30.0, 22.0, 29.0, 29.0, 24.0, 35.0, 32.0, 28.0, 17.0, 24.0, 30.0, 24.0, 24.0, 31.0, 27.0, 43.0, 25.0, 28.0, 23.0, 34.0, 25.0, 36.0, 20.0, 19.0, 21.0, 18.0, 19.0, 22.0, 16.0, 12.0, 15.0, 10.0, 8.0, 6.0, 4.0, 2.0, 2.0, 3.0, 1.0, 1.0, 2.0], "bins": [-0.000493168830871582, -0.0004785340279340744, -0.00046389922499656677, -0.00044926442205905914, -0.0004346296191215515, -0.0004199948161840439, -0.00040536001324653625, -0.0003907252103090286, -0.000376090407371521, -0.00036145560443401337, -0.00034682080149650574, -0.0003321859985589981, -0.0003175511956214905, -0.00030291639268398285, -0.0002882815897464752, -0.0002736467868089676, -0.00025901198387145996, -0.00024437718093395233, -0.0002297423779964447, -0.00021510757505893707, -0.00020047277212142944, -0.00018583796918392181, -0.00017120316624641418, -0.00015656836330890656, -0.00014193356037139893, -0.0001272987574338913, -0.00011266395449638367, -9.802915155887604e-05, -8.339434862136841e-05, -6.875954568386078e-05, -5.412474274635315e-05, -3.948993980884552e-05, -2.485513687133789e-05, -1.0220333933830261e-05, 4.414469003677368e-06, 1.9049271941184998e-05, 3.368407487869263e-05, 4.8318877816200256e-05, 6.295368075370789e-05, 7.758848369121552e-05, 9.222328662872314e-05, 0.00010685808956623077, 0.0001214928925037384, 0.00013612769544124603, 0.00015076249837875366, 0.0001653973013162613, 0.00018003210425376892, 0.00019466690719127655, 0.00020930171012878418, 0.0002239365130662918, 0.00023857131600379944, 0.00025320611894130707, 0.0002678409218788147, 0.0002824757248163223, 0.00029711052775382996, 0.0003117453306913376, 0.0003263801336288452, 0.00034101493656635284, 0.0003556497395038605, 0.0003702845424413681, 0.00038491934537887573, 0.00039955414831638336, 0.000414188951253891, 0.0004288237541913986, 0.00044345855712890625]}, "gradients/encoder.encoder.layers.22.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 6.0, 4.0, 11.0, 17.0, 19.0, 46.0, 96.0, 325.0, 1180.0, 8011.0, 255423.0, 3913558.0, 12531.0, 1913.0, 601.0, 275.0, 110.0, 65.0, 37.0, 18.0, 13.0, 9.0, 4.0, 9.0, 4.0, 5.0, 4.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0007367134094238281, -0.0007033571600914001, -0.0006700009107589722, -0.0006366446614265442, -0.0006032884120941162, -0.0005699321627616882, -0.0005365759134292603, -0.0005032196640968323, -0.0004698634147644043, -0.0004365071654319763, -0.00040315091609954834, -0.00036979466676712036, -0.0003364384174346924, -0.0003030821681022644, -0.0002697259187698364, -0.00023636966943740845, -0.00020301342010498047, -0.0001696571707725525, -0.0001363009214401245, -0.00010294467210769653, -6.958842277526855e-05, -3.6232173442840576e-05, -2.8759241104125977e-06, 3.048032522201538e-05, 6.383657455444336e-05, 9.719282388687134e-05, 0.00013054907321929932, 0.0001639053225517273, 0.00019726157188415527, 0.00023061782121658325, 0.00026397407054901123, 0.0002973303198814392, 0.0003306865692138672, 0.00036404281854629517, 0.00039739906787872314, 0.0004307553172111511, 0.0004641115665435791, 0.0004974678158760071, 0.0005308240652084351, 0.000564180314540863, 0.000597536563873291, 0.000630892813205719, 0.000664249062538147, 0.000697605311870575, 0.0007309615612030029, 0.0007643178105354309, 0.0007976740598678589, 0.0008310303092002869, 0.0008643865585327148, 0.0008977428078651428, 0.0009310990571975708, 0.0009644553065299988, 0.0009978115558624268, 0.0010311678051948547, 0.0010645240545272827, 0.0010978803038597107, 0.0011312365531921387, 0.0011645928025245667, 0.0011979490518569946, 0.0012313053011894226, 0.0012646615505218506, 0.0012980177998542786, 0.0013313740491867065, 0.0013647302985191345, 0.0013980865478515625]}, "gradients/encoder.encoder.layers.22.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 0.0, 3.0, 4.0, 8.0, 5.0, 11.0, 16.0, 23.0, 35.0, 66.0, 83.0, 120.0, 165.0, 163.0, 112.0, 56.0, 47.0, 43.0, 16.0, 8.0, 8.0, 6.0, 4.0, 3.0, 3.0, 4.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.8596649169921875e-05, -1.7746351659297943e-05, -1.689605414867401e-05, -1.604575663805008e-05, -1.5195459127426147e-05, -1.4345161616802216e-05, -1.3494864106178284e-05, -1.2644566595554352e-05, -1.179426908493042e-05, -1.0943971574306488e-05, -1.0093674063682556e-05, -9.243376553058624e-06, -8.393079042434692e-06, -7.5427815318107605e-06, -6.692484021186829e-06, -5.842186510562897e-06, -4.991888999938965e-06, -4.141591489315033e-06, -3.291293978691101e-06, -2.440996468067169e-06, -1.5906989574432373e-06, -7.404014468193054e-07, 1.0989606380462646e-07, 9.601935744285583e-07, 1.8104910850524902e-06, 2.660788595676422e-06, 3.511086106300354e-06, 4.361383616924286e-06, 5.211681127548218e-06, 6.06197863817215e-06, 6.9122761487960815e-06, 7.762573659420013e-06, 8.612871170043945e-06, 9.463168680667877e-06, 1.0313466191291809e-05, 1.1163763701915741e-05, 1.2014061212539673e-05, 1.2864358723163605e-05, 1.3714656233787537e-05, 1.4564953744411469e-05, 1.54152512550354e-05, 1.6265548765659332e-05, 1.7115846276283264e-05, 1.7966143786907196e-05, 1.8816441297531128e-05, 1.966673880815506e-05, 2.0517036318778992e-05, 2.1367333829402924e-05, 2.2217631340026855e-05, 2.3067928850650787e-05, 2.391822636127472e-05, 2.476852387189865e-05, 2.5618821382522583e-05, 2.6469118893146515e-05, 2.7319416403770447e-05, 2.816971391439438e-05, 2.902001142501831e-05, 2.9870308935642242e-05, 3.0720606446266174e-05, 3.1570903956890106e-05, 3.242120146751404e-05, 3.327149897813797e-05, 3.41217964887619e-05, 3.4972093999385834e-05, 3.5822391510009766e-05]}, "gradients/encoder.encoder.layers.22.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 2.0, 1.0, 4.0, 3.0, 7.0, 2.0, 4.0, 5.0, 12.0, 21.0, 24.0, 32.0, 77.0, 144.0, 262.0, 531.0, 963.0, 2154.0, 4291.0, 9908.0, 25244.0, 81560.0, 1179580.0, 2757720.0, 86927.0, 26060.0, 10055.0, 4393.0, 2121.0, 1011.0, 540.0, 296.0, 139.0, 62.0, 36.0, 26.0, 29.0, 6.0, 6.0, 7.0, 7.0, 2.0, 5.0, 6.0, 4.0, 2.0, 1.0, 1.0, 1.0, 1.0, 2.0, 0.0, 0.0, 1.0], "bins": [-0.00023829936981201172, -0.000231083482503891, -0.00022386759519577026, -0.00021665170788764954, -0.0002094358205795288, -0.00020221993327140808, -0.00019500404596328735, -0.00018778815865516663, -0.0001805722713470459, -0.00017335638403892517, -0.00016614049673080444, -0.00015892460942268372, -0.000151708722114563, -0.00014449283480644226, -0.00013727694749832153, -0.0001300610601902008, -0.00012284517288208008, -0.00011562928557395935, -0.00010841339826583862, -0.0001011975109577179, -9.398162364959717e-05, -8.676573634147644e-05, -7.954984903335571e-05, -7.233396172523499e-05, -6.511807441711426e-05, -5.790218710899353e-05, -5.06862998008728e-05, -4.3470412492752075e-05, -3.625452518463135e-05, -2.903863787651062e-05, -2.1822750568389893e-05, -1.4606863260269165e-05, -7.3909759521484375e-06, -1.7508864402770996e-07, 7.040798664093018e-06, 1.4256685972213745e-05, 2.1472573280334473e-05, 2.86884605884552e-05, 3.590434789657593e-05, 4.3120235204696655e-05, 5.033612251281738e-05, 5.755200982093811e-05, 6.476789712905884e-05, 7.198378443717957e-05, 7.919967174530029e-05, 8.641555905342102e-05, 9.363144636154175e-05, 0.00010084733366966248, 0.0001080632209777832, 0.00011527910828590393, 0.00012249499559402466, 0.00012971088290214539, 0.0001369267702102661, 0.00014414265751838684, 0.00015135854482650757, 0.0001585744321346283, 0.00016579031944274902, 0.00017300620675086975, 0.00018022209405899048, 0.0001874379813671112, 0.00019465386867523193, 0.00020186975598335266, 0.0002090856432914734, 0.00021630153059959412, 0.00022351741790771484]}, "gradients/encoder.encoder.layers.22.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 4.0, 1.0, 1.0, 3.0, 7.0, 3.0, 7.0, 4.0, 7.0, 4.0, 14.0, 9.0, 24.0, 28.0, 42.0, 69.0, 88.0, 595.0, 2218.0, 256.0, 137.0, 100.0, 90.0, 90.0, 62.0, 60.0, 42.0, 32.0, 23.0, 20.0, 14.0, 14.0, 9.0, 3.0, 3.0, 5.0, 3.0, 3.0], "bins": [-0.0001010894775390625, -9.881891310214996e-05, -9.654834866523743e-05, -9.427778422832489e-05, -9.200721979141235e-05, -8.973665535449982e-05, -8.746609091758728e-05, -8.519552648067474e-05, -8.292496204376221e-05, -8.065439760684967e-05, -7.838383316993713e-05, -7.61132687330246e-05, -7.384270429611206e-05, -7.157213985919952e-05, -6.930157542228699e-05, -6.703101098537445e-05, -6.476044654846191e-05, -6.248988211154938e-05, -6.021931767463684e-05, -5.7948753237724304e-05, -5.567818880081177e-05, -5.340762436389923e-05, -5.1137059926986694e-05, -4.886649549007416e-05, -4.659593105316162e-05, -4.4325366616249084e-05, -4.205480217933655e-05, -3.978423774242401e-05, -3.7513673305511475e-05, -3.524310886859894e-05, -3.29725444316864e-05, -3.0701979994773865e-05, -2.8431415557861328e-05, -2.616085112094879e-05, -2.3890286684036255e-05, -2.1619722247123718e-05, -1.934915781021118e-05, -1.7078593373298645e-05, -1.4808028936386108e-05, -1.2537464499473572e-05, -1.0266900062561035e-05, -7.996335625648499e-06, -5.725771188735962e-06, -3.4552067518234253e-06, -1.1846423149108887e-06, 1.085922122001648e-06, 3.3564865589141846e-06, 5.627050995826721e-06, 7.897615432739258e-06, 1.0168179869651794e-05, 1.2438744306564331e-05, 1.4709308743476868e-05, 1.6979873180389404e-05, 1.925043761730194e-05, 2.1521002054214478e-05, 2.3791566491127014e-05, 2.606213092803955e-05, 2.8332695364952087e-05, 3.0603259801864624e-05, 3.287382423877716e-05, 3.51443886756897e-05, 3.7414953112602234e-05, 3.968551754951477e-05, 4.195608198642731e-05, 4.4226646423339844e-05]}, "gradients/encoder.encoder.layers.22.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 6.0, 30.0, 152.0, 600.0, 173.0, 44.0, 13.0, 4.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0005533511284738779, -0.0004520610673353076, -0.00035077103530056775, -0.0002494810032658279, -0.00014819094212725759, -4.690088098868728e-05, 5.438912194222212e-05, 0.00015567918308079243, 0.00025696924421936274, 0.00035825930535793304, 0.0004595493373926729, 0.0005608393694274127, 0.0006621294305659831, 0.0007634194917045534, 0.0008647094946354628, 0.0009659995557740331, 0.0010672896169126034, 0.0011685796780511737, 0.001269869739189744, 0.0013711596839129925, 0.0014724498614668846, 0.001573739806190133, 0.0016750298673287034, 0.0017763199284672737, 0.001877609989605844, 0.0019789000507444143, 0.002080189995467663, 0.002181480173021555, 0.0022827701177448034, 0.0023840602952986956, 0.002485350240021944, 0.002586640417575836, 0.0026879305951297283, 0.002789220539852977, 0.002890510717406869, 0.0029918006621301174, 0.0030930908396840096, 0.003194380784407258, 0.0032956707291305065, 0.0033969609066843987, 0.003498251084238291, 0.0035995410289615393, 0.0037008312065154314, 0.00380212115123868, 0.003903411328792572, 0.004004701506346464, 0.004105991218239069, 0.004207281395792961, 0.004308571107685566, 0.004409861285239458, 0.004511150997132063, 0.004612441174685955, 0.004713731352239847, 0.004815021529793739, 0.004916311241686344, 0.005017601419240236, 0.005118891596794128, 0.0052201817743480206, 0.005321471486240625, 0.0054227616637945175, 0.00552405184134841, 0.005625342018902302, 0.005726631730794907, 0.005827921908348799, 0.005929212085902691]}, "gradients/encoder.encoder.layers.22.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 4.0, 5.0, 10.0, 8.0, 8.0, 11.0, 14.0, 21.0, 22.0, 25.0, 32.0, 30.0, 48.0, 33.0, 48.0, 57.0, 57.0, 40.0, 48.0, 59.0, 47.0, 38.0, 39.0, 47.0, 43.0, 41.0, 38.0, 26.0, 21.0, 21.0, 15.0, 14.0, 12.0, 6.0, 10.0, 8.0, 6.0, 1.0, 1.0, 3.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.0004980564117431641, -0.00048445630818605423, -0.0004708562046289444, -0.00045725610107183456, -0.00044365599751472473, -0.0004300558939576149, -0.00041645579040050507, -0.00040285568684339523, -0.0003892555832862854, -0.00037565547972917557, -0.00036205537617206573, -0.0003484552726149559, -0.00033485516905784607, -0.00032125506550073624, -0.0003076549619436264, -0.00029405485838651657, -0.00028045475482940674, -0.0002668546512722969, -0.00025325454771518707, -0.00023965444415807724, -0.0002260543406009674, -0.00021245423704385757, -0.00019885413348674774, -0.0001852540299296379, -0.00017165392637252808, -0.00015805382281541824, -0.0001444537192583084, -0.00013085361570119858, -0.00011725351214408875, -0.00010365340858697891, -9.005330502986908e-05, -7.645320147275925e-05, -6.285309791564941e-05, -4.925299435853958e-05, -3.565289080142975e-05, -2.2052787244319916e-05, -8.452683687210083e-06, 5.14741986989975e-06, 1.8747523427009583e-05, 3.2347626984119415e-05, 4.594773054122925e-05, 5.954783409833908e-05, 7.314793765544891e-05, 8.674804121255875e-05, 0.00010034814476966858, 0.00011394824832677841, 0.00012754835188388824, 0.00014114845544099808, 0.0001547485589981079, 0.00016834866255521774, 0.00018194876611232758, 0.0001955488696694374, 0.00020914897322654724, 0.00022274907678365707, 0.0002363491803407669, 0.00024994928389787674, 0.00026354938745498657, 0.0002771494910120964, 0.00029074959456920624, 0.00030434969812631607, 0.0003179498016834259, 0.00033154990524053574, 0.00034515000879764557, 0.0003587501123547554, 0.00037235021591186523]}, "gradients/encoder.encoder.layers.22.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 3.0, 4.0, 4.0, 2.0, 2.0, 7.0, 6.0, 10.0, 12.0, 12.0, 19.0, 24.0, 39.0, 27.0, 82.0, 114.0, 172.0, 318.0, 612.0, 1247.0, 2584.0, 6195.0, 16373.0, 55942.0, 432263.0, 448723.0, 56115.0, 16228.0, 6108.0, 2655.0, 1234.0, 604.0, 315.0, 189.0, 116.0, 62.0, 35.0, 29.0, 17.0, 20.0, 9.0, 6.0, 10.0, 5.0, 5.0, 4.0, 1.0, 1.0, 2.0, 1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00028586387634277344, -0.00027693063020706177, -0.0002679973840713501, -0.00025906413793563843, -0.00025013089179992676, -0.0002411976456642151, -0.00023226439952850342, -0.00022333115339279175, -0.00021439790725708008, -0.0002054646611213684, -0.00019653141498565674, -0.00018759816884994507, -0.0001786649227142334, -0.00016973167657852173, -0.00016079843044281006, -0.0001518651843070984, -0.00014293193817138672, -0.00013399869203567505, -0.00012506544589996338, -0.00011613219976425171, -0.00010719895362854004, -9.826570749282837e-05, -8.93324613571167e-05, -8.039921522140503e-05, -7.146596908569336e-05, -6.253272294998169e-05, -5.359947681427002e-05, -4.466623067855835e-05, -3.573298454284668e-05, -2.679973840713501e-05, -1.786649227142334e-05, -8.93324613571167e-06, 0.0, 8.93324613571167e-06, 1.786649227142334e-05, 2.679973840713501e-05, 3.573298454284668e-05, 4.466623067855835e-05, 5.359947681427002e-05, 6.253272294998169e-05, 7.146596908569336e-05, 8.039921522140503e-05, 8.93324613571167e-05, 9.826570749282837e-05, 0.00010719895362854004, 0.00011613219976425171, 0.00012506544589996338, 0.00013399869203567505, 0.00014293193817138672, 0.0001518651843070984, 0.00016079843044281006, 0.00016973167657852173, 0.0001786649227142334, 0.00018759816884994507, 0.00019653141498565674, 0.0002054646611213684, 0.00021439790725708008, 0.00022333115339279175, 0.00023226439952850342, 0.0002411976456642151, 0.00025013089179992676, 0.00025906413793563843, 0.0002679973840713501, 0.00027693063020706177, 0.00028586387634277344]}, "gradients/encoder.encoder.layers.22.attention.out_proj.bias": {"_type": "histogram", "values": [2.0, 2.0, 1.0, 2.0, 3.0, 3.0, 4.0, 3.0, 8.0, 5.0, 18.0, 18.0, 24.0, 36.0, 43.0, 73.0, 102.0, 117.0, 112.0, 124.0, 101.0, 58.0, 49.0, 29.0, 20.0, 11.0, 16.0, 13.0, 8.0, 2.0, 3.0, 5.0, 0.0, 3.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.239776611328125e-05, -1.1721625924110413e-05, -1.1045485734939575e-05, -1.0369345545768738e-05, -9.6932053565979e-06, -9.017065167427063e-06, -8.340924978256226e-06, -7.664784789085388e-06, -6.988644599914551e-06, -6.312504410743713e-06, -5.636364221572876e-06, -4.9602240324020386e-06, -4.284083843231201e-06, -3.6079436540603638e-06, -2.9318034648895264e-06, -2.255663275718689e-06, -1.5795230865478516e-06, -9.033828973770142e-07, -2.2724270820617676e-07, 4.4889748096466064e-07, 1.125037670135498e-06, 1.8011778593063354e-06, 2.477318048477173e-06, 3.1534582376480103e-06, 3.829598426818848e-06, 4.505738615989685e-06, 5.1818788051605225e-06, 5.85801899433136e-06, 6.534159183502197e-06, 7.210299372673035e-06, 7.886439561843872e-06, 8.56257975101471e-06, 9.238719940185547e-06, 9.914860129356384e-06, 1.0591000318527222e-05, 1.1267140507698059e-05, 1.1943280696868896e-05, 1.2619420886039734e-05, 1.3295561075210571e-05, 1.3971701264381409e-05, 1.4647841453552246e-05, 1.5323981642723083e-05, 1.600012183189392e-05, 1.6676262021064758e-05, 1.7352402210235596e-05, 1.8028542399406433e-05, 1.870468258857727e-05, 1.9380822777748108e-05, 2.0056962966918945e-05, 2.0733103156089783e-05, 2.140924334526062e-05, 2.2085383534431458e-05, 2.2761523723602295e-05, 2.3437663912773132e-05, 2.411380410194397e-05, 2.4789944291114807e-05, 2.5466084480285645e-05, 2.6142224669456482e-05, 2.681836485862732e-05, 2.7494505047798157e-05, 2.8170645236968994e-05, 2.884678542613983e-05, 2.952292561531067e-05, 3.0199065804481506e-05, 3.0875205993652344e-05]}, "gradients/encoder.encoder.layers.22.attention.v_proj.weight": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 2.0, 1.0, 1.0, 2.0, 3.0, 6.0, 9.0, 7.0, 15.0, 18.0, 23.0, 28.0, 42.0, 61.0, 77.0, 136.0, 169.0, 260.0, 362.0, 598.0, 1004.0, 1503.0, 2718.0, 4672.0, 8749.0, 17790.0, 40270.0, 116271.0, 545894.0, 200835.0, 57545.0, 23875.0, 11311.0, 5890.0, 3184.0, 1895.0, 1156.0, 731.0, 473.0, 300.0, 217.0, 120.0, 92.0, 75.0, 54.0, 35.0, 25.0, 23.0, 11.0, 10.0, 4.0, 3.0, 6.0, 1.0, 5.0, 1.0, 1.0, 0.0, 3.0, 0.0, 1.0], "bins": [-0.00014519691467285156, -0.0001406148076057434, -0.00013603270053863525, -0.0001314505934715271, -0.00012686848640441895, -0.0001222863793373108, -0.00011770427227020264, -0.00011312216520309448, -0.00010854005813598633, -0.00010395795106887817, -9.937584400177002e-05, -9.479373693466187e-05, -9.021162986755371e-05, -8.562952280044556e-05, -8.10474157333374e-05, -7.646530866622925e-05, -7.18832015991211e-05, -6.730109453201294e-05, -6.271898746490479e-05, -5.813688039779663e-05, -5.3554773330688477e-05, -4.897266626358032e-05, -4.439055919647217e-05, -3.9808452129364014e-05, -3.522634506225586e-05, -3.0644237995147705e-05, -2.606213092803955e-05, -2.1480023860931396e-05, -1.6897916793823242e-05, -1.2315809726715088e-05, -7.733702659606934e-06, -3.1515955924987793e-06, 1.430511474609375e-06, 6.012618541717529e-06, 1.0594725608825684e-05, 1.5176832675933838e-05, 1.9758939743041992e-05, 2.4341046810150146e-05, 2.89231538772583e-05, 3.3505260944366455e-05, 3.808736801147461e-05, 4.2669475078582764e-05, 4.725158214569092e-05, 5.183368921279907e-05, 5.6415796279907227e-05, 6.099790334701538e-05, 6.558001041412354e-05, 7.016211748123169e-05, 7.474422454833984e-05, 7.9326331615448e-05, 8.390843868255615e-05, 8.84905457496643e-05, 9.307265281677246e-05, 9.765475988388062e-05, 0.00010223686695098877, 0.00010681897401809692, 0.00011140108108520508, 0.00011598318815231323, 0.00012056529521942139, 0.00012514740228652954, 0.0001297295093536377, 0.00013431161642074585, 0.000138893723487854, 0.00014347583055496216, 0.0001480579376220703]}, "gradients/encoder.encoder.layers.22.attention.v_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 0.0, 2.0, 2.0, 0.0, 2.0, 2.0, 5.0, 4.0, 6.0, 5.0, 6.0, 7.0, 4.0, 13.0, 16.0, 17.0, 16.0, 25.0, 26.0, 25.0, 20.0, 21.0, 37.0, 35.0, 50.0, 58.0, 41.0, 57.0, 46.0, 50.0, 43.0, 46.0, 42.0, 37.0, 42.0, 22.0, 23.0, 20.0, 21.0, 22.0, 15.0, 14.0, 11.0, 12.0, 9.0, 4.0, 11.0, 2.0, 4.0, 6.0, 2.0, 2.0, 3.0, 2.0, 2.0, 4.0, 0.0, 0.0, 1.0, 1.0], "bins": [-4.4405460357666016e-05, -4.303455352783203e-05, -4.166364669799805e-05, -4.029273986816406e-05, -3.892183303833008e-05, -3.7550926208496094e-05, -3.618001937866211e-05, -3.4809112548828125e-05, -3.343820571899414e-05, -3.2067298889160156e-05, -3.069639205932617e-05, -2.9325485229492188e-05, -2.7954578399658203e-05, -2.658367156982422e-05, -2.5212764739990234e-05, -2.384185791015625e-05, -2.2470951080322266e-05, -2.110004425048828e-05, -1.9729137420654297e-05, -1.8358230590820312e-05, -1.6987323760986328e-05, -1.5616416931152344e-05, -1.424551010131836e-05, -1.2874603271484375e-05, -1.150369644165039e-05, -1.0132789611816406e-05, -8.761882781982422e-06, -7.3909759521484375e-06, -6.020069122314453e-06, -4.649162292480469e-06, -3.2782554626464844e-06, -1.9073486328125e-06, -5.364418029785156e-07, 8.344650268554688e-07, 2.205371856689453e-06, 3.5762786865234375e-06, 4.947185516357422e-06, 6.318092346191406e-06, 7.68899917602539e-06, 9.059906005859375e-06, 1.043081283569336e-05, 1.1801719665527344e-05, 1.3172626495361328e-05, 1.4543533325195312e-05, 1.5914440155029297e-05, 1.728534698486328e-05, 1.8656253814697266e-05, 2.002716064453125e-05, 2.1398067474365234e-05, 2.276897430419922e-05, 2.4139881134033203e-05, 2.5510787963867188e-05, 2.6881694793701172e-05, 2.8252601623535156e-05, 2.962350845336914e-05, 3.0994415283203125e-05, 3.236532211303711e-05, 3.3736228942871094e-05, 3.510713577270508e-05, 3.647804260253906e-05, 3.784894943237305e-05, 3.921985626220703e-05, 4.0590763092041016e-05, 4.1961669921875e-05, 4.3332576751708984e-05]}, "gradients/encoder.encoder.layers.22.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 2.0, 4.0, 6.0, 3.0, 4.0, 5.0, 7.0, 10.0, 21.0, 30.0, 63.0, 75.0, 163.0, 272.0, 449.0, 949.0, 1842.0, 3723.0, 8496.0, 24603.0, 106719.0, 748640.0, 110788.0, 24911.0, 9065.0, 3735.0, 1865.0, 938.0, 507.0, 292.0, 153.0, 81.0, 51.0, 34.0, 19.0, 14.0, 8.0, 10.0, 2.0, 6.0, 2.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0001081228256225586, -0.00010498985648155212, -0.00010185688734054565, -9.872391819953918e-05, -9.559094905853271e-05, -9.245797991752625e-05, -8.932501077651978e-05, -8.61920416355133e-05, -8.305907249450684e-05, -7.992610335350037e-05, -7.67931342124939e-05, -7.366016507148743e-05, -7.052719593048096e-05, -6.739422678947449e-05, -6.426125764846802e-05, -6.112828850746155e-05, -5.799531936645508e-05, -5.486235022544861e-05, -5.172938108444214e-05, -4.859641194343567e-05, -4.54634428024292e-05, -4.233047366142273e-05, -3.919750452041626e-05, -3.606453537940979e-05, -3.293156623840332e-05, -2.979859709739685e-05, -2.666562795639038e-05, -2.353265881538391e-05, -2.039968967437744e-05, -1.726672053337097e-05, -1.4133751392364502e-05, -1.1000782251358032e-05, -7.867813110351562e-06, -4.734843969345093e-06, -1.601874828338623e-06, 1.5310943126678467e-06, 4.664063453674316e-06, 7.797032594680786e-06, 1.0930001735687256e-05, 1.4062970876693726e-05, 1.7195940017700195e-05, 2.0328909158706665e-05, 2.3461878299713135e-05, 2.6594847440719604e-05, 2.9727816581726074e-05, 3.2860785722732544e-05, 3.5993754863739014e-05, 3.9126724004745483e-05, 4.225969314575195e-05, 4.539266228675842e-05, 4.852563142776489e-05, 5.165860056877136e-05, 5.479156970977783e-05, 5.79245388507843e-05, 6.105750799179077e-05, 6.419047713279724e-05, 6.732344627380371e-05, 7.045641541481018e-05, 7.358938455581665e-05, 7.672235369682312e-05, 7.985532283782959e-05, 8.298829197883606e-05, 8.612126111984253e-05, 8.9254230260849e-05, 9.238719940185547e-05]}, "gradients/encoder.encoder.layers.22.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 3.0, 3.0, 2.0, 7.0, 2.0, 12.0, 11.0, 12.0, 14.0, 21.0, 27.0, 34.0, 43.0, 51.0, 78.0, 184.0, 200.0, 73.0, 42.0, 32.0, 31.0, 22.0, 28.0, 9.0, 16.0, 9.0, 15.0, 4.0, 3.0, 4.0, 3.0, 3.0, 3.0, 5.0, 3.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.0907649993896484e-05, -1.0545365512371063e-05, -1.0183081030845642e-05, -9.820796549320221e-06, -9.4585120677948e-06, -9.096227586269379e-06, -8.733943104743958e-06, -8.371658623218536e-06, -8.009374141693115e-06, -7.647089660167694e-06, -7.284805178642273e-06, -6.922520697116852e-06, -6.560236215591431e-06, -6.1979517340660095e-06, -5.835667252540588e-06, -5.473382771015167e-06, -5.111098289489746e-06, -4.748813807964325e-06, -4.386529326438904e-06, -4.024244844913483e-06, -3.6619603633880615e-06, -3.2996758818626404e-06, -2.9373914003372192e-06, -2.575106918811798e-06, -2.212822437286377e-06, -1.8505379557609558e-06, -1.4882534742355347e-06, -1.1259689927101135e-06, -7.636845111846924e-07, -4.0140002965927124e-07, -3.91155481338501e-08, 3.2316893339157104e-07, 6.854534149169922e-07, 1.0477378964424133e-06, 1.4100223779678345e-06, 1.7723068594932556e-06, 2.1345913410186768e-06, 2.496875822544098e-06, 2.859160304069519e-06, 3.22144478559494e-06, 3.5837292671203613e-06, 3.9460137486457825e-06, 4.308298230171204e-06, 4.670582711696625e-06, 5.032867193222046e-06, 5.395151674747467e-06, 5.757436156272888e-06, 6.119720637798309e-06, 6.4820051193237305e-06, 6.844289600849152e-06, 7.206574082374573e-06, 7.568858563899994e-06, 7.931143045425415e-06, 8.293427526950836e-06, 8.655712008476257e-06, 9.017996490001678e-06, 9.3802809715271e-06, 9.74256545305252e-06, 1.0104849934577942e-05, 1.0467134416103363e-05, 1.0829418897628784e-05, 1.1191703379154205e-05, 1.1553987860679626e-05, 1.1916272342205048e-05, 1.2278556823730469e-05]}, "gradients/encoder.encoder.layers.22.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 3.0, 1.0, 2.0, 2.0, 2.0, 0.0, 0.0, 2.0, 5.0, 5.0, 7.0, 6.0, 18.0, 28.0, 25.0, 31.0, 43.0, 50.0, 93.0, 105.0, 175.0, 241.0, 400.0, 576.0, 923.0, 1481.0, 2548.0, 4556.0, 8986.0, 21773.0, 59975.0, 472824.0, 382663.0, 52448.0, 19100.0, 8544.0, 4559.0, 2362.0, 1435.0, 897.0, 568.0, 352.0, 238.0, 150.0, 123.0, 63.0, 43.0, 43.0, 28.0, 16.0, 11.0, 9.0, 12.0, 7.0, 8.0, 4.0, 1.0, 1.0, 1.0, 2.0, 0.0, 0.0, 1.0], "bins": [-3.594160079956055e-05, -3.485102206468582e-05, -3.3760443329811096e-05, -3.266986459493637e-05, -3.1579285860061646e-05, -3.048870712518692e-05, -2.9398128390312195e-05, -2.830754965543747e-05, -2.7216970920562744e-05, -2.612639218568802e-05, -2.5035813450813293e-05, -2.3945234715938568e-05, -2.2854655981063843e-05, -2.1764077246189117e-05, -2.0673498511314392e-05, -1.9582919776439667e-05, -1.849234104156494e-05, -1.7401762306690216e-05, -1.631118357181549e-05, -1.5220604836940765e-05, -1.413002610206604e-05, -1.3039447367191315e-05, -1.194886863231659e-05, -1.0858289897441864e-05, -9.767711162567139e-06, -8.677132427692413e-06, -7.586553692817688e-06, -6.495974957942963e-06, -5.405396223068237e-06, -4.314817488193512e-06, -3.2242387533187866e-06, -2.1336600184440613e-06, -1.043081283569336e-06, 4.7497451305389404e-08, 1.1380761861801147e-06, 2.22865492105484e-06, 3.3192336559295654e-06, 4.409812390804291e-06, 5.500391125679016e-06, 6.5909698605537415e-06, 7.681548595428467e-06, 8.772127330303192e-06, 9.862706065177917e-06, 1.0953284800052643e-05, 1.2043863534927368e-05, 1.3134442269802094e-05, 1.4225021004676819e-05, 1.5315599739551544e-05, 1.640617847442627e-05, 1.7496757209300995e-05, 1.858733594417572e-05, 1.9677914679050446e-05, 2.076849341392517e-05, 2.1859072148799896e-05, 2.294965088367462e-05, 2.4040229618549347e-05, 2.5130808353424072e-05, 2.6221387088298798e-05, 2.7311965823173523e-05, 2.8402544558048248e-05, 2.9493123292922974e-05, 3.05837020277977e-05, 3.1674280762672424e-05, 3.276485949754715e-05, 3.3855438232421875e-05]}, "gradients/encoder.encoder.layers.22.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 3.0, 1.0, 2.0, 2.0, 2.0, 0.0, 0.0, 2.0, 2.0, 2.0, 4.0, 1.0, 4.0, 1.0, 6.0, 2.0, 4.0, 8.0, 6.0, 12.0, 7.0, 14.0, 13.0, 19.0, 21.0, 39.0, 32.0, 37.0, 37.0, 64.0, 69.0, 107.0, 107.0, 71.0, 61.0, 37.0, 49.0, 24.0, 11.0, 19.0, 25.0, 21.0, 10.0, 13.0, 7.0, 4.0, 7.0, 1.0, 4.0, 7.0, 3.0, 3.0, 5.0, 4.0, 2.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0], "bins": [-1.055002212524414e-05, -1.0229647159576416e-05, -9.909272193908691e-06, -9.588897228240967e-06, -9.268522262573242e-06, -8.948147296905518e-06, -8.627772331237793e-06, -8.307397365570068e-06, -7.987022399902344e-06, -7.666647434234619e-06, -7.3462724685668945e-06, -7.02589750289917e-06, -6.705522537231445e-06, -6.385147571563721e-06, -6.064772605895996e-06, -5.7443976402282715e-06, -5.424022674560547e-06, -5.103647708892822e-06, -4.783272743225098e-06, -4.462897777557373e-06, -4.1425228118896484e-06, -3.822147846221924e-06, -3.5017728805541992e-06, -3.1813979148864746e-06, -2.86102294921875e-06, -2.5406479835510254e-06, -2.2202730178833008e-06, -1.8998980522155762e-06, -1.5795230865478516e-06, -1.259148120880127e-06, -9.387731552124023e-07, -6.183981895446777e-07, -2.980232238769531e-07, 2.2351741790771484e-08, 3.427267074584961e-07, 6.631016731262207e-07, 9.834766387939453e-07, 1.30385160446167e-06, 1.6242265701293945e-06, 1.944601535797119e-06, 2.2649765014648438e-06, 2.5853514671325684e-06, 2.905726432800293e-06, 3.2261013984680176e-06, 3.546476364135742e-06, 3.866851329803467e-06, 4.187226295471191e-06, 4.507601261138916e-06, 4.827976226806641e-06, 5.148351192474365e-06, 5.46872615814209e-06, 5.7891011238098145e-06, 6.109476089477539e-06, 6.429851055145264e-06, 6.750226020812988e-06, 7.070600986480713e-06, 7.3909759521484375e-06, 7.711350917816162e-06, 8.031725883483887e-06, 8.352100849151611e-06, 8.672475814819336e-06, 8.99285078048706e-06, 9.313225746154785e-06, 9.63360071182251e-06, 9.953975677490234e-06]}, "gradients/encoder.encoder.layers.22.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 4.0, 4.0, 7.0, 19.0, 41.0, 100.0, 184.0, 437.0, 162.0, 32.0, 19.0, 7.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0011511954944580793, -0.001085653668269515, -0.0010201118420809507, -0.0009545700158923864, -0.0008890281897038221, -0.0008234863635152578, -0.0007579445373266935, -0.0006924027111381292, -0.0006268608849495649, -0.0005613190587610006, -0.0004957772325724363, -0.00043023540638387203, -0.00036469358019530773, -0.00029915175400674343, -0.00023360992781817913, -0.00016806810162961483, -0.00010252627544105053, -3.698444925248623e-05, 2.855737693607807e-05, 9.409920312464237e-05, 0.00015964102931320667, 0.00022518285550177097, 0.0002907246816903353, 0.0003562665078788996, 0.0004218083340674639, 0.0004873501602560282, 0.0005528919864445925, 0.0006184338126331568, 0.0006839756388217211, 0.0007495174650102854, 0.0008150592911988497, 0.000880601117387414, 0.0009461429435759783, 0.0010116847697645426, 0.0010772265959531069, 0.0011427684221416712, 0.0012083102483302355, 0.0012738520745187998, 0.001339393900707364, 0.0014049357268959284, 0.0014704775530844927, 0.001536019379273057, 0.0016015612054616213, 0.0016671030316501856, 0.0017326448578387499, 0.0017981866840273142, 0.0018637285102158785, 0.0019292703364044428, 0.001994812162593007, 0.0020603539887815714, 0.0021258958149701357, 0.0021914376411587, 0.0022569794673472643, 0.0023225212935358286, 0.002388063119724393, 0.002453604945912957, 0.0025191467721015215, 0.002584688598290086, 0.00265023042447865, 0.0027157722506672144, 0.0027813140768557787, 0.002846855903044343, 0.0029123977292329073, 0.0029779395554214716, 0.003043481381610036]}, "gradients/encoder.encoder.layers.22.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 4.0, 8.0, 6.0, 15.0, 22.0, 13.0, 31.0, 36.0, 36.0, 40.0, 52.0, 56.0, 61.0, 54.0, 54.0, 55.0, 65.0, 40.0, 47.0, 46.0, 52.0, 43.0, 33.0, 46.0, 26.0, 17.0, 23.0, 7.0, 11.0, 8.0, 5.0, 3.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0006004571914672852, -0.000584891065955162, -0.0005693249404430389, -0.0005537588149309158, -0.0005381926894187927, -0.0005226265639066696, -0.0005070604383945465, -0.0004914943128824234, -0.0004759281873703003, -0.0004603620618581772, -0.0004447959363460541, -0.00042922981083393097, -0.00041366368532180786, -0.00039809755980968475, -0.00038253143429756165, -0.00036696530878543854, -0.00035139918327331543, -0.0003358330577611923, -0.0003202669322490692, -0.0003047008067369461, -0.000289134681224823, -0.0002735685557126999, -0.0002580024302005768, -0.00024243630468845367, -0.00022687017917633057, -0.00021130405366420746, -0.00019573792815208435, -0.00018017180263996124, -0.00016460567712783813, -0.00014903955161571503, -0.00013347342610359192, -0.00011790730059146881, -0.0001023411750793457, -8.67750495672226e-05, -7.120892405509949e-05, -5.564279854297638e-05, -4.007667303085327e-05, -2.4510547518730164e-05, -8.944422006607056e-06, 6.621703505516052e-06, 2.218782901763916e-05, 3.775395452976227e-05, 5.3320080041885376e-05, 6.888620555400848e-05, 8.445233106613159e-05, 0.0001000184565782547, 0.00011558458209037781, 0.00013115070760250092, 0.00014671683311462402, 0.00016228295862674713, 0.00017784908413887024, 0.00019341520965099335, 0.00020898133516311646, 0.00022454746067523956, 0.00024011358618736267, 0.0002556797116994858, 0.0002712458372116089, 0.000286811962723732, 0.0003023780882358551, 0.0003179442137479782, 0.0003335103392601013, 0.0003490764647722244, 0.00036464259028434753, 0.00038020871579647064, 0.00039577484130859375]}, "gradients/encoder.encoder.layers.21.feed_forward.output_dense.weight": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 3.0, 5.0, 3.0, 7.0, 5.0, 9.0, 9.0, 9.0, 21.0, 27.0, 42.0, 81.0, 136.0, 227.0, 329.0, 610.0, 1052.0, 1685.0, 3028.0, 5094.0, 9606.0, 19833.0, 46664.0, 145987.0, 3454658.0, 364133.0, 78681.0, 29952.0, 13936.0, 7334.0, 4103.0, 2410.0, 1583.0, 957.0, 708.0, 452.0, 276.0, 194.0, 131.0, 94.0, 66.0, 41.0, 27.0, 22.0, 14.0, 15.0, 9.0, 3.0, 4.0, 4.0, 7.0, 4.0, 2.0, 2.0, 3.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.0001628398895263672, -0.00015698187053203583, -0.00015112385153770447, -0.0001452658325433731, -0.00013940781354904175, -0.0001335497945547104, -0.00012769177556037903, -0.00012183375656604767, -0.00011597573757171631, -0.00011011771857738495, -0.00010425969958305359, -9.840168058872223e-05, -9.254366159439087e-05, -8.668564260005951e-05, -8.082762360572815e-05, -7.496960461139679e-05, -6.911158561706543e-05, -6.325356662273407e-05, -5.739554762840271e-05, -5.153752863407135e-05, -4.567950963973999e-05, -3.982149064540863e-05, -3.396347165107727e-05, -2.810545265674591e-05, -2.224743366241455e-05, -1.638941466808319e-05, -1.0531395673751831e-05, -4.673376679420471e-06, 1.1846423149108887e-06, 7.0426613092422485e-06, 1.2900680303573608e-05, 1.8758699297904968e-05, 2.4616718292236328e-05, 3.0474737286567688e-05, 3.633275628089905e-05, 4.219077527523041e-05, 4.804879426956177e-05, 5.390681326389313e-05, 5.976483225822449e-05, 6.562285125255585e-05, 7.148087024688721e-05, 7.733888924121857e-05, 8.319690823554993e-05, 8.905492722988129e-05, 9.491294622421265e-05, 0.000100770965218544, 0.00010662898421287537, 0.00011248700320720673, 0.00011834502220153809, 0.00012420304119586945, 0.0001300610601902008, 0.00013591907918453217, 0.00014177709817886353, 0.00014763511717319489, 0.00015349313616752625, 0.0001593511551618576, 0.00016520917415618896, 0.00017106719315052032, 0.00017692521214485168, 0.00018278323113918304, 0.0001886412501335144, 0.00019449926912784576, 0.00020035728812217712, 0.00020621530711650848, 0.00021207332611083984]}, "gradients/encoder.encoder.layers.21.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 2.0, 5.0, 1.0, 1.0, 0.0, 2.0, 3.0, 4.0, 6.0, 3.0, 10.0, 4.0, 9.0, 7.0, 10.0, 27.0, 21.0, 38.0, 31.0, 52.0, 70.0, 86.0, 76.0, 96.0, 73.0, 66.0, 87.0, 47.0, 31.0, 28.0, 28.0, 18.0, 9.0, 14.0, 12.0, 7.0, 3.0, 6.0, 4.0, 4.0, 1.0, 2.0, 1.0, 5.0, 1.0, 2.0, 0.0, 0.0, 0.0, 3.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.2755393981933594e-05, -1.2277625501155853e-05, -1.1799857020378113e-05, -1.1322088539600372e-05, -1.0844320058822632e-05, -1.0366551578044891e-05, -9.888783097267151e-06, -9.41101461648941e-06, -8.93324613571167e-06, -8.45547765493393e-06, -7.977709174156189e-06, -7.4999406933784485e-06, -7.022172212600708e-06, -6.5444037318229675e-06, -6.066635251045227e-06, -5.5888667702674866e-06, -5.111098289489746e-06, -4.633329808712006e-06, -4.155561327934265e-06, -3.6777928471565247e-06, -3.200024366378784e-06, -2.7222558856010437e-06, -2.2444874048233032e-06, -1.7667189240455627e-06, -1.2889504432678223e-06, -8.111819624900818e-07, -3.334134817123413e-07, 1.4435499906539917e-07, 6.221234798431396e-07, 1.0998919606208801e-06, 1.5776604413986206e-06, 2.055428922176361e-06, 2.5331974029541016e-06, 3.010965883731842e-06, 3.4887343645095825e-06, 3.966502845287323e-06, 4.4442713260650635e-06, 4.922039806842804e-06, 5.3998082876205444e-06, 5.877576768398285e-06, 6.355345249176025e-06, 6.833113729953766e-06, 7.310882210731506e-06, 7.788650691509247e-06, 8.266419172286987e-06, 8.744187653064728e-06, 9.221956133842468e-06, 9.699724614620209e-06, 1.017749309539795e-05, 1.065526157617569e-05, 1.113303005695343e-05, 1.161079853773117e-05, 1.2088567018508911e-05, 1.2566335499286652e-05, 1.3044103980064392e-05, 1.3521872460842133e-05, 1.3999640941619873e-05, 1.4477409422397614e-05, 1.4955177903175354e-05, 1.5432946383953094e-05, 1.5910714864730835e-05, 1.6388483345508575e-05, 1.6866251826286316e-05, 1.7344020307064056e-05, 1.7821788787841797e-05]}, "gradients/encoder.encoder.layers.21.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [3.0, 1.0, 1.0, 1.0, 2.0, 5.0, 12.0, 6.0, 12.0, 12.0, 25.0, 29.0, 33.0, 55.0, 109.0, 197.0, 322.0, 636.0, 1192.0, 2552.0, 5954.0, 15044.0, 46396.0, 202722.0, 3523628.0, 303526.0, 60033.0, 18596.0, 7026.0, 3096.0, 1373.0, 775.0, 380.0, 204.0, 123.0, 76.0, 47.0, 28.0, 19.0, 12.0, 8.0, 8.0, 2.0, 2.0, 5.0, 4.0, 2.0, 4.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 1.0], "bins": [-0.00015246868133544922, -0.00014627166092395782, -0.00014007464051246643, -0.00013387762010097504, -0.00012768059968948364, -0.00012148357927799225, -0.00011528655886650085, -0.00010908953845500946, -0.00010289251804351807, -9.669549763202667e-05, -9.049847722053528e-05, -8.430145680904388e-05, -7.810443639755249e-05, -7.19074159860611e-05, -6.57103955745697e-05, -5.951337516307831e-05, -5.3316354751586914e-05, -4.711933434009552e-05, -4.0922313928604126e-05, -3.472529351711273e-05, -2.8528273105621338e-05, -2.2331252694129944e-05, -1.613423228263855e-05, -9.937211871147156e-06, -3.7401914596557617e-06, 2.4568289518356323e-06, 8.653849363327026e-06, 1.485086977481842e-05, 2.1047890186309814e-05, 2.724491059780121e-05, 3.34419310092926e-05, 3.9638951420783997e-05, 4.583597183227539e-05, 5.2032992243766785e-05, 5.823001265525818e-05, 6.442703306674957e-05, 7.062405347824097e-05, 7.682107388973236e-05, 8.301809430122375e-05, 8.921511471271515e-05, 9.541213512420654e-05, 0.00010160915553569794, 0.00010780617594718933, 0.00011400319635868073, 0.00012020021677017212, 0.0001263972371816635, 0.0001325942575931549, 0.0001387912780046463, 0.0001449882984161377, 0.0001511853188276291, 0.00015738233923912048, 0.00016357935965061188, 0.00016977638006210327, 0.00017597340047359467, 0.00018217042088508606, 0.00018836744129657745, 0.00019456446170806885, 0.00020076148211956024, 0.00020695850253105164, 0.00021315552294254303, 0.00021935254335403442, 0.00022554956376552582, 0.0002317465841770172, 0.0002379436045885086, 0.000244140625]}, "gradients/encoder.encoder.layers.21.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 1.0, 1.0, 3.0, 3.0, 4.0, 0.0, 11.0, 8.0, 6.0, 12.0, 19.0, 25.0, 24.0, 36.0, 31.0, 45.0, 46.0, 88.0, 109.0, 146.0, 146.0, 407.0, 1672.0, 470.0, 162.0, 107.0, 82.0, 70.0, 64.0, 60.0, 46.0, 35.0, 28.0, 24.0, 18.0, 16.0, 11.0, 12.0, 8.0, 10.0, 4.0, 4.0, 3.0, 7.0, 3.0, 0.0, 3.0], "bins": [-5.602836608886719e-05, -5.4613687098026276e-05, -5.3199008107185364e-05, -5.178432911634445e-05, -5.036965012550354e-05, -4.895497113466263e-05, -4.7540292143821716e-05, -4.6125613152980804e-05, -4.471093416213989e-05, -4.329625517129898e-05, -4.188157618045807e-05, -4.046689718961716e-05, -3.9052218198776245e-05, -3.763753920793533e-05, -3.622286021709442e-05, -3.480818122625351e-05, -3.33935022354126e-05, -3.1978823244571686e-05, -3.0564144253730774e-05, -2.9149465262889862e-05, -2.773478627204895e-05, -2.632010728120804e-05, -2.4905428290367126e-05, -2.3490749299526215e-05, -2.2076070308685303e-05, -2.066139131784439e-05, -1.924671232700348e-05, -1.7832033336162567e-05, -1.6417354345321655e-05, -1.5002675354480743e-05, -1.3587996363639832e-05, -1.217331737279892e-05, -1.0758638381958008e-05, -9.343959391117096e-06, -7.929280400276184e-06, -6.514601409435272e-06, -5.09992241859436e-06, -3.6852434277534485e-06, -2.2705644369125366e-06, -8.558854460716248e-07, 5.587935447692871e-07, 1.973472535610199e-06, 3.388151526451111e-06, 4.802830517292023e-06, 6.2175095081329346e-06, 7.632188498973846e-06, 9.046867489814758e-06, 1.046154648065567e-05, 1.1876225471496582e-05, 1.3290904462337494e-05, 1.4705583453178406e-05, 1.6120262444019318e-05, 1.753494143486023e-05, 1.894962042570114e-05, 2.0364299416542053e-05, 2.1778978407382965e-05, 2.3193657398223877e-05, 2.460833638906479e-05, 2.60230153799057e-05, 2.7437694370746613e-05, 2.8852373361587524e-05, 3.0267052352428436e-05, 3.168173134326935e-05, 3.309641033411026e-05, 3.451108932495117e-05]}, "gradients/encoder.encoder.layers.21.final_layer_norm.weight": {"_type": "histogram", "values": [2.0, 5.0, 5.0, 15.0, 38.0, 104.0, 355.0, 253.0, 127.0, 54.0, 34.0, 16.0, 11.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0002162230375688523, -0.0001843953796196729, -0.00015256772167049348, -0.00012074004916939884, -8.891239122021943e-05, -5.708473327104002e-05, -2.5257060769945383e-05, 6.570597179234028e-06, 3.839825512841344e-05, 7.022591307759285e-05, 0.00010205357830272987, 0.0001338812435278669, 0.0001657089014770463, 0.00019753655942622572, 0.00022936423192732036, 0.00026119188987649977, 0.0002930195478256792, 0.0003248472057748586, 0.000356674863724038, 0.00038850255077704787, 0.0004203302087262273, 0.0004521578666754067, 0.0004839855246245861, 0.0005158131825737655, 0.0005476408405229449, 0.0005794684984721243, 0.0006112961564213037, 0.0006431238143704832, 0.0006749514723196626, 0.000706779130268842, 0.0007386067882180214, 0.0007704345043748617, 0.000802262220531702, 0.0008340898784808815, 0.0008659175364300609, 0.0008977451943792403, 0.0009295728523284197, 0.0009614005102775991, 0.0009932281682267785, 0.0010250558843836188, 0.0010568834841251373, 0.0010887112002819777, 0.0011205388000234962, 0.0011523665161803365, 0.001184194115921855, 0.0012160218320786953, 0.0012478494318202138, 0.0012796771479770541, 0.0013115047477185726, 0.001343332463875413, 0.0013751600636169314, 0.0014069877797737718, 0.0014388153795152903, 0.0014706430956721306, 0.001502470695413649, 0.0015342984115704894, 0.0015661261277273297, 0.00159795384388417, 0.0016297814436256886, 0.0016616091597825289, 0.0016934367595240474, 0.0017252644756808877, 0.0017570920754224062, 0.0017889197915792465, 0.001820747391320765]}, "gradients/encoder.encoder.layers.21.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 2.0, 3.0, 2.0, 4.0, 7.0, 5.0, 10.0, 15.0, 8.0, 16.0, 9.0, 31.0, 21.0, 21.0, 23.0, 31.0, 35.0, 36.0, 35.0, 44.0, 42.0, 33.0, 42.0, 49.0, 49.0, 41.0, 56.0, 38.0, 39.0, 46.0, 26.0, 28.0, 29.0, 25.0, 21.0, 12.0, 10.0, 16.0, 14.0, 11.0, 7.0, 7.0, 2.0, 5.0, 3.0, 2.0, 2.0, 0.0, 2.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.00018328428268432617, -0.00017753895372152328, -0.0001717936247587204, -0.0001660482957959175, -0.00016030296683311462, -0.00015455763787031174, -0.00014881230890750885, -0.00014306697994470596, -0.00013732165098190308, -0.0001315763220191002, -0.0001258309930562973, -0.00012008566409349442, -0.00011434033513069153, -0.00010859500616788864, -0.00010284967720508575, -9.710434824228287e-05, -9.135901927947998e-05, -8.56136903166771e-05, -7.98683613538742e-05, -7.412303239107132e-05, -6.837770342826843e-05, -6.263237446546555e-05, -5.688704550266266e-05, -5.114171653985977e-05, -4.5396387577056885e-05, -3.9651058614254e-05, -3.390572965145111e-05, -2.8160400688648224e-05, -2.2415071725845337e-05, -1.666974276304245e-05, -1.0924413800239563e-05, -5.179084837436676e-06, 5.662441253662109e-07, 6.311573088169098e-06, 1.2056902050971985e-05, 1.7802231013774872e-05, 2.354755997657776e-05, 2.9292888939380646e-05, 3.503821790218353e-05, 4.078354686498642e-05, 4.652887582778931e-05, 5.2274204790592194e-05, 5.801953375339508e-05, 6.376486271619797e-05, 6.951019167900085e-05, 7.525552064180374e-05, 8.100084960460663e-05, 8.674617856740952e-05, 9.24915075302124e-05, 9.823683649301529e-05, 0.00010398216545581818, 0.00010972749441862106, 0.00011547282338142395, 0.00012121815234422684, 0.00012696348130702972, 0.0001327088102698326, 0.0001384541392326355, 0.00014419946819543839, 0.00014994479715824127, 0.00015569012612104416, 0.00016143545508384705, 0.00016718078404664993, 0.00017292611300945282, 0.0001786714419722557, 0.0001844167709350586]}, "gradients/encoder.encoder.layers.21.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 2.0, 4.0, 3.0, 3.0, 8.0, 13.0, 11.0, 24.0, 30.0, 49.0, 53.0, 89.0, 142.0, 194.0, 345.0, 517.0, 862.0, 1465.0, 2612.0, 4923.0, 10058.0, 22259.0, 63283.0, 270451.0, 525485.0, 89735.0, 29815.0, 12435.0, 5996.0, 3147.0, 1777.0, 1015.0, 628.0, 375.0, 236.0, 158.0, 113.0, 79.0, 59.0, 41.0, 18.0, 15.0, 11.0, 7.0, 6.0, 6.0, 1.0, 3.0, 2.0, 3.0, 3.0, 2.0], "bins": [-0.0001475811004638672, -0.00014349818229675293, -0.00013941526412963867, -0.00013533234596252441, -0.00013124942779541016, -0.0001271665096282959, -0.00012308359146118164, -0.00011900067329406738, -0.00011491775512695312, -0.00011083483695983887, -0.00010675191879272461, -0.00010266900062561035, -9.85860824584961e-05, -9.450316429138184e-05, -9.042024612426758e-05, -8.633732795715332e-05, -8.225440979003906e-05, -7.81714916229248e-05, -7.408857345581055e-05, -7.000565528869629e-05, -6.592273712158203e-05, -6.183981895446777e-05, -5.7756900787353516e-05, -5.367398262023926e-05, -4.9591064453125e-05, -4.550814628601074e-05, -4.1425228118896484e-05, -3.7342309951782227e-05, -3.325939178466797e-05, -2.917647361755371e-05, -2.5093555450439453e-05, -2.1010637283325195e-05, -1.6927719116210938e-05, -1.284480094909668e-05, -8.761882781982422e-06, -4.678964614868164e-06, -5.960464477539062e-07, 3.4868717193603516e-06, 7.569789886474609e-06, 1.1652708053588867e-05, 1.5735626220703125e-05, 1.9818544387817383e-05, 2.390146255493164e-05, 2.79843807220459e-05, 3.2067298889160156e-05, 3.6150217056274414e-05, 4.023313522338867e-05, 4.431605339050293e-05, 4.839897155761719e-05, 5.2481889724731445e-05, 5.65648078918457e-05, 6.064772605895996e-05, 6.473064422607422e-05, 6.881356239318848e-05, 7.289648056030273e-05, 7.697939872741699e-05, 8.106231689453125e-05, 8.514523506164551e-05, 8.922815322875977e-05, 9.331107139587402e-05, 9.739398956298828e-05, 0.00010147690773010254, 0.0001055598258972168, 0.00010964274406433105, 0.00011372566223144531]}, "gradients/encoder.encoder.layers.21.attention.out_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 1.0, 1.0, 2.0, 1.0, 4.0, 4.0, 4.0, 3.0, 9.0, 10.0, 11.0, 20.0, 14.0, 28.0, 34.0, 43.0, 72.0, 73.0, 94.0, 90.0, 89.0, 85.0, 76.0, 53.0, 50.0, 28.0, 26.0, 19.0, 11.0, 14.0, 6.0, 6.0, 11.0, 3.0, 5.0, 3.0, 3.0, 1.0, 3.0, 3.0, 0.0, 0.0, 3.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-1.5079975128173828e-05, -1.4542602002620697e-05, -1.4005228877067566e-05, -1.3467855751514435e-05, -1.2930482625961304e-05, -1.2393109500408173e-05, -1.1855736374855042e-05, -1.131836324930191e-05, -1.078099012374878e-05, -1.0243616998195648e-05, -9.706243872642517e-06, -9.168870747089386e-06, -8.631497621536255e-06, -8.094124495983124e-06, -7.556751370429993e-06, -7.0193782448768616e-06, -6.4820051193237305e-06, -5.944631993770599e-06, -5.407258868217468e-06, -4.869885742664337e-06, -4.332512617111206e-06, -3.795139491558075e-06, -3.257766366004944e-06, -2.7203932404518127e-06, -2.1830201148986816e-06, -1.6456469893455505e-06, -1.1082738637924194e-06, -5.709007382392883e-07, -3.3527612686157227e-08, 5.038455128669739e-07, 1.041218638420105e-06, 1.578591763973236e-06, 2.115964889526367e-06, 2.6533380150794983e-06, 3.1907111406326294e-06, 3.7280842661857605e-06, 4.265457391738892e-06, 4.802830517292023e-06, 5.340203642845154e-06, 5.877576768398285e-06, 6.414949893951416e-06, 6.952323019504547e-06, 7.489696145057678e-06, 8.02706927061081e-06, 8.56444239616394e-06, 9.101815521717072e-06, 9.639188647270203e-06, 1.0176561772823334e-05, 1.0713934898376465e-05, 1.1251308023929596e-05, 1.1788681149482727e-05, 1.2326054275035858e-05, 1.286342740058899e-05, 1.340080052614212e-05, 1.3938173651695251e-05, 1.4475546777248383e-05, 1.5012919902801514e-05, 1.5550293028354645e-05, 1.6087666153907776e-05, 1.6625039279460907e-05, 1.7162412405014038e-05, 1.769978553056717e-05, 1.82371586561203e-05, 1.877453178167343e-05, 1.9311904907226562e-05]}, "gradients/encoder.encoder.layers.21.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 5.0, 2.0, 2.0, 11.0, 11.0, 15.0, 26.0, 26.0, 37.0, 66.0, 73.0, 156.0, 163.0, 309.0, 442.0, 575.0, 894.0, 1281.0, 2104.0, 3176.0, 5045.0, 7822.0, 12889.0, 21676.0, 38914.0, 75417.0, 181056.0, 403687.0, 142927.0, 64379.0, 33859.0, 19670.0, 11588.0, 7068.0, 4581.0, 2921.0, 1893.0, 1256.0, 854.0, 538.0, 337.0, 257.0, 191.0, 92.0, 103.0, 44.0, 39.0, 35.0, 19.0, 15.0, 9.0, 5.0, 5.0, 2.0, 2.0, 1.0, 3.0, 1.0], "bins": [-7.56978988647461e-05, -7.343105971813202e-05, -7.116422057151794e-05, -6.889738142490387e-05, -6.66305422782898e-05, -6.436370313167572e-05, -6.209686398506165e-05, -5.983002483844757e-05, -5.7563185691833496e-05, -5.529634654521942e-05, -5.302950739860535e-05, -5.076266825199127e-05, -4.84958291053772e-05, -4.622898995876312e-05, -4.396215081214905e-05, -4.169531166553497e-05, -3.94284725189209e-05, -3.7161633372306824e-05, -3.489479422569275e-05, -3.2627955079078674e-05, -3.03611159324646e-05, -2.8094276785850525e-05, -2.582743763923645e-05, -2.3560598492622375e-05, -2.12937593460083e-05, -1.9026920199394226e-05, -1.676008105278015e-05, -1.4493241906166077e-05, -1.2226402759552002e-05, -9.959563612937927e-06, -7.692724466323853e-06, -5.425885319709778e-06, -3.159046173095703e-06, -8.922070264816284e-07, 1.3746321201324463e-06, 3.641471266746521e-06, 5.908310413360596e-06, 8.17514955997467e-06, 1.0441988706588745e-05, 1.270882785320282e-05, 1.4975666999816895e-05, 1.724250614643097e-05, 1.9509345293045044e-05, 2.177618443965912e-05, 2.4043023586273193e-05, 2.6309862732887268e-05, 2.8576701879501343e-05, 3.084354102611542e-05, 3.311038017272949e-05, 3.537721931934357e-05, 3.764405846595764e-05, 3.9910897612571716e-05, 4.217773675918579e-05, 4.4444575905799866e-05, 4.671141505241394e-05, 4.8978254199028015e-05, 5.124509334564209e-05, 5.3511932492256165e-05, 5.577877163887024e-05, 5.8045610785484314e-05, 6.031244993209839e-05, 6.257928907871246e-05, 6.484612822532654e-05, 6.711296737194061e-05, 6.937980651855469e-05]}, "gradients/encoder.encoder.layers.21.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 2.0, 1.0, 3.0, 4.0, 2.0, 2.0, 2.0, 6.0, 3.0, 6.0, 4.0, 6.0, 19.0, 10.0, 10.0, 11.0, 20.0, 17.0, 37.0, 32.0, 32.0, 39.0, 35.0, 33.0, 60.0, 36.0, 47.0, 60.0, 38.0, 48.0, 47.0, 39.0, 47.0, 26.0, 24.0, 29.0, 32.0, 10.0, 17.0, 16.0, 17.0, 15.0, 7.0, 21.0, 9.0, 12.0, 4.0, 11.0, 2.0, 3.0, 3.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.3974647521972656e-05, -3.2862648367881775e-05, -3.1750649213790894e-05, -3.063865005970001e-05, -2.952665090560913e-05, -2.841465175151825e-05, -2.7302652597427368e-05, -2.6190653443336487e-05, -2.5078654289245605e-05, -2.3966655135154724e-05, -2.2854655981063843e-05, -2.174265682697296e-05, -2.063065767288208e-05, -1.95186585187912e-05, -1.8406659364700317e-05, -1.7294660210609436e-05, -1.6182661056518555e-05, -1.5070661902427673e-05, -1.3958662748336792e-05, -1.284666359424591e-05, -1.173466444015503e-05, -1.0622665286064148e-05, -9.510666131973267e-06, -8.398666977882385e-06, -7.286667823791504e-06, -6.1746686697006226e-06, -5.062669515609741e-06, -3.95067036151886e-06, -2.8386712074279785e-06, -1.7266720533370972e-06, -6.146728992462158e-07, 4.973262548446655e-07, 1.6093254089355469e-06, 2.7213245630264282e-06, 3.8333237171173096e-06, 4.945322871208191e-06, 6.057322025299072e-06, 7.169321179389954e-06, 8.281320333480835e-06, 9.393319487571716e-06, 1.0505318641662598e-05, 1.1617317795753479e-05, 1.272931694984436e-05, 1.3841316103935242e-05, 1.4953315258026123e-05, 1.6065314412117004e-05, 1.7177313566207886e-05, 1.8289312720298767e-05, 1.940131187438965e-05, 2.051331102848053e-05, 2.162531018257141e-05, 2.2737309336662292e-05, 2.3849308490753174e-05, 2.4961307644844055e-05, 2.6073306798934937e-05, 2.7185305953025818e-05, 2.82973051071167e-05, 2.940930426120758e-05, 3.052130341529846e-05, 3.163330256938934e-05, 3.2745301723480225e-05, 3.3857300877571106e-05, 3.496930003166199e-05, 3.608129918575287e-05, 3.719329833984375e-05]}, "gradients/encoder.encoder.layers.21.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0, 2.0, 1.0, 6.0, 2.0, 5.0, 13.0, 9.0, 11.0, 29.0, 30.0, 45.0, 53.0, 97.0, 124.0, 177.0, 313.0, 466.0, 616.0, 1114.0, 1914.0, 2919.0, 6415.0, 14211.0, 31290.0, 129829.0, 629828.0, 161251.0, 36182.0, 15995.0, 7078.0, 3230.0, 2084.0, 1164.0, 601.0, 475.0, 314.0, 211.0, 128.0, 107.0, 57.0, 51.0, 41.0, 26.0, 12.0, 18.0, 4.0, 6.0, 4.0, 5.0, 3.0, 1.0, 2.0, 0.0, 0.0, 1.0, 1.0], "bins": [-1.537799835205078e-05, -1.4919787645339966e-05, -1.446157693862915e-05, -1.4003366231918335e-05, -1.354515552520752e-05, -1.3086944818496704e-05, -1.2628734111785889e-05, -1.2170523405075073e-05, -1.1712312698364258e-05, -1.1254101991653442e-05, -1.0795891284942627e-05, -1.0337680578231812e-05, -9.879469871520996e-06, -9.42125916481018e-06, -8.963048458099365e-06, -8.50483775138855e-06, -8.046627044677734e-06, -7.588416337966919e-06, -7.1302056312561035e-06, -6.671994924545288e-06, -6.213784217834473e-06, -5.755573511123657e-06, -5.297362804412842e-06, -4.839152097702026e-06, -4.380941390991211e-06, -3.9227306842803955e-06, -3.46451997756958e-06, -3.0063092708587646e-06, -2.5480985641479492e-06, -2.089887857437134e-06, -1.6316771507263184e-06, -1.173466444015503e-06, -7.152557373046875e-07, -2.5704503059387207e-07, 2.0116567611694336e-07, 6.593763828277588e-07, 1.1175870895385742e-06, 1.5757977962493896e-06, 2.034008502960205e-06, 2.4922192096710205e-06, 2.950429916381836e-06, 3.4086406230926514e-06, 3.866851329803467e-06, 4.325062036514282e-06, 4.783272743225098e-06, 5.241483449935913e-06, 5.6996941566467285e-06, 6.157904863357544e-06, 6.616115570068359e-06, 7.074326276779175e-06, 7.53253698348999e-06, 7.990747690200806e-06, 8.448958396911621e-06, 8.907169103622437e-06, 9.365379810333252e-06, 9.823590517044067e-06, 1.0281801223754883e-05, 1.0740011930465698e-05, 1.1198222637176514e-05, 1.1656433343887329e-05, 1.2114644050598145e-05, 1.257285475730896e-05, 1.3031065464019775e-05, 1.348927617073059e-05, 1.3947486877441406e-05]}, "gradients/encoder.encoder.layers.21.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 3.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 4.0, 4.0, 5.0, 5.0, 4.0, 6.0, 14.0, 10.0, 16.0, 32.0, 17.0, 52.0, 45.0, 91.0, 78.0, 159.0, 93.0, 100.0, 55.0, 64.0, 29.0, 36.0, 13.0, 23.0, 11.0, 10.0, 4.0, 6.0, 8.0, 3.0, 4.0, 1.0, 3.0, 0.0, 1.0, 2.0, 2.0, 0.0, 0.0, 1.0, 2.0], "bins": [-5.7220458984375e-06, -5.574896931648254e-06, -5.427747964859009e-06, -5.280598998069763e-06, -5.133450031280518e-06, -4.986301064491272e-06, -4.839152097702026e-06, -4.692003130912781e-06, -4.544854164123535e-06, -4.3977051973342896e-06, -4.250556230545044e-06, -4.103407263755798e-06, -3.956258296966553e-06, -3.809109330177307e-06, -3.6619603633880615e-06, -3.514811396598816e-06, -3.3676624298095703e-06, -3.2205134630203247e-06, -3.073364496231079e-06, -2.9262155294418335e-06, -2.779066562652588e-06, -2.6319175958633423e-06, -2.4847686290740967e-06, -2.337619662284851e-06, -2.1904706954956055e-06, -2.04332172870636e-06, -1.8961727619171143e-06, -1.7490237951278687e-06, -1.601874828338623e-06, -1.4547258615493774e-06, -1.3075768947601318e-06, -1.1604279279708862e-06, -1.0132789611816406e-06, -8.66129994392395e-07, -7.189810276031494e-07, -5.718320608139038e-07, -4.246830940246582e-07, -2.775341272354126e-07, -1.30385160446167e-07, 1.6763806343078613e-08, 1.6391277313232422e-07, 3.110617399215698e-07, 4.5821070671081543e-07, 6.05359673500061e-07, 7.525086402893066e-07, 8.996576070785522e-07, 1.0468065738677979e-06, 1.1939555406570435e-06, 1.341104507446289e-06, 1.4882534742355347e-06, 1.6354024410247803e-06, 1.7825514078140259e-06, 1.9297003746032715e-06, 2.076849341392517e-06, 2.2239983081817627e-06, 2.3711472749710083e-06, 2.518296241760254e-06, 2.6654452085494995e-06, 2.812594175338745e-06, 2.9597431421279907e-06, 3.1068921089172363e-06, 3.254041075706482e-06, 3.4011900424957275e-06, 3.548339009284973e-06, 3.6954879760742188e-06]}, "gradients/encoder.encoder.layers.21.attention.q_proj.weight": {"_type": "histogram", "values": [3.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 3.0, 4.0, 6.0, 10.0, 8.0, 9.0, 15.0, 23.0, 26.0, 36.0, 44.0, 77.0, 104.0, 176.0, 219.0, 342.0, 509.0, 822.0, 1811.0, 3459.0, 10532.0, 31146.0, 249962.0, 654514.0, 65639.0, 18138.0, 5320.0, 2550.0, 1063.0, 725.0, 405.0, 276.0, 156.0, 105.0, 102.0, 64.0, 51.0, 23.0, 30.0, 17.0, 14.0, 8.0, 7.0, 3.0, 5.0, 2.0, 4.0, 2.0, 0.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.519918441772461e-05, -1.4696270227432251e-05, -1.4193356037139893e-05, -1.3690441846847534e-05, -1.3187527656555176e-05, -1.2684613466262817e-05, -1.2181699275970459e-05, -1.16787850856781e-05, -1.1175870895385742e-05, -1.0672956705093384e-05, -1.0170042514801025e-05, -9.667128324508667e-06, -9.164214134216309e-06, -8.66129994392395e-06, -8.158385753631592e-06, -7.655471563339233e-06, -7.152557373046875e-06, -6.649643182754517e-06, -6.146728992462158e-06, -5.6438148021698e-06, -5.140900611877441e-06, -4.637986421585083e-06, -4.135072231292725e-06, -3.632158041000366e-06, -3.129243850708008e-06, -2.6263296604156494e-06, -2.123415470123291e-06, -1.6205012798309326e-06, -1.1175870895385742e-06, -6.146728992462158e-07, -1.1175870895385742e-07, 3.91155481338501e-07, 8.940696716308594e-07, 1.3969838619232178e-06, 1.8998980522155762e-06, 2.4028122425079346e-06, 2.905726432800293e-06, 3.4086406230926514e-06, 3.91155481338501e-06, 4.414469003677368e-06, 4.9173831939697266e-06, 5.420297384262085e-06, 5.923211574554443e-06, 6.426125764846802e-06, 6.92903995513916e-06, 7.4319541454315186e-06, 7.934868335723877e-06, 8.437782526016235e-06, 8.940696716308594e-06, 9.443610906600952e-06, 9.94652509689331e-06, 1.0449439287185669e-05, 1.0952353477478027e-05, 1.1455267667770386e-05, 1.1958181858062744e-05, 1.2461096048355103e-05, 1.2964010238647461e-05, 1.346692442893982e-05, 1.3969838619232178e-05, 1.4472752809524536e-05, 1.4975666999816895e-05, 1.5478581190109253e-05, 1.598149538040161e-05, 1.648440957069397e-05, 1.6987323760986328e-05]}, "gradients/encoder.encoder.layers.21.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 3.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 2.0, 1.0, 5.0, 5.0, 11.0, 13.0, 28.0, 27.0, 44.0, 73.0, 117.0, 161.0, 180.0, 131.0, 56.0, 60.0, 22.0, 30.0, 14.0, 10.0, 10.0, 4.0, 2.0, 1.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-8.463859558105469e-06, -8.230097591876984e-06, -7.996335625648499e-06, -7.762573659420013e-06, -7.528811693191528e-06, -7.295049726963043e-06, -7.061287760734558e-06, -6.827525794506073e-06, -6.593763828277588e-06, -6.360001862049103e-06, -6.126239895820618e-06, -5.8924779295921326e-06, -5.6587159633636475e-06, -5.424953997135162e-06, -5.191192030906677e-06, -4.957430064678192e-06, -4.723668098449707e-06, -4.489906132221222e-06, -4.256144165992737e-06, -4.022382199764252e-06, -3.7886202335357666e-06, -3.5548582673072815e-06, -3.3210963010787964e-06, -3.0873343348503113e-06, -2.853572368621826e-06, -2.619810402393341e-06, -2.386048436164856e-06, -2.152286469936371e-06, -1.9185245037078857e-06, -1.6847625374794006e-06, -1.4510005712509155e-06, -1.2172386050224304e-06, -9.834766387939453e-07, -7.497146725654602e-07, -5.159527063369751e-07, -2.8219074010849e-07, -4.842877388000488e-08, 1.8533319234848022e-07, 4.1909515857696533e-07, 6.528571248054504e-07, 8.866190910339355e-07, 1.1203810572624207e-06, 1.3541430234909058e-06, 1.5879049897193909e-06, 1.821666955947876e-06, 2.055428922176361e-06, 2.289190888404846e-06, 2.5229528546333313e-06, 2.7567148208618164e-06, 2.9904767870903015e-06, 3.2242387533187866e-06, 3.4580007195472717e-06, 3.691762685775757e-06, 3.925524652004242e-06, 4.159286618232727e-06, 4.393048584461212e-06, 4.626810550689697e-06, 4.860572516918182e-06, 5.0943344831466675e-06, 5.328096449375153e-06, 5.561858415603638e-06, 5.795620381832123e-06, 6.029382348060608e-06, 6.263144314289093e-06, 6.496906280517578e-06]}, "gradients/encoder.encoder.layers.21.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 4.0, 5.0, 5.0, 9.0, 37.0, 69.0, 145.0, 378.0, 190.0, 82.0, 46.0, 24.0, 14.0, 7.0, 3.0, 1.0, 1.0], "bins": [-0.00134546949993819, -0.0013208773452788591, -0.0012962851906195283, -0.0012716931523755193, -0.0012471009977161884, -0.0012225088430568576, -0.0011979166883975267, -0.0011733246501535177, -0.0011487324954941869, -0.001124140340834856, -0.0010995481861755252, -0.0010749561479315162, -0.0010503639932721853, -0.0010257718386128545, -0.0010011796839535236, -0.0009765876457095146, -0.0009519954910501838, -0.0009274033363908529, -0.000902811239939183, -0.0008782190852798522, -0.0008536269888281822, -0.0008290348341688514, -0.0008044427377171814, -0.0007798505830578506, -0.0007552584283985198, -0.0007306662737391889, -0.000706074177287519, -0.0006814820226281881, -0.0006568899261765182, -0.0006322977715171874, -0.0006077056750655174, -0.0005831135204061866, -0.0005585214821621776, -0.0005339293275028467, -0.0005093372310511768, -0.0004847451054956764, -0.000460152979940176, -0.00043556082528084517, -0.00041096872882917523, -0.0003863765741698444, -0.00036178441951051354, -0.00033719229395501316, -0.00031260016839951277, -0.0002880080428440124, -0.000263415917288512, -0.00023882377718109637, -0.000214231651625596, -0.0001896395260700956, -0.00016504741506651044, -0.00014045528951101005, -0.00011586316395550966, -9.127103112405166e-05, -6.667890556855127e-05, -4.208677273709327e-05, -1.749464718159288e-05, 7.0974783739075065e-06, 3.1689603929407895e-05, 5.628172948490828e-05, 8.087385504040867e-05, 0.00010546598787186667, 0.00013005812070332468, 0.00015465024625882506, 0.00017924237181432545, 0.00020383449736982584, 0.00022842662292532623]}, "gradients/encoder.encoder.layers.21.layer_norm.bias": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 1.0, 4.0, 3.0, 7.0, 7.0, 8.0, 6.0, 9.0, 13.0, 19.0, 13.0, 13.0, 26.0, 22.0, 27.0, 36.0, 42.0, 38.0, 35.0, 40.0, 40.0, 30.0, 37.0, 30.0, 44.0, 43.0, 39.0, 35.0, 35.0, 33.0, 30.0, 26.0, 30.0, 33.0, 28.0, 27.0, 24.0, 13.0, 17.0, 14.0, 12.0, 10.0, 2.0, 5.0, 6.0, 0.0, 3.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00014394521713256836, -0.00013869162648916245, -0.00013343803584575653, -0.00012818444520235062, -0.0001229308545589447, -0.00011767726391553879, -0.00011242367327213287, -0.00010717008262872696, -0.00010191649198532104, -9.666290134191513e-05, -9.140931069850922e-05, -8.61557200551033e-05, -8.090212941169739e-05, -7.564853876829147e-05, -7.039494812488556e-05, -6.514135748147964e-05, -5.988776683807373e-05, -5.4634176194667816e-05, -4.93805855512619e-05, -4.412699490785599e-05, -3.887340426445007e-05, -3.361981362104416e-05, -2.8366222977638245e-05, -2.311263233423233e-05, -1.7859041690826416e-05, -1.2605451047420502e-05, -7.351860404014587e-06, -2.098269760608673e-06, 3.155320882797241e-06, 8.408911526203156e-06, 1.366250216960907e-05, 1.8916092813014984e-05, 2.41696834564209e-05, 2.9423274099826813e-05, 3.467686474323273e-05, 3.993045538663864e-05, 4.5184046030044556e-05, 5.043763667345047e-05, 5.5691227316856384e-05, 6.09448179602623e-05, 6.619840860366821e-05, 7.145199924707413e-05, 7.670558989048004e-05, 8.195918053388596e-05, 8.721277117729187e-05, 9.246636182069778e-05, 9.77199524641037e-05, 0.00010297354310750961, 0.00010822713375091553, 0.00011348072439432144, 0.00011873431503772736, 0.00012398790568113327, 0.00012924149632453918, 0.0001344950869679451, 0.000139748677611351, 0.00014500226825475693, 0.00015025585889816284, 0.00015550944954156876, 0.00016076304018497467, 0.00016601663082838058, 0.0001712702214717865, 0.0001765238121151924, 0.00018177740275859833, 0.00018703099340200424, 0.00019228458404541016]}, "gradients/encoder.encoder.layers.20.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 4.0, 3.0, 6.0, 17.0, 28.0, 50.0, 120.0, 218.0, 439.0, 897.0, 1837.0, 3952.0, 8950.0, 21846.0, 64043.0, 393311.0, 3518458.0, 117931.0, 36627.0, 13708.0, 5642.0, 2625.0, 1259.0, 719.0, 394.0, 259.0, 188.0, 154.0, 96.0, 102.0, 71.0, 63.0, 55.0, 30.0, 34.0, 34.0, 32.0, 20.0, 21.0, 10.0, 7.0, 13.0, 5.0, 5.0, 5.0, 3.0, 1.0, 2.0, 2.0, 3.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-7.957220077514648e-05, -7.520709186792374e-05, -7.084198296070099e-05, -6.647687405347824e-05, -6.211176514625549e-05, -5.7746656239032745e-05, -5.338154733181e-05, -4.901643842458725e-05, -4.46513295173645e-05, -4.0286220610141754e-05, -3.5921111702919006e-05, -3.155600279569626e-05, -2.719089388847351e-05, -2.2825784981250763e-05, -1.8460676074028015e-05, -1.4095567166805267e-05, -9.73045825958252e-06, -5.365349352359772e-06, -1.000240445137024e-06, 3.364868462085724e-06, 7.729977369308472e-06, 1.209508627653122e-05, 1.6460195183753967e-05, 2.0825304090976715e-05, 2.5190412998199463e-05, 2.955552190542221e-05, 3.392063081264496e-05, 3.8285739719867706e-05, 4.2650848627090454e-05, 4.70159575343132e-05, 5.138106644153595e-05, 5.57461753487587e-05, 6.0111284255981445e-05, 6.447639316320419e-05, 6.884150207042694e-05, 7.320661097764969e-05, 7.757171988487244e-05, 8.193682879209518e-05, 8.630193769931793e-05, 9.066704660654068e-05, 9.503215551376343e-05, 9.939726442098618e-05, 0.00010376237332820892, 0.00010812748223543167, 0.00011249259114265442, 0.00011685770004987717, 0.00012122280895709991, 0.00012558791786432266, 0.0001299530267715454, 0.00013431813567876816, 0.0001386832445859909, 0.00014304835349321365, 0.0001474134624004364, 0.00015177857130765915, 0.0001561436802148819, 0.00016050878912210464, 0.0001648738980293274, 0.00016923900693655014, 0.0001736041158437729, 0.00017796922475099564, 0.00018233433365821838, 0.00018669944256544113, 0.00019106455147266388, 0.00019542966037988663, 0.00019979476928710938]}, "gradients/encoder.encoder.layers.20.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 3.0, 6.0, 4.0, 11.0, 12.0, 15.0, 16.0, 33.0, 50.0, 57.0, 112.0, 114.0, 148.0, 111.0, 90.0, 66.0, 36.0, 44.0, 23.0, 25.0, 9.0, 8.0, 8.0, 3.0, 3.0, 3.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.3649463653564453e-05, -1.2916512787342072e-05, -1.218356192111969e-05, -1.1450611054897308e-05, -1.0717660188674927e-05, -9.984709322452545e-06, -9.251758456230164e-06, -8.518807590007782e-06, -7.7858567237854e-06, -7.052905857563019e-06, -6.319954991340637e-06, -5.587004125118256e-06, -4.854053258895874e-06, -4.1211023926734924e-06, -3.388151526451111e-06, -2.6552006602287292e-06, -1.9222497940063477e-06, -1.189298927783966e-06, -4.5634806156158447e-07, 2.766028046607971e-07, 1.0095536708831787e-06, 1.7425045371055603e-06, 2.475455403327942e-06, 3.2084062695503235e-06, 3.941357135772705e-06, 4.674308001995087e-06, 5.407258868217468e-06, 6.14020973443985e-06, 6.8731606006622314e-06, 7.606111466884613e-06, 8.339062333106995e-06, 9.072013199329376e-06, 9.804964065551758e-06, 1.053791493177414e-05, 1.1270865797996521e-05, 1.2003816664218903e-05, 1.2736767530441284e-05, 1.3469718396663666e-05, 1.4202669262886047e-05, 1.4935620129108429e-05, 1.566857099533081e-05, 1.6401521861553192e-05, 1.7134472727775574e-05, 1.7867423593997955e-05, 1.8600374460220337e-05, 1.933332532644272e-05, 2.00662761926651e-05, 2.079922705888748e-05, 2.1532177925109863e-05, 2.2265128791332245e-05, 2.2998079657554626e-05, 2.3731030523777008e-05, 2.446398138999939e-05, 2.519693225622177e-05, 2.5929883122444153e-05, 2.6662833988666534e-05, 2.7395784854888916e-05, 2.8128735721111298e-05, 2.886168658733368e-05, 2.959463745355606e-05, 3.0327588319778442e-05, 3.1060539186000824e-05, 3.1793490052223206e-05, 3.252644091844559e-05, 3.325939178466797e-05]}, "gradients/encoder.encoder.layers.20.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [3.0, 0.0, 0.0, 1.0, 2.0, 2.0, 0.0, 1.0, 3.0, 4.0, 4.0, 4.0, 3.0, 9.0, 8.0, 14.0, 29.0, 33.0, 74.0, 139.0, 241.0, 516.0, 1253.0, 3355.0, 10329.0, 36701.0, 198185.0, 3566080.0, 308565.0, 48989.0, 12841.0, 4139.0, 1527.0, 617.0, 288.0, 147.0, 63.0, 37.0, 27.0, 15.0, 13.0, 9.0, 9.0, 5.0, 4.0, 2.0, 5.0, 1.0, 2.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.0001423358917236328, -0.0001371782273054123, -0.00013202056288719177, -0.00012686289846897125, -0.00012170523405075073, -0.00011654756963253021, -0.00011138990521430969, -0.00010623224079608917, -0.00010107457637786865, -9.591691195964813e-05, -9.075924754142761e-05, -8.560158312320709e-05, -8.044391870498657e-05, -7.528625428676605e-05, -7.012858986854553e-05, -6.497092545032501e-05, -5.981326103210449e-05, -5.465559661388397e-05, -4.949793219566345e-05, -4.434026777744293e-05, -3.918260335922241e-05, -3.402493894100189e-05, -2.8867274522781372e-05, -2.3709610104560852e-05, -1.8551945686340332e-05, -1.3394281268119812e-05, -8.236616849899292e-06, -3.078952431678772e-06, 2.078711986541748e-06, 7.236376404762268e-06, 1.2394040822982788e-05, 1.7551705241203308e-05, 2.2709369659423828e-05, 2.7867034077644348e-05, 3.302469849586487e-05, 3.818236291408539e-05, 4.334002733230591e-05, 4.849769175052643e-05, 5.365535616874695e-05, 5.881302058696747e-05, 6.397068500518799e-05, 6.912834942340851e-05, 7.428601384162903e-05, 7.944367825984955e-05, 8.460134267807007e-05, 8.975900709629059e-05, 9.491667151451111e-05, 0.00010007433593273163, 0.00010523200035095215, 0.00011038966476917267, 0.00011554732918739319, 0.00012070499360561371, 0.00012586265802383423, 0.00013102032244205475, 0.00013617798686027527, 0.0001413356512784958, 0.0001464933156967163, 0.00015165098011493683, 0.00015680864453315735, 0.00016196630895137787, 0.0001671239733695984, 0.0001722816377878189, 0.00017743930220603943, 0.00018259696662425995, 0.00018775463104248047]}, "gradients/encoder.encoder.layers.20.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 3.0, 3.0, 6.0, 6.0, 11.0, 11.0, 9.0, 17.0, 19.0, 27.0, 33.0, 46.0, 54.0, 72.0, 91.0, 141.0, 172.0, 267.0, 770.0, 1262.0, 396.0, 184.0, 121.0, 85.0, 64.0, 52.0, 37.0, 28.0, 33.0, 16.0, 16.0, 12.0, 9.0, 6.0, 5.0, 1.0, 4.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.6835670471191406e-05, -3.5177916288375854e-05, -3.35201621055603e-05, -3.186240792274475e-05, -3.02046537399292e-05, -2.8546899557113647e-05, -2.6889145374298096e-05, -2.5231391191482544e-05, -2.3573637008666992e-05, -2.191588282585144e-05, -2.025812864303589e-05, -1.8600374460220337e-05, -1.6942620277404785e-05, -1.5284866094589233e-05, -1.3627111911773682e-05, -1.196935772895813e-05, -1.0311603546142578e-05, -8.653849363327026e-06, -6.996095180511475e-06, -5.338340997695923e-06, -3.680586814880371e-06, -2.0228326320648193e-06, -3.650784492492676e-07, 1.2926757335662842e-06, 2.950429916381836e-06, 4.608184099197388e-06, 6.2659382820129395e-06, 7.923692464828491e-06, 9.581446647644043e-06, 1.1239200830459595e-05, 1.2896955013275146e-05, 1.4554709196090698e-05, 1.621246337890625e-05, 1.7870217561721802e-05, 1.9527971744537354e-05, 2.1185725927352905e-05, 2.2843480110168457e-05, 2.450123429298401e-05, 2.615898847579956e-05, 2.7816742658615112e-05, 2.9474496841430664e-05, 3.1132251024246216e-05, 3.279000520706177e-05, 3.444775938987732e-05, 3.610551357269287e-05, 3.776326775550842e-05, 3.9421021938323975e-05, 4.1078776121139526e-05, 4.273653030395508e-05, 4.439428448677063e-05, 4.605203866958618e-05, 4.7709792852401733e-05, 4.9367547035217285e-05, 5.102530121803284e-05, 5.268305540084839e-05, 5.434080958366394e-05, 5.599856376647949e-05, 5.7656317949295044e-05, 5.9314072132110596e-05, 6.097182631492615e-05, 6.26295804977417e-05, 6.428733468055725e-05, 6.59450888633728e-05, 6.760284304618835e-05, 6.92605972290039e-05]}, "gradients/encoder.encoder.layers.20.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 2.0, 2.0, 2.0, 12.0, 28.0, 46.0, 141.0, 349.0, 257.0, 106.0, 43.0, 11.0, 9.0, 7.0, 2.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00032045351690612733, -0.0002907697344198823, -0.0002610859810374677, -0.00023140219855122268, -0.00020171841606497765, -0.0001720346335787326, -0.0001423508656444028, -0.000112667097710073, -8.298331522382796e-05, -5.3299540013540536e-05, -2.3615764803253114e-05, 6.068010407034308e-06, 3.575178561732173e-05, 6.543556810356677e-05, 9.511933603789657e-05, 0.00012480310397222638, 0.00015448688645847142, 0.00018417066894471645, 0.00021385443687904626, 0.00024353820481337607, 0.0002732219872996211, 0.00030290576978586614, 0.0003325895522721112, 0.00036227330565452576, 0.0003919570881407708, 0.00042164087062701583, 0.0004513246240094304, 0.00048100840649567544, 0.0005106921889819205, 0.0005403759423643351, 0.0005700597539544106, 0.0005997435073368251, 0.0006294272607192397, 0.0006591110141016543, 0.0006887948256917298, 0.0007184785790741444, 0.0007481623906642199, 0.0007778461440466344, 0.000807529897429049, 0.0008372137090191245, 0.0008668974624015391, 0.0008965812157839537, 0.0009262650273740292, 0.0009559487807564437, 0.0009856325341388583, 0.0010153163457289338, 0.0010450001573190093, 0.001074683852493763, 0.0011043676640838385, 0.001134051475673914, 0.0011637351708486676, 0.0011934189824387431, 0.0012231027940288186, 0.0012527864892035723, 0.0012824703007936478, 0.0013121541123837233, 0.0013418379239737988, 0.0013715217355638742, 0.001401205430738628, 0.0014308892423287034, 0.001460573053918779, 0.0014902567490935326, 0.001519940560683608, 0.0015496243722736835, 0.0015793080674484372]}, "gradients/encoder.encoder.layers.20.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 2.0, 3.0, 6.0, 9.0, 4.0, 13.0, 19.0, 16.0, 21.0, 30.0, 46.0, 53.0, 52.0, 62.0, 62.0, 66.0, 76.0, 72.0, 68.0, 64.0, 51.0, 38.0, 49.0, 33.0, 27.0, 16.0, 16.0, 15.0, 5.0, 6.0, 5.0, 4.0, 5.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0002200007438659668, -0.00021190103143453598, -0.00020380131900310516, -0.00019570160657167435, -0.00018760189414024353, -0.00017950218170881271, -0.0001714024692773819, -0.00016330275684595108, -0.00015520304441452026, -0.00014710333198308945, -0.00013900361955165863, -0.00013090390712022781, -0.000122804194688797, -0.00011470448225736618, -0.00010660476982593536, -9.850505739450455e-05, -9.040534496307373e-05, -8.230563253164291e-05, -7.42059201002121e-05, -6.610620766878128e-05, -5.8006495237350464e-05, -4.990678280591965e-05, -4.180707037448883e-05, -3.3707357943058014e-05, -2.5607645511627197e-05, -1.750793308019638e-05, -9.408220648765564e-06, -1.3085082173347473e-06, 6.791204214096069e-06, 1.4890916645526886e-05, 2.2990629076957703e-05, 3.109034150838852e-05, 3.9190053939819336e-05, 4.728976637125015e-05, 5.538947880268097e-05, 6.348919123411179e-05, 7.15889036655426e-05, 7.968861609697342e-05, 8.778832852840424e-05, 9.588804095983505e-05, 0.00010398775339126587, 0.00011208746582269669, 0.0001201871782541275, 0.00012828689068555832, 0.00013638660311698914, 0.00014448631554841995, 0.00015258602797985077, 0.00016068574041128159, 0.0001687854528427124, 0.00017688516527414322, 0.00018498487770557404, 0.00019308459013700485, 0.00020118430256843567, 0.00020928401499986649, 0.0002173837274312973, 0.00022548343986272812, 0.00023358315229415894, 0.00024168286472558975, 0.00024978257715702057, 0.0002578822895884514, 0.0002659820020198822, 0.000274081714451313, 0.00028218142688274384, 0.00029028113931417465, 0.00029838085174560547]}, "gradients/encoder.encoder.layers.20.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 1.0, 3.0, 3.0, 3.0, 6.0, 7.0, 10.0, 13.0, 30.0, 26.0, 45.0, 75.0, 117.0, 173.0, 312.0, 562.0, 1020.0, 2247.0, 6335.0, 20367.0, 88901.0, 709674.0, 171580.0, 31956.0, 9001.0, 3180.0, 1345.0, 628.0, 345.0, 226.0, 122.0, 78.0, 45.0, 31.0, 30.0, 23.0, 12.0, 11.0, 7.0, 4.0, 6.0, 3.0, 4.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00022161006927490234, -0.00021502748131752014, -0.00020844489336013794, -0.00020186230540275574, -0.00019527971744537354, -0.00018869712948799133, -0.00018211454153060913, -0.00017553195357322693, -0.00016894936561584473, -0.00016236677765846252, -0.00015578418970108032, -0.00014920160174369812, -0.00014261901378631592, -0.00013603642582893372, -0.00012945383787155151, -0.0001228712499141693, -0.00011628866195678711, -0.00010970607399940491, -0.0001031234860420227, -9.65408980846405e-05, -8.99583101272583e-05, -8.33757221698761e-05, -7.67931342124939e-05, -7.02105462551117e-05, -6.362795829772949e-05, -5.704537034034729e-05, -5.046278238296509e-05, -4.3880194425582886e-05, -3.7297606468200684e-05, -3.071501851081848e-05, -2.413243055343628e-05, -1.7549842596054077e-05, -1.0967254638671875e-05, -4.384666681289673e-06, 2.1979212760925293e-06, 8.780509233474731e-06, 1.5363097190856934e-05, 2.1945685148239136e-05, 2.8528273105621338e-05, 3.511086106300354e-05, 4.169344902038574e-05, 4.8276036977767944e-05, 5.4858624935150146e-05, 6.144121289253235e-05, 6.802380084991455e-05, 7.460638880729675e-05, 8.118897676467896e-05, 8.777156472206116e-05, 9.435415267944336e-05, 0.00010093674063682556, 0.00010751932859420776, 0.00011410191655158997, 0.00012068450450897217, 0.00012726709246635437, 0.00013384968042373657, 0.00014043226838111877, 0.00014701485633850098, 0.00015359744429588318, 0.00016018003225326538, 0.00016676262021064758, 0.00017334520816802979, 0.000179927796125412, 0.0001865103840827942, 0.0001930929720401764, 0.0001996755599975586]}, "gradients/encoder.encoder.layers.20.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 2.0, 1.0, 2.0, 7.0, 10.0, 15.0, 23.0, 37.0, 26.0, 51.0, 63.0, 99.0, 107.0, 143.0, 103.0, 100.0, 68.0, 52.0, 28.0, 20.0, 23.0, 9.0, 3.0, 4.0, 7.0, 2.0, 3.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.5735626220703125e-05, -1.5035271644592285e-05, -1.4334917068481445e-05, -1.3634562492370605e-05, -1.2934207916259766e-05, -1.2233853340148926e-05, -1.1533498764038086e-05, -1.0833144187927246e-05, -1.0132789611816406e-05, -9.432435035705566e-06, -8.732080459594727e-06, -8.031725883483887e-06, -7.331371307373047e-06, -6.631016731262207e-06, -5.930662155151367e-06, -5.230307579040527e-06, -4.5299530029296875e-06, -3.829598426818848e-06, -3.129243850708008e-06, -2.428889274597168e-06, -1.7285346984863281e-06, -1.0281801223754883e-06, -3.2782554626464844e-07, 3.725290298461914e-07, 1.0728836059570312e-06, 1.773238182067871e-06, 2.473592758178711e-06, 3.1739473342895508e-06, 3.874301910400391e-06, 4.5746564865112305e-06, 5.27501106262207e-06, 5.97536563873291e-06, 6.67572021484375e-06, 7.37607479095459e-06, 8.07642936706543e-06, 8.77678394317627e-06, 9.47713851928711e-06, 1.017749309539795e-05, 1.0877847671508789e-05, 1.1578202247619629e-05, 1.2278556823730469e-05, 1.2978911399841309e-05, 1.3679265975952148e-05, 1.4379620552062988e-05, 1.5079975128173828e-05, 1.5780329704284668e-05, 1.6480684280395508e-05, 1.7181038856506348e-05, 1.7881393432617188e-05, 1.8581748008728027e-05, 1.9282102584838867e-05, 1.9982457160949707e-05, 2.0682811737060547e-05, 2.1383166313171387e-05, 2.2083520889282227e-05, 2.2783875465393066e-05, 2.3484230041503906e-05, 2.4184584617614746e-05, 2.4884939193725586e-05, 2.5585293769836426e-05, 2.6285648345947266e-05, 2.6986002922058105e-05, 2.7686357498168945e-05, 2.8386712074279785e-05, 2.9087066650390625e-05]}, "gradients/encoder.encoder.layers.20.attention.v_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 3.0, 3.0, 7.0, 10.0, 11.0, 20.0, 14.0, 21.0, 28.0, 48.0, 69.0, 78.0, 109.0, 145.0, 231.0, 310.0, 445.0, 642.0, 947.0, 1451.0, 2111.0, 3305.0, 4832.0, 7817.0, 13028.0, 22214.0, 40288.0, 77669.0, 170858.0, 395572.0, 146863.0, 69618.0, 36396.0, 20303.0, 11825.0, 7391.0, 4655.0, 2966.0, 1921.0, 1333.0, 888.0, 652.0, 432.0, 306.0, 207.0, 132.0, 100.0, 78.0, 57.0, 42.0, 29.0, 24.0, 22.0, 15.0, 6.0, 9.0, 7.0, 2.0, 2.0, 3.0, 3.0], "bins": [-6.771087646484375e-05, -6.562191992998123e-05, -6.353296339511871e-05, -6.14440068602562e-05, -5.935505032539368e-05, -5.726609379053116e-05, -5.517713725566864e-05, -5.308818072080612e-05, -5.0999224185943604e-05, -4.8910267651081085e-05, -4.682131111621857e-05, -4.473235458135605e-05, -4.264339804649353e-05, -4.055444151163101e-05, -3.8465484976768494e-05, -3.6376528441905975e-05, -3.428757190704346e-05, -3.219861537218094e-05, -3.010965883731842e-05, -2.8020702302455902e-05, -2.5931745767593384e-05, -2.3842789232730865e-05, -2.1753832697868347e-05, -1.966487616300583e-05, -1.757591962814331e-05, -1.5486963093280792e-05, -1.3398006558418274e-05, -1.1309050023555756e-05, -9.220093488693237e-06, -7.131136953830719e-06, -5.042180418968201e-06, -2.9532238841056824e-06, -8.642673492431641e-07, 1.2246891856193542e-06, 3.3136457204818726e-06, 5.402602255344391e-06, 7.491558790206909e-06, 9.580515325069427e-06, 1.1669471859931946e-05, 1.3758428394794464e-05, 1.5847384929656982e-05, 1.79363414645195e-05, 2.002529799938202e-05, 2.2114254534244537e-05, 2.4203211069107056e-05, 2.6292167603969574e-05, 2.8381124138832092e-05, 3.047008067369461e-05, 3.255903720855713e-05, 3.464799374341965e-05, 3.6736950278282166e-05, 3.8825906813144684e-05, 4.09148633480072e-05, 4.300381988286972e-05, 4.509277641773224e-05, 4.718173295259476e-05, 4.9270689487457275e-05, 5.1359646022319794e-05, 5.344860255718231e-05, 5.553755909204483e-05, 5.762651562690735e-05, 5.971547216176987e-05, 6.180442869663239e-05, 6.38933852314949e-05, 6.598234176635742e-05]}, "gradients/encoder.encoder.layers.20.attention.v_proj.bias": {"_type": "histogram", "values": [2.0, 3.0, 1.0, 2.0, 2.0, 2.0, 0.0, 4.0, 3.0, 2.0, 3.0, 3.0, 5.0, 8.0, 8.0, 9.0, 14.0, 19.0, 12.0, 16.0, 17.0, 25.0, 37.0, 32.0, 33.0, 36.0, 32.0, 45.0, 30.0, 40.0, 49.0, 50.0, 40.0, 48.0, 47.0, 43.0, 34.0, 39.0, 33.0, 25.0, 29.0, 15.0, 16.0, 17.0, 19.0, 9.0, 8.0, 8.0, 5.0, 5.0, 7.0, 7.0, 3.0, 5.0, 1.0, 6.0, 3.0, 2.0, 2.0, 1.0, 1.0, 0.0, 0.0, 2.0], "bins": [-3.272294998168945e-05, -3.1682662665843964e-05, -3.0642375349998474e-05, -2.9602088034152985e-05, -2.8561800718307495e-05, -2.7521513402462006e-05, -2.6481226086616516e-05, -2.5440938770771027e-05, -2.4400651454925537e-05, -2.3360364139080048e-05, -2.2320076823234558e-05, -2.127978950738907e-05, -2.023950219154358e-05, -1.919921487569809e-05, -1.81589275598526e-05, -1.711864024400711e-05, -1.607835292816162e-05, -1.5038065612316132e-05, -1.3997778296470642e-05, -1.2957490980625153e-05, -1.1917203664779663e-05, -1.0876916348934174e-05, -9.836629033088684e-06, -8.796341717243195e-06, -7.756054401397705e-06, -6.7157670855522156e-06, -5.675479769706726e-06, -4.6351924538612366e-06, -3.594905138015747e-06, -2.5546178221702576e-06, -1.514330506324768e-06, -4.7404319047927856e-07, 5.662441253662109e-07, 1.6065314412117004e-06, 2.64681875705719e-06, 3.6871060729026794e-06, 4.727393388748169e-06, 5.7676807045936584e-06, 6.807968020439148e-06, 7.848255336284637e-06, 8.888542652130127e-06, 9.928829967975616e-06, 1.0969117283821106e-05, 1.2009404599666595e-05, 1.3049691915512085e-05, 1.4089979231357574e-05, 1.5130266547203064e-05, 1.6170553863048553e-05, 1.7210841178894043e-05, 1.8251128494739532e-05, 1.9291415810585022e-05, 2.033170312643051e-05, 2.1371990442276e-05, 2.241227775812149e-05, 2.345256507396698e-05, 2.449285238981247e-05, 2.553313970565796e-05, 2.657342702150345e-05, 2.7613714337348938e-05, 2.8654001653194427e-05, 2.9694288969039917e-05, 3.0734576284885406e-05, 3.1774863600730896e-05, 3.2815150916576385e-05, 3.3855438232421875e-05]}, "gradients/encoder.encoder.layers.20.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 3.0, 0.0, 0.0, 3.0, 3.0, 4.0, 5.0, 5.0, 10.0, 5.0, 9.0, 16.0, 22.0, 30.0, 35.0, 52.0, 89.0, 121.0, 147.0, 177.0, 343.0, 530.0, 947.0, 1814.0, 4318.0, 12456.0, 49371.0, 356958.0, 530555.0, 65234.0, 15371.0, 5062.0, 1883.0, 1137.0, 676.0, 375.0, 258.0, 161.0, 109.0, 72.0, 57.0, 31.0, 29.0, 23.0, 20.0, 12.0, 9.0, 7.0, 4.0, 4.0, 1.0, 3.0, 1.0, 3.0, 0.0, 1.0, 0.0, 1.0], "bins": [-1.2040138244628906e-05, -1.168716698884964e-05, -1.1334195733070374e-05, -1.0981224477291107e-05, -1.062825322151184e-05, -1.0275281965732574e-05, -9.922310709953308e-06, -9.569339454174042e-06, -9.216368198394775e-06, -8.863396942615509e-06, -8.510425686836243e-06, -8.157454431056976e-06, -7.80448317527771e-06, -7.451511919498444e-06, -7.098540663719177e-06, -6.745569407939911e-06, -6.3925981521606445e-06, -6.039626896381378e-06, -5.686655640602112e-06, -5.3336843848228455e-06, -4.980713129043579e-06, -4.627741873264313e-06, -4.274770617485046e-06, -3.92179936170578e-06, -3.5688281059265137e-06, -3.2158568501472473e-06, -2.862885594367981e-06, -2.5099143385887146e-06, -2.1569430828094482e-06, -1.8039718270301819e-06, -1.4510005712509155e-06, -1.0980293154716492e-06, -7.450580596923828e-07, -3.9208680391311646e-07, -3.91155481338501e-08, 3.1385570764541626e-07, 6.668269634246826e-07, 1.019798219203949e-06, 1.3727694749832153e-06, 1.7257407307624817e-06, 2.078711986541748e-06, 2.4316832423210144e-06, 2.7846544981002808e-06, 3.137625753879547e-06, 3.4905970096588135e-06, 3.84356826543808e-06, 4.196539521217346e-06, 4.5495107769966125e-06, 4.902482032775879e-06, 5.255453288555145e-06, 5.608424544334412e-06, 5.961395800113678e-06, 6.314367055892944e-06, 6.667338311672211e-06, 7.020309567451477e-06, 7.373280823230743e-06, 7.72625207901001e-06, 8.079223334789276e-06, 8.432194590568542e-06, 8.785165846347809e-06, 9.138137102127075e-06, 9.491108357906342e-06, 9.844079613685608e-06, 1.0197050869464874e-05, 1.055002212524414e-05]}, "gradients/encoder.encoder.layers.20.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 3.0, 0.0, 2.0, 2.0, 0.0, 2.0, 3.0, 0.0, 7.0, 0.0, 5.0, 10.0, 0.0, 8.0, 13.0, 0.0, 21.0, 23.0, 0.0, 25.0, 33.0, 0.0, 48.0, 0.0, 62.0, 60.0, 0.0, 79.0, 89.0, 0.0, 77.0, 71.0, 0.0, 79.0, 0.0, 57.0, 49.0, 0.0, 36.0, 30.0, 0.0, 27.0, 18.0, 0.0, 21.0, 18.0, 0.0, 6.0, 0.0, 12.0, 7.0, 0.0, 1.0, 6.0, 0.0, 7.0, 3.0, 0.0, 1.0, 2.0], "bins": [-1.3113021850585938e-06, -1.2731179594993591e-06, -1.2349337339401245e-06, -1.1967495083808899e-06, -1.1585652828216553e-06, -1.1203810572624207e-06, -1.082196831703186e-06, -1.0440126061439514e-06, -1.0058283805847168e-06, -9.676441550254822e-07, -9.294599294662476e-07, -8.912757039070129e-07, -8.530914783477783e-07, -8.149072527885437e-07, -7.767230272293091e-07, -7.385388016700745e-07, -7.003545761108398e-07, -6.621703505516052e-07, -6.239861249923706e-07, -5.85801899433136e-07, -5.476176738739014e-07, -5.094334483146667e-07, -4.7124922275543213e-07, -4.330649971961975e-07, -3.948807716369629e-07, -3.5669654607772827e-07, -3.1851232051849365e-07, -2.8032809495925903e-07, -2.421438694000244e-07, -2.039596438407898e-07, -1.6577541828155518e-07, -1.2759119272232056e-07, -8.940696716308594e-08, -5.122274160385132e-08, -1.30385160446167e-08, 2.514570951461792e-08, 6.332993507385254e-08, 1.0151416063308716e-07, 1.3969838619232178e-07, 1.778826117515564e-07, 2.1606683731079102e-07, 2.5425106287002563e-07, 2.9243528842926025e-07, 3.3061951398849487e-07, 3.688037395477295e-07, 4.069879651069641e-07, 4.4517219066619873e-07, 4.833564162254333e-07, 5.21540641784668e-07, 5.597248673439026e-07, 5.979090929031372e-07, 6.360933184623718e-07, 6.742775440216064e-07, 7.124617695808411e-07, 7.506459951400757e-07, 7.888302206993103e-07, 8.270144462585449e-07, 8.651986718177795e-07, 9.033828973770142e-07, 9.415671229362488e-07, 9.797513484954834e-07, 1.017935574054718e-06, 1.0561197996139526e-06, 1.0943040251731873e-06, 1.1324882507324219e-06]}, "gradients/encoder.encoder.layers.20.attention.q_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 0.0, 2.0, 2.0, 2.0, 3.0, 3.0, 3.0, 8.0, 7.0, 17.0, 11.0, 27.0, 19.0, 27.0, 72.0, 52.0, 93.0, 105.0, 249.0, 256.0, 367.0, 829.0, 982.0, 1557.0, 2743.0, 7599.0, 12771.0, 31142.0, 98950.0, 650378.0, 159516.0, 44760.0, 20561.0, 6298.0, 3524.0, 1965.0, 1438.0, 705.0, 421.0, 372.0, 201.0, 134.0, 93.0, 106.0, 56.0, 30.0, 31.0, 17.0, 11.0, 18.0, 12.0, 3.0, 4.0, 7.0, 3.0, 4.0, 3.0, 1.0, 1.0, 0.0, 2.0], "bins": [-6.377696990966797e-06, -6.182119250297546e-06, -5.986541509628296e-06, -5.790963768959045e-06, -5.595386028289795e-06, -5.3998082876205444e-06, -5.204230546951294e-06, -5.0086528062820435e-06, -4.813075065612793e-06, -4.6174973249435425e-06, -4.421919584274292e-06, -4.2263418436050415e-06, -4.030764102935791e-06, -3.8351863622665405e-06, -3.63960862159729e-06, -3.4440308809280396e-06, -3.248453140258789e-06, -3.0528753995895386e-06, -2.857297658920288e-06, -2.6617199182510376e-06, -2.466142177581787e-06, -2.2705644369125366e-06, -2.074986696243286e-06, -1.8794089555740356e-06, -1.6838312149047852e-06, -1.4882534742355347e-06, -1.2926757335662842e-06, -1.0970979928970337e-06, -9.015202522277832e-07, -7.059425115585327e-07, -5.103647708892822e-07, -3.1478703022003174e-07, -1.1920928955078125e-07, 7.636845111846924e-08, 2.7194619178771973e-07, 4.675239324569702e-07, 6.631016731262207e-07, 8.586794137954712e-07, 1.0542571544647217e-06, 1.2498348951339722e-06, 1.4454126358032227e-06, 1.6409903764724731e-06, 1.8365681171417236e-06, 2.032145857810974e-06, 2.2277235984802246e-06, 2.423301339149475e-06, 2.6188790798187256e-06, 2.814456820487976e-06, 3.0100345611572266e-06, 3.205612301826477e-06, 3.4011900424957275e-06, 3.596767783164978e-06, 3.7923455238342285e-06, 3.987923264503479e-06, 4.1835010051727295e-06, 4.37907874584198e-06, 4.5746564865112305e-06, 4.770234227180481e-06, 4.9658119678497314e-06, 5.161389708518982e-06, 5.356967449188232e-06, 5.552545189857483e-06, 5.748122930526733e-06, 5.943700671195984e-06, 6.139278411865234e-06]}, "gradients/encoder.encoder.layers.20.attention.q_proj.bias": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 2.0, 2.0, 3.0, 3.0, 4.0, 6.0, 2.0, 12.0, 13.0, 8.0, 18.0, 9.0, 49.0, 50.0, 27.0, 85.0, 59.0, 123.0, 66.0, 140.0, 123.0, 43.0, 50.0, 32.0, 26.0, 15.0, 7.0, 9.0, 2.0, 11.0, 5.0, 3.0, 0.0, 1.0, 1.0, 2.0, 1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-2.9802322387695312e-06, -2.8852373361587524e-06, -2.7902424335479736e-06, -2.695247530937195e-06, -2.600252628326416e-06, -2.505257725715637e-06, -2.4102628231048584e-06, -2.3152679204940796e-06, -2.2202730178833008e-06, -2.125278115272522e-06, -2.030283212661743e-06, -1.9352883100509644e-06, -1.8402934074401855e-06, -1.7452985048294067e-06, -1.650303602218628e-06, -1.5553086996078491e-06, -1.4603137969970703e-06, -1.3653188943862915e-06, -1.2703239917755127e-06, -1.1753290891647339e-06, -1.080334186553955e-06, -9.853392839431763e-07, -8.903443813323975e-07, -7.953494787216187e-07, -7.003545761108398e-07, -6.05359673500061e-07, -5.103647708892822e-07, -4.153698682785034e-07, -3.203749656677246e-07, -2.253800630569458e-07, -1.30385160446167e-07, -3.5390257835388184e-08, 5.960464477539063e-08, 1.5459954738616943e-07, 2.4959444999694824e-07, 3.4458935260772705e-07, 4.3958425521850586e-07, 5.345791578292847e-07, 6.295740604400635e-07, 7.245689630508423e-07, 8.195638656616211e-07, 9.145587682723999e-07, 1.0095536708831787e-06, 1.1045485734939575e-06, 1.1995434761047363e-06, 1.2945383787155151e-06, 1.389533281326294e-06, 1.4845281839370728e-06, 1.5795230865478516e-06, 1.6745179891586304e-06, 1.7695128917694092e-06, 1.864507794380188e-06, 1.959502696990967e-06, 2.0544975996017456e-06, 2.1494925022125244e-06, 2.2444874048233032e-06, 2.339482307434082e-06, 2.434477210044861e-06, 2.5294721126556396e-06, 2.6244670152664185e-06, 2.7194619178771973e-06, 2.814456820487976e-06, 2.909451723098755e-06, 3.0044466257095337e-06, 3.0994415283203125e-06]}, "gradients/encoder.encoder.layers.20.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 4.0, 5.0, 12.0, 13.0, 19.0, 42.0, 81.0, 189.0, 336.0, 143.0, 74.0, 38.0, 24.0, 13.0, 11.0, 4.0, 3.0, 3.0, 1.0, 1.0, 3.0], "bins": [-0.0008735902956686914, -0.0008565704920329154, -0.0008395506883971393, -0.0008225308847613633, -0.0008055110811255872, -0.0007884912774898112, -0.0007714714738540351, -0.0007544516702182591, -0.000737431866582483, -0.000720412062946707, -0.000703392259310931, -0.0006863724556751549, -0.0006693526520393789, -0.0006523328484036028, -0.0006353130447678268, -0.0006182932411320508, -0.0006012733792886138, -0.0005842535756528378, -0.0005672337720170617, -0.0005502139683812857, -0.0005331941647455096, -0.0005161743611097336, -0.0004991545574739575, -0.0004821347538381815, -0.00046511495020240545, -0.0004480951465666294, -0.00043107534293085337, -0.0004140555392950773, -0.0003970357356593013, -0.00038001593202352524, -0.0003629961283877492, -0.00034597632475197315, -0.000328956579323858, -0.000311936775688082, -0.00029491697205230594, -0.0002778971684165299, -0.00026087736478075385, -0.0002438575611449778, -0.00022683774295728654, -0.0002098179393215105, -0.00019279812113381922, -0.00017577831749804318, -0.00015875851386226714, -0.0001417387102264911, -0.00012471890659071505, -0.0001076990956789814, -9.067928476724774e-05, -7.36594811314717e-05, -5.663967749569565e-05, -3.961987385991961e-05, -2.2600066586164758e-05, -5.580259312409908e-06, 1.1439544323366135e-05, 2.8459347959142178e-05, 4.5479158870875835e-05, 6.249896250665188e-05, 7.951876614242792e-05, 9.653856977820396e-05, 0.00011355837341398001, 0.00013057817704975605, 0.0001475979806855321, 0.00016461778432130814, 0.0001816376025089994, 0.00019865740614477545, 0.0002156772097805515]}, "gradients/encoder.encoder.layers.20.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 3.0, 0.0, 0.0, 3.0, 4.0, 3.0, 3.0, 12.0, 11.0, 6.0, 12.0, 11.0, 12.0, 13.0, 15.0, 18.0, 24.0, 24.0, 23.0, 27.0, 31.0, 41.0, 33.0, 39.0, 31.0, 51.0, 24.0, 49.0, 32.0, 31.0, 40.0, 38.0, 47.0, 30.0, 31.0, 27.0, 17.0, 27.0, 22.0, 17.0, 21.0, 17.0, 20.0, 8.0, 13.0, 12.0, 6.0, 7.0, 9.0, 5.0, 4.0, 3.0, 3.0, 2.0, 3.0, 3.0, 1.0, 3.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00011730194091796875, -0.00011329818516969681, -0.00010929442942142487, -0.00010529067367315292, -0.00010128691792488098, -9.728316217660904e-05, -9.32794064283371e-05, -8.927565068006516e-05, -8.527189493179321e-05, -8.126813918352127e-05, -7.726438343524933e-05, -7.326062768697739e-05, -6.925687193870544e-05, -6.52531161904335e-05, -6.124936044216156e-05, -5.724560469388962e-05, -5.3241848945617676e-05, -4.9238093197345734e-05, -4.523433744907379e-05, -4.123058170080185e-05, -3.722682595252991e-05, -3.3223070204257965e-05, -2.9219314455986023e-05, -2.521555870771408e-05, -2.121180295944214e-05, -1.7208047211170197e-05, -1.3204291462898254e-05, -9.200535714626312e-06, -5.19677996635437e-06, -1.193024218082428e-06, 2.810731530189514e-06, 6.814487278461456e-06, 1.0818243026733398e-05, 1.482199877500534e-05, 1.8825754523277283e-05, 2.2829510271549225e-05, 2.6833266019821167e-05, 3.083702176809311e-05, 3.484077751636505e-05, 3.8844533264636993e-05, 4.2848289012908936e-05, 4.685204476118088e-05, 5.085580050945282e-05, 5.485955625772476e-05, 5.8863312005996704e-05, 6.286706775426865e-05, 6.687082350254059e-05, 7.087457925081253e-05, 7.487833499908447e-05, 7.888209074735641e-05, 8.288584649562836e-05, 8.68896022439003e-05, 9.089335799217224e-05, 9.489711374044418e-05, 9.890086948871613e-05, 0.00010290462523698807, 0.00010690838098526001, 0.00011091213673353195, 0.0001149158924818039, 0.00011891964823007584, 0.00012292340397834778, 0.00012692715972661972, 0.00013093091547489166, 0.0001349346712231636, 0.00013893842697143555]}, "gradients/encoder.encoder.layers.19.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 2.0, 0.0, 1.0, 0.0, 1.0, 2.0, 1.0, 8.0, 8.0, 7.0, 8.0, 18.0, 15.0, 21.0, 21.0, 48.0, 55.0, 82.0, 97.0, 117.0, 209.0, 285.0, 475.0, 798.0, 1402.0, 2500.0, 4320.0, 8246.0, 15682.0, 34121.0, 89278.0, 770508.0, 3052759.0, 127430.0, 44259.0, 18797.0, 9207.0, 4947.0, 2909.0, 1893.0, 1181.0, 821.0, 559.0, 336.0, 259.0, 169.0, 129.0, 90.0, 66.0, 33.0, 28.0, 31.0, 18.0, 12.0, 7.0, 12.0, 6.0, 2.0, 2.0, 1.0, 0.0, 1.0, 3.0], "bins": [-0.00010186433792114258, -9.878911077976227e-05, -9.571388363838196e-05, -9.263865649700165e-05, -8.956342935562134e-05, -8.648820221424103e-05, -8.341297507286072e-05, -8.033774793148041e-05, -7.72625207901001e-05, -7.418729364871979e-05, -7.111206650733948e-05, -6.803683936595917e-05, -6.496161222457886e-05, -6.188638508319855e-05, -5.881115794181824e-05, -5.573593080043793e-05, -5.266070365905762e-05, -4.958547651767731e-05, -4.6510249376297e-05, -4.343502223491669e-05, -4.035979509353638e-05, -3.728456795215607e-05, -3.420934081077576e-05, -3.113411366939545e-05, -2.8058886528015137e-05, -2.4983659386634827e-05, -2.1908432245254517e-05, -1.8833205103874207e-05, -1.5757977962493896e-05, -1.2682750821113586e-05, -9.607523679733276e-06, -6.532296538352966e-06, -3.4570693969726562e-06, -3.818422555923462e-07, 2.693384885787964e-06, 5.768612027168274e-06, 8.843839168548584e-06, 1.1919066309928894e-05, 1.4994293451309204e-05, 1.8069520592689514e-05, 2.1144747734069824e-05, 2.4219974875450134e-05, 2.7295202016830444e-05, 3.0370429158210754e-05, 3.3445656299591064e-05, 3.6520883440971375e-05, 3.9596110582351685e-05, 4.2671337723731995e-05, 4.5746564865112305e-05, 4.8821792006492615e-05, 5.1897019147872925e-05, 5.4972246289253235e-05, 5.8047473430633545e-05, 6.112270057201385e-05, 6.419792771339417e-05, 6.727315485477448e-05, 7.034838199615479e-05, 7.34236091375351e-05, 7.64988362789154e-05, 7.957406342029572e-05, 8.264929056167603e-05, 8.572451770305634e-05, 8.879974484443665e-05, 9.187497198581696e-05, 9.495019912719727e-05]}, "gradients/encoder.encoder.layers.19.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 4.0, 1.0, 3.0, 8.0, 8.0, 12.0, 9.0, 12.0, 21.0, 30.0, 35.0, 40.0, 65.0, 105.0, 113.0, 93.0, 119.0, 89.0, 67.0, 42.0, 38.0, 30.0, 16.0, 14.0, 8.0, 10.0, 2.0, 4.0, 7.0, 3.0, 2.0, 1.0, 1.0, 2.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.9669532775878906e-05, -1.906324177980423e-05, -1.8456950783729553e-05, -1.7850659787654877e-05, -1.72443687915802e-05, -1.6638077795505524e-05, -1.6031786799430847e-05, -1.542549580335617e-05, -1.4819204807281494e-05, -1.4212913811206818e-05, -1.3606622815132141e-05, -1.3000331819057465e-05, -1.2394040822982788e-05, -1.1787749826908112e-05, -1.1181458830833435e-05, -1.0575167834758759e-05, -9.968876838684082e-06, -9.362585842609406e-06, -8.756294846534729e-06, -8.150003850460052e-06, -7.543712854385376e-06, -6.9374218583106995e-06, -6.331130862236023e-06, -5.7248398661613464e-06, -5.11854887008667e-06, -4.512257874011993e-06, -3.905966877937317e-06, -3.2996758818626404e-06, -2.693384885787964e-06, -2.0870938897132874e-06, -1.4808028936386108e-06, -8.745118975639343e-07, -2.682209014892578e-07, 3.380700945854187e-07, 9.443610906600952e-07, 1.5506520867347717e-06, 2.1569430828094482e-06, 2.7632340788841248e-06, 3.3695250749588013e-06, 3.975816071033478e-06, 4.582107067108154e-06, 5.188398063182831e-06, 5.794689059257507e-06, 6.400980055332184e-06, 7.00727105140686e-06, 7.613562047481537e-06, 8.219853043556213e-06, 8.82614403963089e-06, 9.432435035705566e-06, 1.0038726031780243e-05, 1.064501702785492e-05, 1.1251308023929596e-05, 1.1857599020004272e-05, 1.2463890016078949e-05, 1.3070181012153625e-05, 1.3676472008228302e-05, 1.4282763004302979e-05, 1.4889054000377655e-05, 1.549534499645233e-05, 1.6101635992527008e-05, 1.6707926988601685e-05, 1.731421798467636e-05, 1.7920508980751038e-05, 1.8526799976825714e-05, 1.913309097290039e-05]}, "gradients/encoder.encoder.layers.19.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 3.0, 2.0, 1.0, 2.0, 3.0, 6.0, 3.0, 8.0, 10.0, 20.0, 24.0, 31.0, 46.0, 80.0, 126.0, 197.0, 295.0, 528.0, 849.0, 1492.0, 2741.0, 5070.0, 9967.0, 21325.0, 48654.0, 128258.0, 600987.0, 2991211.0, 244899.0, 75856.0, 31589.0, 14383.0, 6985.0, 3780.0, 1933.0, 1154.0, 684.0, 402.0, 221.0, 139.0, 117.0, 69.0, 50.0, 32.0, 20.0, 17.0, 7.0, 7.0, 6.0, 5.0, 3.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0], "bins": [-8.159875869750977e-05, -7.914472371339798e-05, -7.66906887292862e-05, -7.423665374517441e-05, -7.178261876106262e-05, -6.932858377695084e-05, -6.687454879283905e-05, -6.442051380872726e-05, -6.196647882461548e-05, -5.951244384050369e-05, -5.705840885639191e-05, -5.460437387228012e-05, -5.2150338888168335e-05, -4.969630390405655e-05, -4.724226891994476e-05, -4.478823393583298e-05, -4.233419895172119e-05, -3.9880163967609406e-05, -3.742612898349762e-05, -3.4972093999385834e-05, -3.251805901527405e-05, -3.0064024031162262e-05, -2.7609989047050476e-05, -2.515595406293869e-05, -2.2701919078826904e-05, -2.024788409471512e-05, -1.7793849110603333e-05, -1.5339814126491547e-05, -1.288577914237976e-05, -1.0431744158267975e-05, -7.977709174156189e-06, -5.523674190044403e-06, -3.069639205932617e-06, -6.156042218208313e-07, 1.8384307622909546e-06, 4.2924657464027405e-06, 6.746500730514526e-06, 9.200535714626312e-06, 1.1654570698738098e-05, 1.4108605682849884e-05, 1.656264066696167e-05, 1.9016675651073456e-05, 2.1470710635185242e-05, 2.3924745619297028e-05, 2.6378780603408813e-05, 2.88328155875206e-05, 3.1286850571632385e-05, 3.374088555574417e-05, 3.619492053985596e-05, 3.864895552396774e-05, 4.110299050807953e-05, 4.3557025492191315e-05, 4.60110604763031e-05, 4.8465095460414886e-05, 5.091913044452667e-05, 5.337316542863846e-05, 5.5827200412750244e-05, 5.828123539686203e-05, 6.0735270380973816e-05, 6.31893053650856e-05, 6.564334034919739e-05, 6.809737533330917e-05, 7.055141031742096e-05, 7.300544530153275e-05, 7.545948028564453e-05]}, "gradients/encoder.encoder.layers.19.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 4.0, 1.0, 1.0, 5.0, 10.0, 9.0, 21.0, 26.0, 29.0, 40.0, 53.0, 63.0, 79.0, 102.0, 151.0, 253.0, 654.0, 1275.0, 474.0, 202.0, 121.0, 100.0, 91.0, 60.0, 46.0, 51.0, 42.0, 24.0, 25.0, 20.0, 9.0, 15.0, 10.0, 7.0, 3.0, 4.0, 0.0, 2.0, 2.0, 3.0, 1.0, 2.0, 0.0, 0.0, 1.0], "bins": [-5.257129669189453e-05, -5.112774670124054e-05, -4.968419671058655e-05, -4.8240646719932556e-05, -4.6797096729278564e-05, -4.535354673862457e-05, -4.390999674797058e-05, -4.246644675731659e-05, -4.10228967666626e-05, -3.9579346776008606e-05, -3.8135796785354614e-05, -3.669224679470062e-05, -3.524869680404663e-05, -3.380514681339264e-05, -3.236159682273865e-05, -3.0918046832084656e-05, -2.9474496841430664e-05, -2.8030946850776672e-05, -2.658739686012268e-05, -2.514384686946869e-05, -2.3700296878814697e-05, -2.2256746888160706e-05, -2.0813196897506714e-05, -1.9369646906852722e-05, -1.792609691619873e-05, -1.648254692554474e-05, -1.5038996934890747e-05, -1.3595446944236755e-05, -1.2151896953582764e-05, -1.0708346962928772e-05, -9.26479697227478e-06, -7.821246981620789e-06, -6.377696990966797e-06, -4.934147000312805e-06, -3.4905970096588135e-06, -2.0470470190048218e-06, -6.034970283508301e-07, 8.400529623031616e-07, 2.2836029529571533e-06, 3.727152943611145e-06, 5.170702934265137e-06, 6.614252924919128e-06, 8.05780291557312e-06, 9.501352906227112e-06, 1.0944902896881104e-05, 1.2388452887535095e-05, 1.3832002878189087e-05, 1.527555286884308e-05, 1.671910285949707e-05, 1.8162652850151062e-05, 1.9606202840805054e-05, 2.1049752831459045e-05, 2.2493302822113037e-05, 2.393685281276703e-05, 2.538040280342102e-05, 2.6823952794075012e-05, 2.8267502784729004e-05, 2.9711052775382996e-05, 3.115460276603699e-05, 3.259815275669098e-05, 3.404170274734497e-05, 3.548525273799896e-05, 3.6928802728652954e-05, 3.8372352719306946e-05, 3.981590270996094e-05]}, "gradients/encoder.encoder.layers.19.final_layer_norm.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 4.0, 6.0, 24.0, 84.0, 288.0, 356.0, 129.0, 62.0, 27.0, 23.0, 5.0, 4.0, 2.0, 3.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0003272871836088598, -0.00029459752840921283, -0.0002619078441057354, -0.00022921817435417324, -0.00019652850460261106, -0.0001638388348510489, -0.0001311491650994867, -9.845949534792453e-05, -6.576982559636235e-05, -3.3080155844800174e-05, -3.904860932379961e-07, 3.229918365832418e-05, 6.498885340988636e-05, 9.767852316144854e-05, 0.00013036819291301072, 0.0001630578626645729, 0.00019574753241613507, 0.00022843720216769725, 0.00026112687191925943, 0.00029381655622273684, 0.0003265062114223838, 0.00035919586662203074, 0.00039188555092550814, 0.00042457523522898555, 0.0004572648904286325, 0.0004899545456282794, 0.0005226442590355873, 0.0005553339142352343, 0.0005880235694348812, 0.0006207132246345282, 0.0006534028798341751, 0.000686092593241483, 0.0007187823066487908, 0.0007514719618484378, 0.0007841616170480847, 0.0008168513304553926, 0.0008495409856550395, 0.0008822306408546865, 0.0009149203542619944, 0.0009476100094616413, 0.0009802996646612883, 0.0010129893198609352, 0.0010456789750605822, 0.0010783686302602291, 0.001111058285459876, 0.0011437480570748448, 0.0011764377122744918, 0.0012091273674741387, 0.0012418170226737857, 0.0012745066778734326, 0.0013071963330730796, 0.0013398859882727265, 0.0013725757598876953, 0.0014052654150873423, 0.0014379550702869892, 0.0014706447254866362, 0.0015033343806862831, 0.00153602403588593, 0.001568713691085577, 0.001601403346285224, 0.001634093001484871, 0.0016667827730998397, 0.0016994724282994866, 0.0017321620834991336, 0.0017648517386987805]}, "gradients/encoder.encoder.layers.19.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 4.0, 2.0, 4.0, 2.0, 7.0, 12.0, 12.0, 10.0, 17.0, 11.0, 14.0, 18.0, 19.0, 43.0, 32.0, 38.0, 33.0, 47.0, 55.0, 53.0, 55.0, 55.0, 52.0, 54.0, 55.0, 37.0, 34.0, 38.0, 34.0, 28.0, 24.0, 24.0, 20.0, 10.0, 15.0, 13.0, 10.0, 5.0, 6.0, 6.0, 4.0, 0.0, 4.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.00024056434631347656, -0.00023416057229042053, -0.0002277567982673645, -0.00022135302424430847, -0.00021494925022125244, -0.0002085454761981964, -0.00020214170217514038, -0.00019573792815208435, -0.00018933415412902832, -0.0001829303801059723, -0.00017652660608291626, -0.00017012283205986023, -0.0001637190580368042, -0.00015731528401374817, -0.00015091150999069214, -0.0001445077359676361, -0.00013810396194458008, -0.00013170018792152405, -0.00012529641389846802, -0.00011889263987541199, -0.00011248886585235596, -0.00010608509182929993, -9.96813178062439e-05, -9.327754378318787e-05, -8.687376976013184e-05, -8.04699957370758e-05, -7.406622171401978e-05, -6.766244769096375e-05, -6.125867366790771e-05, -5.4854899644851685e-05, -4.8451125621795654e-05, -4.2047351598739624e-05, -3.5643577575683594e-05, -2.9239803552627563e-05, -2.2836029529571533e-05, -1.6432255506515503e-05, -1.0028481483459473e-05, -3.6247074604034424e-06, 2.779066562652588e-06, 9.182840585708618e-06, 1.558661460876465e-05, 2.199038863182068e-05, 2.839416265487671e-05, 3.479793667793274e-05, 4.120171070098877e-05, 4.76054847240448e-05, 5.400925874710083e-05, 6.041303277015686e-05, 6.681680679321289e-05, 7.322058081626892e-05, 7.962435483932495e-05, 8.602812886238098e-05, 9.243190288543701e-05, 9.883567690849304e-05, 0.00010523945093154907, 0.0001116432249546051, 0.00011804699897766113, 0.00012445077300071716, 0.0001308545470237732, 0.00013725832104682922, 0.00014366209506988525, 0.00015006586909294128, 0.00015646964311599731, 0.00016287341713905334, 0.00016927719116210938]}, "gradients/encoder.encoder.layers.19.attention.out_proj.weight": {"_type": "histogram", "values": [3.0, 1.0, 1.0, 0.0, 0.0, 5.0, 2.0, 2.0, 3.0, 5.0, 9.0, 8.0, 6.0, 10.0, 15.0, 13.0, 17.0, 26.0, 21.0, 39.0, 45.0, 81.0, 129.0, 195.0, 368.0, 641.0, 1191.0, 2298.0, 4860.0, 12481.0, 36727.0, 158675.0, 668289.0, 114094.0, 29540.0, 10002.0, 4204.0, 1976.0, 1070.0, 584.0, 318.0, 201.0, 117.0, 69.0, 44.0, 46.0, 28.0, 21.0, 22.0, 12.0, 11.0, 14.0, 7.0, 5.0, 7.0, 4.0, 2.0, 3.0, 2.0, 0.0, 1.0, 3.0, 2.0, 1.0], "bins": [-0.00016295909881591797, -0.00015792995691299438, -0.0001529008150100708, -0.00014787167310714722, -0.00014284253120422363, -0.00013781338930130005, -0.00013278424739837646, -0.00012775510549545288, -0.0001227259635925293, -0.00011769682168960571, -0.00011266767978668213, -0.00010763853788375854, -0.00010260939598083496, -9.758025407791138e-05, -9.255111217498779e-05, -8.752197027206421e-05, -8.249282836914062e-05, -7.746368646621704e-05, -7.243454456329346e-05, -6.740540266036987e-05, -6.237626075744629e-05, -5.7347118854522705e-05, -5.231797695159912e-05, -4.728883504867554e-05, -4.225969314575195e-05, -3.723055124282837e-05, -3.2201409339904785e-05, -2.71722674369812e-05, -2.2143125534057617e-05, -1.7113983631134033e-05, -1.208484172821045e-05, -7.055699825286865e-06, -2.0265579223632812e-06, 3.0025839805603027e-06, 8.031725883483887e-06, 1.306086778640747e-05, 1.8090009689331055e-05, 2.311915159225464e-05, 2.8148293495178223e-05, 3.317743539810181e-05, 3.820657730102539e-05, 4.3235719203948975e-05, 4.826486110687256e-05, 5.329400300979614e-05, 5.8323144912719727e-05, 6.335228681564331e-05, 6.83814287185669e-05, 7.341057062149048e-05, 7.843971252441406e-05, 8.346885442733765e-05, 8.849799633026123e-05, 9.352713823318481e-05, 9.85562801361084e-05, 0.00010358542203903198, 0.00010861456394195557, 0.00011364370584487915, 0.00011867284774780273, 0.00012370198965072632, 0.0001287311315536499, 0.00013376027345657349, 0.00013878941535949707, 0.00014381855726242065, 0.00014884769916534424, 0.00015387684106826782, 0.0001589059829711914]}, "gradients/encoder.encoder.layers.19.attention.out_proj.bias": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 3.0, 0.0, 3.0, 3.0, 4.0, 6.0, 6.0, 6.0, 15.0, 29.0, 23.0, 48.0, 58.0, 94.0, 129.0, 138.0, 119.0, 109.0, 73.0, 45.0, 35.0, 22.0, 16.0, 9.0, 6.0, 3.0, 2.0, 3.0, 4.0, 4.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.3768672943115234e-05, -1.3032928109169006e-05, -1.2297183275222778e-05, -1.156143844127655e-05, -1.0825693607330322e-05, -1.0089948773384094e-05, -9.354203939437866e-06, -8.618459105491638e-06, -7.88271427154541e-06, -7.146969437599182e-06, -6.411224603652954e-06, -5.675479769706726e-06, -4.939734935760498e-06, -4.20399010181427e-06, -3.468245267868042e-06, -2.732500433921814e-06, -1.996755599975586e-06, -1.261010766029358e-06, -5.252659320831299e-07, 2.1047890186309814e-07, 9.462237358093262e-07, 1.6819685697555542e-06, 2.4177134037017822e-06, 3.1534582376480103e-06, 3.889203071594238e-06, 4.624947905540466e-06, 5.360692739486694e-06, 6.096437573432922e-06, 6.83218240737915e-06, 7.567927241325378e-06, 8.303672075271606e-06, 9.039416909217834e-06, 9.775161743164062e-06, 1.051090657711029e-05, 1.1246651411056519e-05, 1.1982396245002747e-05, 1.2718141078948975e-05, 1.3453885912895203e-05, 1.418963074684143e-05, 1.4925375580787659e-05, 1.5661120414733887e-05, 1.6396865248680115e-05, 1.7132610082626343e-05, 1.786835491657257e-05, 1.86040997505188e-05, 1.9339844584465027e-05, 2.0075589418411255e-05, 2.0811334252357483e-05, 2.154707908630371e-05, 2.228282392024994e-05, 2.3018568754196167e-05, 2.3754313588142395e-05, 2.4490058422088623e-05, 2.522580325603485e-05, 2.596154808998108e-05, 2.6697292923927307e-05, 2.7433037757873535e-05, 2.8168782591819763e-05, 2.890452742576599e-05, 2.964027225971222e-05, 3.0376017093658447e-05, 3.1111761927604675e-05, 3.18475067615509e-05, 3.258325159549713e-05, 3.331899642944336e-05]}, "gradients/encoder.encoder.layers.19.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 2.0, 2.0, 5.0, 5.0, 7.0, 19.0, 20.0, 32.0, 47.0, 81.0, 135.0, 179.0, 306.0, 473.0, 730.0, 1223.0, 1903.0, 3181.0, 5599.0, 9596.0, 17017.0, 31089.0, 57098.0, 120866.0, 364020.0, 234432.0, 94541.0, 46198.0, 25618.0, 14133.0, 8050.0, 4694.0, 2781.0, 1648.0, 1029.0, 638.0, 436.0, 260.0, 157.0, 113.0, 60.0, 56.0, 32.0, 23.0, 14.0, 6.0, 5.0, 3.0, 1.0, 5.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6.61015510559082e-05, -6.40256330370903e-05, -6.19497150182724e-05, -5.98737969994545e-05, -5.77978789806366e-05, -5.5721960961818695e-05, -5.3646042943000793e-05, -5.157012492418289e-05, -4.949420690536499e-05, -4.741828888654709e-05, -4.534237086772919e-05, -4.3266452848911285e-05, -4.1190534830093384e-05, -3.911461681127548e-05, -3.703869879245758e-05, -3.496278077363968e-05, -3.288686275482178e-05, -3.0810944736003876e-05, -2.8735026717185974e-05, -2.6659108698368073e-05, -2.458319067955017e-05, -2.250727266073227e-05, -2.0431354641914368e-05, -1.8355436623096466e-05, -1.6279518604278564e-05, -1.4203600585460663e-05, -1.2127682566642761e-05, -1.005176454782486e-05, -7.975846529006958e-06, -5.899928510189056e-06, -3.824010491371155e-06, -1.7480924725532532e-06, 3.2782554626464844e-07, 2.40374356508255e-06, 4.479661583900452e-06, 6.555579602718353e-06, 8.631497621536255e-06, 1.0707415640354156e-05, 1.2783333659172058e-05, 1.485925167798996e-05, 1.693516969680786e-05, 1.9011087715625763e-05, 2.1087005734443665e-05, 2.3162923753261566e-05, 2.5238841772079468e-05, 2.731475979089737e-05, 2.939067780971527e-05, 3.146659582853317e-05, 3.3542513847351074e-05, 3.5618431866168976e-05, 3.769434988498688e-05, 3.977026790380478e-05, 4.184618592262268e-05, 4.392210394144058e-05, 4.5998021960258484e-05, 4.8073939979076385e-05, 5.014985799789429e-05, 5.222577601671219e-05, 5.430169403553009e-05, 5.637761205434799e-05, 5.8453530073165894e-05, 6.0529448091983795e-05, 6.26053661108017e-05, 6.46812841296196e-05, 6.67572021484375e-05]}, "gradients/encoder.encoder.layers.19.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 2.0, 2.0, 1.0, 5.0, 5.0, 2.0, 7.0, 4.0, 9.0, 8.0, 13.0, 12.0, 16.0, 19.0, 17.0, 28.0, 21.0, 33.0, 42.0, 30.0, 35.0, 41.0, 38.0, 43.0, 50.0, 41.0, 41.0, 46.0, 56.0, 38.0, 47.0, 27.0, 30.0, 25.0, 21.0, 29.0, 22.0, 14.0, 25.0, 18.0, 5.0, 13.0, 11.0, 7.0, 5.0, 3.0, 5.0, 2.0, 3.0, 0.0, 2.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.075599670410156e-05, -2.9587186872959137e-05, -2.841837704181671e-05, -2.7249567210674286e-05, -2.608075737953186e-05, -2.4911947548389435e-05, -2.374313771724701e-05, -2.2574327886104584e-05, -2.1405518054962158e-05, -2.0236708223819733e-05, -1.9067898392677307e-05, -1.789908856153488e-05, -1.6730278730392456e-05, -1.556146889925003e-05, -1.4392659068107605e-05, -1.322384923696518e-05, -1.2055039405822754e-05, -1.0886229574680328e-05, -9.717419743537903e-06, -8.548609912395477e-06, -7.379800081253052e-06, -6.210990250110626e-06, -5.042180418968201e-06, -3.873370587825775e-06, -2.7045607566833496e-06, -1.535750925540924e-06, -3.6694109439849854e-07, 8.01868736743927e-07, 1.9706785678863525e-06, 3.139488399028778e-06, 4.308298230171204e-06, 5.477108061313629e-06, 6.645917892456055e-06, 7.81472772359848e-06, 8.983537554740906e-06, 1.0152347385883331e-05, 1.1321157217025757e-05, 1.2489967048168182e-05, 1.3658776879310608e-05, 1.4827586710453033e-05, 1.599639654159546e-05, 1.7165206372737885e-05, 1.833401620388031e-05, 1.9502826035022736e-05, 2.067163586616516e-05, 2.1840445697307587e-05, 2.3009255528450012e-05, 2.4178065359592438e-05, 2.5346875190734863e-05, 2.651568502187729e-05, 2.7684494853019714e-05, 2.885330468416214e-05, 3.0022114515304565e-05, 3.119092434644699e-05, 3.2359734177589417e-05, 3.352854400873184e-05, 3.469735383987427e-05, 3.586616367101669e-05, 3.703497350215912e-05, 3.8203783333301544e-05, 3.937259316444397e-05, 4.0541402995586395e-05, 4.171021282672882e-05, 4.2879022657871246e-05, 4.404783248901367e-05]}, "gradients/encoder.encoder.layers.19.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 2.0, 1.0, 5.0, 6.0, 4.0, 8.0, 13.0, 15.0, 11.0, 37.0, 40.0, 62.0, 92.0, 173.0, 216.0, 309.0, 487.0, 1083.0, 1387.0, 2192.0, 3641.0, 10747.0, 19292.0, 49544.0, 317244.0, 467052.0, 108156.0, 35164.0, 17609.0, 5663.0, 3188.0, 1853.0, 1343.0, 638.0, 439.0, 255.0, 238.0, 115.0, 59.0, 60.0, 42.0, 31.0, 15.0, 14.0, 7.0, 8.0, 1.0, 1.0, 3.0, 2.0, 1.0, 1.0, 2.0, 0.0, 2.0], "bins": [-6.67572021484375e-06, -6.481073796749115e-06, -6.28642737865448e-06, -6.091780960559845e-06, -5.89713454246521e-06, -5.702488124370575e-06, -5.50784170627594e-06, -5.313195288181305e-06, -5.11854887008667e-06, -4.923902451992035e-06, -4.7292560338974e-06, -4.534609615802765e-06, -4.33996319770813e-06, -4.145316779613495e-06, -3.95067036151886e-06, -3.756023943424225e-06, -3.56137752532959e-06, -3.366731107234955e-06, -3.17208468914032e-06, -2.977438271045685e-06, -2.78279185295105e-06, -2.588145434856415e-06, -2.3934990167617798e-06, -2.1988525986671448e-06, -2.0042061805725098e-06, -1.8095597624778748e-06, -1.6149133443832397e-06, -1.4202669262886047e-06, -1.2256205081939697e-06, -1.0309740900993347e-06, -8.363276720046997e-07, -6.416812539100647e-07, -4.470348358154297e-07, -2.523884177207947e-07, -5.774199962615967e-08, 1.3690441846847534e-07, 3.3155083656311035e-07, 5.261972546577454e-07, 7.208436727523804e-07, 9.154900908470154e-07, 1.1101365089416504e-06, 1.3047829270362854e-06, 1.4994293451309204e-06, 1.6940757632255554e-06, 1.8887221813201904e-06, 2.0833685994148254e-06, 2.2780150175094604e-06, 2.4726614356040955e-06, 2.6673078536987305e-06, 2.8619542717933655e-06, 3.0566006898880005e-06, 3.2512471079826355e-06, 3.4458935260772705e-06, 3.6405399441719055e-06, 3.8351863622665405e-06, 4.0298327803611755e-06, 4.2244791984558105e-06, 4.4191256165504456e-06, 4.6137720346450806e-06, 4.8084184527397156e-06, 5.003064870834351e-06, 5.197711288928986e-06, 5.392357707023621e-06, 5.587004125118256e-06, 5.781650543212891e-06]}, "gradients/encoder.encoder.layers.19.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 2.0, 3.0, 2.0, 2.0, 2.0, 5.0, 4.0, 4.0, 7.0, 7.0, 7.0, 14.0, 22.0, 30.0, 37.0, 33.0, 47.0, 66.0, 78.0, 91.0, 100.0, 88.0, 67.0, 60.0, 52.0, 39.0, 31.0, 27.0, 18.0, 16.0, 16.0, 5.0, 5.0, 3.0, 4.0, 4.0, 5.0, 3.0, 0.0, 3.0, 2.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.086162567138672e-06, -2.0274892449378967e-06, -1.9688159227371216e-06, -1.9101426005363464e-06, -1.8514692783355713e-06, -1.7927959561347961e-06, -1.734122633934021e-06, -1.6754493117332458e-06, -1.6167759895324707e-06, -1.5581026673316956e-06, -1.4994293451309204e-06, -1.4407560229301453e-06, -1.3820827007293701e-06, -1.323409378528595e-06, -1.2647360563278198e-06, -1.2060627341270447e-06, -1.1473894119262695e-06, -1.0887160897254944e-06, -1.0300427675247192e-06, -9.71369445323944e-07, -9.126961231231689e-07, -8.540228009223938e-07, -7.953494787216187e-07, -7.366761565208435e-07, -6.780028343200684e-07, -6.193295121192932e-07, -5.606561899185181e-07, -5.019828677177429e-07, -4.4330954551696777e-07, -3.8463622331619263e-07, -3.259629011154175e-07, -2.6728957891464233e-07, -2.086162567138672e-07, -1.4994293451309204e-07, -9.12696123123169e-08, -3.259629011154175e-08, 2.60770320892334e-08, 8.475035429000854e-08, 1.434236764907837e-07, 2.0209699869155884e-07, 2.60770320892334e-07, 3.1944364309310913e-07, 3.781169652938843e-07, 4.367902874946594e-07, 4.954636096954346e-07, 5.541369318962097e-07, 6.128102540969849e-07, 6.7148357629776e-07, 7.301568984985352e-07, 7.888302206993103e-07, 8.475035429000854e-07, 9.061768651008606e-07, 9.648501873016357e-07, 1.0235235095024109e-06, 1.082196831703186e-06, 1.1408701539039612e-06, 1.1995434761047363e-06, 1.2582167983055115e-06, 1.3168901205062866e-06, 1.3755634427070618e-06, 1.434236764907837e-06, 1.492910087108612e-06, 1.5515834093093872e-06, 1.6102567315101624e-06, 1.6689300537109375e-06]}, "gradients/encoder.encoder.layers.19.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 1.0, 1.0, 2.0, 5.0, 8.0, 17.0, 9.0, 18.0, 29.0, 31.0, 34.0, 69.0, 88.0, 103.0, 142.0, 191.0, 288.0, 333.0, 578.0, 823.0, 1069.0, 2726.0, 3057.0, 4530.0, 7491.0, 12702.0, 23119.0, 48146.0, 122209.0, 441460.0, 228577.0, 73682.0, 32865.0, 17012.0, 12929.0, 4685.0, 2963.0, 1956.0, 1388.0, 943.0, 631.0, 456.0, 329.0, 222.0, 173.0, 126.0, 101.0, 86.0, 50.0, 34.0, 24.0, 17.0, 16.0, 8.0, 4.0, 6.0, 3.0, 4.0, 1.0, 3.0], "bins": [-4.172325134277344e-06, -4.048459231853485e-06, -3.9245933294296265e-06, -3.800727427005768e-06, -3.676861524581909e-06, -3.5529956221580505e-06, -3.429129719734192e-06, -3.3052638173103333e-06, -3.1813979148864746e-06, -3.057532012462616e-06, -2.9336661100387573e-06, -2.8098002076148987e-06, -2.68593430519104e-06, -2.5620684027671814e-06, -2.4382025003433228e-06, -2.314336597919464e-06, -2.1904706954956055e-06, -2.066604793071747e-06, -1.942738890647888e-06, -1.8188729882240295e-06, -1.695007085800171e-06, -1.5711411833763123e-06, -1.4472752809524536e-06, -1.323409378528595e-06, -1.1995434761047363e-06, -1.0756775736808777e-06, -9.51811671257019e-07, -8.279457688331604e-07, -7.040798664093018e-07, -5.802139639854431e-07, -4.5634806156158447e-07, -3.3248215913772583e-07, -2.086162567138672e-07, -8.475035429000854e-08, 3.91155481338501e-08, 1.6298145055770874e-07, 2.868473529815674e-07, 4.10713255405426e-07, 5.345791578292847e-07, 6.584450602531433e-07, 7.82310962677002e-07, 9.061768651008606e-07, 1.0300427675247192e-06, 1.1539086699485779e-06, 1.2777745723724365e-06, 1.4016404747962952e-06, 1.5255063772201538e-06, 1.6493722796440125e-06, 1.773238182067871e-06, 1.8971040844917297e-06, 2.0209699869155884e-06, 2.144835889339447e-06, 2.2687017917633057e-06, 2.3925676941871643e-06, 2.516433596611023e-06, 2.6402994990348816e-06, 2.7641654014587402e-06, 2.888031303882599e-06, 3.0118972063064575e-06, 3.135763108730316e-06, 3.259629011154175e-06, 3.3834949135780334e-06, 3.507360816001892e-06, 3.6312267184257507e-06, 3.7550926208496094e-06]}, "gradients/encoder.encoder.layers.19.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 3.0, 2.0, 2.0, 0.0, 4.0, 2.0, 2.0, 5.0, 2.0, 5.0, 5.0, 2.0, 9.0, 12.0, 6.0, 11.0, 21.0, 18.0, 15.0, 49.0, 20.0, 31.0, 65.0, 51.0, 62.0, 134.0, 63.0, 111.0, 48.0, 41.0, 64.0, 29.0, 18.0, 23.0, 16.0, 7.0, 14.0, 4.0, 8.0, 7.0, 2.0, 4.0, 6.0, 2.0, 4.0, 2.0, 0.0, 0.0, 1.0, 1.0, 1.0, 4.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-2.6226043701171875e-06, -2.5425106287002563e-06, -2.462416887283325e-06, -2.382323145866394e-06, -2.302229404449463e-06, -2.2221356630325317e-06, -2.1420419216156006e-06, -2.0619481801986694e-06, -1.9818544387817383e-06, -1.9017606973648071e-06, -1.821666955947876e-06, -1.7415732145309448e-06, -1.6614794731140137e-06, -1.5813857316970825e-06, -1.5012919902801514e-06, -1.4211982488632202e-06, -1.341104507446289e-06, -1.261010766029358e-06, -1.1809170246124268e-06, -1.1008232831954956e-06, -1.0207295417785645e-06, -9.406358003616333e-07, -8.605420589447021e-07, -7.80448317527771e-07, -7.003545761108398e-07, -6.202608346939087e-07, -5.401670932769775e-07, -4.600733518600464e-07, -3.7997961044311523e-07, -2.998858690261841e-07, -2.1979212760925293e-07, -1.3969838619232178e-07, -5.960464477539063e-08, 2.0489096641540527e-08, 1.0058283805847168e-07, 1.8067657947540283e-07, 2.60770320892334e-07, 3.4086406230926514e-07, 4.209578037261963e-07, 5.010515451431274e-07, 5.811452865600586e-07, 6.612390279769897e-07, 7.413327693939209e-07, 8.21426510810852e-07, 9.015202522277832e-07, 9.816139936447144e-07, 1.0617077350616455e-06, 1.1418014764785767e-06, 1.2218952178955078e-06, 1.301988959312439e-06, 1.3820827007293701e-06, 1.4621764421463013e-06, 1.5422701835632324e-06, 1.6223639249801636e-06, 1.7024576663970947e-06, 1.7825514078140259e-06, 1.862645149230957e-06, 1.942738890647888e-06, 2.0228326320648193e-06, 2.1029263734817505e-06, 2.1830201148986816e-06, 2.263113856315613e-06, 2.343207597732544e-06, 2.423301339149475e-06, 2.5033950805664062e-06]}, "gradients/encoder.encoder.layers.19.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 2.0, 1.0, 6.0, 8.0, 12.0, 15.0, 24.0, 41.0, 90.0, 171.0, 340.0, 139.0, 66.0, 41.0, 27.0, 14.0, 7.0, 6.0, 5.0, 3.0, 1.0, 1.0, 1.0], "bins": [-0.0008525124285370111, -0.0008359133498743176, -0.0008193142130039632, -0.0008027150761336088, -0.0007861159974709153, -0.0007695169188082218, -0.0007529177819378674, -0.000736318645067513, -0.0007197195664048195, -0.000703120487742126, -0.0006865213508717716, -0.0006699222140014172, -0.0006533231353387237, -0.0006367240566760302, -0.0006201249198056757, -0.0006035257829353213, -0.0005869267042726278, -0.0005703276256099343, -0.0005537284887395799, -0.0005371293518692255, -0.000520530273206532, -0.0005039311945438385, -0.0004873320576734841, -0.00047073294990696013, -0.00045413384214043617, -0.0004375347343739122, -0.00042093562660738826, -0.0004043365188408643, -0.00038773741107434034, -0.0003711383033078164, -0.00035453919554129243, -0.00033794008777476847, -0.0003213409800082445, -0.00030474187224172056, -0.0002881427644751966, -0.00027154365670867264, -0.0002549445489421487, -0.00023834544117562473, -0.00022174633340910077, -0.00020514722564257681, -0.00018854813242796808, -0.00017194902466144413, -0.00015534991689492017, -0.0001387508091283962, -0.00012215170136187226, -0.0001055525935953483, -8.895348582882434e-05, -7.235437806230038e-05, -5.575527029577643e-05, -3.915616252925247e-05, -2.2557054762728512e-05, -5.957946996204555e-06, 1.0641160770319402e-05, 2.724026853684336e-05, 4.383937630336732e-05, 6.0438484069891274e-05, 7.703759183641523e-05, 9.363669960293919e-05, 0.00011023580736946315, 0.0001268349151359871, 0.00014343402290251106, 0.00016003313066903502, 0.00017663223843555897, 0.00019323134620208293, 0.0002098304539686069]}, "gradients/encoder.encoder.layers.19.layer_norm.bias": {"_type": "histogram", "values": [2.0, 1.0, 2.0, 3.0, 3.0, 6.0, 5.0, 5.0, 7.0, 6.0, 10.0, 16.0, 11.0, 19.0, 29.0, 28.0, 26.0, 24.0, 22.0, 45.0, 34.0, 42.0, 38.0, 64.0, 40.0, 54.0, 46.0, 41.0, 37.0, 31.0, 49.0, 33.0, 38.0, 33.0, 24.0, 18.0, 19.0, 25.0, 13.0, 13.0, 15.0, 13.0, 11.0, 2.0, 3.0, 3.0, 4.0, 3.0, 1.0, 1.0, 1.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0001245737075805664, -0.00011979695409536362, -0.00011502020061016083, -0.00011024344712495804, -0.00010546669363975525, -0.00010068994015455246, -9.591318666934967e-05, -9.113643318414688e-05, -8.635967969894409e-05, -8.15829262137413e-05, -7.680617272853851e-05, -7.202941924333572e-05, -6.725266575813293e-05, -6.247591227293015e-05, -5.7699158787727356e-05, -5.292240530252457e-05, -4.814565181732178e-05, -4.336889833211899e-05, -3.85921448469162e-05, -3.381539136171341e-05, -2.903863787651062e-05, -2.426188439130783e-05, -1.948513090610504e-05, -1.4708377420902252e-05, -9.931623935699463e-06, -5.154870450496674e-06, -3.781169652938843e-07, 4.398636519908905e-06, 9.175390005111694e-06, 1.3952143490314484e-05, 1.8728896975517273e-05, 2.3505650460720062e-05, 2.828240394592285e-05, 3.305915743112564e-05, 3.783591091632843e-05, 4.261266440153122e-05, 4.738941788673401e-05, 5.21661713719368e-05, 5.694292485713959e-05, 6.171967834234238e-05, 6.649643182754517e-05, 7.127318531274796e-05, 7.604993879795074e-05, 8.082669228315353e-05, 8.560344576835632e-05, 9.038019925355911e-05, 9.51569527387619e-05, 9.993370622396469e-05, 0.00010471045970916748, 0.00010948721319437027, 0.00011426396667957306, 0.00011904072016477585, 0.00012381747364997864, 0.00012859422713518143, 0.00013337098062038422, 0.000138147734105587, 0.0001429244875907898, 0.00014770124107599258, 0.00015247799456119537, 0.00015725474804639816, 0.00016203150153160095, 0.00016680825501680374, 0.00017158500850200653, 0.00017636176198720932, 0.0001811385154724121]}, "gradients/encoder.encoder.layers.18.feed_forward.output_dense.weight": {"_type": "histogram", "values": [2.0, 2.0, 1.0, 0.0, 1.0, 5.0, 7.0, 28.0, 40.0, 56.0, 107.0, 160.0, 285.0, 499.0, 780.0, 1454.0, 2779.0, 5560.0, 12899.0, 34595.0, 128537.0, 3457191.0, 430684.0, 73320.0, 24187.0, 10025.0, 4773.0, 2638.0, 1417.0, 769.0, 448.0, 279.0, 227.0, 127.0, 82.0, 62.0, 59.0, 41.0, 39.0, 32.0, 22.0, 17.0, 13.0, 13.0, 8.0, 4.0, 7.0, 7.0, 3.0, 2.0, 2.0, 3.0, 3.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-8.612871170043945e-05, -8.217524737119675e-05, -7.822178304195404e-05, -7.426831871271133e-05, -7.031485438346863e-05, -6.636139005422592e-05, -6.240792572498322e-05, -5.845446139574051e-05, -5.45009970664978e-05, -5.0547532737255096e-05, -4.659406840801239e-05, -4.2640604078769684e-05, -3.868713974952698e-05, -3.473367542028427e-05, -3.0780211091041565e-05, -2.682674676179886e-05, -2.2873282432556152e-05, -1.8919818103313446e-05, -1.496635377407074e-05, -1.1012889444828033e-05, -7.059425115585327e-06, -3.105960786342621e-06, 8.475035429000854e-07, 4.800967872142792e-06, 8.754432201385498e-06, 1.2707896530628204e-05, 1.666136085987091e-05, 2.0614825189113617e-05, 2.4568289518356323e-05, 2.852175384759903e-05, 3.2475218176841736e-05, 3.642868250608444e-05, 4.038214683532715e-05, 4.4335611164569855e-05, 4.828907549381256e-05, 5.224253982305527e-05, 5.6196004152297974e-05, 6.014946848154068e-05, 6.410293281078339e-05, 6.805639714002609e-05, 7.20098614692688e-05, 7.59633257985115e-05, 7.991679012775421e-05, 8.387025445699692e-05, 8.782371878623962e-05, 9.177718311548233e-05, 9.573064744472504e-05, 9.968411177396774e-05, 0.00010363757610321045, 0.00010759104043245316, 0.00011154450476169586, 0.00011549796909093857, 0.00011945143342018127, 0.00012340489774942398, 0.0001273583620786667, 0.0001313118264079094, 0.0001352652907371521, 0.0001392187550663948, 0.0001431722193956375, 0.00014712568372488022, 0.00015107914805412292, 0.00015503261238336563, 0.00015898607671260834, 0.00016293954104185104, 0.00016689300537109375]}, "gradients/encoder.encoder.layers.18.feed_forward.output_dense.bias": {"_type": "histogram", "values": [3.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 3.0, 3.0, 6.0, 6.0, 17.0, 10.0, 16.0, 31.0, 42.0, 49.0, 77.0, 115.0, 111.0, 98.0, 111.0, 74.0, 73.0, 47.0, 39.0, 13.0, 17.0, 13.0, 5.0, 13.0, 4.0, 3.0, 2.0, 6.0, 3.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.4007091522216797e-05, -1.3346783816814423e-05, -1.2686476111412048e-05, -1.2026168406009674e-05, -1.13658607006073e-05, -1.0705552995204926e-05, -1.0045245289802551e-05, -9.384937584400177e-06, -8.724629878997803e-06, -8.064322173595428e-06, -7.404014468193054e-06, -6.74370676279068e-06, -6.083399057388306e-06, -5.423091351985931e-06, -4.762783646583557e-06, -4.102475941181183e-06, -3.4421682357788086e-06, -2.7818605303764343e-06, -2.12155282497406e-06, -1.4612451195716858e-06, -8.009374141693115e-07, -1.4062970876693726e-07, 5.19677996635437e-07, 1.1799857020378113e-06, 1.8402934074401855e-06, 2.50060111284256e-06, 3.160908818244934e-06, 3.821216523647308e-06, 4.481524229049683e-06, 5.141831934452057e-06, 5.802139639854431e-06, 6.462447345256805e-06, 7.12275505065918e-06, 7.783062756061554e-06, 8.443370461463928e-06, 9.103678166866302e-06, 9.763985872268677e-06, 1.0424293577671051e-05, 1.1084601283073425e-05, 1.17449089884758e-05, 1.2405216693878174e-05, 1.3065524399280548e-05, 1.3725832104682922e-05, 1.4386139810085297e-05, 1.5046447515487671e-05, 1.5706755220890045e-05, 1.636706292629242e-05, 1.7027370631694794e-05, 1.7687678337097168e-05, 1.8347986042499542e-05, 1.9008293747901917e-05, 1.966860145330429e-05, 2.0328909158706665e-05, 2.098921686410904e-05, 2.1649524569511414e-05, 2.2309832274913788e-05, 2.2970139980316162e-05, 2.3630447685718536e-05, 2.429075539112091e-05, 2.4951063096523285e-05, 2.561137080192566e-05, 2.6271678507328033e-05, 2.6931986212730408e-05, 2.7592293918132782e-05, 2.8252601623535156e-05]}, "gradients/encoder.encoder.layers.18.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 3.0, 2.0, 6.0, 2.0, 11.0, 15.0, 10.0, 19.0, 29.0, 50.0, 79.0, 90.0, 150.0, 227.0, 339.0, 470.0, 729.0, 1181.0, 1795.0, 2693.0, 4354.0, 7024.0, 11513.0, 20624.0, 35191.0, 65967.0, 139786.0, 402309.0, 2671618.0, 494146.0, 166636.0, 73792.0, 38452.0, 21535.0, 12810.0, 7424.0, 4676.0, 2946.0, 1864.0, 1284.0, 789.0, 539.0, 359.0, 240.0, 184.0, 122.0, 83.0, 39.0, 26.0, 26.0, 14.0, 11.0, 7.0, 6.0, 1.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-5.137920379638672e-05, -4.975218325853348e-05, -4.812516272068024e-05, -4.6498142182826996e-05, -4.4871121644973755e-05, -4.3244101107120514e-05, -4.161708056926727e-05, -3.999006003141403e-05, -3.836303949356079e-05, -3.673601895570755e-05, -3.510899841785431e-05, -3.348197788000107e-05, -3.185495734214783e-05, -3.0227936804294586e-05, -2.8600916266441345e-05, -2.6973895728588104e-05, -2.5346875190734863e-05, -2.3719854652881622e-05, -2.209283411502838e-05, -2.046581357717514e-05, -1.88387930393219e-05, -1.721177250146866e-05, -1.5584751963615417e-05, -1.3957731425762177e-05, -1.2330710887908936e-05, -1.0703690350055695e-05, -9.076669812202454e-06, -7.449649274349213e-06, -5.822628736495972e-06, -4.195608198642731e-06, -2.5685876607894897e-06, -9.415671229362488e-07, 6.854534149169922e-07, 2.312473952770233e-06, 3.939494490623474e-06, 5.566515028476715e-06, 7.193535566329956e-06, 8.820556104183197e-06, 1.0447576642036438e-05, 1.2074597179889679e-05, 1.370161771774292e-05, 1.532863825559616e-05, 1.6955658793449402e-05, 1.8582679331302643e-05, 2.0209699869155884e-05, 2.1836720407009125e-05, 2.3463740944862366e-05, 2.5090761482715607e-05, 2.6717782020568848e-05, 2.834480255842209e-05, 2.997182309627533e-05, 3.159884363412857e-05, 3.322586417198181e-05, 3.485288470983505e-05, 3.6479905247688293e-05, 3.8106925785541534e-05, 3.9733946323394775e-05, 4.1360966861248016e-05, 4.298798739910126e-05, 4.46150079369545e-05, 4.624202847480774e-05, 4.786904901266098e-05, 4.949606955051422e-05, 5.112309008836746e-05, 5.27501106262207e-05]}, "gradients/encoder.encoder.layers.18.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 3.0, 3.0, 1.0, 0.0, 4.0, 5.0, 5.0, 4.0, 8.0, 6.0, 14.0, 14.0, 19.0, 19.0, 29.0, 29.0, 44.0, 48.0, 55.0, 83.0, 95.0, 105.0, 130.0, 158.0, 263.0, 582.0, 960.0, 458.0, 186.0, 129.0, 112.0, 86.0, 68.0, 64.0, 44.0, 30.0, 34.0, 35.0, 18.0, 22.0, 27.0, 19.0, 11.0, 14.0, 13.0, 6.0, 5.0, 6.0, 10.0, 4.0, 2.0, 0.0, 0.0, 0.0, 2.0], "bins": [-4.082918167114258e-05, -3.967713564634323e-05, -3.8525089621543884e-05, -3.737304359674454e-05, -3.622099757194519e-05, -3.5068951547145844e-05, -3.3916905522346497e-05, -3.276485949754715e-05, -3.16128134727478e-05, -3.0460767447948456e-05, -2.930872142314911e-05, -2.8156675398349762e-05, -2.7004629373550415e-05, -2.5852583348751068e-05, -2.470053732395172e-05, -2.3548491299152374e-05, -2.2396445274353027e-05, -2.124439924955368e-05, -2.0092353224754333e-05, -1.8940307199954987e-05, -1.778826117515564e-05, -1.6636215150356293e-05, -1.5484169125556946e-05, -1.4332123100757599e-05, -1.3180077075958252e-05, -1.2028031051158905e-05, -1.0875985026359558e-05, -9.723939001560211e-06, -8.571892976760864e-06, -7.419846951961517e-06, -6.26780092716217e-06, -5.1157549023628235e-06, -3.9637088775634766e-06, -2.8116628527641296e-06, -1.6596168279647827e-06, -5.075708031654358e-07, 6.444752216339111e-07, 1.796521246433258e-06, 2.948567271232605e-06, 4.100613296031952e-06, 5.252659320831299e-06, 6.404705345630646e-06, 7.556751370429993e-06, 8.70879739522934e-06, 9.860843420028687e-06, 1.1012889444828033e-05, 1.216493546962738e-05, 1.3316981494426727e-05, 1.4469027519226074e-05, 1.562107354402542e-05, 1.6773119568824768e-05, 1.7925165593624115e-05, 1.9077211618423462e-05, 2.022925764322281e-05, 2.1381303668022156e-05, 2.2533349692821503e-05, 2.368539571762085e-05, 2.4837441742420197e-05, 2.5989487767219543e-05, 2.714153379201889e-05, 2.8293579816818237e-05, 2.9445625841617584e-05, 3.059767186641693e-05, 3.174971789121628e-05, 3.2901763916015625e-05]}, "gradients/encoder.encoder.layers.18.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 4.0, 3.0, 6.0, 4.0, 9.0, 11.0, 14.0, 19.0, 47.0, 46.0, 69.0, 132.0, 170.0, 141.0, 95.0, 58.0, 57.0, 34.0, 23.0, 21.0, 12.0, 10.0, 8.0, 7.0, 3.0, 4.0, 2.0, 3.0, 0.0, 3.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0002866583818104118, -0.0002739180054049939, -0.00026117762899957597, -0.0002484372234903276, -0.00023569684708490968, -0.00022295647067949176, -0.00021021609427407384, -0.00019747571786865592, -0.00018473532691132277, -0.00017199495050590485, -0.0001592545595485717, -0.0001465141831431538, -0.00013377380673773587, -0.00012103341578040272, -0.0001082930393749848, -9.555265569360927e-05, -8.281227201223373e-05, -7.00718883308582e-05, -5.7331508287461475e-05, -4.459112824406475e-05, -3.1850744562689215e-05, -1.911036088131368e-05, -6.3699844758957624e-06, 6.370399205479771e-06, 1.9110782886855304e-05, 3.185116656823084e-05, 4.4591546611627564e-05, 5.733192665502429e-05, 7.007231033639982e-05, 8.281269401777536e-05, 9.555307042319328e-05, 0.00010829345410456881, 0.00012103380868211389, 0.0001337741850875318, 0.00014651457604486495, 0.00015925495245028287, 0.00017199534340761602, 0.00018473571981303394, 0.00019747609621845186, 0.00021021647262386978, 0.00022295686358120292, 0.00023569723998662084, 0.000248437630943954, 0.0002611780073493719, 0.00027391838375478983, 0.00028665876016020775, 0.00029939913656562567, 0.00031213954207487404, 0.00032487991848029196, 0.0003376202948857099, 0.0003503606712911278, 0.0003631010768003762, 0.0003758414532057941, 0.000388581829611212, 0.00040132220601662993, 0.00041406258242204785, 0.00042680295882746577, 0.0004395433352328837, 0.0004522837116383016, 0.00046502408804371953, 0.0004777644935529679, 0.0004905048990622163, 0.0005032452754676342, 0.0005159856518730521, 0.00052872602827847]}, "gradients/encoder.encoder.layers.18.final_layer_norm.bias": {"_type": "histogram", "values": [3.0, 1.0, 3.0, 1.0, 2.0, 4.0, 3.0, 6.0, 6.0, 7.0, 6.0, 8.0, 16.0, 16.0, 16.0, 20.0, 21.0, 28.0, 26.0, 23.0, 31.0, 34.0, 27.0, 30.0, 32.0, 44.0, 40.0, 44.0, 37.0, 46.0, 31.0, 37.0, 35.0, 34.0, 29.0, 26.0, 30.0, 23.0, 17.0, 29.0, 23.0, 20.0, 11.0, 12.0, 12.0, 14.0, 14.0, 6.0, 5.0, 6.0, 6.0, 2.0, 3.0, 5.0, 4.0, 1.0, 1.0, 3.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00013560056686401367, -0.0001307651400566101, -0.00012592971324920654, -0.00012109428644180298, -0.00011625885963439941, -0.00011142343282699585, -0.00010658800601959229, -0.00010175257921218872, -9.691715240478516e-05, -9.208172559738159e-05, -8.724629878997803e-05, -8.241087198257446e-05, -7.75754451751709e-05, -7.274001836776733e-05, -6.790459156036377e-05, -6.30691647529602e-05, -5.823373794555664e-05, -5.3398311138153076e-05, -4.856288433074951e-05, -4.372745752334595e-05, -3.889203071594238e-05, -3.405660390853882e-05, -2.9221177101135254e-05, -2.438575029373169e-05, -1.9550323486328125e-05, -1.471489667892456e-05, -9.879469871520996e-06, -5.044043064117432e-06, -2.086162567138672e-07, 4.626810550689697e-06, 9.462237358093262e-06, 1.4297664165496826e-05, 1.913309097290039e-05, 2.3968517780303955e-05, 2.880394458770752e-05, 3.3639371395111084e-05, 3.847479820251465e-05, 4.331022500991821e-05, 4.814565181732178e-05, 5.298107862472534e-05, 5.7816505432128906e-05, 6.265193223953247e-05, 6.748735904693604e-05, 7.23227858543396e-05, 7.715821266174316e-05, 8.199363946914673e-05, 8.682906627655029e-05, 9.166449308395386e-05, 9.649991989135742e-05, 0.00010133534669876099, 0.00010617077350616455, 0.00011100620031356812, 0.00011584162712097168, 0.00012067705392837524, 0.0001255124807357788, 0.00013034790754318237, 0.00013518333435058594, 0.0001400187611579895, 0.00014485418796539307, 0.00014968961477279663, 0.0001545250415802002, 0.00015936046838760376, 0.00016419589519500732, 0.0001690313220024109, 0.00017386674880981445]}, "gradients/encoder.encoder.layers.18.attention.out_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 2.0, 5.0, 2.0, 4.0, 6.0, 7.0, 4.0, 9.0, 17.0, 23.0, 23.0, 35.0, 57.0, 71.0, 112.0, 207.0, 373.0, 592.0, 1235.0, 2832.0, 6961.0, 19686.0, 77562.0, 651626.0, 226351.0, 40087.0, 11980.0, 4602.0, 1950.0, 924.0, 466.0, 260.0, 167.0, 105.0, 71.0, 34.0, 24.0, 17.0, 21.0, 20.0, 9.0, 10.0, 3.0, 5.0, 6.0, 0.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0], "bins": [-0.00020384788513183594, -0.00019743293523788452, -0.0001910179853439331, -0.0001846030354499817, -0.00017818808555603027, -0.00017177313566207886, -0.00016535818576812744, -0.00015894323587417603, -0.0001525282859802246, -0.0001461133360862732, -0.00013969838619232178, -0.00013328343629837036, -0.00012686848640441895, -0.00012045353651046753, -0.00011403858661651611, -0.0001076236367225647, -0.00010120868682861328, -9.479373693466187e-05, -8.837878704071045e-05, -8.196383714675903e-05, -7.554888725280762e-05, -6.91339373588562e-05, -6.271898746490479e-05, -5.630403757095337e-05, -4.988908767700195e-05, -4.347413778305054e-05, -3.705918788909912e-05, -3.0644237995147705e-05, -2.422928810119629e-05, -1.7814338207244873e-05, -1.1399388313293457e-05, -4.984438419342041e-06, 1.430511474609375e-06, 7.845461368560791e-06, 1.4260411262512207e-05, 2.0675361156463623e-05, 2.709031105041504e-05, 3.3505260944366455e-05, 3.992021083831787e-05, 4.633516073226929e-05, 5.27501106262207e-05, 5.916506052017212e-05, 6.558001041412354e-05, 7.199496030807495e-05, 7.840991020202637e-05, 8.482486009597778e-05, 9.12398099899292e-05, 9.765475988388062e-05, 0.00010406970977783203, 0.00011048465967178345, 0.00011689960956573486, 0.00012331455945968628, 0.0001297295093536377, 0.0001361444592475891, 0.00014255940914154053, 0.00014897435903549194, 0.00015538930892944336, 0.00016180425882339478, 0.0001682192087173462, 0.0001746341586112976, 0.00018104910850524902, 0.00018746405839920044, 0.00019387900829315186, 0.00020029395818710327, 0.0002067089080810547]}, "gradients/encoder.encoder.layers.18.attention.out_proj.bias": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 2.0, 0.0, 4.0, 4.0, 5.0, 7.0, 11.0, 15.0, 17.0, 29.0, 31.0, 66.0, 79.0, 116.0, 133.0, 125.0, 111.0, 75.0, 50.0, 45.0, 20.0, 19.0, 13.0, 8.0, 10.0, 6.0, 6.0, 4.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.5914440155029297e-05, -1.5157274901866913e-05, -1.4400109648704529e-05, -1.3642944395542145e-05, -1.288577914237976e-05, -1.2128613889217377e-05, -1.1371448636054993e-05, -1.0614283382892609e-05, -9.857118129730225e-06, -9.09995287656784e-06, -8.342787623405457e-06, -7.5856223702430725e-06, -6.8284571170806885e-06, -6.0712918639183044e-06, -5.31412661075592e-06, -4.556961357593536e-06, -3.7997961044311523e-06, -3.0426308512687683e-06, -2.2854655981063843e-06, -1.5283003449440002e-06, -7.711350917816162e-07, -1.3969838619232178e-08, 7.431954145431519e-07, 1.5003606677055359e-06, 2.25752592086792e-06, 3.014691174030304e-06, 3.771856427192688e-06, 4.529021680355072e-06, 5.286186933517456e-06, 6.04335218667984e-06, 6.800517439842224e-06, 7.557682693004608e-06, 8.314847946166992e-06, 9.072013199329376e-06, 9.82917845249176e-06, 1.0586343705654144e-05, 1.1343508958816528e-05, 1.2100674211978912e-05, 1.2857839465141296e-05, 1.361500471830368e-05, 1.4372169971466064e-05, 1.5129335224628448e-05, 1.5886500477790833e-05, 1.6643665730953217e-05, 1.74008309841156e-05, 1.8157996237277985e-05, 1.891516149044037e-05, 1.9672326743602753e-05, 2.0429491996765137e-05, 2.118665724992752e-05, 2.1943822503089905e-05, 2.270098775625229e-05, 2.3458153009414673e-05, 2.4215318262577057e-05, 2.497248351573944e-05, 2.5729648768901825e-05, 2.648681402206421e-05, 2.7243979275226593e-05, 2.8001144528388977e-05, 2.875830978155136e-05, 2.9515475034713745e-05, 3.027264028787613e-05, 3.102980554103851e-05, 3.17869707942009e-05, 3.254413604736328e-05]}, "gradients/encoder.encoder.layers.18.attention.v_proj.weight": {"_type": "histogram", "values": [2.0, 2.0, 1.0, 4.0, 7.0, 18.0, 12.0, 16.0, 25.0, 34.0, 54.0, 75.0, 99.0, 148.0, 203.0, 232.0, 358.0, 494.0, 749.0, 999.0, 1409.0, 1968.0, 3116.0, 4264.0, 6376.0, 10089.0, 15037.0, 24528.0, 38798.0, 68187.0, 124260.0, 334529.0, 189785.0, 86537.0, 51010.0, 29433.0, 19044.0, 11668.0, 7994.0, 5214.0, 3553.0, 2491.0, 1643.0, 1212.0, 832.0, 595.0, 387.0, 304.0, 226.0, 155.0, 118.0, 76.0, 60.0, 47.0, 27.0, 23.0, 16.0, 14.0, 8.0, 4.0, 4.0, 0.0, 1.0, 2.0], "bins": [-5.5670738220214844e-05, -5.391612648963928e-05, -5.216151475906372e-05, -5.040690302848816e-05, -4.86522912979126e-05, -4.6897679567337036e-05, -4.5143067836761475e-05, -4.338845610618591e-05, -4.163384437561035e-05, -3.987923264503479e-05, -3.812462091445923e-05, -3.637000918388367e-05, -3.4615397453308105e-05, -3.2860785722732544e-05, -3.110617399215698e-05, -2.935156226158142e-05, -2.759695053100586e-05, -2.5842338800430298e-05, -2.4087727069854736e-05, -2.2333115339279175e-05, -2.0578503608703613e-05, -1.8823891878128052e-05, -1.706928014755249e-05, -1.531466841697693e-05, -1.3560056686401367e-05, -1.1805444955825806e-05, -1.0050833225250244e-05, -8.296221494674683e-06, -6.541609764099121e-06, -4.7869980335235596e-06, -3.032386302947998e-06, -1.2777745723724365e-06, 4.76837158203125e-07, 2.2314488887786865e-06, 3.986060619354248e-06, 5.7406723499298096e-06, 7.495284080505371e-06, 9.249895811080933e-06, 1.1004507541656494e-05, 1.2759119272232056e-05, 1.4513731002807617e-05, 1.626834273338318e-05, 1.802295446395874e-05, 1.9777566194534302e-05, 2.1532177925109863e-05, 2.3286789655685425e-05, 2.5041401386260986e-05, 2.6796013116836548e-05, 2.855062484741211e-05, 3.030523657798767e-05, 3.205984830856323e-05, 3.3814460039138794e-05, 3.5569071769714355e-05, 3.732368350028992e-05, 3.907829523086548e-05, 4.083290696144104e-05, 4.25875186920166e-05, 4.434213042259216e-05, 4.6096742153167725e-05, 4.7851353883743286e-05, 4.960596561431885e-05, 5.136057734489441e-05, 5.311518907546997e-05, 5.486980080604553e-05, 5.6624412536621094e-05]}, "gradients/encoder.encoder.layers.18.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 0.0, 2.0, 4.0, 1.0, 5.0, 6.0, 7.0, 7.0, 6.0, 3.0, 9.0, 10.0, 13.0, 13.0, 17.0, 16.0, 20.0, 23.0, 30.0, 28.0, 37.0, 32.0, 39.0, 37.0, 44.0, 47.0, 50.0, 42.0, 48.0, 36.0, 36.0, 36.0, 29.0, 38.0, 28.0, 39.0, 25.0, 20.0, 20.0, 18.0, 16.0, 16.0, 12.0, 9.0, 8.0, 6.0, 8.0, 3.0, 3.0, 4.0, 3.0, 2.0, 1.0, 1.0, 2.0, 2.0, 1.0], "bins": [-4.225969314575195e-05, -4.10228967666626e-05, -3.978610038757324e-05, -3.854930400848389e-05, -3.731250762939453e-05, -3.6075711250305176e-05, -3.483891487121582e-05, -3.3602118492126465e-05, -3.236532211303711e-05, -3.1128525733947754e-05, -2.98917293548584e-05, -2.8654932975769043e-05, -2.7418136596679688e-05, -2.6181340217590332e-05, -2.4944543838500977e-05, -2.370774745941162e-05, -2.2470951080322266e-05, -2.123415470123291e-05, -1.9997358322143555e-05, -1.87605619430542e-05, -1.7523765563964844e-05, -1.6286969184875488e-05, -1.5050172805786133e-05, -1.3813376426696777e-05, -1.2576580047607422e-05, -1.1339783668518066e-05, -1.0102987289428711e-05, -8.866190910339355e-06, -7.62939453125e-06, -6.3925981521606445e-06, -5.155801773071289e-06, -3.919005393981934e-06, -2.682209014892578e-06, -1.4454126358032227e-06, -2.086162567138672e-07, 1.0281801223754883e-06, 2.2649765014648438e-06, 3.5017728805541992e-06, 4.738569259643555e-06, 5.97536563873291e-06, 7.212162017822266e-06, 8.448958396911621e-06, 9.685754776000977e-06, 1.0922551155090332e-05, 1.2159347534179688e-05, 1.3396143913269043e-05, 1.4632940292358398e-05, 1.5869736671447754e-05, 1.710653305053711e-05, 1.8343329429626465e-05, 1.958012580871582e-05, 2.0816922187805176e-05, 2.205371856689453e-05, 2.3290514945983887e-05, 2.4527311325073242e-05, 2.5764107704162598e-05, 2.7000904083251953e-05, 2.823770046234131e-05, 2.9474496841430664e-05, 3.071129322052002e-05, 3.1948089599609375e-05, 3.318488597869873e-05, 3.4421682357788086e-05, 3.565847873687744e-05, 3.68952751159668e-05]}, "gradients/encoder.encoder.layers.18.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 6.0, 3.0, 3.0, 5.0, 14.0, 14.0, 17.0, 30.0, 32.0, 53.0, 98.0, 112.0, 152.0, 282.0, 388.0, 510.0, 1263.0, 1670.0, 2891.0, 8380.0, 14568.0, 35461.0, 196907.0, 523949.0, 172795.0, 58578.0, 14495.0, 6780.0, 4308.0, 1642.0, 1060.0, 821.0, 375.0, 268.0, 222.0, 112.0, 83.0, 77.0, 41.0, 26.0, 27.0, 6.0, 11.0, 9.0, 8.0, 4.0, 6.0, 2.0, 2.0, 1.0, 0.0, 1.0, 2.0], "bins": [-6.854534149169922e-06, -6.656162440776825e-06, -6.457790732383728e-06, -6.259419023990631e-06, -6.061047315597534e-06, -5.862675607204437e-06, -5.66430389881134e-06, -5.465932190418243e-06, -5.2675604820251465e-06, -5.0691887736320496e-06, -4.870817065238953e-06, -4.672445356845856e-06, -4.474073648452759e-06, -4.275701940059662e-06, -4.077330231666565e-06, -3.878958523273468e-06, -3.680586814880371e-06, -3.482215106487274e-06, -3.2838433980941772e-06, -3.0854716897010803e-06, -2.8870999813079834e-06, -2.6887282729148865e-06, -2.4903565645217896e-06, -2.2919848561286926e-06, -2.0936131477355957e-06, -1.8952414393424988e-06, -1.6968697309494019e-06, -1.498498022556305e-06, -1.300126314163208e-06, -1.101754605770111e-06, -9.033828973770142e-07, -7.050111889839172e-07, -5.066394805908203e-07, -3.082677721977234e-07, -1.0989606380462646e-07, 8.847564458847046e-08, 2.868473529815674e-07, 4.852190613746643e-07, 6.835907697677612e-07, 8.819624781608582e-07, 1.080334186553955e-06, 1.278705894947052e-06, 1.477077603340149e-06, 1.6754493117332458e-06, 1.8738210201263428e-06, 2.0721927285194397e-06, 2.2705644369125366e-06, 2.4689361453056335e-06, 2.6673078536987305e-06, 2.8656795620918274e-06, 3.0640512704849243e-06, 3.2624229788780212e-06, 3.460794687271118e-06, 3.659166395664215e-06, 3.857538104057312e-06, 4.055909812450409e-06, 4.254281520843506e-06, 4.452653229236603e-06, 4.6510249376297e-06, 4.849396646022797e-06, 5.0477683544158936e-06, 5.2461400628089905e-06, 5.444511771202087e-06, 5.642883479595184e-06, 5.841255187988281e-06]}, "gradients/encoder.encoder.layers.18.attention.k_proj.bias": {"_type": "histogram", "values": [2.0, 2.0, 0.0, 3.0, 0.0, 5.0, 0.0, 7.0, 0.0, 6.0, 0.0, 13.0, 0.0, 22.0, 0.0, 12.0, 18.0, 0.0, 41.0, 0.0, 32.0, 0.0, 42.0, 0.0, 54.0, 0.0, 62.0, 0.0, 74.0, 0.0, 81.0, 0.0, 70.0, 72.0, 0.0, 77.0, 0.0, 76.0, 0.0, 58.0, 0.0, 46.0, 0.0, 45.0, 0.0, 23.0, 0.0, 29.0, 14.0, 0.0, 10.0, 0.0, 13.0, 0.0, 2.0, 0.0, 5.0, 0.0, 5.0, 0.0, 1.0, 0.0, 1.0, 1.0], "bins": [-1.0132789611816406e-06, -9.816139936447144e-07, -9.499490261077881e-07, -9.182840585708618e-07, -8.866190910339355e-07, -8.549541234970093e-07, -8.23289155960083e-07, -7.916241884231567e-07, -7.599592208862305e-07, -7.282942533493042e-07, -6.966292858123779e-07, -6.649643182754517e-07, -6.332993507385254e-07, -6.016343832015991e-07, -5.699694156646729e-07, -5.383044481277466e-07, -5.066394805908203e-07, -4.7497451305389404e-07, -4.4330954551696777e-07, -4.116445779800415e-07, -3.7997961044311523e-07, -3.4831464290618896e-07, -3.166496753692627e-07, -2.849847078323364e-07, -2.5331974029541016e-07, -2.2165477275848389e-07, -1.8998980522155762e-07, -1.5832483768463135e-07, -1.2665987014770508e-07, -9.499490261077881e-08, -6.332993507385254e-08, -3.166496753692627e-08, 0.0, 3.166496753692627e-08, 6.332993507385254e-08, 9.499490261077881e-08, 1.2665987014770508e-07, 1.5832483768463135e-07, 1.8998980522155762e-07, 2.2165477275848389e-07, 2.5331974029541016e-07, 2.849847078323364e-07, 3.166496753692627e-07, 3.4831464290618896e-07, 3.7997961044311523e-07, 4.116445779800415e-07, 4.4330954551696777e-07, 4.7497451305389404e-07, 5.066394805908203e-07, 5.383044481277466e-07, 5.699694156646729e-07, 6.016343832015991e-07, 6.332993507385254e-07, 6.649643182754517e-07, 6.966292858123779e-07, 7.282942533493042e-07, 7.599592208862305e-07, 7.916241884231567e-07, 8.23289155960083e-07, 8.549541234970093e-07, 8.866190910339355e-07, 9.182840585708618e-07, 9.499490261077881e-07, 9.816139936447144e-07, 1.0132789611816406e-06]}, "gradients/encoder.encoder.layers.18.attention.q_proj.weight": {"_type": "histogram", "values": [4.0, 2.0, 4.0, 2.0, 1.0, 5.0, 10.0, 4.0, 16.0, 9.0, 20.0, 34.0, 42.0, 52.0, 43.0, 106.0, 157.0, 252.0, 317.0, 241.0, 611.0, 902.0, 1337.0, 2216.0, 1496.0, 4482.0, 7752.0, 14506.0, 29341.0, 26862.0, 120884.0, 464824.0, 237897.0, 70000.0, 17563.0, 20346.0, 10670.0, 5983.0, 3392.0, 1170.0, 1683.0, 1036.0, 705.0, 467.0, 186.0, 280.0, 195.0, 125.0, 101.0, 42.0, 47.0, 43.0, 28.0, 24.0, 10.0, 14.0, 9.0, 11.0, 5.0, 0.0, 4.0, 0.0, 4.0, 2.0], "bins": [-3.3974647521972656e-06, -3.2903626561164856e-06, -3.1832605600357056e-06, -3.0761584639549255e-06, -2.9690563678741455e-06, -2.8619542717933655e-06, -2.7548521757125854e-06, -2.6477500796318054e-06, -2.5406479835510254e-06, -2.4335458874702454e-06, -2.3264437913894653e-06, -2.2193416953086853e-06, -2.1122395992279053e-06, -2.0051375031471252e-06, -1.8980354070663452e-06, -1.7909333109855652e-06, -1.6838312149047852e-06, -1.5767291188240051e-06, -1.469627022743225e-06, -1.362524926662445e-06, -1.255422830581665e-06, -1.148320734500885e-06, -1.041218638420105e-06, -9.34116542339325e-07, -8.270144462585449e-07, -7.199123501777649e-07, -6.128102540969849e-07, -5.057081580162048e-07, -3.986060619354248e-07, -2.915039658546448e-07, -1.8440186977386475e-07, -7.729977369308472e-08, 2.9802322387695312e-08, 1.3690441846847534e-07, 2.4400651454925537e-07, 3.511086106300354e-07, 4.5821070671081543e-07, 5.653128027915955e-07, 6.724148988723755e-07, 7.795169949531555e-07, 8.866190910339355e-07, 9.937211871147156e-07, 1.1008232831954956e-06, 1.2079253792762756e-06, 1.3150274753570557e-06, 1.4221295714378357e-06, 1.5292316675186157e-06, 1.6363337635993958e-06, 1.7434358596801758e-06, 1.8505379557609558e-06, 1.957640051841736e-06, 2.064742147922516e-06, 2.171844244003296e-06, 2.278946340084076e-06, 2.386048436164856e-06, 2.493150532245636e-06, 2.600252628326416e-06, 2.707354724407196e-06, 2.814456820487976e-06, 2.921558916568756e-06, 3.028661012649536e-06, 3.135763108730316e-06, 3.242865204811096e-06, 3.3499673008918762e-06, 3.4570693969726562e-06]}, "gradients/encoder.encoder.layers.18.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 2.0, 2.0, 7.0, 7.0, 2.0, 10.0, 7.0, 13.0, 30.0, 19.0, 20.0, 54.0, 30.0, 38.0, 107.0, 46.0, 71.0, 78.0, 125.0, 63.0, 53.0, 65.0, 23.0, 19.0, 40.0, 6.0, 17.0, 10.0, 6.0, 5.0, 6.0, 8.0, 4.0, 3.0, 5.0, 4.0, 3.0, 2.0, 1.0, 1.0, 4.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.4437904357910156e-06, -2.3655593395233154e-06, -2.2873282432556152e-06, -2.209097146987915e-06, -2.130866050720215e-06, -2.0526349544525146e-06, -1.9744038581848145e-06, -1.8961727619171143e-06, -1.817941665649414e-06, -1.7397105693817139e-06, -1.6614794731140137e-06, -1.5832483768463135e-06, -1.5050172805786133e-06, -1.426786184310913e-06, -1.3485550880432129e-06, -1.2703239917755127e-06, -1.1920928955078125e-06, -1.1138617992401123e-06, -1.0356307029724121e-06, -9.57399606704712e-07, -8.791685104370117e-07, -8.009374141693115e-07, -7.227063179016113e-07, -6.444752216339111e-07, -5.662441253662109e-07, -4.880130290985107e-07, -4.0978193283081055e-07, -3.3155083656311035e-07, -2.5331974029541016e-07, -1.7508864402770996e-07, -9.685754776000977e-08, -1.862645149230957e-08, 5.960464477539063e-08, 1.3783574104309082e-07, 2.1606683731079102e-07, 2.942979335784912e-07, 3.725290298461914e-07, 4.507601261138916e-07, 5.289912223815918e-07, 6.07222318649292e-07, 6.854534149169922e-07, 7.636845111846924e-07, 8.419156074523926e-07, 9.201467037200928e-07, 9.98377799987793e-07, 1.0766088962554932e-06, 1.1548399925231934e-06, 1.2330710887908936e-06, 1.3113021850585938e-06, 1.389533281326294e-06, 1.4677643775939941e-06, 1.5459954738616943e-06, 1.6242265701293945e-06, 1.7024576663970947e-06, 1.780688762664795e-06, 1.8589198589324951e-06, 1.9371509552001953e-06, 2.0153820514678955e-06, 2.0936131477355957e-06, 2.171844244003296e-06, 2.250075340270996e-06, 2.3283064365386963e-06, 2.4065375328063965e-06, 2.4847686290740967e-06, 2.562999725341797e-06]}, "gradients/encoder.encoder.layers.18.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 1.0, 1.0, 0.0, 4.0, 3.0, 3.0, 0.0, 9.0, 7.0, 9.0, 17.0, 22.0, 46.0, 62.0, 105.0, 238.0, 200.0, 97.0, 76.0, 31.0, 24.0, 21.0, 11.0, 5.0, 9.0, 6.0, 5.0, 5.0, 0.0, 1.0], "bins": [-0.0006909047369845212, -0.0006770605687052011, -0.0006632164004258811, -0.0006493722903542221, -0.0006355281220749021, -0.0006216839537955821, -0.0006078397855162621, -0.000593995675444603, -0.000580151507165283, -0.000566307338885963, -0.000552463170606643, -0.0005386190605349839, -0.0005247748922556639, -0.0005109307239763439, -0.0004970865556970239, -0.0004832424165215343, -0.0004693982773460448, -0.0004555541090667248, -0.00044170996989123523, -0.00042786580161191523, -0.0004140216624364257, -0.0004001774941571057, -0.00038633335498161614, -0.00037248918670229614, -0.00035864501842297614, -0.00034480085014365613, -0.0003309567109681666, -0.0003171125426888466, -0.00030326840351335704, -0.00028942423523403704, -0.0002755800960585475, -0.0002617359277792275, -0.00024789178860373795, -0.00023404763487633318, -0.0002202034811489284, -0.00020635932742152363, -0.00019251517369411886, -0.00017867100541479886, -0.0001648268662393093, -0.0001509826979599893, -0.00013713855878449976, -0.000123294405057095, -0.00010945025132969022, -9.560609760228544e-05, -8.176194387488067e-05, -6.791778287151828e-05, -5.407362914411351e-05, -4.022947541670874e-05, -2.638531441334635e-05, -1.2541159776446875e-05, 1.3029948604525998e-06, 1.5147150406846777e-05, 2.899130413425155e-05, 4.283546149963513e-05, 5.6679615227039903e-05, 7.052376895444468e-05, 8.436792268184945e-05, 9.821207640925422e-05, 0.000112056230136659, 0.00012590039114002138, 0.00013974454486742616, 0.00015358869859483093, 0.0001674328523222357, 0.00018127700604964048, 0.00019512115977704525]}, "gradients/encoder.encoder.layers.18.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 6.0, 1.0, 1.0, 2.0, 5.0, 5.0, 3.0, 10.0, 10.0, 7.0, 17.0, 15.0, 19.0, 26.0, 20.0, 25.0, 40.0, 32.0, 31.0, 37.0, 33.0, 40.0, 43.0, 56.0, 49.0, 42.0, 34.0, 45.0, 51.0, 38.0, 30.0, 33.0, 33.0, 31.0, 22.0, 21.0, 20.0, 15.0, 12.0, 10.0, 12.0, 12.0, 7.0, 2.0, 4.0, 0.0, 1.0, 3.0, 2.0, 5.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0001723170280456543, -0.00016692373901605606, -0.00016153044998645782, -0.0001561371609568596, -0.00015074387192726135, -0.00014535058289766312, -0.00013995729386806488, -0.00013456400483846664, -0.0001291707158088684, -0.00012377742677927017, -0.00011838413774967194, -0.0001129908487200737, -0.00010759755969047546, -0.00010220427066087723, -9.681098163127899e-05, -9.141769260168076e-05, -8.602440357208252e-05, -8.063111454248428e-05, -7.523782551288605e-05, -6.984453648328781e-05, -6.445124745368958e-05, -5.905795842409134e-05, -5.36646693944931e-05, -4.827138036489487e-05, -4.287809133529663e-05, -3.7484802305698395e-05, -3.209151327610016e-05, -2.6698224246501923e-05, -2.1304935216903687e-05, -1.591164618730545e-05, -1.0518357157707214e-05, -5.125068128108978e-06, 2.682209014892578e-07, 5.661509931087494e-06, 1.105479896068573e-05, 1.6448087990283966e-05, 2.1841377019882202e-05, 2.7234666049480438e-05, 3.2627955079078674e-05, 3.802124410867691e-05, 4.3414533138275146e-05, 4.880782216787338e-05, 5.420111119747162e-05, 5.9594400227069855e-05, 6.498768925666809e-05, 7.038097828626633e-05, 7.577426731586456e-05, 8.11675563454628e-05, 8.656084537506104e-05, 9.195413440465927e-05, 9.734742343425751e-05, 0.00010274071246385574, 0.00010813400149345398, 0.00011352729052305222, 0.00011892057955265045, 0.0001243138685822487, 0.00012970715761184692, 0.00013510044664144516, 0.0001404937356710434, 0.00014588702470064163, 0.00015128031373023987, 0.0001566736027598381, 0.00016206689178943634, 0.00016746018081903458, 0.0001728534698486328]}, "gradients/encoder.encoder.layers.17.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 10.0, 10.0, 19.0, 41.0, 64.0, 86.0, 127.0, 244.0, 364.0, 620.0, 1032.0, 1691.0, 2811.0, 5051.0, 9277.0, 18833.0, 45325.0, 159814.0, 3504397.0, 323779.0, 66485.0, 25835.0, 12079.0, 6324.0, 3633.0, 2164.0, 1318.0, 876.0, 608.0, 379.0, 253.0, 165.0, 116.0, 87.0, 61.0, 64.0, 44.0, 28.0, 32.0, 15.0, 21.0, 19.0, 15.0, 16.0, 12.0, 13.0, 6.0, 4.0, 6.0, 2.0, 8.0, 5.0, 3.0, 4.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0], "bins": [-6.443262100219727e-05, -6.146077066659927e-05, -5.848892033100128e-05, -5.551706999540329e-05, -5.25452196598053e-05, -4.9573369324207306e-05, -4.6601518988609314e-05, -4.362966865301132e-05, -4.065781831741333e-05, -3.768596798181534e-05, -3.4714117646217346e-05, -3.1742267310619354e-05, -2.8770416975021362e-05, -2.579856663942337e-05, -2.282671630382538e-05, -1.9854865968227386e-05, -1.6883015632629395e-05, -1.3911165297031403e-05, -1.093931496143341e-05, -7.967464625835419e-06, -4.995614290237427e-06, -2.023763954639435e-06, 9.480863809585571e-07, 3.919936716556549e-06, 6.891787052154541e-06, 9.863637387752533e-06, 1.2835487723350525e-05, 1.5807338058948517e-05, 1.877918839454651e-05, 2.17510387301445e-05, 2.4722889065742493e-05, 2.7694739401340485e-05, 3.0666589736938477e-05, 3.363844007253647e-05, 3.661029040813446e-05, 3.958214074373245e-05, 4.2553991079330444e-05, 4.5525841414928436e-05, 4.849769175052643e-05, 5.146954208612442e-05, 5.444139242172241e-05, 5.7413242757320404e-05, 6.0385093092918396e-05, 6.335694342851639e-05, 6.632879376411438e-05, 6.930064409971237e-05, 7.227249443531036e-05, 7.524434477090836e-05, 7.821619510650635e-05, 8.118804544210434e-05, 8.415989577770233e-05, 8.713174611330032e-05, 9.010359644889832e-05, 9.307544678449631e-05, 9.60472971200943e-05, 9.901914745569229e-05, 0.00010199099779129028, 0.00010496284812688828, 0.00010793469846248627, 0.00011090654879808426, 0.00011387839913368225, 0.00011685024946928024, 0.00011982209980487823, 0.00012279395014047623, 0.00012576580047607422]}, "gradients/encoder.encoder.layers.17.feed_forward.output_dense.bias": {"_type": "histogram", "values": [3.0, 1.0, 1.0, 1.0, 1.0, 2.0, 3.0, 3.0, 7.0, 9.0, 7.0, 10.0, 9.0, 24.0, 18.0, 35.0, 44.0, 73.0, 117.0, 124.0, 110.0, 97.0, 82.0, 71.0, 38.0, 36.0, 24.0, 19.0, 9.0, 5.0, 5.0, 13.0, 5.0, 5.0, 2.0, 2.0, 1.0, 3.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.33514404296875e-05, -1.2695789337158203e-05, -1.2040138244628906e-05, -1.138448715209961e-05, -1.0728836059570312e-05, -1.0073184967041016e-05, -9.417533874511719e-06, -8.761882781982422e-06, -8.106231689453125e-06, -7.450580596923828e-06, -6.794929504394531e-06, -6.139278411865234e-06, -5.4836273193359375e-06, -4.827976226806641e-06, -4.172325134277344e-06, -3.516674041748047e-06, -2.86102294921875e-06, -2.205371856689453e-06, -1.5497207641601562e-06, -8.940696716308594e-07, -2.384185791015625e-07, 4.172325134277344e-07, 1.0728836059570312e-06, 1.7285346984863281e-06, 2.384185791015625e-06, 3.039836883544922e-06, 3.6954879760742188e-06, 4.351139068603516e-06, 5.0067901611328125e-06, 5.662441253662109e-06, 6.318092346191406e-06, 6.973743438720703e-06, 7.62939453125e-06, 8.285045623779297e-06, 8.940696716308594e-06, 9.59634780883789e-06, 1.0251998901367188e-05, 1.0907649993896484e-05, 1.1563301086425781e-05, 1.2218952178955078e-05, 1.2874603271484375e-05, 1.3530254364013672e-05, 1.4185905456542969e-05, 1.4841556549072266e-05, 1.5497207641601562e-05, 1.615285873413086e-05, 1.6808509826660156e-05, 1.7464160919189453e-05, 1.811981201171875e-05, 1.8775463104248047e-05, 1.9431114196777344e-05, 2.008676528930664e-05, 2.0742416381835938e-05, 2.1398067474365234e-05, 2.205371856689453e-05, 2.2709369659423828e-05, 2.3365020751953125e-05, 2.4020671844482422e-05, 2.467632293701172e-05, 2.5331974029541016e-05, 2.5987625122070312e-05, 2.664327621459961e-05, 2.7298927307128906e-05, 2.7954578399658203e-05, 2.86102294921875e-05]}, "gradients/encoder.encoder.layers.17.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 2.0, 4.0, 4.0, 6.0, 4.0, 10.0, 9.0, 21.0, 37.0, 41.0, 63.0, 84.0, 129.0, 184.0, 320.0, 458.0, 788.0, 1186.0, 1990.0, 3204.0, 5548.0, 9683.0, 17558.0, 34857.0, 68763.0, 171192.0, 939001.0, 2560463.0, 208381.0, 82300.0, 40746.0, 20155.0, 11414.0, 6148.0, 3594.0, 2222.0, 1273.0, 884.0, 521.0, 342.0, 221.0, 159.0, 99.0, 64.0, 46.0, 35.0, 33.0, 17.0, 10.0, 9.0, 2.0, 6.0, 5.0, 2.0, 3.0], "bins": [-6.54458999633789e-05, -6.363354623317719e-05, -6.182119250297546e-05, -6.000883877277374e-05, -5.819648504257202e-05, -5.63841313123703e-05, -5.457177758216858e-05, -5.275942385196686e-05, -5.094707012176514e-05, -4.9134716391563416e-05, -4.7322362661361694e-05, -4.551000893115997e-05, -4.369765520095825e-05, -4.188530147075653e-05, -4.007294774055481e-05, -3.826059401035309e-05, -3.644824028015137e-05, -3.4635886549949646e-05, -3.2823532819747925e-05, -3.1011179089546204e-05, -2.9198825359344482e-05, -2.738647162914276e-05, -2.557411789894104e-05, -2.376176416873932e-05, -2.1949410438537598e-05, -2.0137056708335876e-05, -1.8324702978134155e-05, -1.6512349247932434e-05, -1.4699995517730713e-05, -1.2887641787528992e-05, -1.107528805732727e-05, -9.26293432712555e-06, -7.450580596923828e-06, -5.638226866722107e-06, -3.825873136520386e-06, -2.0135194063186646e-06, -2.0116567611694336e-07, 1.6111880540847778e-06, 3.423541784286499e-06, 5.23589551448822e-06, 7.048249244689941e-06, 8.860602974891663e-06, 1.0672956705093384e-05, 1.2485310435295105e-05, 1.4297664165496826e-05, 1.6110017895698547e-05, 1.792237162590027e-05, 1.973472535610199e-05, 2.154707908630371e-05, 2.3359432816505432e-05, 2.5171786546707153e-05, 2.6984140276908875e-05, 2.8796494007110596e-05, 3.060884773731232e-05, 3.242120146751404e-05, 3.423355519771576e-05, 3.604590892791748e-05, 3.78582626581192e-05, 3.967061638832092e-05, 4.1482970118522644e-05, 4.3295323848724365e-05, 4.5107677578926086e-05, 4.692003130912781e-05, 4.873238503932953e-05, 5.054473876953125e-05]}, "gradients/encoder.encoder.layers.17.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 1.0, 0.0, 4.0, 1.0, 0.0, 6.0, 7.0, 5.0, 10.0, 20.0, 13.0, 21.0, 27.0, 32.0, 44.0, 53.0, 59.0, 83.0, 105.0, 143.0, 277.0, 733.0, 1281.0, 426.0, 151.0, 112.0, 82.0, 82.0, 57.0, 50.0, 51.0, 34.0, 31.0, 16.0, 16.0, 11.0, 11.0, 9.0, 7.0, 2.0, 5.0, 3.0, 3.0, 1.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0], "bins": [-4.947185516357422e-05, -4.802923649549484e-05, -4.6586617827415466e-05, -4.514399915933609e-05, -4.3701380491256714e-05, -4.225876182317734e-05, -4.081614315509796e-05, -3.9373524487018585e-05, -3.793090581893921e-05, -3.648828715085983e-05, -3.5045668482780457e-05, -3.360304981470108e-05, -3.2160431146621704e-05, -3.071781247854233e-05, -2.927519381046295e-05, -2.7832575142383575e-05, -2.63899564743042e-05, -2.4947337806224823e-05, -2.3504719138145447e-05, -2.206210047006607e-05, -2.0619481801986694e-05, -1.9176863133907318e-05, -1.7734244465827942e-05, -1.6291625797748566e-05, -1.484900712966919e-05, -1.3406388461589813e-05, -1.1963769793510437e-05, -1.052115112543106e-05, -9.078532457351685e-06, -7.635913789272308e-06, -6.193295121192932e-06, -4.750676453113556e-06, -3.3080577850341797e-06, -1.8654391169548035e-06, -4.2282044887542725e-07, 1.019798219203949e-06, 2.462416887283325e-06, 3.905035555362701e-06, 5.347654223442078e-06, 6.790272891521454e-06, 8.23289155960083e-06, 9.675510227680206e-06, 1.1118128895759583e-05, 1.2560747563838959e-05, 1.4003366231918335e-05, 1.544598489999771e-05, 1.6888603568077087e-05, 1.8331222236156464e-05, 1.977384090423584e-05, 2.1216459572315216e-05, 2.2659078240394592e-05, 2.410169690847397e-05, 2.5544315576553345e-05, 2.698693424463272e-05, 2.8429552912712097e-05, 2.9872171580791473e-05, 3.131479024887085e-05, 3.2757408916950226e-05, 3.42000275850296e-05, 3.564264625310898e-05, 3.7085264921188354e-05, 3.852788358926773e-05, 3.997050225734711e-05, 4.141312092542648e-05, 4.285573959350586e-05]}, "gradients/encoder.encoder.layers.17.final_layer_norm.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 2.0, 2.0, 5.0, 1.0, 2.0, 6.0, 9.0, 9.0, 22.0, 38.0, 52.0, 83.0, 140.0, 197.0, 141.0, 93.0, 56.0, 44.0, 27.0, 21.0, 27.0, 11.0, 5.0, 6.0, 4.0, 2.0, 5.0, 1.0, 3.0, 2.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0002161163283744827, -0.0002035608922597021, -0.00019100545614492148, -0.00017845002003014088, -0.00016589458391536027, -0.00015333914780057967, -0.0001407837262377143, -0.00012822827557101846, -0.00011567284673219547, -0.00010311741061741486, -9.056197450263426e-05, -7.800654566381127e-05, -6.545110954903066e-05, -5.289567343425006e-05, -4.034023731946945e-05, -2.7784801204688847e-05, -1.5229365089908242e-05, -2.673929884622339e-06, 9.881505320663564e-06, 2.2436939616454765e-05, 3.499237573123537e-05, 4.754780820803717e-05, 6.010324432281777e-05, 7.265868043759838e-05, 8.521411655237898e-05, 9.776955266715959e-05, 0.00011032498878194019, 0.00012288041762076318, 0.0001354358537355438, 0.0001479912898503244, 0.000160546725965105, 0.0001731021620798856, 0.00018565761274658144, 0.00019821304886136204, 0.00021076848497614264, 0.00022332392109092325, 0.00023587935720570385, 0.00024843477876856923, 0.00026099022943526506, 0.00027354565099813044, 0.0002861011016648263, 0.00029865652322769165, 0.0003112119738943875, 0.00032376739545725286, 0.0003363228461239487, 0.00034887826768681407, 0.0003614337183535099, 0.0003739891399163753, 0.00038654456147924066, 0.00039909998304210603, 0.00041165543370880187, 0.00042421085527166724, 0.0004367663059383631, 0.00044932172750122845, 0.0004618771781679243, 0.00047443259973078966, 0.0004869880503974855, 0.0004995435010641813, 0.0005120988935232162, 0.0005246543441899121, 0.0005372097948566079, 0.0005497652455233037, 0.0005623206379823387, 0.0005748760886490345, 0.0005874315393157303]}, "gradients/encoder.encoder.layers.17.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 4.0, 2.0, 3.0, 3.0, 5.0, 14.0, 3.0, 9.0, 8.0, 12.0, 12.0, 12.0, 13.0, 15.0, 25.0, 22.0, 31.0, 36.0, 26.0, 31.0, 31.0, 37.0, 23.0, 44.0, 39.0, 52.0, 38.0, 36.0, 51.0, 39.0, 43.0, 39.0, 27.0, 30.0, 23.0, 21.0, 17.0, 19.0, 13.0, 19.0, 12.0, 9.0, 15.0, 11.0, 7.0, 9.0, 5.0, 7.0, 4.0, 8.0, 1.0, 2.0, 0.0, 2.0, 0.0, 1.0, 1.0, 1.0], "bins": [-0.00014591217041015625, -0.00014131423085927963, -0.00013671629130840302, -0.0001321183517575264, -0.00012752041220664978, -0.00012292247265577316, -0.00011832453310489655, -0.00011372659355401993, -0.00010912865400314331, -0.00010453071445226669, -9.993277490139008e-05, -9.533483535051346e-05, -9.073689579963684e-05, -8.613895624876022e-05, -8.15410166978836e-05, -7.694307714700699e-05, -7.234513759613037e-05, -6.774719804525375e-05, -6.314925849437714e-05, -5.855131894350052e-05, -5.39533793926239e-05, -4.9355439841747284e-05, -4.4757500290870667e-05, -4.015956073999405e-05, -3.556162118911743e-05, -3.0963681638240814e-05, -2.6365742087364197e-05, -2.176780253648758e-05, -1.7169862985610962e-05, -1.2571923434734344e-05, -7.973983883857727e-06, -3.3760443329811096e-06, 1.2218952178955078e-06, 5.819834768772125e-06, 1.0417774319648743e-05, 1.501571387052536e-05, 1.9613653421401978e-05, 2.4211592972278595e-05, 2.8809532523155212e-05, 3.340747207403183e-05, 3.800541162490845e-05, 4.2603351175785065e-05, 4.720129072666168e-05, 5.17992302775383e-05, 5.639716982841492e-05, 6.0995109379291534e-05, 6.559304893016815e-05, 7.019098848104477e-05, 7.478892803192139e-05, 7.9386867582798e-05, 8.398480713367462e-05, 8.858274668455124e-05, 9.318068623542786e-05, 9.777862578630447e-05, 0.00010237656533718109, 0.00010697450488805771, 0.00011157244443893433, 0.00011617038398981094, 0.00012076832354068756, 0.00012536626309156418, 0.0001299642026424408, 0.0001345621421933174, 0.00013916008174419403, 0.00014375802129507065, 0.00014835596084594727]}, "gradients/encoder.encoder.layers.17.attention.out_proj.weight": {"_type": "histogram", "values": [2.0, 2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0, 2.0, 4.0, 4.0, 3.0, 6.0, 6.0, 4.0, 15.0, 12.0, 10.0, 16.0, 22.0, 27.0, 48.0, 90.0, 105.0, 228.0, 332.0, 659.0, 1612.0, 3462.0, 9636.0, 30361.0, 167328.0, 717866.0, 85421.0, 19484.0, 6805.0, 2508.0, 1215.0, 523.0, 272.0, 152.0, 92.0, 65.0, 39.0, 34.0, 30.0, 9.0, 10.0, 5.0, 8.0, 12.0, 2.0, 7.0, 2.0, 3.0, 2.0, 1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 1.0], "bins": [-0.0001882314682006836, -0.00018258392810821533, -0.00017693638801574707, -0.0001712888479232788, -0.00016564130783081055, -0.00015999376773834229, -0.00015434622764587402, -0.00014869868755340576, -0.0001430511474609375, -0.00013740360736846924, -0.00013175606727600098, -0.00012610852718353271, -0.00012046098709106445, -0.00011481344699859619, -0.00010916590690612793, -0.00010351836681365967, -9.78708267211914e-05, -9.222328662872314e-05, -8.657574653625488e-05, -8.092820644378662e-05, -7.528066635131836e-05, -6.96331262588501e-05, -6.398558616638184e-05, -5.8338046073913574e-05, -5.269050598144531e-05, -4.704296588897705e-05, -4.139542579650879e-05, -3.574788570404053e-05, -3.0100345611572266e-05, -2.4452805519104004e-05, -1.8805265426635742e-05, -1.315772533416748e-05, -7.510185241699219e-06, -1.862645149230957e-06, 3.7848949432373047e-06, 9.432435035705566e-06, 1.5079975128173828e-05, 2.072751522064209e-05, 2.637505531311035e-05, 3.202259540557861e-05, 3.7670135498046875e-05, 4.331767559051514e-05, 4.89652156829834e-05, 5.461275577545166e-05, 6.026029586791992e-05, 6.590783596038818e-05, 7.155537605285645e-05, 7.720291614532471e-05, 8.285045623779297e-05, 8.849799633026123e-05, 9.414553642272949e-05, 9.979307651519775e-05, 0.00010544061660766602, 0.00011108815670013428, 0.00011673569679260254, 0.0001223832368850708, 0.00012803077697753906, 0.00013367831707000732, 0.00013932585716247559, 0.00014497339725494385, 0.0001506209373474121, 0.00015626847743988037, 0.00016191601753234863, 0.0001675635576248169, 0.00017321109771728516]}, "gradients/encoder.encoder.layers.17.attention.out_proj.bias": {"_type": "histogram", "values": [2.0, 3.0, 2.0, 6.0, 3.0, 2.0, 4.0, 5.0, 19.0, 18.0, 17.0, 32.0, 56.0, 76.0, 111.0, 137.0, 139.0, 110.0, 82.0, 63.0, 34.0, 22.0, 23.0, 15.0, 10.0, 9.0, 4.0, 6.0, 5.0, 3.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.1801719665527344e-05, -1.1072494089603424e-05, -1.0343268513679504e-05, -9.614042937755585e-06, -8.884817361831665e-06, -8.155591785907745e-06, -7.426366209983826e-06, -6.697140634059906e-06, -5.967915058135986e-06, -5.238689482212067e-06, -4.509463906288147e-06, -3.7802383303642273e-06, -3.0510127544403076e-06, -2.321787178516388e-06, -1.5925616025924683e-06, -8.633360266685486e-07, -1.341104507446289e-07, 5.951151251792908e-07, 1.3243407011032104e-06, 2.05356627702713e-06, 2.78279185295105e-06, 3.5120174288749695e-06, 4.241243004798889e-06, 4.970468580722809e-06, 5.6996941566467285e-06, 6.428919732570648e-06, 7.158145308494568e-06, 7.887370884418488e-06, 8.616596460342407e-06, 9.345822036266327e-06, 1.0075047612190247e-05, 1.0804273188114166e-05, 1.1533498764038086e-05, 1.2262724339962006e-05, 1.2991949915885925e-05, 1.3721175491809845e-05, 1.4450401067733765e-05, 1.5179626643657684e-05, 1.5908852219581604e-05, 1.6638077795505524e-05, 1.7367303371429443e-05, 1.8096528947353363e-05, 1.8825754523277283e-05, 1.9554980099201202e-05, 2.0284205675125122e-05, 2.1013431251049042e-05, 2.174265682697296e-05, 2.247188240289688e-05, 2.32011079788208e-05, 2.393033355474472e-05, 2.465955913066864e-05, 2.538878470659256e-05, 2.611801028251648e-05, 2.68472358584404e-05, 2.757646143436432e-05, 2.830568701028824e-05, 2.9034912586212158e-05, 2.9764138162136078e-05, 3.0493363738059998e-05, 3.122258931398392e-05, 3.195181488990784e-05, 3.2681040465831757e-05, 3.3410266041755676e-05, 3.4139491617679596e-05, 3.4868717193603516e-05]}, "gradients/encoder.encoder.layers.17.attention.v_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 4.0, 4.0, 1.0, 10.0, 7.0, 13.0, 14.0, 25.0, 42.0, 61.0, 77.0, 116.0, 150.0, 211.0, 306.0, 409.0, 641.0, 915.0, 1294.0, 2017.0, 2872.0, 4316.0, 6412.0, 9803.0, 15254.0, 24313.0, 42160.0, 69959.0, 134985.0, 342297.0, 175221.0, 85179.0, 48096.0, 29840.0, 17636.0, 11280.0, 7437.0, 4776.0, 3251.0, 2166.0, 1573.0, 1062.0, 698.0, 485.0, 363.0, 250.0, 157.0, 126.0, 91.0, 58.0, 56.0, 19.0, 21.0, 15.0, 8.0, 8.0, 6.0, 1.0, 3.0, 3.0, 0.0, 1.0], "bins": [-4.744529724121094e-05, -4.594679921865463e-05, -4.444830119609833e-05, -4.294980317354202e-05, -4.145130515098572e-05, -3.995280712842941e-05, -3.845430910587311e-05, -3.69558110833168e-05, -3.54573130607605e-05, -3.395881503820419e-05, -3.246031701564789e-05, -3.096181899309158e-05, -2.946332097053528e-05, -2.7964822947978973e-05, -2.646632492542267e-05, -2.4967826902866364e-05, -2.346932888031006e-05, -2.1970830857753754e-05, -2.047233283519745e-05, -1.8973834812641144e-05, -1.747533679008484e-05, -1.5976838767528534e-05, -1.4478340744972229e-05, -1.2979842722415924e-05, -1.1481344699859619e-05, -9.982846677303314e-06, -8.48434865474701e-06, -6.985850632190704e-06, -5.487352609634399e-06, -3.9888545870780945e-06, -2.4903565645217896e-06, -9.918585419654846e-07, 5.066394805908203e-07, 2.0051375031471252e-06, 3.50363552570343e-06, 5.002133548259735e-06, 6.50063157081604e-06, 7.999129593372345e-06, 9.49762761592865e-06, 1.0996125638484955e-05, 1.249462366104126e-05, 1.3993121683597565e-05, 1.549161970615387e-05, 1.6990117728710175e-05, 1.848861575126648e-05, 1.9987113773822784e-05, 2.148561179637909e-05, 2.2984109818935394e-05, 2.44826078414917e-05, 2.5981105864048004e-05, 2.747960388660431e-05, 2.8978101909160614e-05, 3.047659993171692e-05, 3.1975097954273224e-05, 3.347359597682953e-05, 3.4972093999385834e-05, 3.647059202194214e-05, 3.7969090044498444e-05, 3.946758806705475e-05, 4.0966086089611053e-05, 4.246458411216736e-05, 4.396308213472366e-05, 4.546158015727997e-05, 4.696007817983627e-05, 4.845857620239258e-05]}, "gradients/encoder.encoder.layers.17.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 2.0, 4.0, 4.0, 1.0, 2.0, 5.0, 6.0, 5.0, 10.0, 8.0, 13.0, 13.0, 17.0, 21.0, 25.0, 23.0, 28.0, 31.0, 41.0, 42.0, 44.0, 30.0, 43.0, 50.0, 47.0, 52.0, 42.0, 52.0, 42.0, 39.0, 31.0, 41.0, 28.0, 23.0, 31.0, 19.0, 17.0, 18.0, 11.0, 6.0, 9.0, 6.0, 5.0, 5.0, 8.0, 5.0, 7.0, 1.0, 2.0, 0.0, 1.0, 2.0, 1.0, 0.0, 0.0, 1.0], "bins": [-3.7789344787597656e-05, -3.6627985537052155e-05, -3.546662628650665e-05, -3.430526703596115e-05, -3.314390778541565e-05, -3.198254853487015e-05, -3.0821189284324646e-05, -2.9659830033779144e-05, -2.8498470783233643e-05, -2.733711153268814e-05, -2.617575228214264e-05, -2.5014393031597137e-05, -2.3853033781051636e-05, -2.2691674530506134e-05, -2.1530315279960632e-05, -2.036895602941513e-05, -1.920759677886963e-05, -1.8046237528324127e-05, -1.6884878277778625e-05, -1.5723519027233124e-05, -1.4562159776687622e-05, -1.340080052614212e-05, -1.2239441275596619e-05, -1.1078082025051117e-05, -9.916722774505615e-06, -8.755363523960114e-06, -7.594004273414612e-06, -6.43264502286911e-06, -5.271285772323608e-06, -4.109926521778107e-06, -2.948567271232605e-06, -1.7872080206871033e-06, -6.258487701416016e-07, 5.355104804039001e-07, 1.6968697309494019e-06, 2.8582289814949036e-06, 4.019588232040405e-06, 5.180947482585907e-06, 6.342306733131409e-06, 7.50366598367691e-06, 8.665025234222412e-06, 9.826384484767914e-06, 1.0987743735313416e-05, 1.2149102985858917e-05, 1.3310462236404419e-05, 1.447182148694992e-05, 1.5633180737495422e-05, 1.6794539988040924e-05, 1.7955899238586426e-05, 1.9117258489131927e-05, 2.027861773967743e-05, 2.143997699022293e-05, 2.2601336240768433e-05, 2.3762695491313934e-05, 2.4924054741859436e-05, 2.6085413992404938e-05, 2.724677324295044e-05, 2.840813249349594e-05, 2.9569491744041443e-05, 3.0730850994586945e-05, 3.1892210245132446e-05, 3.305356949567795e-05, 3.421492874622345e-05, 3.537628799676895e-05, 3.653764724731445e-05]}, "gradients/encoder.encoder.layers.17.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 0.0, 2.0, 1.0, 9.0, 4.0, 4.0, 16.0, 14.0, 19.0, 28.0, 22.0, 44.0, 71.0, 107.0, 238.0, 256.0, 403.0, 598.0, 920.0, 1485.0, 2342.0, 3845.0, 6633.0, 12591.0, 24525.0, 105974.0, 243963.0, 397206.0, 139357.0, 53770.0, 24410.0, 12504.0, 6728.0, 3858.0, 2328.0, 1997.0, 743.0, 516.0, 317.0, 219.0, 149.0, 99.0, 67.0, 52.0, 29.0, 24.0, 32.0, 16.0, 7.0, 11.0, 5.0, 5.0, 2.0, 1.0, 0.0, 1.0, 4.0], "bins": [-4.291534423828125e-06, -4.166737198829651e-06, -4.041939973831177e-06, -3.917142748832703e-06, -3.7923455238342285e-06, -3.6675482988357544e-06, -3.5427510738372803e-06, -3.417953848838806e-06, -3.293156623840332e-06, -3.168359398841858e-06, -3.043562173843384e-06, -2.9187649488449097e-06, -2.7939677238464355e-06, -2.6691704988479614e-06, -2.5443732738494873e-06, -2.419576048851013e-06, -2.294778823852539e-06, -2.169981598854065e-06, -2.045184373855591e-06, -1.9203871488571167e-06, -1.7955899238586426e-06, -1.6707926988601685e-06, -1.5459954738616943e-06, -1.4211982488632202e-06, -1.296401023864746e-06, -1.171603798866272e-06, -1.0468065738677979e-06, -9.220093488693237e-07, -7.972121238708496e-07, -6.724148988723755e-07, -5.476176738739014e-07, -4.2282044887542725e-07, -2.980232238769531e-07, -1.73225998878479e-07, -4.842877388000488e-08, 7.636845111846924e-08, 2.0116567611694336e-07, 3.259629011154175e-07, 4.507601261138916e-07, 5.755573511123657e-07, 7.003545761108398e-07, 8.25151801109314e-07, 9.499490261077881e-07, 1.0747462511062622e-06, 1.1995434761047363e-06, 1.3243407011032104e-06, 1.4491379261016846e-06, 1.5739351511001587e-06, 1.6987323760986328e-06, 1.823529601097107e-06, 1.948326826095581e-06, 2.073124051094055e-06, 2.1979212760925293e-06, 2.3227185010910034e-06, 2.4475157260894775e-06, 2.5723129510879517e-06, 2.6971101760864258e-06, 2.8219074010849e-06, 2.946704626083374e-06, 3.071501851081848e-06, 3.1962990760803223e-06, 3.3210963010787964e-06, 3.4458935260772705e-06, 3.5706907510757446e-06, 3.6954879760742188e-06]}, "gradients/encoder.encoder.layers.17.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 3.0, 2.0, 0.0, 2.0, 8.0, 0.0, 19.0, 17.0, 0.0, 28.0, 23.0, 0.0, 43.0, 50.0, 0.0, 51.0, 95.0, 0.0, 71.0, 0.0, 74.0, 94.0, 0.0, 87.0, 81.0, 0.0, 66.0, 55.0, 0.0, 41.0, 21.0, 0.0, 19.0, 16.0, 0.0, 10.0, 14.0, 0.0, 10.0, 9.0, 0.0, 3.0, 5.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.3113021850585938e-06, -1.2721866369247437e-06, -1.2330710887908936e-06, -1.1939555406570435e-06, -1.1548399925231934e-06, -1.1157244443893433e-06, -1.0766088962554932e-06, -1.037493348121643e-06, -9.98377799987793e-07, -9.592622518539429e-07, -9.201467037200928e-07, -8.810311555862427e-07, -8.419156074523926e-07, -8.028000593185425e-07, -7.636845111846924e-07, -7.245689630508423e-07, -6.854534149169922e-07, -6.463378667831421e-07, -6.07222318649292e-07, -5.681067705154419e-07, -5.289912223815918e-07, -4.898756742477417e-07, -4.507601261138916e-07, -4.116445779800415e-07, -3.725290298461914e-07, -3.334134817123413e-07, -2.942979335784912e-07, -2.551823854446411e-07, -2.1606683731079102e-07, -1.7695128917694092e-07, -1.3783574104309082e-07, -9.872019290924072e-08, -5.960464477539063e-08, -2.0489096641540527e-08, 1.862645149230957e-08, 5.774199962615967e-08, 9.685754776000977e-08, 1.3597309589385986e-07, 1.7508864402770996e-07, 2.1420419216156006e-07, 2.5331974029541016e-07, 2.9243528842926025e-07, 3.3155083656311035e-07, 3.7066638469696045e-07, 4.0978193283081055e-07, 4.4889748096466064e-07, 4.880130290985107e-07, 5.271285772323608e-07, 5.662441253662109e-07, 6.05359673500061e-07, 6.444752216339111e-07, 6.835907697677612e-07, 7.227063179016113e-07, 7.618218660354614e-07, 8.009374141693115e-07, 8.400529623031616e-07, 8.791685104370117e-07, 9.182840585708618e-07, 9.57399606704712e-07, 9.96515154838562e-07, 1.0356307029724121e-06, 1.0747462511062622e-06, 1.1138617992401123e-06, 1.1529773473739624e-06, 1.1920928955078125e-06]}, "gradients/encoder.encoder.layers.17.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 2.0, 0.0, 2.0, 1.0, 0.0, 5.0, 4.0, 10.0, 4.0, 21.0, 31.0, 21.0, 71.0, 54.0, 127.0, 228.0, 144.0, 459.0, 324.0, 987.0, 1648.0, 1274.0, 3881.0, 3004.0, 9444.0, 19274.0, 17027.0, 69032.0, 77830.0, 475106.0, 163792.0, 120572.0, 43127.0, 11362.0, 13398.0, 4059.0, 5171.0, 2862.0, 973.0, 1295.0, 412.0, 603.0, 365.0, 130.0, 163.0, 66.0, 83.0, 39.0, 18.0, 30.0, 7.0, 13.0, 8.0, 3.0, 3.0, 0.0, 4.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-2.9206275939941406e-06, -2.825632691383362e-06, -2.730637788772583e-06, -2.635642886161804e-06, -2.5406479835510254e-06, -2.4456530809402466e-06, -2.3506581783294678e-06, -2.255663275718689e-06, -2.16066837310791e-06, -2.0656734704971313e-06, -1.9706785678863525e-06, -1.8756836652755737e-06, -1.780688762664795e-06, -1.6856938600540161e-06, -1.5906989574432373e-06, -1.4957040548324585e-06, -1.4007091522216797e-06, -1.3057142496109009e-06, -1.210719347000122e-06, -1.1157244443893433e-06, -1.0207295417785645e-06, -9.257346391677856e-07, -8.307397365570068e-07, -7.35744833946228e-07, -6.407499313354492e-07, -5.457550287246704e-07, -4.507601261138916e-07, -3.557652235031128e-07, -2.60770320892334e-07, -1.6577541828155518e-07, -7.078051567077637e-08, 2.421438694000244e-08, 1.1920928955078125e-07, 2.1420419216156006e-07, 3.0919909477233887e-07, 4.041939973831177e-07, 4.991888999938965e-07, 5.941838026046753e-07, 6.891787052154541e-07, 7.841736078262329e-07, 8.791685104370117e-07, 9.741634130477905e-07, 1.0691583156585693e-06, 1.1641532182693481e-06, 1.259148120880127e-06, 1.3541430234909058e-06, 1.4491379261016846e-06, 1.5441328287124634e-06, 1.6391277313232422e-06, 1.734122633934021e-06, 1.8291175365447998e-06, 1.9241124391555786e-06, 2.0191073417663574e-06, 2.1141022443771362e-06, 2.209097146987915e-06, 2.304092049598694e-06, 2.3990869522094727e-06, 2.4940818548202515e-06, 2.5890767574310303e-06, 2.684071660041809e-06, 2.779066562652588e-06, 2.8740614652633667e-06, 2.9690563678741455e-06, 3.0640512704849243e-06, 3.159046173095703e-06]}, "gradients/encoder.encoder.layers.17.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 3.0, 1.0, 6.0, 1.0, 4.0, 4.0, 3.0, 4.0, 5.0, 6.0, 9.0, 14.0, 11.0, 21.0, 17.0, 22.0, 22.0, 35.0, 46.0, 59.0, 47.0, 60.0, 67.0, 74.0, 60.0, 67.0, 52.0, 54.0, 40.0, 40.0, 24.0, 15.0, 19.0, 14.0, 16.0, 14.0, 8.0, 12.0, 6.0, 7.0, 2.0, 7.0, 3.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 2.0, 2.0], "bins": [-2.1457672119140625e-06, -2.086162567138672e-06, -2.0265579223632812e-06, -1.9669532775878906e-06, -1.9073486328125e-06, -1.8477439880371094e-06, -1.7881393432617188e-06, -1.7285346984863281e-06, -1.6689300537109375e-06, -1.6093254089355469e-06, -1.5497207641601562e-06, -1.4901161193847656e-06, -1.430511474609375e-06, -1.3709068298339844e-06, -1.3113021850585938e-06, -1.2516975402832031e-06, -1.1920928955078125e-06, -1.1324882507324219e-06, -1.0728836059570312e-06, -1.0132789611816406e-06, -9.5367431640625e-07, -8.940696716308594e-07, -8.344650268554688e-07, -7.748603820800781e-07, -7.152557373046875e-07, -6.556510925292969e-07, -5.960464477539062e-07, -5.364418029785156e-07, -4.76837158203125e-07, -4.172325134277344e-07, -3.5762786865234375e-07, -2.980232238769531e-07, -2.384185791015625e-07, -1.7881393432617188e-07, -1.1920928955078125e-07, -5.960464477539063e-08, 0.0, 5.960464477539063e-08, 1.1920928955078125e-07, 1.7881393432617188e-07, 2.384185791015625e-07, 2.980232238769531e-07, 3.5762786865234375e-07, 4.172325134277344e-07, 4.76837158203125e-07, 5.364418029785156e-07, 5.960464477539062e-07, 6.556510925292969e-07, 7.152557373046875e-07, 7.748603820800781e-07, 8.344650268554688e-07, 8.940696716308594e-07, 9.5367431640625e-07, 1.0132789611816406e-06, 1.0728836059570312e-06, 1.1324882507324219e-06, 1.1920928955078125e-06, 1.2516975402832031e-06, 1.3113021850585938e-06, 1.3709068298339844e-06, 1.430511474609375e-06, 1.4901161193847656e-06, 1.5497207641601562e-06, 1.6093254089355469e-06, 1.6689300537109375e-06]}, "gradients/encoder.encoder.layers.17.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 2.0, 3.0, 6.0, 15.0, 33.0, 60.0, 155.0, 359.0, 188.0, 82.0, 38.0, 36.0, 18.0, 8.0, 6.0, 4.0, 1.0, 1.0, 3.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0008392405579797924, -0.0008219002047553658, -0.0008045599097386003, -0.0007872195565141737, -0.0007698792032897472, -0.0007525389082729816, -0.0007351985550485551, -0.0007178582018241286, -0.000700517906807363, -0.0006831775535829365, -0.0006658372585661709, -0.0006484969053417444, -0.0006311565521173179, -0.0006138161988928914, -0.0005964759038761258, -0.0005791355506516993, -0.0005617951974272728, -0.0005444548442028463, -0.0005271145491860807, -0.0005097741959616542, -0.0004924338427372277, -0.0004750935186166316, -0.0004577531944960356, -0.00044041284127160907, -0.000423072517151013, -0.00040573219303041697, -0.00038839183980599046, -0.0003710515156853944, -0.00035371119156479836, -0.00033637083834037185, -0.0003190305142197758, -0.00030169019009917974, -0.0002843498077709228, -0.00026700948365032673, -0.0002496691304259002, -0.00023232880630530417, -0.0002149884676327929, -0.0001976481289602816, -0.00018030780483968556, -0.00016296746616717428, -0.000145627127494663, -0.00012828678882215172, -0.00011094645742559806, -9.360612602904439e-05, -7.626578735653311e-05, -5.892544868402183e-05, -4.1585117287468165e-05, -2.42447858909145e-05, -6.90444721840322e-06, 1.0435887816129252e-05, 2.7776222850661725e-05, 4.51165578851942e-05, 6.245689291972667e-05, 7.979723159223795e-05, 9.713756298879161e-05, 0.00011447789438534528, 0.00013181823305785656, 0.00014915857173036784, 0.00016649891040287912, 0.00018383923452347517, 0.00020117957319598645, 0.00021851991186849773, 0.00023586023598909378, 0.0002532005892135203, 0.00027054091333411634]}, "gradients/encoder.encoder.layers.17.layer_norm.bias": {"_type": "histogram", "values": [3.0, 4.0, 5.0, 3.0, 7.0, 4.0, 6.0, 8.0, 12.0, 15.0, 17.0, 18.0, 19.0, 19.0, 33.0, 33.0, 35.0, 45.0, 39.0, 45.0, 41.0, 48.0, 46.0, 46.0, 39.0, 45.0, 38.0, 40.0, 48.0, 45.0, 29.0, 30.0, 17.0, 21.0, 30.0, 11.0, 18.0, 12.0, 10.0, 6.0, 4.0, 8.0, 3.0, 5.0, 6.0, 0.0, 0.0, 2.0, 3.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0001074671745300293, -0.00010298844426870346, -9.850971400737762e-05, -9.403098374605179e-05, -8.955225348472595e-05, -8.507352322340012e-05, -8.059479296207428e-05, -7.611606270074844e-05, -7.163733243942261e-05, -6.715860217809677e-05, -6.267987191677094e-05, -5.82011416554451e-05, -5.372241139411926e-05, -4.9243681132793427e-05, -4.476495087146759e-05, -4.0286220610141754e-05, -3.580749034881592e-05, -3.132876008749008e-05, -2.6850029826164246e-05, -2.237129956483841e-05, -1.7892569303512573e-05, -1.3413839042186737e-05, -8.935108780860901e-06, -4.456378519535065e-06, 2.2351741790771484e-08, 4.501082003116608e-06, 8.979812264442444e-06, 1.345854252576828e-05, 1.7937272787094116e-05, 2.2416003048419952e-05, 2.689473330974579e-05, 3.1373463571071625e-05, 3.585219383239746e-05, 4.03309240937233e-05, 4.480965435504913e-05, 4.928838461637497e-05, 5.3767114877700806e-05, 5.824584513902664e-05, 6.272457540035248e-05, 6.720330566167831e-05, 7.168203592300415e-05, 7.616076618432999e-05, 8.063949644565582e-05, 8.511822670698166e-05, 8.95969569683075e-05, 9.407568722963333e-05, 9.855441749095917e-05, 0.000103033147752285, 0.00010751187801361084, 0.00011199060827493668, 0.00011646933853626251, 0.00012094806879758835, 0.00012542679905891418, 0.00012990552932024002, 0.00013438425958156586, 0.0001388629898428917, 0.00014334172010421753, 0.00014782045036554337, 0.0001522991806268692, 0.00015677791088819504, 0.00016125664114952087, 0.0001657353714108467, 0.00017021410167217255, 0.00017469283193349838, 0.00017917156219482422]}, "gradients/encoder.encoder.layers.16.feed_forward.output_dense.weight": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 6.0, 9.0, 20.0, 34.0, 44.0, 65.0, 93.0, 157.0, 283.0, 453.0, 716.0, 1339.0, 2197.0, 4080.0, 7792.0, 16147.0, 36405.0, 106914.0, 2131257.0, 1716641.0, 94247.0, 36333.0, 17270.0, 8693.0, 5109.0, 2982.0, 1702.0, 1079.0, 672.0, 410.0, 308.0, 214.0, 125.0, 72.0, 73.0, 53.0, 45.0, 38.0, 32.0, 35.0, 27.0, 17.0, 14.0, 17.0, 9.0, 12.0, 11.0, 6.0, 6.0, 5.0, 6.0, 3.0, 8.0, 5.0, 2.0, 5.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-5.692243576049805e-05, -5.4333359003067017e-05, -5.1744282245635986e-05, -4.9155205488204956e-05, -4.6566128730773926e-05, -4.3977051973342896e-05, -4.1387975215911865e-05, -3.8798898458480835e-05, -3.6209821701049805e-05, -3.3620744943618774e-05, -3.1031668186187744e-05, -2.8442591428756714e-05, -2.5853514671325684e-05, -2.3264437913894653e-05, -2.0675361156463623e-05, -1.8086284399032593e-05, -1.5497207641601562e-05, -1.2908130884170532e-05, -1.0319054126739502e-05, -7.729977369308472e-06, -5.140900611877441e-06, -2.551823854446411e-06, 3.725290298461914e-08, 2.6263296604156494e-06, 5.21540641784668e-06, 7.80448317527771e-06, 1.039355993270874e-05, 1.298263669013977e-05, 1.55717134475708e-05, 1.816079020500183e-05, 2.074986696243286e-05, 2.333894371986389e-05, 2.5928020477294922e-05, 2.8517097234725952e-05, 3.110617399215698e-05, 3.369525074958801e-05, 3.628432750701904e-05, 3.887340426445007e-05, 4.1462481021881104e-05, 4.4051557779312134e-05, 4.6640634536743164e-05, 4.9229711294174194e-05, 5.1818788051605225e-05, 5.4407864809036255e-05, 5.6996941566467285e-05, 5.9586018323898315e-05, 6.217509508132935e-05, 6.476417183876038e-05, 6.73532485961914e-05, 6.994232535362244e-05, 7.253140211105347e-05, 7.51204788684845e-05, 7.770955562591553e-05, 8.029863238334656e-05, 8.288770914077759e-05, 8.547678589820862e-05, 8.806586265563965e-05, 9.065493941307068e-05, 9.324401617050171e-05, 9.583309292793274e-05, 9.842216968536377e-05, 0.0001010112464427948, 0.00010360032320022583, 0.00010618939995765686, 0.00010877847671508789]}, "gradients/encoder.encoder.layers.16.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 2.0, 0.0, 0.0, 2.0, 2.0, 5.0, 6.0, 5.0, 9.0, 13.0, 17.0, 26.0, 36.0, 44.0, 66.0, 103.0, 97.0, 108.0, 116.0, 79.0, 56.0, 67.0, 42.0, 26.0, 19.0, 21.0, 15.0, 5.0, 9.0, 4.0, 4.0, 6.0, 3.0, 3.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.2874603271484375e-05, -1.2256205081939697e-05, -1.163780689239502e-05, -1.1019408702850342e-05, -1.0401010513305664e-05, -9.782612323760986e-06, -9.164214134216309e-06, -8.545815944671631e-06, -7.927417755126953e-06, -7.309019565582275e-06, -6.690621376037598e-06, -6.07222318649292e-06, -5.453824996948242e-06, -4.8354268074035645e-06, -4.217028617858887e-06, -3.598630428314209e-06, -2.9802322387695312e-06, -2.3618340492248535e-06, -1.7434358596801758e-06, -1.125037670135498e-06, -5.066394805908203e-07, 1.1175870895385742e-07, 7.301568984985352e-07, 1.3485550880432129e-06, 1.9669532775878906e-06, 2.5853514671325684e-06, 3.203749656677246e-06, 3.822147846221924e-06, 4.4405460357666016e-06, 5.058944225311279e-06, 5.677342414855957e-06, 6.295740604400635e-06, 6.9141387939453125e-06, 7.53253698348999e-06, 8.150935173034668e-06, 8.769333362579346e-06, 9.387731552124023e-06, 1.0006129741668701e-05, 1.0624527931213379e-05, 1.1242926120758057e-05, 1.1861324310302734e-05, 1.2479722499847412e-05, 1.309812068939209e-05, 1.3716518878936768e-05, 1.4334917068481445e-05, 1.4953315258026123e-05, 1.55717134475708e-05, 1.619011163711548e-05, 1.6808509826660156e-05, 1.7426908016204834e-05, 1.8045306205749512e-05, 1.866370439529419e-05, 1.9282102584838867e-05, 1.9900500774383545e-05, 2.0518898963928223e-05, 2.11372971534729e-05, 2.1755695343017578e-05, 2.2374093532562256e-05, 2.2992491722106934e-05, 2.361088991165161e-05, 2.422928810119629e-05, 2.4847686290740967e-05, 2.5466084480285645e-05, 2.6084482669830322e-05, 2.6702880859375e-05]}, "gradients/encoder.encoder.layers.16.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 6.0, 7.0, 8.0, 9.0, 14.0, 26.0, 33.0, 49.0, 82.0, 127.0, 151.0, 238.0, 370.0, 584.0, 797.0, 1309.0, 1817.0, 3112.0, 4846.0, 7457.0, 12720.0, 21896.0, 37170.0, 75822.0, 173187.0, 842449.0, 2545382.0, 253280.0, 98144.0, 46734.0, 26717.0, 15244.0, 9177.0, 5339.0, 3521.0, 2216.0, 1400.0, 977.0, 611.0, 403.0, 274.0, 196.0, 142.0, 79.0, 57.0, 38.0, 28.0, 19.0, 10.0, 6.0, 9.0, 4.0, 1.0, 3.0, 2.0, 2.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.082918167114258e-05, -3.947596997022629e-05, -3.812275826931e-05, -3.676954656839371e-05, -3.541633486747742e-05, -3.406312316656113e-05, -3.2709911465644836e-05, -3.1356699764728546e-05, -3.0003488063812256e-05, -2.8650276362895966e-05, -2.7297064661979675e-05, -2.5943852961063385e-05, -2.4590641260147095e-05, -2.3237429559230804e-05, -2.1884217858314514e-05, -2.0531006157398224e-05, -1.9177794456481934e-05, -1.7824582755565643e-05, -1.6471371054649353e-05, -1.5118159353733063e-05, -1.3764947652816772e-05, -1.2411735951900482e-05, -1.1058524250984192e-05, -9.705312550067902e-06, -8.352100849151611e-06, -6.998889148235321e-06, -5.645677447319031e-06, -4.2924657464027405e-06, -2.93925404548645e-06, -1.58604234457016e-06, -2.3283064365386963e-07, 1.1203810572624207e-06, 2.473592758178711e-06, 3.826804459095001e-06, 5.1800161600112915e-06, 6.533227860927582e-06, 7.886439561843872e-06, 9.239651262760162e-06, 1.0592862963676453e-05, 1.1946074664592743e-05, 1.3299286365509033e-05, 1.4652498066425323e-05, 1.6005709767341614e-05, 1.7358921468257904e-05, 1.8712133169174194e-05, 2.0065344870090485e-05, 2.1418556571006775e-05, 2.2771768271923065e-05, 2.4124979972839355e-05, 2.5478191673755646e-05, 2.6831403374671936e-05, 2.8184615075588226e-05, 2.9537826776504517e-05, 3.089103847742081e-05, 3.22442501783371e-05, 3.359746187925339e-05, 3.495067358016968e-05, 3.630388528108597e-05, 3.765709698200226e-05, 3.901030868291855e-05, 4.036352038383484e-05, 4.171673208475113e-05, 4.306994378566742e-05, 4.442315548658371e-05, 4.57763671875e-05]}, "gradients/encoder.encoder.layers.16.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 3.0, 1.0, 2.0, 1.0, 2.0, 2.0, 4.0, 4.0, 7.0, 8.0, 15.0, 13.0, 15.0, 12.0, 13.0, 25.0, 19.0, 25.0, 48.0, 48.0, 74.0, 57.0, 87.0, 100.0, 139.0, 259.0, 561.0, 1243.0, 479.0, 200.0, 121.0, 75.0, 90.0, 56.0, 49.0, 48.0, 32.0, 27.0, 23.0, 20.0, 17.0, 18.0, 13.0, 9.0, 3.0, 3.0, 5.0, 3.0, 3.0, 3.0, 3.0, 2.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 2.0], "bins": [-3.600120544433594e-05, -3.489293158054352e-05, -3.37846577167511e-05, -3.267638385295868e-05, -3.156810998916626e-05, -3.045983612537384e-05, -2.935156226158142e-05, -2.8243288397789e-05, -2.7135014533996582e-05, -2.6026740670204163e-05, -2.4918466806411743e-05, -2.3810192942619324e-05, -2.2701919078826904e-05, -2.1593645215034485e-05, -2.0485371351242065e-05, -1.9377097487449646e-05, -1.8268823623657227e-05, -1.7160549759864807e-05, -1.6052275896072388e-05, -1.4944002032279968e-05, -1.3835728168487549e-05, -1.272745430469513e-05, -1.161918044090271e-05, -1.051090657711029e-05, -9.402632713317871e-06, -8.294358849525452e-06, -7.186084985733032e-06, -6.077811121940613e-06, -4.969537258148193e-06, -3.861263394355774e-06, -2.7529895305633545e-06, -1.644715666770935e-06, -5.364418029785156e-07, 5.718320608139038e-07, 1.6801059246063232e-06, 2.7883797883987427e-06, 3.896653652191162e-06, 5.0049275159835815e-06, 6.113201379776001e-06, 7.22147524356842e-06, 8.32974910736084e-06, 9.43802297115326e-06, 1.0546296834945679e-05, 1.1654570698738098e-05, 1.2762844562530518e-05, 1.3871118426322937e-05, 1.4979392290115356e-05, 1.6087666153907776e-05, 1.7195940017700195e-05, 1.8304213881492615e-05, 1.9412487745285034e-05, 2.0520761609077454e-05, 2.1629035472869873e-05, 2.2737309336662292e-05, 2.3845583200454712e-05, 2.495385706424713e-05, 2.606213092803955e-05, 2.717040479183197e-05, 2.827867865562439e-05, 2.938695251941681e-05, 3.049522638320923e-05, 3.160350024700165e-05, 3.271177411079407e-05, 3.382004797458649e-05, 3.4928321838378906e-05]}, "gradients/encoder.encoder.layers.16.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 2.0, 0.0, 1.0, 0.0, 1.0, 3.0, 2.0, 6.0, 2.0, 4.0, 3.0, 9.0, 13.0, 12.0, 21.0, 11.0, 22.0, 40.0, 54.0, 70.0, 81.0, 121.0, 119.0, 97.0, 68.0, 57.0, 33.0, 35.0, 17.0, 25.0, 17.0, 17.0, 9.0, 9.0, 6.0, 3.0, 6.0, 3.0, 5.0, 3.0, 1.0, 4.0, 1.0, 2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00020833007874898612, -0.0002008856099564582, -0.0001934411411639303, -0.00018599667237140238, -0.00017855218902695924, -0.00017110772023443133, -0.0001636632514419034, -0.0001562187826493755, -0.00014877429930493236, -0.00014132983051240444, -0.00013388536171987653, -0.00012644089292734861, -0.00011899640958290547, -0.00011155194079037756, -0.00010410747199784964, -9.666300320532173e-05, -8.921853441279382e-05, -8.17740656202659e-05, -7.432958955178037e-05, -6.688512075925246e-05, -5.944064832874574e-05, -5.1996175898239017e-05, -4.45517071057111e-05, -3.710723467520438e-05, -2.966276224469766e-05, -2.221828981419094e-05, -1.4773819202673621e-05, -7.329348591156304e-06, 1.1512383935041726e-07, 7.559596269857138e-06, 1.5004065062385052e-05, 2.2448537492891774e-05, 2.9893009923398495e-05, 3.7337482353905216e-05, 4.478195478441194e-05, 5.222642357693985e-05, 5.967089600744657e-05, 6.71153684379533e-05, 7.455983723048121e-05, 8.200430602300912e-05, 8.944878209149465e-05, 9.689325088402256e-05, 0.00010433772695250809, 0.000111782195745036, 0.00011922666453756392, 0.00012667113333009183, 0.00013411560212261975, 0.0001415600854670629, 0.0001490045542595908, 0.00015644902305211872, 0.00016389349184464663, 0.00017133797518908978, 0.0001787824439816177, 0.0001862269127741456, 0.00019367138156667352, 0.00020111585035920143, 0.00020856031915172935, 0.00021600478794425726, 0.00022344925673678517, 0.0002308937255293131, 0.00023833820887375623, 0.0002457826631143689, 0.00025322713190689683, 0.0002606716298032552, 0.0002681160985957831]}, "gradients/encoder.encoder.layers.16.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 1.0, 0.0, 3.0, 3.0, 3.0, 4.0, 5.0, 4.0, 9.0, 9.0, 10.0, 17.0, 15.0, 17.0, 15.0, 19.0, 23.0, 25.0, 29.0, 37.0, 26.0, 35.0, 27.0, 37.0, 62.0, 31.0, 45.0, 35.0, 48.0, 41.0, 37.0, 35.0, 41.0, 30.0, 34.0, 26.0, 29.0, 21.0, 16.0, 19.0, 16.0, 16.0, 8.0, 9.0, 9.0, 4.0, 7.0, 2.0, 4.0, 6.0, 4.0, 7.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0], "bins": [-0.00012624263763427734, -0.00012203399091959, -0.00011782534420490265, -0.0001136166974902153, -0.00010940805077552795, -0.0001051994040608406, -0.00010099075734615326, -9.678211063146591e-05, -9.257346391677856e-05, -8.836481720209122e-05, -8.415617048740387e-05, -7.994752377271652e-05, -7.573887705802917e-05, -7.153023034334183e-05, -6.732158362865448e-05, -6.311293691396713e-05, -5.8904290199279785e-05, -5.469564348459244e-05, -5.048699676990509e-05, -4.627835005521774e-05, -4.2069703340530396e-05, -3.786105662584305e-05, -3.36524099111557e-05, -2.9443763196468353e-05, -2.5235116481781006e-05, -2.102646976709366e-05, -1.681782305240631e-05, -1.2609176337718964e-05, -8.400529623031616e-06, -4.191882908344269e-06, 1.6763806343078613e-08, 4.225410521030426e-06, 8.434057235717773e-06, 1.2642703950405121e-05, 1.6851350665092468e-05, 2.1059997379779816e-05, 2.5268644094467163e-05, 2.947729080915451e-05, 3.368593752384186e-05, 3.7894584238529205e-05, 4.210323095321655e-05, 4.63118776679039e-05, 5.052052438259125e-05, 5.4729171097278595e-05, 5.893781781196594e-05, 6.314646452665329e-05, 6.735511124134064e-05, 7.156375795602798e-05, 7.577240467071533e-05, 7.998105138540268e-05, 8.418969810009003e-05, 8.839834481477737e-05, 9.260699152946472e-05, 9.681563824415207e-05, 0.00010102428495883942, 0.00010523293167352676, 0.00010944157838821411, 0.00011365022510290146, 0.0001178588718175888, 0.00012206751853227615, 0.0001262761652469635, 0.00013048481196165085, 0.0001346934586763382, 0.00013890210539102554, 0.0001431107521057129]}, "gradients/encoder.encoder.layers.16.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 3.0, 1.0, 1.0, 3.0, 7.0, 7.0, 7.0, 8.0, 10.0, 14.0, 14.0, 28.0, 41.0, 46.0, 68.0, 93.0, 151.0, 277.0, 431.0, 759.0, 1156.0, 2103.0, 3827.0, 7343.0, 15857.0, 40156.0, 131510.0, 610379.0, 155684.0, 43873.0, 17269.0, 7901.0, 3985.0, 2271.0, 1231.0, 766.0, 483.0, 279.0, 161.0, 105.0, 57.0, 58.0, 43.0, 28.0, 24.0, 12.0, 9.0, 8.0, 6.0, 6.0, 4.0, 5.0, 3.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.00010985136032104492, -0.00010647624731063843, -0.00010310113430023193, -9.972602128982544e-05, -9.635090827941895e-05, -9.297579526901245e-05, -8.960068225860596e-05, -8.622556924819946e-05, -8.285045623779297e-05, -7.947534322738647e-05, -7.610023021697998e-05, -7.272511720657349e-05, -6.935000419616699e-05, -6.59748911857605e-05, -6.2599778175354e-05, -5.922466516494751e-05, -5.5849552154541016e-05, -5.247443914413452e-05, -4.909932613372803e-05, -4.572421312332153e-05, -4.234910011291504e-05, -3.8973987102508545e-05, -3.559887409210205e-05, -3.222376108169556e-05, -2.8848648071289062e-05, -2.547353506088257e-05, -2.2098422050476074e-05, -1.872330904006958e-05, -1.5348196029663086e-05, -1.1973083019256592e-05, -8.597970008850098e-06, -5.2228569984436035e-06, -1.8477439880371094e-06, 1.5273690223693848e-06, 4.902482032775879e-06, 8.277595043182373e-06, 1.1652708053588867e-05, 1.5027821063995361e-05, 1.8402934074401855e-05, 2.177804708480835e-05, 2.5153160095214844e-05, 2.8528273105621338e-05, 3.190338611602783e-05, 3.5278499126434326e-05, 3.865361213684082e-05, 4.2028725147247314e-05, 4.540383815765381e-05, 4.87789511680603e-05, 5.21540641784668e-05, 5.552917718887329e-05, 5.8904290199279785e-05, 6.227940320968628e-05, 6.565451622009277e-05, 6.902962923049927e-05, 7.240474224090576e-05, 7.577985525131226e-05, 7.915496826171875e-05, 8.253008127212524e-05, 8.590519428253174e-05, 8.928030729293823e-05, 9.265542030334473e-05, 9.603053331375122e-05, 9.940564632415771e-05, 0.00010278075933456421, 0.0001061558723449707]}, "gradients/encoder.encoder.layers.16.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 3.0, 1.0, 3.0, 4.0, 8.0, 3.0, 7.0, 10.0, 18.0, 10.0, 14.0, 38.0, 51.0, 60.0, 77.0, 122.0, 91.0, 128.0, 91.0, 63.0, 64.0, 39.0, 34.0, 19.0, 9.0, 8.0, 11.0, 10.0, 4.0, 3.0, 6.0, 2.0, 2.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.6808509826660156e-05, -1.6179867088794708e-05, -1.555122435092926e-05, -1.4922581613063812e-05, -1.4293938875198364e-05, -1.3665296137332916e-05, -1.3036653399467468e-05, -1.240801066160202e-05, -1.1779367923736572e-05, -1.1150725185871124e-05, -1.0522082448005676e-05, -9.893439710140228e-06, -9.26479697227478e-06, -8.636154234409332e-06, -8.007511496543884e-06, -7.378868758678436e-06, -6.750226020812988e-06, -6.12158328294754e-06, -5.492940545082092e-06, -4.864297807216644e-06, -4.235655069351196e-06, -3.6070123314857483e-06, -2.9783695936203003e-06, -2.3497268557548523e-06, -1.7210841178894043e-06, -1.0924413800239563e-06, -4.637986421585083e-07, 1.648440957069397e-07, 7.934868335723877e-07, 1.4221295714378357e-06, 2.0507723093032837e-06, 2.6794150471687317e-06, 3.3080577850341797e-06, 3.936700522899628e-06, 4.565343260765076e-06, 5.193985998630524e-06, 5.822628736495972e-06, 6.45127147436142e-06, 7.079914212226868e-06, 7.708556950092316e-06, 8.337199687957764e-06, 8.965842425823212e-06, 9.59448516368866e-06, 1.0223127901554108e-05, 1.0851770639419556e-05, 1.1480413377285004e-05, 1.2109056115150452e-05, 1.27376988530159e-05, 1.3366341590881348e-05, 1.3994984328746796e-05, 1.4623627066612244e-05, 1.5252269804477692e-05, 1.588091254234314e-05, 1.6509555280208588e-05, 1.7138198018074036e-05, 1.7766840755939484e-05, 1.839548349380493e-05, 1.902412623167038e-05, 1.9652768969535828e-05, 2.0281411707401276e-05, 2.0910054445266724e-05, 2.153869718313217e-05, 2.216733992099762e-05, 2.2795982658863068e-05, 2.3424625396728516e-05]}, "gradients/encoder.encoder.layers.16.attention.v_proj.weight": {"_type": "histogram", "values": [4.0, 3.0, 10.0, 6.0, 8.0, 11.0, 6.0, 11.0, 27.0, 44.0, 58.0, 84.0, 124.0, 141.0, 235.0, 298.0, 463.0, 594.0, 941.0, 1273.0, 1999.0, 2897.0, 4102.0, 6314.0, 8932.0, 14241.0, 20735.0, 34774.0, 55723.0, 111344.0, 305653.0, 242546.0, 95481.0, 49544.0, 31505.0, 18921.0, 13074.0, 8345.0, 5759.0, 3738.0, 2721.0, 1769.0, 1271.0, 842.0, 587.0, 408.0, 288.0, 204.0, 159.0, 102.0, 74.0, 51.0, 35.0, 32.0, 23.0, 10.0, 15.0, 2.0, 5.0, 5.0, 0.0, 4.0, 0.0, 1.0], "bins": [-3.600120544433594e-05, -3.4836120903491974e-05, -3.367103636264801e-05, -3.250595182180405e-05, -3.134086728096008e-05, -3.017578274011612e-05, -2.9010698199272156e-05, -2.7845613658428192e-05, -2.668052911758423e-05, -2.5515444576740265e-05, -2.43503600358963e-05, -2.3185275495052338e-05, -2.2020190954208374e-05, -2.085510641336441e-05, -1.9690021872520447e-05, -1.8524937331676483e-05, -1.735985279083252e-05, -1.6194768249988556e-05, -1.5029683709144592e-05, -1.3864599168300629e-05, -1.2699514627456665e-05, -1.1534430086612701e-05, -1.0369345545768738e-05, -9.204261004924774e-06, -8.03917646408081e-06, -6.874091923236847e-06, -5.709007382392883e-06, -4.54392284154892e-06, -3.378838300704956e-06, -2.2137537598609924e-06, -1.0486692190170288e-06, 1.1641532182693481e-07, 1.2814998626708984e-06, 2.446584403514862e-06, 3.6116689443588257e-06, 4.776753485202789e-06, 5.941838026046753e-06, 7.1069225668907166e-06, 8.27200710773468e-06, 9.437091648578644e-06, 1.0602176189422607e-05, 1.1767260730266571e-05, 1.2932345271110535e-05, 1.4097429811954498e-05, 1.5262514352798462e-05, 1.6427598893642426e-05, 1.759268343448639e-05, 1.8757767975330353e-05, 1.9922852516174316e-05, 2.108793705701828e-05, 2.2253021597862244e-05, 2.3418106138706207e-05, 2.458319067955017e-05, 2.5748275220394135e-05, 2.6913359761238098e-05, 2.8078444302082062e-05, 2.9243528842926025e-05, 3.040861338376999e-05, 3.157369792461395e-05, 3.2738782465457916e-05, 3.390386700630188e-05, 3.5068951547145844e-05, 3.623403608798981e-05, 3.739912062883377e-05, 3.8564205169677734e-05]}, "gradients/encoder.encoder.layers.16.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 2.0, 2.0, 3.0, 2.0, 6.0, 4.0, 7.0, 8.0, 12.0, 9.0, 13.0, 13.0, 22.0, 16.0, 21.0, 22.0, 24.0, 34.0, 27.0, 37.0, 39.0, 50.0, 38.0, 64.0, 39.0, 54.0, 52.0, 51.0, 54.0, 39.0, 35.0, 31.0, 25.0, 21.0, 20.0, 22.0, 22.0, 9.0, 11.0, 14.0, 11.0, 7.0, 6.0, 3.0, 4.0, 2.0, 1.0, 4.0, 0.0, 0.0, 1.0, 2.0, 1.0, 1.0, 2.0], "bins": [-3.510713577270508e-05, -3.4047290682792664e-05, -3.298744559288025e-05, -3.1927600502967834e-05, -3.086775541305542e-05, -2.9807910323143005e-05, -2.874806523323059e-05, -2.7688220143318176e-05, -2.6628375053405762e-05, -2.5568529963493347e-05, -2.4508684873580933e-05, -2.3448839783668518e-05, -2.2388994693756104e-05, -2.132914960384369e-05, -2.0269304513931274e-05, -1.920945942401886e-05, -1.8149614334106445e-05, -1.708976924419403e-05, -1.6029924154281616e-05, -1.4970079064369202e-05, -1.3910233974456787e-05, -1.2850388884544373e-05, -1.1790543794631958e-05, -1.0730698704719543e-05, -9.670853614807129e-06, -8.611008524894714e-06, -7.5511634349823e-06, -6.491318345069885e-06, -5.431473255157471e-06, -4.371628165245056e-06, -3.3117830753326416e-06, -2.251937985420227e-06, -1.1920928955078125e-06, -1.3224780559539795e-07, 9.275972843170166e-07, 1.987442374229431e-06, 3.0472874641418457e-06, 4.10713255405426e-06, 5.166977643966675e-06, 6.226822733879089e-06, 7.286667823791504e-06, 8.346512913703918e-06, 9.406358003616333e-06, 1.0466203093528748e-05, 1.1526048183441162e-05, 1.2585893273353577e-05, 1.3645738363265991e-05, 1.4705583453178406e-05, 1.576542854309082e-05, 1.6825273633003235e-05, 1.788511872291565e-05, 1.8944963812828064e-05, 2.000480890274048e-05, 2.1064653992652893e-05, 2.2124499082565308e-05, 2.3184344172477722e-05, 2.4244189262390137e-05, 2.530403435230255e-05, 2.6363879442214966e-05, 2.742372453212738e-05, 2.8483569622039795e-05, 2.954341471195221e-05, 3.0603259801864624e-05, 3.166310489177704e-05, 3.272294998168945e-05]}, "gradients/encoder.encoder.layers.16.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 3.0, 1.0, 0.0, 6.0, 9.0, 13.0, 18.0, 17.0, 34.0, 54.0, 75.0, 112.0, 192.0, 299.0, 575.0, 1108.0, 2129.0, 4442.0, 10563.0, 28327.0, 95258.0, 523417.0, 285543.0, 61522.0, 19757.0, 7867.0, 3412.0, 1687.0, 916.0, 469.0, 295.0, 145.0, 97.0, 65.0, 35.0, 36.0, 16.0, 13.0, 14.0, 7.0, 5.0, 2.0, 1.0, 5.0, 0.0, 1.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.662441253662109e-06, -5.4836273193359375e-06, -5.304813385009766e-06, -5.125999450683594e-06, -4.947185516357422e-06, -4.76837158203125e-06, -4.589557647705078e-06, -4.410743713378906e-06, -4.231929779052734e-06, -4.0531158447265625e-06, -3.874301910400391e-06, -3.6954879760742188e-06, -3.516674041748047e-06, -3.337860107421875e-06, -3.159046173095703e-06, -2.9802322387695312e-06, -2.8014183044433594e-06, -2.6226043701171875e-06, -2.4437904357910156e-06, -2.2649765014648438e-06, -2.086162567138672e-06, -1.9073486328125e-06, -1.7285346984863281e-06, -1.5497207641601562e-06, -1.3709068298339844e-06, -1.1920928955078125e-06, -1.0132789611816406e-06, -8.344650268554688e-07, -6.556510925292969e-07, -4.76837158203125e-07, -2.980232238769531e-07, -1.1920928955078125e-07, 5.960464477539063e-08, 2.384185791015625e-07, 4.172325134277344e-07, 5.960464477539062e-07, 7.748603820800781e-07, 9.5367431640625e-07, 1.1324882507324219e-06, 1.3113021850585938e-06, 1.4901161193847656e-06, 1.6689300537109375e-06, 1.8477439880371094e-06, 2.0265579223632812e-06, 2.205371856689453e-06, 2.384185791015625e-06, 2.562999725341797e-06, 2.7418136596679688e-06, 2.9206275939941406e-06, 3.0994415283203125e-06, 3.2782554626464844e-06, 3.4570693969726562e-06, 3.635883331298828e-06, 3.814697265625e-06, 3.993511199951172e-06, 4.172325134277344e-06, 4.351139068603516e-06, 4.5299530029296875e-06, 4.708766937255859e-06, 4.887580871582031e-06, 5.066394805908203e-06, 5.245208740234375e-06, 5.424022674560547e-06, 5.602836608886719e-06, 5.781650543212891e-06]}, "gradients/encoder.encoder.layers.16.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 2.0, 1.0, 3.0, 4.0, 0.0, 4.0, 12.0, 10.0, 20.0, 16.0, 0.0, 28.0, 18.0, 31.0, 33.0, 0.0, 45.0, 57.0, 78.0, 96.0, 0.0, 87.0, 102.0, 78.0, 56.0, 50.0, 0.0, 45.0, 25.0, 26.0, 30.0, 0.0, 15.0, 12.0, 6.0, 9.0, 0.0, 7.0, 2.0, 4.0, 0.0, 1.0, 0.0, 1.0, 1.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.5497207641601562e-06, -1.5012919902801514e-06, -1.4528632164001465e-06, -1.4044344425201416e-06, -1.3560056686401367e-06, -1.3075768947601318e-06, -1.259148120880127e-06, -1.210719347000122e-06, -1.1622905731201172e-06, -1.1138617992401123e-06, -1.0654330253601074e-06, -1.0170042514801025e-06, -9.685754776000977e-07, -9.201467037200928e-07, -8.717179298400879e-07, -8.23289155960083e-07, -7.748603820800781e-07, -7.264316082000732e-07, -6.780028343200684e-07, -6.295740604400635e-07, -5.811452865600586e-07, -5.327165126800537e-07, -4.842877388000488e-07, -4.3585896492004395e-07, -3.8743019104003906e-07, -3.390014171600342e-07, -2.905726432800293e-07, -2.421438694000244e-07, -1.9371509552001953e-07, -1.4528632164001465e-07, -9.685754776000977e-08, -4.842877388000488e-08, 0.0, 4.842877388000488e-08, 9.685754776000977e-08, 1.4528632164001465e-07, 1.9371509552001953e-07, 2.421438694000244e-07, 2.905726432800293e-07, 3.390014171600342e-07, 3.8743019104003906e-07, 4.3585896492004395e-07, 4.842877388000488e-07, 5.327165126800537e-07, 5.811452865600586e-07, 6.295740604400635e-07, 6.780028343200684e-07, 7.264316082000732e-07, 7.748603820800781e-07, 8.23289155960083e-07, 8.717179298400879e-07, 9.201467037200928e-07, 9.685754776000977e-07, 1.0170042514801025e-06, 1.0654330253601074e-06, 1.1138617992401123e-06, 1.1622905731201172e-06, 1.210719347000122e-06, 1.259148120880127e-06, 1.3075768947601318e-06, 1.3560056686401367e-06, 1.4044344425201416e-06, 1.4528632164001465e-06, 1.5012919902801514e-06, 1.5497207641601562e-06]}, "gradients/encoder.encoder.layers.16.attention.q_proj.weight": {"_type": "histogram", "values": [4.0, 1.0, 3.0, 2.0, 7.0, 3.0, 5.0, 6.0, 3.0, 20.0, 13.0, 30.0, 14.0, 60.0, 43.0, 53.0, 167.0, 129.0, 324.0, 218.0, 675.0, 569.0, 1635.0, 1302.0, 1769.0, 6010.0, 5274.0, 20363.0, 20204.0, 103829.0, 168560.0, 385988.0, 236861.0, 35471.0, 32673.0, 7806.0, 8887.0, 2527.0, 3097.0, 925.0, 687.0, 957.0, 309.0, 429.0, 148.0, 173.0, 62.0, 65.0, 78.0, 28.0, 38.0, 12.0, 23.0, 10.0, 11.0, 5.0, 2.0, 1.0, 2.0, 1.0, 0.0, 4.0, 0.0, 1.0], "bins": [-2.682209014892578e-06, -2.596527338027954e-06, -2.51084566116333e-06, -2.425163984298706e-06, -2.339482307434082e-06, -2.253800630569458e-06, -2.168118953704834e-06, -2.08243727684021e-06, -1.996755599975586e-06, -1.911073923110962e-06, -1.8253922462463379e-06, -1.7397105693817139e-06, -1.6540288925170898e-06, -1.5683472156524658e-06, -1.4826655387878418e-06, -1.3969838619232178e-06, -1.3113021850585938e-06, -1.2256205081939697e-06, -1.1399388313293457e-06, -1.0542571544647217e-06, -9.685754776000977e-07, -8.828938007354736e-07, -7.972121238708496e-07, -7.115304470062256e-07, -6.258487701416016e-07, -5.401670932769775e-07, -4.544854164123535e-07, -3.688037395477295e-07, -2.8312206268310547e-07, -1.9744038581848145e-07, -1.1175870895385742e-07, -2.60770320892334e-08, 5.960464477539063e-08, 1.4528632164001465e-07, 2.3096799850463867e-07, 3.166496753692627e-07, 4.023313522338867e-07, 4.880130290985107e-07, 5.736947059631348e-07, 6.593763828277588e-07, 7.450580596923828e-07, 8.307397365570068e-07, 9.164214134216309e-07, 1.0021030902862549e-06, 1.087784767150879e-06, 1.173466444015503e-06, 1.259148120880127e-06, 1.344829797744751e-06, 1.430511474609375e-06, 1.516193151473999e-06, 1.601874828338623e-06, 1.687556505203247e-06, 1.773238182067871e-06, 1.8589198589324951e-06, 1.944601535797119e-06, 2.030283212661743e-06, 2.115964889526367e-06, 2.201646566390991e-06, 2.2873282432556152e-06, 2.3730099201202393e-06, 2.4586915969848633e-06, 2.5443732738494873e-06, 2.6300549507141113e-06, 2.7157366275787354e-06, 2.8014183044433594e-06]}, "gradients/encoder.encoder.layers.16.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 0.0, 3.0, 2.0, 0.0, 4.0, 0.0, 7.0, 3.0, 3.0, 5.0, 5.0, 11.0, 12.0, 25.0, 22.0, 20.0, 19.0, 31.0, 45.0, 42.0, 120.0, 81.0, 95.0, 83.0, 68.0, 48.0, 47.0, 77.0, 23.0, 24.0, 15.0, 18.0, 10.0, 5.0, 12.0, 8.0, 2.0, 7.0, 2.0, 4.0, 1.0, 1.0, 1.0, 2.0, 0.0, 2.0, 0.0, 0.0, 3.0], "bins": [-2.562999725341797e-06, -2.495013177394867e-06, -2.427026629447937e-06, -2.359040081501007e-06, -2.291053533554077e-06, -2.2230669856071472e-06, -2.1550804376602173e-06, -2.0870938897132874e-06, -2.0191073417663574e-06, -1.9511207938194275e-06, -1.8831342458724976e-06, -1.8151476979255676e-06, -1.7471611499786377e-06, -1.6791746020317078e-06, -1.6111880540847778e-06, -1.543201506137848e-06, -1.475214958190918e-06, -1.407228410243988e-06, -1.339241862297058e-06, -1.2712553143501282e-06, -1.2032687664031982e-06, -1.1352822184562683e-06, -1.0672956705093384e-06, -9.993091225624084e-07, -9.313225746154785e-07, -8.633360266685486e-07, -7.953494787216187e-07, -7.273629307746887e-07, -6.593763828277588e-07, -5.913898348808289e-07, -5.234032869338989e-07, -4.55416738986969e-07, -3.8743019104003906e-07, -3.1944364309310913e-07, -2.514570951461792e-07, -1.8347054719924927e-07, -1.1548399925231934e-07, -4.7497451305389404e-08, 2.0489096641540527e-08, 8.847564458847046e-08, 1.564621925354004e-07, 2.2444874048233032e-07, 2.9243528842926025e-07, 3.604218363761902e-07, 4.284083843231201e-07, 4.9639493227005e-07, 5.6438148021698e-07, 6.323680281639099e-07, 7.003545761108398e-07, 7.683411240577698e-07, 8.363276720046997e-07, 9.043142199516296e-07, 9.723007678985596e-07, 1.0402873158454895e-06, 1.1082738637924194e-06, 1.1762604117393494e-06, 1.2442469596862793e-06, 1.3122335076332092e-06, 1.3802200555801392e-06, 1.448206603527069e-06, 1.516193151473999e-06, 1.584179699420929e-06, 1.6521662473678589e-06, 1.7201527953147888e-06, 1.7881393432617188e-06]}, "gradients/encoder.encoder.layers.16.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 5.0, 3.0, 3.0, 5.0, 5.0, 18.0, 14.0, 23.0, 38.0, 45.0, 62.0, 117.0, 258.0, 126.0, 76.0, 58.0, 31.0, 35.0, 20.0, 12.0, 16.0, 9.0, 7.0, 7.0, 6.0, 3.0, 4.0, 3.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0001925597171066329, -0.00018602219643071294, -0.00017948469030670822, -0.00017294716963078827, -0.00016640964895486832, -0.00015987212827894837, -0.00015333460760302842, -0.0001467971014790237, -0.00014025958080310374, -0.0001337220601271838, -0.00012718455400317907, -0.00012064703332725912, -0.00011410951265133917, -0.00010757199197541922, -0.00010103447857545689, -9.449696517549455e-05, -8.79594444995746e-05, -8.142192382365465e-05, -7.488441042369232e-05, -6.834689702372998e-05, -6.180937634781003e-05, -5.527185930986889e-05, -4.8734342271927744e-05, -4.21968252339866e-05, -3.565930819604546e-05, -2.9121791158104315e-05, -2.2584274120163172e-05, -1.604675708222203e-05, -9.509240044280887e-06, -2.9717230063397437e-06, 3.565794031601399e-06, 1.0103311069542542e-05, 1.6640813555568457e-05, 2.31783305935096e-05, 2.9715847631450742e-05, 3.6253364669391885e-05, 4.279088170733303e-05, 4.932839874527417e-05, 5.5865915783215314e-05, 6.240342918317765e-05, 6.89409498590976e-05, 7.547847053501755e-05, 8.201598393497989e-05, 8.855349733494222e-05, 9.509101801086217e-05, 0.00010162853868678212, 0.00010816605208674446, 0.0001147035654867068, 0.00012124108616262674, 0.0001277786068385467, 0.00013431612751446664, 0.00014085363363847136, 0.00014739115431439131, 0.00015392867499031126, 0.000160466181114316, 0.00016700370179023594, 0.0001735412224661559, 0.00018007874314207584, 0.0001866162638179958, 0.0001931537699420005, 0.00019969129061792046, 0.0002062288112938404, 0.00021276631741784513, 0.00021930383809376508, 0.00022584135876968503]}, "gradients/encoder.encoder.layers.16.layer_norm.bias": {"_type": "histogram", "values": [1.0, 1.0, 4.0, 1.0, 1.0, 4.0, 2.0, 4.0, 5.0, 3.0, 5.0, 6.0, 7.0, 10.0, 11.0, 11.0, 13.0, 23.0, 19.0, 25.0, 36.0, 18.0, 32.0, 36.0, 26.0, 39.0, 30.0, 27.0, 41.0, 33.0, 44.0, 46.0, 35.0, 35.0, 31.0, 28.0, 29.0, 32.0, 27.0, 30.0, 27.0, 19.0, 20.0, 31.0, 14.0, 13.0, 11.0, 13.0, 6.0, 4.0, 19.0, 7.0, 7.0, 3.0, 4.0, 1.0, 3.0, 2.0, 4.0, 2.0, 1.0, 1.0, 0.0, 1.0], "bins": [-8.171796798706055e-05, -7.910747081041336e-05, -7.649697363376617e-05, -7.388647645711899e-05, -7.12759792804718e-05, -6.866548210382462e-05, -6.605498492717743e-05, -6.344448775053024e-05, -6.083399057388306e-05, -5.822349339723587e-05, -5.5612996220588684e-05, -5.30024990439415e-05, -5.039200186729431e-05, -4.7781504690647125e-05, -4.517100751399994e-05, -4.256051033735275e-05, -3.9950013160705566e-05, -3.733951598405838e-05, -3.4729018807411194e-05, -3.211852163076401e-05, -2.950802445411682e-05, -2.6897527277469635e-05, -2.428703010082245e-05, -2.1676532924175262e-05, -1.9066035747528076e-05, -1.645553857088089e-05, -1.3845041394233704e-05, -1.1234544217586517e-05, -8.624047040939331e-06, -6.013549864292145e-06, -3.4030526876449585e-06, -7.925555109977722e-07, 1.817941665649414e-06, 4.4284388422966e-06, 7.038936018943787e-06, 9.649433195590973e-06, 1.225993037223816e-05, 1.4870427548885345e-05, 1.7480924725532532e-05, 2.0091421902179718e-05, 2.2701919078826904e-05, 2.531241625547409e-05, 2.7922913432121277e-05, 3.053341060876846e-05, 3.314390778541565e-05, 3.5754404962062836e-05, 3.836490213871002e-05, 4.097539931535721e-05, 4.3585896492004395e-05, 4.619639366865158e-05, 4.880689084529877e-05, 5.141738802194595e-05, 5.402788519859314e-05, 5.6638382375240326e-05, 5.924887955188751e-05, 6.18593767285347e-05, 6.446987390518188e-05, 6.708037108182907e-05, 6.969086825847626e-05, 7.230136543512344e-05, 7.491186261177063e-05, 7.752235978841782e-05, 8.0132856965065e-05, 8.274335414171219e-05, 8.535385131835938e-05]}, "gradients/encoder.encoder.layers.15.feed_forward.output_dense.weight": {"_type": "histogram", "values": [3.0, 1.0, 1.0, 1.0, 4.0, 6.0, 18.0, 24.0, 30.0, 53.0, 84.0, 163.0, 253.0, 452.0, 822.0, 1605.0, 3218.0, 7115.0, 17248.0, 50920.0, 223287.0, 3658534.0, 161485.0, 41069.0, 15176.0, 6167.0, 2855.0, 1455.0, 804.0, 415.0, 280.0, 158.0, 118.0, 66.0, 67.0, 49.0, 36.0, 23.0, 31.0, 23.0, 30.0, 29.0, 14.0, 18.0, 14.0, 8.0, 11.0, 12.0, 9.0, 11.0, 5.0, 7.0, 4.0, 1.0, 2.0, 0.0, 3.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 3.0], "bins": [-9.02414321899414e-05, -8.603185415267944e-05, -8.182227611541748e-05, -7.761269807815552e-05, -7.340312004089355e-05, -6.919354200363159e-05, -6.498396396636963e-05, -6.0774385929107666e-05, -5.65648078918457e-05, -5.235522985458374e-05, -4.814565181732178e-05, -4.3936073780059814e-05, -3.972649574279785e-05, -3.551691770553589e-05, -3.1307339668273926e-05, -2.7097761631011963e-05, -2.288818359375e-05, -1.8678605556488037e-05, -1.4469027519226074e-05, -1.0259449481964111e-05, -6.0498714447021484e-06, -1.8402934074401855e-06, 2.3692846298217773e-06, 6.57886266708374e-06, 1.0788440704345703e-05, 1.4998018741607666e-05, 1.920759677886963e-05, 2.3417174816131592e-05, 2.7626752853393555e-05, 3.183633089065552e-05, 3.604590892791748e-05, 4.025548696517944e-05, 4.4465065002441406e-05, 4.867464303970337e-05, 5.288422107696533e-05, 5.7093799114227295e-05, 6.130337715148926e-05, 6.551295518875122e-05, 6.972253322601318e-05, 7.393211126327515e-05, 7.814168930053711e-05, 8.235126733779907e-05, 8.656084537506104e-05, 9.0770423412323e-05, 9.498000144958496e-05, 9.918957948684692e-05, 0.00010339915752410889, 0.00010760873556137085, 0.00011181831359863281, 0.00011602789163589478, 0.00012023746967315674, 0.0001244470477104187, 0.00012865662574768066, 0.00013286620378494263, 0.0001370757818222046, 0.00014128535985946655, 0.00014549493789672852, 0.00014970451593399048, 0.00015391409397125244, 0.0001581236720085144, 0.00016233325004577637, 0.00016654282808303833, 0.0001707524061203003, 0.00017496198415756226, 0.00017917156219482422]}, "gradients/encoder.encoder.layers.15.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 3.0, 1.0, 4.0, 3.0, 10.0, 16.0, 14.0, 24.0, 45.0, 59.0, 83.0, 132.0, 150.0, 132.0, 101.0, 85.0, 44.0, 30.0, 39.0, 11.0, 8.0, 6.0, 7.0, 3.0, 2.0, 0.0, 4.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.627206802368164e-05, -1.5494413673877716e-05, -1.4716759324073792e-05, -1.3939104974269867e-05, -1.3161450624465942e-05, -1.2383796274662018e-05, -1.1606141924858093e-05, -1.0828487575054169e-05, -1.0050833225250244e-05, -9.27317887544632e-06, -8.495524525642395e-06, -7.71787017583847e-06, -6.940215826034546e-06, -6.162561476230621e-06, -5.384907126426697e-06, -4.607252776622772e-06, -3.829598426818848e-06, -3.051944077014923e-06, -2.2742897272109985e-06, -1.496635377407074e-06, -7.189810276031494e-07, 5.8673322200775146e-08, 8.363276720046997e-07, 1.6139820218086243e-06, 2.391636371612549e-06, 3.1692907214164734e-06, 3.946945071220398e-06, 4.7245994210243225e-06, 5.502253770828247e-06, 6.279908120632172e-06, 7.057562470436096e-06, 7.83521682024002e-06, 8.612871170043945e-06, 9.39052551984787e-06, 1.0168179869651794e-05, 1.0945834219455719e-05, 1.1723488569259644e-05, 1.2501142919063568e-05, 1.3278797268867493e-05, 1.4056451618671417e-05, 1.4834105968475342e-05, 1.5611760318279266e-05, 1.638941466808319e-05, 1.7167069017887115e-05, 1.794472336769104e-05, 1.8722377717494965e-05, 1.950003206729889e-05, 2.0277686417102814e-05, 2.1055340766906738e-05, 2.1832995116710663e-05, 2.2610649466514587e-05, 2.3388303816318512e-05, 2.4165958166122437e-05, 2.494361251592636e-05, 2.5721266865730286e-05, 2.649892121553421e-05, 2.7276575565338135e-05, 2.805422991514206e-05, 2.8831884264945984e-05, 2.960953861474991e-05, 3.0387192964553833e-05, 3.116484731435776e-05, 3.194250166416168e-05, 3.272015601396561e-05, 3.349781036376953e-05]}, "gradients/encoder.encoder.layers.15.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 3.0, 1.0, 5.0, 6.0, 14.0, 10.0, 21.0, 25.0, 50.0, 82.0, 109.0, 164.0, 262.0, 384.0, 615.0, 909.0, 1437.0, 2233.0, 3456.0, 5922.0, 10393.0, 17933.0, 32822.0, 68310.0, 144936.0, 481124.0, 2891974.0, 298437.0, 113150.0, 54449.0, 27151.0, 15039.0, 9137.0, 4970.0, 3110.0, 1996.0, 1306.0, 810.0, 566.0, 310.0, 228.0, 148.0, 85.0, 68.0, 53.0, 32.0, 16.0, 11.0, 7.0, 4.0, 7.0, 5.0, 1.0, 2.0, 0.0, 1.0, 1.0], "bins": [-5.233287811279297e-05, -5.076359957456589e-05, -4.9194321036338806e-05, -4.7625042498111725e-05, -4.6055763959884644e-05, -4.448648542165756e-05, -4.291720688343048e-05, -4.13479283452034e-05, -3.977864980697632e-05, -3.820937126874924e-05, -3.6640092730522156e-05, -3.5070814192295074e-05, -3.350153565406799e-05, -3.193225711584091e-05, -3.036297857761383e-05, -2.879370003938675e-05, -2.7224421501159668e-05, -2.5655142962932587e-05, -2.4085864424705505e-05, -2.2516585886478424e-05, -2.0947307348251343e-05, -1.937802881002426e-05, -1.780875027179718e-05, -1.62394717335701e-05, -1.4670193195343018e-05, -1.3100914657115936e-05, -1.1531636118888855e-05, -9.962357580661774e-06, -8.393079042434692e-06, -6.823800504207611e-06, -5.25452196598053e-06, -3.6852434277534485e-06, -2.115964889526367e-06, -5.466863512992859e-07, 1.0225921869277954e-06, 2.5918707251548767e-06, 4.161149263381958e-06, 5.730427801609039e-06, 7.299706339836121e-06, 8.868984878063202e-06, 1.0438263416290283e-05, 1.2007541954517365e-05, 1.3576820492744446e-05, 1.5146099030971527e-05, 1.671537756919861e-05, 1.828465610742569e-05, 1.985393464565277e-05, 2.1423213183879852e-05, 2.2992491722106934e-05, 2.4561770260334015e-05, 2.6131048798561096e-05, 2.7700327336788177e-05, 2.926960587501526e-05, 3.083888441324234e-05, 3.240816295146942e-05, 3.39774414896965e-05, 3.5546720027923584e-05, 3.7115998566150665e-05, 3.8685277104377747e-05, 4.025455564260483e-05, 4.182383418083191e-05, 4.339311271905899e-05, 4.496239125728607e-05, 4.653166979551315e-05, 4.8100948333740234e-05]}, "gradients/encoder.encoder.layers.15.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 4.0, 0.0, 0.0, 0.0, 1.0, 8.0, 6.0, 7.0, 5.0, 4.0, 11.0, 11.0, 19.0, 41.0, 41.0, 40.0, 53.0, 70.0, 69.0, 100.0, 133.0, 267.0, 726.0, 1206.0, 380.0, 181.0, 112.0, 90.0, 88.0, 72.0, 61.0, 50.0, 40.0, 52.0, 28.0, 18.0, 15.0, 16.0, 11.0, 14.0, 10.0, 7.0, 6.0, 5.0, 3.0, 1.0, 4.0, 4.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.707408905029297e-05, -3.584660589694977e-05, -3.461912274360657e-05, -3.339163959026337e-05, -3.2164156436920166e-05, -3.0936673283576965e-05, -2.9709190130233765e-05, -2.8481706976890564e-05, -2.7254223823547363e-05, -2.6026740670204163e-05, -2.4799257516860962e-05, -2.357177436351776e-05, -2.234429121017456e-05, -2.111680805683136e-05, -1.988932490348816e-05, -1.866184175014496e-05, -1.7434358596801758e-05, -1.6206875443458557e-05, -1.4979392290115356e-05, -1.3751909136772156e-05, -1.2524425983428955e-05, -1.1296942830085754e-05, -1.0069459676742554e-05, -8.841976523399353e-06, -7.614493370056152e-06, -6.387010216712952e-06, -5.159527063369751e-06, -3.93204391002655e-06, -2.7045607566833496e-06, -1.477077603340149e-06, -2.4959444999694824e-07, 9.778887033462524e-07, 2.205371856689453e-06, 3.432855010032654e-06, 4.6603381633758545e-06, 5.887821316719055e-06, 7.115304470062256e-06, 8.342787623405457e-06, 9.570270776748657e-06, 1.0797753930091858e-05, 1.2025237083435059e-05, 1.325272023677826e-05, 1.448020339012146e-05, 1.570768654346466e-05, 1.693516969680786e-05, 1.8162652850151062e-05, 1.9390136003494263e-05, 2.0617619156837463e-05, 2.1845102310180664e-05, 2.3072585463523865e-05, 2.4300068616867065e-05, 2.5527551770210266e-05, 2.6755034923553467e-05, 2.7982518076896667e-05, 2.9210001230239868e-05, 3.043748438358307e-05, 3.166496753692627e-05, 3.289245069026947e-05, 3.411993384361267e-05, 3.534741699695587e-05, 3.657490015029907e-05, 3.780238330364227e-05, 3.9029866456985474e-05, 4.0257349610328674e-05, 4.1484832763671875e-05]}, "gradients/encoder.encoder.layers.15.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 2.0, 3.0, 1.0, 3.0, 6.0, 5.0, 6.0, 8.0, 18.0, 19.0, 33.0, 67.0, 74.0, 105.0, 127.0, 100.0, 110.0, 57.0, 38.0, 46.0, 34.0, 23.0, 30.0, 18.0, 20.0, 7.0, 8.0, 5.0, 5.0, 4.0, 5.0, 3.0, 2.0, 3.0, 4.0, 4.0, 2.0, 2.0, 2.0, 3.0, 0.0, 1.0, 1.0, 2.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00026325476937927306, -0.00025325894239358604, -0.00024326311540789902, -0.000233267288422212, -0.00022327144688460976, -0.00021327561989892274, -0.00020327979291323572, -0.00019328395137563348, -0.00018328812438994646, -0.00017329229740425944, -0.00016329647041857243, -0.0001533006434328854, -0.00014330480189528316, -0.00013330897490959615, -0.00012331314792390913, -0.0001133173136622645, -0.00010332149395253509, -9.332566696684808e-05, -8.332983270520344e-05, -7.333400571951643e-05, -6.33381714578718e-05, -5.334234447218478e-05, -4.334651748649776e-05, -3.335068322485313e-05, -2.335485623916611e-05, -1.3359026524994988e-05, -3.3631968108238652e-06, 6.632631993852556e-06, 1.662846261751838e-05, 2.6624293241184205e-05, 3.662012022687122e-05, 4.6615954488515854e-05, 5.661178147420287e-05, 6.660760845988989e-05, 7.660344272153452e-05, 8.659926970722154e-05, 9.659510396886617e-05, 0.00010659093095455319, 0.0001165867579402402, 0.00012658259947784245, 0.00013657842646352947, 0.00014657425344921649, 0.0001565700804349035, 0.00016656590742059052, 0.00017656174895819277, 0.00018655757594387978, 0.0001965534029295668, 0.00020654924446716905, 0.00021654505690094084, 0.00022654088388662785, 0.00023653671087231487, 0.0002465325524099171, 0.00025652837939560413, 0.00026652420638129115, 0.00027652003336697817, 0.0002865158603526652, 0.0002965116873383522, 0.0003065075143240392, 0.00031650334130972624, 0.00032649916829541326, 0.0003364949952811003, 0.0003464908222667873, 0.0003564866492524743, 0.0003664825053419918, 0.0003764783323276788]}, "gradients/encoder.encoder.layers.15.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 0.0, 1.0, 0.0, 1.0, 1.0, 2.0, 6.0, 4.0, 3.0, 12.0, 6.0, 9.0, 12.0, 17.0, 15.0, 20.0, 15.0, 15.0, 19.0, 26.0, 29.0, 28.0, 33.0, 30.0, 40.0, 27.0, 42.0, 33.0, 33.0, 32.0, 38.0, 30.0, 46.0, 41.0, 43.0, 38.0, 37.0, 24.0, 20.0, 17.0, 27.0, 17.0, 16.0, 15.0, 20.0, 13.0, 10.0, 11.0, 7.0, 1.0, 7.0, 6.0, 4.0, 7.0, 4.0, 2.0, 3.0, 3.0, 0.0, 1.0, 2.0], "bins": [-0.0001723766326904297, -0.00016716867685317993, -0.00016196072101593018, -0.00015675276517868042, -0.00015154480934143066, -0.0001463368535041809, -0.00014112889766693115, -0.0001359209418296814, -0.00013071298599243164, -0.00012550503015518188, -0.00012029707431793213, -0.00011508911848068237, -0.00010988116264343262, -0.00010467320680618286, -9.94652509689331e-05, -9.425729513168335e-05, -8.90493392944336e-05, -8.384138345718384e-05, -7.863342761993408e-05, -7.342547178268433e-05, -6.821751594543457e-05, -6.300956010818481e-05, -5.780160427093506e-05, -5.25936484336853e-05, -4.738569259643555e-05, -4.217773675918579e-05, -3.6969780921936035e-05, -3.176182508468628e-05, -2.6553869247436523e-05, -2.1345913410186768e-05, -1.6137957572937012e-05, -1.0930001735687256e-05, -5.7220458984375e-06, -5.140900611877441e-07, 4.693865776062012e-06, 9.901821613311768e-06, 1.5109777450561523e-05, 2.031773328781128e-05, 2.5525689125061035e-05, 3.073364496231079e-05, 3.594160079956055e-05, 4.11495566368103e-05, 4.635751247406006e-05, 5.1565468311309814e-05, 5.677342414855957e-05, 6.198137998580933e-05, 6.718933582305908e-05, 7.239729166030884e-05, 7.76052474975586e-05, 8.281320333480835e-05, 8.80211591720581e-05, 9.322911500930786e-05, 9.843707084655762e-05, 0.00010364502668380737, 0.00010885298252105713, 0.00011406093835830688, 0.00011926889419555664, 0.0001244768500328064, 0.00012968480587005615, 0.0001348927617073059, 0.00014010071754455566, 0.00014530867338180542, 0.00015051662921905518, 0.00015572458505630493, 0.0001609325408935547]}, "gradients/encoder.encoder.layers.15.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 1.0, 2.0, 1.0, 5.0, 4.0, 4.0, 2.0, 9.0, 8.0, 9.0, 13.0, 20.0, 20.0, 28.0, 37.0, 43.0, 67.0, 106.0, 170.0, 309.0, 628.0, 1332.0, 3004.0, 6806.0, 17774.0, 57033.0, 344210.0, 514623.0, 68560.0, 19871.0, 7592.0, 3221.0, 1470.0, 639.0, 370.0, 177.0, 123.0, 68.0, 43.0, 37.0, 31.0, 24.0, 13.0, 13.0, 9.0, 8.0, 10.0, 7.0, 3.0, 4.0, 1.0, 3.0, 3.0, 3.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.00013375282287597656, -0.0001294463872909546, -0.00012513995170593262, -0.00012083351612091064, -0.00011652708053588867, -0.0001122206449508667, -0.00010791420936584473, -0.00010360777378082275, -9.930133819580078e-05, -9.499490261077881e-05, -9.068846702575684e-05, -8.638203144073486e-05, -8.207559585571289e-05, -7.776916027069092e-05, -7.346272468566895e-05, -6.915628910064697e-05, -6.4849853515625e-05, -6.054341793060303e-05, -5.6236982345581055e-05, -5.193054676055908e-05, -4.762411117553711e-05, -4.331767559051514e-05, -3.9011240005493164e-05, -3.470480442047119e-05, -3.039836883544922e-05, -2.6091933250427246e-05, -2.1785497665405273e-05, -1.74790620803833e-05, -1.3172626495361328e-05, -8.866190910339355e-06, -4.559755325317383e-06, -2.5331974029541016e-07, 4.0531158447265625e-06, 8.359551429748535e-06, 1.2665987014770508e-05, 1.697242259979248e-05, 2.1278858184814453e-05, 2.5585293769836426e-05, 2.98917293548584e-05, 3.419816493988037e-05, 3.8504600524902344e-05, 4.2811036109924316e-05, 4.711747169494629e-05, 5.142390727996826e-05, 5.5730342864990234e-05, 6.003677845001221e-05, 6.434321403503418e-05, 6.864964962005615e-05, 7.295608520507812e-05, 7.72625207901001e-05, 8.156895637512207e-05, 8.587539196014404e-05, 9.018182754516602e-05, 9.448826313018799e-05, 9.879469871520996e-05, 0.00010310113430023193, 0.0001074075698852539, 0.00011171400547027588, 0.00011602044105529785, 0.00012032687664031982, 0.0001246333122253418, 0.00012893974781036377, 0.00013324618339538574, 0.00013755261898040771, 0.0001418590545654297]}, "gradients/encoder.encoder.layers.15.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 4.0, 4.0, 6.0, 10.0, 9.0, 17.0, 24.0, 22.0, 38.0, 51.0, 98.0, 123.0, 121.0, 113.0, 102.0, 101.0, 60.0, 31.0, 23.0, 18.0, 8.0, 11.0, 8.0, 7.0, 3.0, 2.0, 3.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.627206802368164e-05, -1.5565194189548492e-05, -1.4858320355415344e-05, -1.4151446521282196e-05, -1.3444572687149048e-05, -1.27376988530159e-05, -1.2030825018882751e-05, -1.1323951184749603e-05, -1.0617077350616455e-05, -9.910203516483307e-06, -9.203329682350159e-06, -8.49645584821701e-06, -7.789582014083862e-06, -7.082708179950714e-06, -6.375834345817566e-06, -5.668960511684418e-06, -4.9620866775512695e-06, -4.255212843418121e-06, -3.548339009284973e-06, -2.841465175151825e-06, -2.1345913410186768e-06, -1.4277175068855286e-06, -7.208436727523804e-07, -1.3969838619232178e-08, 6.92903995513916e-07, 1.3997778296470642e-06, 2.1066516637802124e-06, 2.8135254979133606e-06, 3.520399332046509e-06, 4.227273166179657e-06, 4.934147000312805e-06, 5.641020834445953e-06, 6.3478946685791016e-06, 7.05476850271225e-06, 7.761642336845398e-06, 8.468516170978546e-06, 9.175390005111694e-06, 9.882263839244843e-06, 1.058913767337799e-05, 1.1296011507511139e-05, 1.2002885341644287e-05, 1.2709759175777435e-05, 1.3416633009910583e-05, 1.4123506844043732e-05, 1.483038067817688e-05, 1.5537254512310028e-05, 1.6244128346443176e-05, 1.6951002180576324e-05, 1.7657876014709473e-05, 1.836474984884262e-05, 1.907162368297577e-05, 1.9778497517108917e-05, 2.0485371351242065e-05, 2.1192245185375214e-05, 2.1899119019508362e-05, 2.260599285364151e-05, 2.3312866687774658e-05, 2.4019740521907806e-05, 2.4726614356040955e-05, 2.5433488190174103e-05, 2.614036202430725e-05, 2.68472358584404e-05, 2.7554109692573547e-05, 2.8260983526706696e-05, 2.8967857360839844e-05]}, "gradients/encoder.encoder.layers.15.attention.v_proj.weight": {"_type": "histogram", "values": [2.0, 2.0, 6.0, 3.0, 7.0, 16.0, 12.0, 27.0, 41.0, 38.0, 76.0, 86.0, 141.0, 185.0, 259.0, 392.0, 561.0, 740.0, 1118.0, 1541.0, 2147.0, 3195.0, 4523.0, 6636.0, 9727.0, 14397.0, 22468.0, 34935.0, 57363.0, 102052.0, 244321.0, 270709.0, 105863.0, 59065.0, 35782.0, 22871.0, 15113.0, 9941.0, 6802.0, 4694.0, 3204.0, 2202.0, 1522.0, 1131.0, 766.0, 564.0, 375.0, 308.0, 173.0, 134.0, 99.0, 69.0, 59.0, 36.0, 17.0, 20.0, 13.0, 10.0, 7.0, 5.0, 1.0, 1.0, 2.0, 1.0], "bins": [-3.141164779663086e-05, -3.039836883544922e-05, -2.9385089874267578e-05, -2.8371810913085938e-05, -2.7358531951904297e-05, -2.6345252990722656e-05, -2.5331974029541016e-05, -2.4318695068359375e-05, -2.3305416107177734e-05, -2.2292137145996094e-05, -2.1278858184814453e-05, -2.0265579223632812e-05, -1.9252300262451172e-05, -1.823902130126953e-05, -1.722574234008789e-05, -1.621246337890625e-05, -1.519918441772461e-05, -1.4185905456542969e-05, -1.3172626495361328e-05, -1.2159347534179688e-05, -1.1146068572998047e-05, -1.0132789611816406e-05, -9.119510650634766e-06, -8.106231689453125e-06, -7.092952728271484e-06, -6.079673767089844e-06, -5.066394805908203e-06, -4.0531158447265625e-06, -3.039836883544922e-06, -2.0265579223632812e-06, -1.0132789611816406e-06, 0.0, 1.0132789611816406e-06, 2.0265579223632812e-06, 3.039836883544922e-06, 4.0531158447265625e-06, 5.066394805908203e-06, 6.079673767089844e-06, 7.092952728271484e-06, 8.106231689453125e-06, 9.119510650634766e-06, 1.0132789611816406e-05, 1.1146068572998047e-05, 1.2159347534179688e-05, 1.3172626495361328e-05, 1.4185905456542969e-05, 1.519918441772461e-05, 1.621246337890625e-05, 1.722574234008789e-05, 1.823902130126953e-05, 1.9252300262451172e-05, 2.0265579223632812e-05, 2.1278858184814453e-05, 2.2292137145996094e-05, 2.3305416107177734e-05, 2.4318695068359375e-05, 2.5331974029541016e-05, 2.6345252990722656e-05, 2.7358531951904297e-05, 2.8371810913085938e-05, 2.9385089874267578e-05, 3.039836883544922e-05, 3.141164779663086e-05, 3.24249267578125e-05, 3.343820571899414e-05]}, "gradients/encoder.encoder.layers.15.attention.v_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 2.0, 0.0, 4.0, 3.0, 0.0, 6.0, 3.0, 5.0, 11.0, 7.0, 12.0, 13.0, 13.0, 15.0, 18.0, 10.0, 16.0, 25.0, 24.0, 31.0, 37.0, 36.0, 33.0, 41.0, 41.0, 40.0, 52.0, 50.0, 33.0, 38.0, 53.0, 44.0, 32.0, 36.0, 33.0, 25.0, 22.0, 24.0, 24.0, 14.0, 13.0, 18.0, 10.0, 11.0, 8.0, 6.0, 8.0, 3.0, 2.0, 5.0, 2.0, 2.0, 1.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0], "bins": [-2.6881694793701172e-05, -2.599228173494339e-05, -2.5102868676185608e-05, -2.4213455617427826e-05, -2.3324042558670044e-05, -2.2434629499912262e-05, -2.154521644115448e-05, -2.0655803382396698e-05, -1.9766390323638916e-05, -1.8876977264881134e-05, -1.7987564206123352e-05, -1.709815114736557e-05, -1.6208738088607788e-05, -1.5319325029850006e-05, -1.4429911971092224e-05, -1.3540498912334442e-05, -1.265108585357666e-05, -1.1761672794818878e-05, -1.0872259736061096e-05, -9.982846677303314e-06, -9.093433618545532e-06, -8.20402055978775e-06, -7.314607501029968e-06, -6.425194442272186e-06, -5.535781383514404e-06, -4.646368324756622e-06, -3.7569552659988403e-06, -2.8675422072410583e-06, -1.9781291484832764e-06, -1.0887160897254944e-06, -1.993030309677124e-07, 6.901100277900696e-07, 1.5795230865478516e-06, 2.4689361453056335e-06, 3.3583492040634155e-06, 4.2477622628211975e-06, 5.1371753215789795e-06, 6.0265883803367615e-06, 6.9160014390945435e-06, 7.805414497852325e-06, 8.694827556610107e-06, 9.58424061536789e-06, 1.0473653674125671e-05, 1.1363066732883453e-05, 1.2252479791641235e-05, 1.3141892850399017e-05, 1.40313059091568e-05, 1.4920718967914581e-05, 1.5810132026672363e-05, 1.6699545085430145e-05, 1.7588958144187927e-05, 1.847837120294571e-05, 1.936778426170349e-05, 2.0257197320461273e-05, 2.1146610379219055e-05, 2.2036023437976837e-05, 2.292543649673462e-05, 2.38148495554924e-05, 2.4704262614250183e-05, 2.5593675673007965e-05, 2.6483088731765747e-05, 2.737250179052353e-05, 2.826191484928131e-05, 2.9151327908039093e-05, 3.0040740966796875e-05]}, "gradients/encoder.encoder.layers.15.attention.k_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 2.0, 1.0, 1.0, 3.0, 4.0, 4.0, 11.0, 15.0, 16.0, 34.0, 42.0, 52.0, 78.0, 144.0, 148.0, 221.0, 483.0, 467.0, 678.0, 1641.0, 1675.0, 2585.0, 6776.0, 8496.0, 15251.0, 59704.0, 129090.0, 435076.0, 240837.0, 93374.0, 21426.0, 11404.0, 8789.0, 3212.0, 2069.0, 1896.0, 826.0, 573.0, 594.0, 257.0, 180.0, 187.0, 70.0, 58.0, 26.0, 30.0, 14.0, 16.0, 8.0, 10.0, 5.0, 1.0, 2.0, 3.0, 3.0, 2.0, 0.0, 1.0, 1.0, 0.0, 2.0], "bins": [-4.231929779052734e-06, -4.0940940380096436e-06, -3.956258296966553e-06, -3.818422555923462e-06, -3.680586814880371e-06, -3.5427510738372803e-06, -3.4049153327941895e-06, -3.2670795917510986e-06, -3.129243850708008e-06, -2.991408109664917e-06, -2.853572368621826e-06, -2.7157366275787354e-06, -2.5779008865356445e-06, -2.4400651454925537e-06, -2.302229404449463e-06, -2.164393663406372e-06, -2.0265579223632812e-06, -1.8887221813201904e-06, -1.7508864402770996e-06, -1.6130506992340088e-06, -1.475214958190918e-06, -1.3373792171478271e-06, -1.1995434761047363e-06, -1.0617077350616455e-06, -9.238719940185547e-07, -7.860362529754639e-07, -6.48200511932373e-07, -5.103647708892822e-07, -3.725290298461914e-07, -2.3469328880310059e-07, -9.685754776000977e-08, 4.0978193283081055e-08, 1.7881393432617188e-07, 3.166496753692627e-07, 4.544854164123535e-07, 5.923211574554443e-07, 7.301568984985352e-07, 8.67992639541626e-07, 1.0058283805847168e-06, 1.1436641216278076e-06, 1.2814998626708984e-06, 1.4193356037139893e-06, 1.55717134475708e-06, 1.695007085800171e-06, 1.8328428268432617e-06, 1.9706785678863525e-06, 2.1085143089294434e-06, 2.246350049972534e-06, 2.384185791015625e-06, 2.522021532058716e-06, 2.6598572731018066e-06, 2.7976930141448975e-06, 2.9355287551879883e-06, 3.073364496231079e-06, 3.21120023727417e-06, 3.3490359783172607e-06, 3.4868717193603516e-06, 3.6247074604034424e-06, 3.762543201446533e-06, 3.900378942489624e-06, 4.038214683532715e-06, 4.176050424575806e-06, 4.3138861656188965e-06, 4.451721906661987e-06, 4.589557647705078e-06]}, "gradients/encoder.encoder.layers.15.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 2.0, 1.0, 2.0, 1.0, 3.0, 1.0, 3.0, 5.0, 9.0, 15.0, 15.0, 41.0, 24.0, 32.0, 42.0, 66.0, 70.0, 69.0, 74.0, 81.0, 70.0, 76.0, 44.0, 53.0, 72.0, 30.0, 33.0, 22.0, 18.0, 10.0, 13.0, 3.0, 4.0, 2.0, 5.0, 1.0, 1.0, 2.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.1457672119140625e-06, -2.0815059542655945e-06, -2.0172446966171265e-06, -1.9529834389686584e-06, -1.8887221813201904e-06, -1.8244609236717224e-06, -1.7601996660232544e-06, -1.6959384083747864e-06, -1.6316771507263184e-06, -1.5674158930778503e-06, -1.5031546354293823e-06, -1.4388933777809143e-06, -1.3746321201324463e-06, -1.3103708624839783e-06, -1.2461096048355103e-06, -1.1818483471870422e-06, -1.1175870895385742e-06, -1.0533258318901062e-06, -9.890645742416382e-07, -9.248033165931702e-07, -8.605420589447021e-07, -7.962808012962341e-07, -7.320195436477661e-07, -6.677582859992981e-07, -6.034970283508301e-07, -5.392357707023621e-07, -4.7497451305389404e-07, -4.10713255405426e-07, -3.46451997756958e-07, -2.8219074010849e-07, -2.1792948246002197e-07, -1.5366822481155396e-07, -8.940696716308594e-08, -2.514570951461792e-08, 3.91155481338501e-08, 1.0337680578231812e-07, 1.6763806343078613e-07, 2.3189932107925415e-07, 2.9616057872772217e-07, 3.604218363761902e-07, 4.246830940246582e-07, 4.889443516731262e-07, 5.532056093215942e-07, 6.174668669700623e-07, 6.817281246185303e-07, 7.459893822669983e-07, 8.102506399154663e-07, 8.745118975639343e-07, 9.387731552124023e-07, 1.0030344128608704e-06, 1.0672956705093384e-06, 1.1315569281578064e-06, 1.1958181858062744e-06, 1.2600794434547424e-06, 1.3243407011032104e-06, 1.3886019587516785e-06, 1.4528632164001465e-06, 1.5171244740486145e-06, 1.5813857316970825e-06, 1.6456469893455505e-06, 1.7099082469940186e-06, 1.7741695046424866e-06, 1.8384307622909546e-06, 1.9026920199394226e-06, 1.9669532775878906e-06]}, "gradients/encoder.encoder.layers.15.attention.q_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 2.0, 2.0, 2.0, 5.0, 3.0, 6.0, 15.0, 7.0, 13.0, 19.0, 21.0, 23.0, 31.0, 58.0, 108.0, 117.0, 155.0, 197.0, 303.0, 484.0, 655.0, 1011.0, 2773.0, 3509.0, 6418.0, 12539.0, 30308.0, 102041.0, 556650.0, 238439.0, 63884.0, 12797.0, 6344.0, 3575.0, 2097.0, 1326.0, 825.0, 551.0, 484.0, 203.0, 152.0, 121.0, 67.0, 51.0, 40.0, 27.0, 36.0, 23.0, 9.0, 14.0, 7.0, 5.0, 2.0, 5.0, 5.0, 0.0, 3.0, 1.0, 1.0, 1.0, 2.0, 2.0], "bins": [-3.874301910400391e-06, -3.7476420402526855e-06, -3.6209821701049805e-06, -3.4943222999572754e-06, -3.3676624298095703e-06, -3.2410025596618652e-06, -3.11434268951416e-06, -2.987682819366455e-06, -2.86102294921875e-06, -2.734363079071045e-06, -2.60770320892334e-06, -2.4810433387756348e-06, -2.3543834686279297e-06, -2.2277235984802246e-06, -2.1010637283325195e-06, -1.9744038581848145e-06, -1.8477439880371094e-06, -1.7210841178894043e-06, -1.5944242477416992e-06, -1.4677643775939941e-06, -1.341104507446289e-06, -1.214444637298584e-06, -1.087784767150879e-06, -9.611248970031738e-07, -8.344650268554688e-07, -7.078051567077637e-07, -5.811452865600586e-07, -4.544854164123535e-07, -3.2782554626464844e-07, -2.0116567611694336e-07, -7.450580596923828e-08, 5.21540641784668e-08, 1.7881393432617188e-07, 3.0547380447387695e-07, 4.3213367462158203e-07, 5.587935447692871e-07, 6.854534149169922e-07, 8.121132850646973e-07, 9.387731552124023e-07, 1.0654330253601074e-06, 1.1920928955078125e-06, 1.3187527656555176e-06, 1.4454126358032227e-06, 1.5720725059509277e-06, 1.6987323760986328e-06, 1.8253922462463379e-06, 1.952052116394043e-06, 2.078711986541748e-06, 2.205371856689453e-06, 2.332031726837158e-06, 2.4586915969848633e-06, 2.5853514671325684e-06, 2.7120113372802734e-06, 2.8386712074279785e-06, 2.9653310775756836e-06, 3.0919909477233887e-06, 3.2186508178710938e-06, 3.345310688018799e-06, 3.471970558166504e-06, 3.598630428314209e-06, 3.725290298461914e-06, 3.851950168609619e-06, 3.978610038757324e-06, 4.105269908905029e-06, 4.231929779052734e-06]}, "gradients/encoder.encoder.layers.15.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 2.0, 2.0, 2.0, 5.0, 1.0, 2.0, 6.0, 11.0, 7.0, 5.0, 19.0, 19.0, 14.0, 42.0, 91.0, 71.0, 155.0, 176.0, 60.0, 116.0, 78.0, 24.0, 34.0, 21.0, 12.0, 4.0, 8.0, 9.0, 2.0, 5.0, 3.0, 5.0, 2.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.7418136596679688e-06, -2.641230821609497e-06, -2.5406479835510254e-06, -2.4400651454925537e-06, -2.339482307434082e-06, -2.2388994693756104e-06, -2.1383166313171387e-06, -2.037733793258667e-06, -1.9371509552001953e-06, -1.8365681171417236e-06, -1.735985279083252e-06, -1.6354024410247803e-06, -1.5348196029663086e-06, -1.434236764907837e-06, -1.3336539268493652e-06, -1.2330710887908936e-06, -1.1324882507324219e-06, -1.0319054126739502e-06, -9.313225746154785e-07, -8.307397365570068e-07, -7.301568984985352e-07, -6.295740604400635e-07, -5.289912223815918e-07, -4.284083843231201e-07, -3.2782554626464844e-07, -2.2724270820617676e-07, -1.2665987014770508e-07, -2.60770320892334e-08, 7.450580596923828e-08, 1.7508864402770996e-07, 2.7567148208618164e-07, 3.762543201446533e-07, 4.76837158203125e-07, 5.774199962615967e-07, 6.780028343200684e-07, 7.7858567237854e-07, 8.791685104370117e-07, 9.797513484954834e-07, 1.080334186553955e-06, 1.1809170246124268e-06, 1.2814998626708984e-06, 1.3820827007293701e-06, 1.4826655387878418e-06, 1.5832483768463135e-06, 1.6838312149047852e-06, 1.7844140529632568e-06, 1.8849968910217285e-06, 1.9855797290802e-06, 2.086162567138672e-06, 2.1867454051971436e-06, 2.2873282432556152e-06, 2.387911081314087e-06, 2.4884939193725586e-06, 2.5890767574310303e-06, 2.689659595489502e-06, 2.7902424335479736e-06, 2.8908252716064453e-06, 2.991408109664917e-06, 3.0919909477233887e-06, 3.1925737857818604e-06, 3.293156623840332e-06, 3.3937394618988037e-06, 3.4943222999572754e-06, 3.594905138015747e-06, 3.6954879760742188e-06]}, "gradients/encoder.encoder.layers.15.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 1.0, 3.0, 3.0, 7.0, 3.0, 6.0, 9.0, 10.0, 21.0, 19.0, 25.0, 35.0, 55.0, 75.0, 161.0, 193.0, 77.0, 72.0, 49.0, 47.0, 29.0, 28.0, 18.0, 15.0, 15.0, 10.0, 7.0, 7.0, 0.0, 3.0, 1.0, 5.0, 2.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00017697080329526216, -0.00017161769210360944, -0.00016626458091195673, -0.000160911469720304, -0.00015555837308056653, -0.0001502052618889138, -0.0001448521506972611, -0.00013949903950560838, -0.00013414592831395566, -0.00012879281712230295, -0.00012343970593065023, -0.00011808660201495513, -0.00011273349082330242, -0.00010738038690760732, -0.0001020272757159546, -9.667416452430189e-05, -9.132106060860679e-05, -8.596794941695407e-05, -8.061484550125897e-05, -7.526173430960625e-05, -6.990862311795354e-05, -6.455551192630082e-05, -5.920240801060572e-05, -5.384929681895301e-05, -4.84961892652791e-05, -4.314308171160519e-05, -3.7789970519952476e-05, -3.243686296627857e-05, -2.7083753593615256e-05, -2.1730644220951945e-05, -1.6377536667278036e-05, -1.1024425475625321e-05, -5.671317921951413e-06, -3.182090040354524e-07, 5.034899913880508e-06, 1.0388008377049118e-05, 1.574111774971243e-05, 2.109422712237574e-05, 2.644733467604965e-05, 3.1800445867702365e-05, 3.715355342137627e-05, 4.250666097505018e-05, 4.7859772166702896e-05, 5.3212879720376804e-05, 5.856598727405071e-05, 6.391909846570343e-05, 6.927220965735614e-05, 7.462532084900886e-05, 7.997842476470396e-05, 8.533153595635667e-05, 9.068463987205178e-05, 9.603775106370449e-05, 0.0001013908622553572, 0.00010674397344700992, 0.00011209707736270502, 0.00011745018855435774, 0.00012280329247005284, 0.00012815640366170555, 0.00013350951485335827, 0.00013886261149309576, 0.00014421572268474847, 0.00014956883387640119, 0.0001549219450680539, 0.00016027505625970662, 0.00016562816745135933]}, "gradients/encoder.encoder.layers.15.layer_norm.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 4.0, 0.0, 3.0, 3.0, 2.0, 5.0, 6.0, 8.0, 11.0, 11.0, 16.0, 13.0, 28.0, 32.0, 24.0, 31.0, 30.0, 29.0, 34.0, 40.0, 38.0, 42.0, 39.0, 40.0, 52.0, 43.0, 43.0, 36.0, 42.0, 28.0, 31.0, 35.0, 29.0, 30.0, 20.0, 18.0, 18.0, 21.0, 20.0, 19.0, 10.0, 10.0, 9.0, 1.0, 3.0, 2.0, 2.0, 2.0, 1.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-9.679794311523438e-05, -9.383261203765869e-05, -9.086728096008301e-05, -8.790194988250732e-05, -8.493661880493164e-05, -8.197128772735596e-05, -7.900595664978027e-05, -7.604062557220459e-05, -7.30752944946289e-05, -7.010996341705322e-05, -6.714463233947754e-05, -6.417930126190186e-05, -6.121397018432617e-05, -5.824863910675049e-05, -5.5283308029174805e-05, -5.231797695159912e-05, -4.935264587402344e-05, -4.6387314796447754e-05, -4.342198371887207e-05, -4.045665264129639e-05, -3.74913215637207e-05, -3.452599048614502e-05, -3.1560659408569336e-05, -2.8595328330993652e-05, -2.562999725341797e-05, -2.2664666175842285e-05, -1.96993350982666e-05, -1.6734004020690918e-05, -1.3768672943115234e-05, -1.080334186553955e-05, -7.838010787963867e-06, -4.872679710388184e-06, -1.9073486328125e-06, 1.0579824447631836e-06, 4.023313522338867e-06, 6.988644599914551e-06, 9.953975677490234e-06, 1.2919306755065918e-05, 1.58846378326416e-05, 1.8849968910217285e-05, 2.181529998779297e-05, 2.4780631065368652e-05, 2.7745962142944336e-05, 3.071129322052002e-05, 3.36766242980957e-05, 3.664195537567139e-05, 3.960728645324707e-05, 4.2572617530822754e-05, 4.553794860839844e-05, 4.850327968597412e-05, 5.1468610763549805e-05, 5.443394184112549e-05, 5.739927291870117e-05, 6.0364603996276855e-05, 6.332993507385254e-05, 6.629526615142822e-05, 6.92605972290039e-05, 7.222592830657959e-05, 7.519125938415527e-05, 7.815659046173096e-05, 8.112192153930664e-05, 8.408725261688232e-05, 8.705258369445801e-05, 9.001791477203369e-05, 9.298324584960938e-05]}, "gradients/encoder.encoder.layers.14.feed_forward.output_dense.weight": {"_type": "histogram", "values": [2.0, 1.0, 2.0, 6.0, 10.0, 14.0, 35.0, 45.0, 79.0, 145.0, 275.0, 426.0, 714.0, 1316.0, 2122.0, 3704.0, 6350.0, 11429.0, 22846.0, 52474.0, 167087.0, 3636127.0, 185635.0, 53460.0, 23027.0, 11320.0, 5987.0, 3425.0, 2127.0, 1345.0, 871.0, 593.0, 377.0, 255.0, 137.0, 148.0, 77.0, 56.0, 37.0, 33.0, 31.0, 22.0, 18.0, 19.0, 11.0, 12.0, 17.0, 11.0, 10.0, 8.0, 1.0, 9.0, 4.0, 3.0, 2.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0], "bins": [-6.29425048828125e-05, -6.002187728881836e-05, -5.710124969482422e-05, -5.418062210083008e-05, -5.125999450683594e-05, -4.83393669128418e-05, -4.5418739318847656e-05, -4.2498111724853516e-05, -3.9577484130859375e-05, -3.6656856536865234e-05, -3.3736228942871094e-05, -3.081560134887695e-05, -2.7894973754882812e-05, -2.4974346160888672e-05, -2.205371856689453e-05, -1.913309097290039e-05, -1.621246337890625e-05, -1.329183578491211e-05, -1.0371208190917969e-05, -7.450580596923828e-06, -4.5299530029296875e-06, -1.6093254089355469e-06, 1.3113021850585938e-06, 4.231929779052734e-06, 7.152557373046875e-06, 1.0073184967041016e-05, 1.2993812561035156e-05, 1.5914440155029297e-05, 1.8835067749023438e-05, 2.1755695343017578e-05, 2.467632293701172e-05, 2.759695053100586e-05, 3.0517578125e-05, 3.343820571899414e-05, 3.635883331298828e-05, 3.927946090698242e-05, 4.220008850097656e-05, 4.51207160949707e-05, 4.8041343688964844e-05, 5.0961971282958984e-05, 5.3882598876953125e-05, 5.6803226470947266e-05, 5.9723854064941406e-05, 6.264448165893555e-05, 6.556510925292969e-05, 6.848573684692383e-05, 7.140636444091797e-05, 7.432699203491211e-05, 7.724761962890625e-05, 8.016824722290039e-05, 8.308887481689453e-05, 8.600950241088867e-05, 8.893013000488281e-05, 9.185075759887695e-05, 9.47713851928711e-05, 9.769201278686523e-05, 0.00010061264038085938, 0.00010353326797485352, 0.00010645389556884766, 0.0001093745231628418, 0.00011229515075683594, 0.00011521577835083008, 0.00011813640594482422, 0.00012105703353881836, 0.0001239776611328125]}, "gradients/encoder.encoder.layers.14.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 2.0, 3.0, 6.0, 5.0, 5.0, 9.0, 15.0, 12.0, 16.0, 24.0, 31.0, 48.0, 48.0, 70.0, 98.0, 83.0, 115.0, 74.0, 83.0, 67.0, 45.0, 38.0, 38.0, 18.0, 15.0, 12.0, 6.0, 8.0, 6.0, 1.0, 6.0, 4.0, 2.0, 2.0, 3.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.2099742889404297e-05, -1.1538155376911163e-05, -1.097656786441803e-05, -1.0414980351924896e-05, -9.853392839431763e-06, -9.291805326938629e-06, -8.730217814445496e-06, -8.168630301952362e-06, -7.6070427894592285e-06, -7.045455276966095e-06, -6.4838677644729614e-06, -5.922280251979828e-06, -5.360692739486694e-06, -4.799105226993561e-06, -4.237517714500427e-06, -3.6759302020072937e-06, -3.11434268951416e-06, -2.5527551770210266e-06, -1.991167664527893e-06, -1.4295801520347595e-06, -8.67992639541626e-07, -3.0640512704849243e-07, 2.551823854446411e-07, 8.167698979377747e-07, 1.3783574104309082e-06, 1.9399449229240417e-06, 2.5015324354171753e-06, 3.063119947910309e-06, 3.6247074604034424e-06, 4.186294972896576e-06, 4.7478824853897095e-06, 5.309469997882843e-06, 5.8710575103759766e-06, 6.43264502286911e-06, 6.994232535362244e-06, 7.555820047855377e-06, 8.11740756034851e-06, 8.678995072841644e-06, 9.240582585334778e-06, 9.802170097827911e-06, 1.0363757610321045e-05, 1.0925345122814178e-05, 1.1486932635307312e-05, 1.2048520147800446e-05, 1.2610107660293579e-05, 1.3171695172786713e-05, 1.3733282685279846e-05, 1.429487019777298e-05, 1.4856457710266113e-05, 1.5418045222759247e-05, 1.597963273525238e-05, 1.6541220247745514e-05, 1.7102807760238647e-05, 1.766439527273178e-05, 1.8225982785224915e-05, 1.8787570297718048e-05, 1.934915781021118e-05, 1.9910745322704315e-05, 2.047233283519745e-05, 2.1033920347690582e-05, 2.1595507860183716e-05, 2.215709537267685e-05, 2.2718682885169983e-05, 2.3280270397663116e-05, 2.384185791015625e-05]}, "gradients/encoder.encoder.layers.14.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 2.0, 1.0, 2.0, 3.0, 2.0, 8.0, 7.0, 7.0, 16.0, 31.0, 43.0, 64.0, 77.0, 124.0, 171.0, 271.0, 399.0, 628.0, 926.0, 1586.0, 2417.0, 4027.0, 6688.0, 10650.0, 19855.0, 36825.0, 73688.0, 162938.0, 1428395.0, 2109077.0, 175973.0, 72154.0, 37851.0, 20203.0, 11394.0, 6520.0, 4197.0, 2536.0, 1596.0, 960.0, 657.0, 423.0, 278.0, 196.0, 132.0, 88.0, 64.0, 33.0, 36.0, 23.0, 19.0, 13.0, 7.0, 5.0, 3.0, 6.0, 4.0, 0.0, 2.0, 1.0], "bins": [-4.869699478149414e-05, -4.722177982330322e-05, -4.5746564865112305e-05, -4.427134990692139e-05, -4.279613494873047e-05, -4.132091999053955e-05, -3.984570503234863e-05, -3.8370490074157715e-05, -3.68952751159668e-05, -3.542006015777588e-05, -3.394484519958496e-05, -3.246963024139404e-05, -3.0994415283203125e-05, -2.9519200325012207e-05, -2.804398536682129e-05, -2.656877040863037e-05, -2.5093555450439453e-05, -2.3618340492248535e-05, -2.2143125534057617e-05, -2.06679105758667e-05, -1.919269561767578e-05, -1.7717480659484863e-05, -1.6242265701293945e-05, -1.4767050743103027e-05, -1.329183578491211e-05, -1.1816620826721191e-05, -1.0341405868530273e-05, -8.866190910339355e-06, -7.3909759521484375e-06, -5.9157609939575195e-06, -4.4405460357666016e-06, -2.9653310775756836e-06, -1.4901161193847656e-06, -1.4901161193847656e-08, 1.4603137969970703e-06, 2.9355287551879883e-06, 4.410743713378906e-06, 5.885958671569824e-06, 7.361173629760742e-06, 8.83638858795166e-06, 1.0311603546142578e-05, 1.1786818504333496e-05, 1.3262033462524414e-05, 1.4737248420715332e-05, 1.621246337890625e-05, 1.7687678337097168e-05, 1.9162893295288086e-05, 2.0638108253479004e-05, 2.2113323211669922e-05, 2.358853816986084e-05, 2.5063753128051758e-05, 2.6538968086242676e-05, 2.8014183044433594e-05, 2.9489398002624512e-05, 3.096461296081543e-05, 3.243982791900635e-05, 3.3915042877197266e-05, 3.5390257835388184e-05, 3.68654727935791e-05, 3.834068775177002e-05, 3.981590270996094e-05, 4.1291117668151855e-05, 4.2766332626342773e-05, 4.424154758453369e-05, 4.571676254272461e-05]}, "gradients/encoder.encoder.layers.14.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 3.0, 4.0, 1.0, 5.0, 5.0, 4.0, 6.0, 12.0, 14.0, 23.0, 21.0, 25.0, 33.0, 42.0, 53.0, 64.0, 93.0, 85.0, 156.0, 465.0, 1433.0, 679.0, 229.0, 109.0, 73.0, 72.0, 52.0, 53.0, 36.0, 35.0, 31.0, 30.0, 25.0, 24.0, 19.0, 19.0, 11.0, 6.0, 8.0, 9.0, 7.0, 4.0, 1.0, 4.0, 2.0, 3.0, 1.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.6226043701171875e-05, -2.520531415939331e-05, -2.4184584617614746e-05, -2.316385507583618e-05, -2.2143125534057617e-05, -2.1122395992279053e-05, -2.0101666450500488e-05, -1.9080936908721924e-05, -1.806020736694336e-05, -1.7039477825164795e-05, -1.601874828338623e-05, -1.4998018741607666e-05, -1.3977289199829102e-05, -1.2956559658050537e-05, -1.1935830116271973e-05, -1.0915100574493408e-05, -9.894371032714844e-06, -8.87364149093628e-06, -7.852911949157715e-06, -6.83218240737915e-06, -5.811452865600586e-06, -4.7907233238220215e-06, -3.769993782043457e-06, -2.7492642402648926e-06, -1.7285346984863281e-06, -7.078051567077637e-07, 3.129243850708008e-07, 1.3336539268493652e-06, 2.3543834686279297e-06, 3.375113010406494e-06, 4.395842552185059e-06, 5.416572093963623e-06, 6.4373016357421875e-06, 7.458031177520752e-06, 8.478760719299316e-06, 9.499490261077881e-06, 1.0520219802856445e-05, 1.154094934463501e-05, 1.2561678886413574e-05, 1.3582408428192139e-05, 1.4603137969970703e-05, 1.5623867511749268e-05, 1.6644597053527832e-05, 1.7665326595306396e-05, 1.868605613708496e-05, 1.9706785678863525e-05, 2.072751522064209e-05, 2.1748244762420654e-05, 2.276897430419922e-05, 2.3789703845977783e-05, 2.4810433387756348e-05, 2.5831162929534912e-05, 2.6851892471313477e-05, 2.787262201309204e-05, 2.8893351554870605e-05, 2.991408109664917e-05, 3.0934810638427734e-05, 3.19555401802063e-05, 3.297626972198486e-05, 3.399699926376343e-05, 3.501772880554199e-05, 3.603845834732056e-05, 3.705918788909912e-05, 3.8079917430877686e-05, 3.910064697265625e-05]}, "gradients/encoder.encoder.layers.14.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 6.0, 4.0, 5.0, 8.0, 16.0, 16.0, 26.0, 42.0, 68.0, 108.0, 171.0, 128.0, 96.0, 82.0, 44.0, 39.0, 29.0, 18.0, 19.0, 21.0, 17.0, 10.0, 9.0, 5.0, 6.0, 7.0, 1.0, 4.0, 2.0, 3.0, 1.0, 1.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 2.0], "bins": [-0.0003635537577793002, -0.00035331619437783957, -0.00034307860187254846, -0.00033284100936725736, -0.0003226034459657967, -0.00031236588256433606, -0.00030212829005904496, -0.00029189069755375385, -0.0002816531341522932, -0.00027141557075083256, -0.00026117797824554145, -0.00025094038574025035, -0.0002407028223387897, -0.00023046524438541383, -0.00022022766643203795, -0.00020999008847866207, -0.0001997525105252862, -0.00018951493257191032, -0.00017927735461853445, -0.00016903977666515857, -0.0001588021987117827, -0.00014856462075840682, -0.00013832704280503094, -0.00012808946485165507, -0.00011785188689827919, -0.00010761430894490331, -9.737673099152744e-05, -8.713915303815156e-05, -7.690157508477569e-05, -6.666399713139981e-05, -5.6426419178023934e-05, -4.618884122464806e-05, -3.595126327127218e-05, -2.5713685317896307e-05, -1.547610736452043e-05, -5.238529411144555e-06, 4.999048542231321e-06, 1.5236626495607197e-05, 2.5474204448983073e-05, 3.571178240235895e-05, 4.5949360355734825e-05, 5.61869383091107e-05, 6.642451626248658e-05, 7.666209421586245e-05, 8.689967216923833e-05, 9.71372501226142e-05, 0.00010737482807599008, 0.00011761240602936596, 0.00012784998398274183, 0.0001380875619361177, 0.00014832513988949358, 0.00015856271784286946, 0.00016880029579624534, 0.0001790378737496212, 0.0001892754517029971, 0.00019951302965637296, 0.00020975060760974884, 0.00021998818556312472, 0.0002302257635165006, 0.00024046334146987647, 0.00025070091942325234, 0.000260938482824713, 0.0002711760753300041, 0.0002814136678352952, 0.00029165123123675585]}, "gradients/encoder.encoder.layers.14.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 5.0, 1.0, 2.0, 4.0, 5.0, 3.0, 8.0, 12.0, 9.0, 7.0, 15.0, 14.0, 14.0, 11.0, 16.0, 20.0, 27.0, 27.0, 27.0, 27.0, 39.0, 39.0, 36.0, 33.0, 34.0, 36.0, 29.0, 39.0, 56.0, 30.0, 37.0, 30.0, 36.0, 28.0, 19.0, 27.0, 27.0, 29.0, 22.0, 17.0, 19.0, 16.0, 11.0, 16.0, 10.0, 5.0, 7.0, 3.0, 8.0, 7.0, 4.0, 4.0, 5.0, 3.0, 2.0, 1.0, 1.0, 1.0], "bins": [-0.0001481175422668457, -0.0001436648890376091, -0.0001392122358083725, -0.0001347595825791359, -0.0001303069293498993, -0.0001258542761206627, -0.00012140162289142609, -0.00011694896966218948, -0.00011249631643295288, -0.00010804366320371628, -0.00010359100997447968, -9.913835674524307e-05, -9.468570351600647e-05, -9.023305028676987e-05, -8.578039705753326e-05, -8.132774382829666e-05, -7.687509059906006e-05, -7.242243736982346e-05, -6.796978414058685e-05, -6.351713091135025e-05, -5.906447768211365e-05, -5.4611824452877045e-05, -5.015917122364044e-05, -4.570651799440384e-05, -4.1253864765167236e-05, -3.6801211535930634e-05, -3.234855830669403e-05, -2.7895905077457428e-05, -2.3443251848220825e-05, -1.8990598618984222e-05, -1.453794538974762e-05, -1.0085292160511017e-05, -5.632638931274414e-06, -1.1799857020378113e-06, 3.2726675271987915e-06, 7.725320756435394e-06, 1.2177973985671997e-05, 1.66306272149086e-05, 2.1083280444145203e-05, 2.5535933673381805e-05, 2.9988586902618408e-05, 3.444124013185501e-05, 3.8893893361091614e-05, 4.3346546590328217e-05, 4.779919981956482e-05, 5.225185304880142e-05, 5.6704506278038025e-05, 6.115715950727463e-05, 6.560981273651123e-05, 7.006246596574783e-05, 7.451511919498444e-05, 7.896777242422104e-05, 8.342042565345764e-05, 8.787307888269424e-05, 9.232573211193085e-05, 9.677838534116745e-05, 0.00010123103857040405, 0.00010568369179964066, 0.00011013634502887726, 0.00011458899825811386, 0.00011904165148735046, 0.00012349430471658707, 0.00012794695794582367, 0.00013239961117506027, 0.00013685226440429688]}, "gradients/encoder.encoder.layers.14.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 2.0, 2.0, 2.0, 5.0, 7.0, 6.0, 13.0, 16.0, 16.0, 28.0, 34.0, 47.0, 73.0, 142.0, 202.0, 417.0, 765.0, 1555.0, 3318.0, 7780.0, 22208.0, 88186.0, 659185.0, 203788.0, 39730.0, 12183.0, 4673.0, 2029.0, 971.0, 490.0, 274.0, 161.0, 99.0, 49.0, 29.0, 14.0, 28.0, 11.0, 11.0, 5.0, 8.0, 5.0, 3.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0001646280288696289, -0.00015974603593349457, -0.00015486404299736023, -0.0001499820500612259, -0.00014510005712509155, -0.00014021806418895721, -0.00013533607125282288, -0.00013045407831668854, -0.0001255720853805542, -0.00012069009244441986, -0.00011580809950828552, -0.00011092610657215118, -0.00010604411363601685, -0.00010116212069988251, -9.628012776374817e-05, -9.139813482761383e-05, -8.651614189147949e-05, -8.163414895534515e-05, -7.675215601921082e-05, -7.187016308307648e-05, -6.698817014694214e-05, -6.21061772108078e-05, -5.722418427467346e-05, -5.2342191338539124e-05, -4.7460198402404785e-05, -4.257820546627045e-05, -3.769621253013611e-05, -3.281421959400177e-05, -2.793222665786743e-05, -2.3050233721733093e-05, -1.8168240785598755e-05, -1.3286247849464417e-05, -8.404254913330078e-06, -3.5222619771957397e-06, 1.3597309589385986e-06, 6.241723895072937e-06, 1.1123716831207275e-05, 1.6005709767341614e-05, 2.0887702703475952e-05, 2.576969563961029e-05, 3.065168857574463e-05, 3.553368151187897e-05, 4.0415674448013306e-05, 4.5297667384147644e-05, 5.017966032028198e-05, 5.506165325641632e-05, 5.994364619255066e-05, 6.4825639128685e-05, 6.970763206481934e-05, 7.458962500095367e-05, 7.947161793708801e-05, 8.435361087322235e-05, 8.923560380935669e-05, 9.411759674549103e-05, 9.899958968162537e-05, 0.0001038815826177597, 0.00010876357555389404, 0.00011364556849002838, 0.00011852756142616272, 0.00012340955436229706, 0.0001282915472984314, 0.00013317354023456573, 0.00013805553317070007, 0.0001429375261068344, 0.00014781951904296875]}, "gradients/encoder.encoder.layers.14.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 1.0, 2.0, 4.0, 5.0, 5.0, 12.0, 14.0, 10.0, 25.0, 37.0, 61.0, 77.0, 97.0, 129.0, 108.0, 115.0, 97.0, 54.0, 40.0, 37.0, 23.0, 17.0, 7.0, 7.0, 8.0, 5.0, 5.0, 3.0, 5.0, 0.0, 0.0, 4.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.6808509826660156e-05, -1.611746847629547e-05, -1.5426427125930786e-05, -1.4735385775566101e-05, -1.4044344425201416e-05, -1.3353303074836731e-05, -1.2662261724472046e-05, -1.197122037410736e-05, -1.1280179023742676e-05, -1.058913767337799e-05, -9.898096323013306e-06, -9.20705497264862e-06, -8.516013622283936e-06, -7.82497227191925e-06, -7.1339309215545654e-06, -6.44288957118988e-06, -5.751848220825195e-06, -5.06080687046051e-06, -4.369765520095825e-06, -3.67872416973114e-06, -2.987682819366455e-06, -2.29664146900177e-06, -1.605600118637085e-06, -9.145587682723999e-07, -2.2351741790771484e-07, 4.675239324569702e-07, 1.1585652828216553e-06, 1.8496066331863403e-06, 2.5406479835510254e-06, 3.2316893339157104e-06, 3.9227306842803955e-06, 4.6137720346450806e-06, 5.304813385009766e-06, 5.995854735374451e-06, 6.686896085739136e-06, 7.377937436103821e-06, 8.068978786468506e-06, 8.760020136833191e-06, 9.451061487197876e-06, 1.0142102837562561e-05, 1.0833144187927246e-05, 1.1524185538291931e-05, 1.2215226888656616e-05, 1.2906268239021301e-05, 1.3597309589385986e-05, 1.4288350939750671e-05, 1.4979392290115356e-05, 1.567043364048004e-05, 1.6361474990844727e-05, 1.705251634120941e-05, 1.7743557691574097e-05, 1.8434599041938782e-05, 1.9125640392303467e-05, 1.9816681742668152e-05, 2.0507723093032837e-05, 2.1198764443397522e-05, 2.1889805793762207e-05, 2.2580847144126892e-05, 2.3271888494491577e-05, 2.3962929844856262e-05, 2.4653971195220947e-05, 2.5345012545585632e-05, 2.6036053895950317e-05, 2.6727095246315002e-05, 2.7418136596679688e-05]}, "gradients/encoder.encoder.layers.14.attention.v_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 2.0, 2.0, 2.0, 4.0, 5.0, 6.0, 17.0, 19.0, 28.0, 52.0, 55.0, 92.0, 136.0, 186.0, 291.0, 427.0, 647.0, 973.0, 1564.0, 2632.0, 4009.0, 6551.0, 10532.0, 17689.0, 30062.0, 53857.0, 100642.0, 248020.0, 317895.0, 111712.0, 60279.0, 32280.0, 18481.0, 10949.0, 6689.0, 4291.0, 2634.0, 1651.0, 1104.0, 694.0, 506.0, 306.0, 198.0, 140.0, 88.0, 59.0, 27.0, 34.0, 11.0, 16.0, 7.0, 7.0, 3.0, 4.0, 2.0, 0.0, 0.0, 2.0, 2.0, 0.0, 0.0, 1.0], "bins": [-3.7789344787597656e-05, -3.653205931186676e-05, -3.5274773836135864e-05, -3.401748836040497e-05, -3.276020288467407e-05, -3.1502917408943176e-05, -3.024563193321228e-05, -2.8988346457481384e-05, -2.7731060981750488e-05, -2.6473775506019592e-05, -2.5216490030288696e-05, -2.39592045545578e-05, -2.2701919078826904e-05, -2.1444633603096008e-05, -2.0187348127365112e-05, -1.8930062651634216e-05, -1.767277717590332e-05, -1.6415491700172424e-05, -1.5158206224441528e-05, -1.3900920748710632e-05, -1.2643635272979736e-05, -1.138634979724884e-05, -1.0129064321517944e-05, -8.871778845787048e-06, -7.614493370056152e-06, -6.357207894325256e-06, -5.09992241859436e-06, -3.842636942863464e-06, -2.5853514671325684e-06, -1.3280659914016724e-06, -7.078051567077637e-08, 1.1865049600601196e-06, 2.4437904357910156e-06, 3.7010759115219116e-06, 4.958361387252808e-06, 6.215646862983704e-06, 7.4729323387146e-06, 8.730217814445496e-06, 9.987503290176392e-06, 1.1244788765907288e-05, 1.2502074241638184e-05, 1.375935971736908e-05, 1.5016645193099976e-05, 1.627393066883087e-05, 1.7531216144561768e-05, 1.8788501620292664e-05, 2.004578709602356e-05, 2.1303072571754456e-05, 2.256035804748535e-05, 2.3817643523216248e-05, 2.5074928998947144e-05, 2.633221447467804e-05, 2.7589499950408936e-05, 2.884678542613983e-05, 3.0104070901870728e-05, 3.1361356377601624e-05, 3.261864185333252e-05, 3.3875927329063416e-05, 3.513321280479431e-05, 3.639049828052521e-05, 3.7647783756256104e-05, 3.8905069231987e-05, 4.0162354707717896e-05, 4.141964018344879e-05, 4.267692565917969e-05]}, "gradients/encoder.encoder.layers.14.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 3.0, 3.0, 1.0, 1.0, 5.0, 3.0, 6.0, 9.0, 6.0, 6.0, 10.0, 14.0, 13.0, 19.0, 17.0, 25.0, 33.0, 35.0, 37.0, 30.0, 26.0, 43.0, 39.0, 41.0, 44.0, 41.0, 44.0, 45.0, 44.0, 37.0, 30.0, 43.0, 29.0, 38.0, 30.0, 21.0, 31.0, 20.0, 14.0, 11.0, 14.0, 11.0, 11.0, 3.0, 11.0, 4.0, 6.0, 4.0, 2.0, 2.0, 2.0, 1.0], "bins": [-3.504753112792969e-05, -3.41217964887619e-05, -3.3196061849594116e-05, -3.227032721042633e-05, -3.1344592571258545e-05, -3.041885793209076e-05, -2.9493123292922974e-05, -2.8567388653755188e-05, -2.7641654014587402e-05, -2.6715919375419617e-05, -2.579018473625183e-05, -2.4864450097084045e-05, -2.393871545791626e-05, -2.3012980818748474e-05, -2.208724617958069e-05, -2.1161511540412903e-05, -2.0235776901245117e-05, -1.931004226207733e-05, -1.8384307622909546e-05, -1.745857298374176e-05, -1.6532838344573975e-05, -1.560710370540619e-05, -1.4681369066238403e-05, -1.3755634427070618e-05, -1.2829899787902832e-05, -1.1904165148735046e-05, -1.097843050956726e-05, -1.0052695870399475e-05, -9.12696123123169e-06, -8.201226592063904e-06, -7.275491952896118e-06, -6.3497573137283325e-06, -5.424022674560547e-06, -4.498288035392761e-06, -3.5725533962249756e-06, -2.64681875705719e-06, -1.7210841178894043e-06, -7.953494787216187e-07, 1.30385160446167e-07, 1.0561197996139526e-06, 1.9818544387817383e-06, 2.907589077949524e-06, 3.8333237171173096e-06, 4.759058356285095e-06, 5.684792995452881e-06, 6.6105276346206665e-06, 7.536262273788452e-06, 8.461996912956238e-06, 9.387731552124023e-06, 1.0313466191291809e-05, 1.1239200830459595e-05, 1.216493546962738e-05, 1.3090670108795166e-05, 1.4016404747962952e-05, 1.4942139387130737e-05, 1.5867874026298523e-05, 1.679360866546631e-05, 1.7719343304634094e-05, 1.864507794380188e-05, 1.9570812582969666e-05, 2.049654722213745e-05, 2.1422281861305237e-05, 2.2348016500473022e-05, 2.3273751139640808e-05, 2.4199485778808594e-05]}, "gradients/encoder.encoder.layers.14.attention.k_proj.weight": {"_type": "histogram", "values": [2.0, 2.0, 3.0, 0.0, 5.0, 2.0, 4.0, 8.0, 9.0, 19.0, 9.0, 8.0, 30.0, 38.0, 73.0, 64.0, 70.0, 206.0, 180.0, 507.0, 328.0, 556.0, 1688.0, 1327.0, 4521.0, 3843.0, 5674.0, 21266.0, 19938.0, 82324.0, 88155.0, 162090.0, 425491.0, 87774.0, 82275.0, 19769.0, 21154.0, 5719.0, 3789.0, 4467.0, 1347.0, 1629.0, 539.0, 373.0, 556.0, 175.0, 206.0, 91.0, 49.0, 92.0, 29.0, 42.0, 14.0, 11.0, 5.0, 5.0, 9.0, 2.0, 5.0, 7.0, 0.0, 0.0, 1.0, 2.0], "bins": [-2.682209014892578e-06, -2.598389983177185e-06, -2.514570951461792e-06, -2.430751919746399e-06, -2.346932888031006e-06, -2.263113856315613e-06, -2.1792948246002197e-06, -2.0954757928848267e-06, -2.0116567611694336e-06, -1.9278377294540405e-06, -1.8440186977386475e-06, -1.7601996660232544e-06, -1.6763806343078613e-06, -1.5925616025924683e-06, -1.5087425708770752e-06, -1.4249235391616821e-06, -1.341104507446289e-06, -1.257285475730896e-06, -1.173466444015503e-06, -1.0896474123001099e-06, -1.0058283805847168e-06, -9.220093488693237e-07, -8.381903171539307e-07, -7.543712854385376e-07, -6.705522537231445e-07, -5.867332220077515e-07, -5.029141902923584e-07, -4.1909515857696533e-07, -3.3527612686157227e-07, -2.514570951461792e-07, -1.6763806343078613e-07, -8.381903171539307e-08, 0.0, 8.381903171539307e-08, 1.6763806343078613e-07, 2.514570951461792e-07, 3.3527612686157227e-07, 4.1909515857696533e-07, 5.029141902923584e-07, 5.867332220077515e-07, 6.705522537231445e-07, 7.543712854385376e-07, 8.381903171539307e-07, 9.220093488693237e-07, 1.0058283805847168e-06, 1.0896474123001099e-06, 1.173466444015503e-06, 1.257285475730896e-06, 1.341104507446289e-06, 1.4249235391616821e-06, 1.5087425708770752e-06, 1.5925616025924683e-06, 1.6763806343078613e-06, 1.7601996660232544e-06, 1.8440186977386475e-06, 1.9278377294540405e-06, 2.0116567611694336e-06, 2.0954757928848267e-06, 2.1792948246002197e-06, 2.263113856315613e-06, 2.346932888031006e-06, 2.430751919746399e-06, 2.514570951461792e-06, 2.598389983177185e-06, 2.682209014892578e-06]}, "gradients/encoder.encoder.layers.14.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 1.0, 3.0, 3.0, 0.0, 7.0, 6.0, 14.0, 16.0, 19.0, 0.0, 25.0, 41.0, 33.0, 41.0, 0.0, 44.0, 51.0, 66.0, 93.0, 0.0, 72.0, 60.0, 64.0, 68.0, 51.0, 0.0, 45.0, 38.0, 38.0, 31.0, 0.0, 25.0, 18.0, 11.0, 10.0, 0.0, 4.0, 4.0, 2.0, 5.0, 2.0, 0.0, 2.0, 2.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0], "bins": [-1.5497207641601562e-06, -1.5012919902801514e-06, -1.4528632164001465e-06, -1.4044344425201416e-06, -1.3560056686401367e-06, -1.3075768947601318e-06, -1.259148120880127e-06, -1.210719347000122e-06, -1.1622905731201172e-06, -1.1138617992401123e-06, -1.0654330253601074e-06, -1.0170042514801025e-06, -9.685754776000977e-07, -9.201467037200928e-07, -8.717179298400879e-07, -8.23289155960083e-07, -7.748603820800781e-07, -7.264316082000732e-07, -6.780028343200684e-07, -6.295740604400635e-07, -5.811452865600586e-07, -5.327165126800537e-07, -4.842877388000488e-07, -4.3585896492004395e-07, -3.8743019104003906e-07, -3.390014171600342e-07, -2.905726432800293e-07, -2.421438694000244e-07, -1.9371509552001953e-07, -1.4528632164001465e-07, -9.685754776000977e-08, -4.842877388000488e-08, 0.0, 4.842877388000488e-08, 9.685754776000977e-08, 1.4528632164001465e-07, 1.9371509552001953e-07, 2.421438694000244e-07, 2.905726432800293e-07, 3.390014171600342e-07, 3.8743019104003906e-07, 4.3585896492004395e-07, 4.842877388000488e-07, 5.327165126800537e-07, 5.811452865600586e-07, 6.295740604400635e-07, 6.780028343200684e-07, 7.264316082000732e-07, 7.748603820800781e-07, 8.23289155960083e-07, 8.717179298400879e-07, 9.201467037200928e-07, 9.685754776000977e-07, 1.0170042514801025e-06, 1.0654330253601074e-06, 1.1138617992401123e-06, 1.1622905731201172e-06, 1.210719347000122e-06, 1.259148120880127e-06, 1.3075768947601318e-06, 1.3560056686401367e-06, 1.4044344425201416e-06, 1.4528632164001465e-06, 1.5012919902801514e-06, 1.5497207641601562e-06]}, "gradients/encoder.encoder.layers.14.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 2.0, 0.0, 0.0, 0.0, 2.0, 3.0, 2.0, 3.0, 4.0, 10.0, 11.0, 16.0, 28.0, 36.0, 34.0, 59.0, 77.0, 119.0, 161.0, 217.0, 311.0, 482.0, 742.0, 1104.0, 1661.0, 2510.0, 4045.0, 6836.0, 11404.0, 19992.0, 37207.0, 75391.0, 177610.0, 368661.0, 177323.0, 75559.0, 37202.0, 20069.0, 11401.0, 6548.0, 4120.0, 2561.0, 1654.0, 1032.0, 745.0, 474.0, 358.0, 241.0, 136.0, 116.0, 73.0, 66.0, 42.0, 32.0, 22.0, 16.0, 15.0, 12.0, 5.0, 6.0, 4.0, 2.0, 1.0], "bins": [-2.0265579223632812e-06, -1.967884600162506e-06, -1.909211277961731e-06, -1.8505379557609558e-06, -1.7918646335601807e-06, -1.7331913113594055e-06, -1.6745179891586304e-06, -1.6158446669578552e-06, -1.55717134475708e-06, -1.498498022556305e-06, -1.4398247003555298e-06, -1.3811513781547546e-06, -1.3224780559539795e-06, -1.2638047337532043e-06, -1.2051314115524292e-06, -1.146458089351654e-06, -1.087784767150879e-06, -1.0291114449501038e-06, -9.704381227493286e-07, -9.117648005485535e-07, -8.530914783477783e-07, -7.944181561470032e-07, -7.35744833946228e-07, -6.770715117454529e-07, -6.183981895446777e-07, -5.597248673439026e-07, -5.010515451431274e-07, -4.423782229423523e-07, -3.8370490074157715e-07, -3.25031578540802e-07, -2.6635825634002686e-07, -2.076849341392517e-07, -1.4901161193847656e-07, -9.033828973770142e-08, -3.166496753692627e-08, 2.7008354663848877e-08, 8.568167686462402e-08, 1.4435499906539917e-07, 2.0302832126617432e-07, 2.6170164346694946e-07, 3.203749656677246e-07, 3.7904828786849976e-07, 4.377216100692749e-07, 4.9639493227005e-07, 5.550682544708252e-07, 6.137415766716003e-07, 6.724148988723755e-07, 7.310882210731506e-07, 7.897615432739258e-07, 8.484348654747009e-07, 9.071081876754761e-07, 9.657815098762512e-07, 1.0244548320770264e-06, 1.0831281542778015e-06, 1.1418014764785767e-06, 1.2004747986793518e-06, 1.259148120880127e-06, 1.317821443080902e-06, 1.3764947652816772e-06, 1.4351680874824524e-06, 1.4938414096832275e-06, 1.5525147318840027e-06, 1.6111880540847778e-06, 1.669861376285553e-06, 1.7285346984863281e-06]}, "gradients/encoder.encoder.layers.14.attention.q_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 2.0, 0.0, 0.0, 2.0, 2.0, 4.0, 0.0, 3.0, 2.0, 6.0, 9.0, 0.0, 13.0, 13.0, 20.0, 22.0, 0.0, 36.0, 39.0, 47.0, 49.0, 0.0, 56.0, 72.0, 72.0, 66.0, 0.0, 68.0, 81.0, 76.0, 44.0, 0.0, 55.0, 31.0, 24.0, 25.0, 0.0, 18.0, 16.0, 11.0, 8.0, 0.0, 12.0, 2.0, 2.0, 1.0, 0.0, 3.0, 2.0, 2.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-1.3709068298339844e-06, -1.323409378528595e-06, -1.2759119272232056e-06, -1.2284144759178162e-06, -1.1809170246124268e-06, -1.1334195733070374e-06, -1.085922122001648e-06, -1.0384246706962585e-06, -9.909272193908691e-07, -9.434297680854797e-07, -8.959323167800903e-07, -8.484348654747009e-07, -8.009374141693115e-07, -7.534399628639221e-07, -7.059425115585327e-07, -6.584450602531433e-07, -6.109476089477539e-07, -5.634501576423645e-07, -5.159527063369751e-07, -4.684552550315857e-07, -4.209578037261963e-07, -3.734603524208069e-07, -3.259629011154175e-07, -2.784654498100281e-07, -2.3096799850463867e-07, -1.8347054719924927e-07, -1.3597309589385986e-07, -8.847564458847046e-08, -4.0978193283081055e-08, 6.51925802230835e-09, 5.4016709327697754e-08, 1.0151416063308716e-07, 1.4901161193847656e-07, 1.9650906324386597e-07, 2.4400651454925537e-07, 2.915039658546448e-07, 3.390014171600342e-07, 3.864988684654236e-07, 4.33996319770813e-07, 4.814937710762024e-07, 5.289912223815918e-07, 5.764886736869812e-07, 6.239861249923706e-07, 6.7148357629776e-07, 7.189810276031494e-07, 7.664784789085388e-07, 8.139759302139282e-07, 8.614733815193176e-07, 9.08970832824707e-07, 9.564682841300964e-07, 1.0039657354354858e-06, 1.0514631867408752e-06, 1.0989606380462646e-06, 1.146458089351654e-06, 1.1939555406570435e-06, 1.2414529919624329e-06, 1.2889504432678223e-06, 1.3364478945732117e-06, 1.383945345878601e-06, 1.4314427971839905e-06, 1.4789402484893799e-06, 1.5264376997947693e-06, 1.5739351511001587e-06, 1.621432602405548e-06, 1.6689300537109375e-06]}, "gradients/encoder.encoder.layers.14.layer_norm.weight": {"_type": "histogram", "values": [2.0, 0.0, 2.0, 0.0, 0.0, 0.0, 3.0, 1.0, 4.0, 1.0, 2.0, 8.0, 7.0, 8.0, 16.0, 25.0, 23.0, 36.0, 43.0, 89.0, 176.0, 123.0, 88.0, 59.0, 56.0, 39.0, 29.0, 27.0, 22.0, 27.0, 15.0, 15.0, 8.0, 13.0, 3.0, 3.0, 8.0, 7.0, 4.0, 3.0, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 1.0, 2.0, 2.0, 2.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 2.0], "bins": [-8.557600813219324e-05, -8.14294908195734e-05, -7.728297350695357e-05, -7.313645619433373e-05, -6.898994615767151e-05, -6.484342884505168e-05, -6.069691153243184e-05, -5.6550394219812006e-05, -5.240388054517098e-05, -4.825736323255114e-05, -4.4110849557910115e-05, -3.996433224529028e-05, -3.5817814932670444e-05, -3.1671301258029416e-05, -2.752478394540958e-05, -2.337826845177915e-05, -1.9231752958148718e-05, -1.5085237464518286e-05, -1.0938721061393153e-05, -6.7922046582680196e-06, -2.645689164637588e-06, 1.5008263289928436e-06, 5.647343641612679e-06, 9.79385913524311e-06, 1.3940374628873542e-05, 1.8086890122503974e-05, 2.2233405616134405e-05, 2.637992292875424e-05, 3.0526440241374075e-05, 3.4672953916015103e-05, 3.881947122863494e-05, 4.296598490327597e-05, 4.711249493993819e-05, 5.125901225255802e-05, 5.540552592719905e-05, 5.9552043239818886e-05, 6.369855691445991e-05, 6.784507422707975e-05, 7.199159153969958e-05, 7.613810885231942e-05, 8.028461888898164e-05, 8.443113620160148e-05, 8.857765351422131e-05, 9.272416355088353e-05, 9.687068086350337e-05, 0.0001010171981761232, 0.00010516371548874304, 0.00010931023280136287, 0.00011345675011398271, 0.00011760326742660254, 0.00012174978473922238, 0.0001258963020518422, 0.00013004281208850443, 0.00013418932212516665, 0.0001383358467137441, 0.00014248235675040632, 0.00014662888133898377, 0.000150775391375646, 0.00015492191596422344, 0.00015906842600088567, 0.00016321495058946311, 0.00016736146062612534, 0.00017150797066278756, 0.000175654495251365, 0.00017980100528802723]}, "gradients/encoder.encoder.layers.14.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 3.0, 3.0, 2.0, 2.0, 7.0, 5.0, 6.0, 4.0, 4.0, 10.0, 14.0, 23.0, 19.0, 19.0, 19.0, 24.0, 29.0, 39.0, 43.0, 30.0, 37.0, 41.0, 40.0, 43.0, 47.0, 47.0, 52.0, 37.0, 45.0, 32.0, 31.0, 35.0, 21.0, 25.0, 35.0, 26.0, 20.0, 17.0, 16.0, 11.0, 11.0, 14.0, 6.0, 8.0, 8.0, 3.0, 0.0, 1.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0001023411750793457, -9.934324771165848e-05, -9.634532034397125e-05, -9.334739297628403e-05, -9.03494656085968e-05, -8.735153824090958e-05, -8.435361087322235e-05, -8.135568350553513e-05, -7.83577561378479e-05, -7.535982877016068e-05, -7.236190140247345e-05, -6.936397403478622e-05, -6.6366046667099e-05, -6.336811929941177e-05, -6.037019193172455e-05, -5.737226456403732e-05, -5.43743371963501e-05, -5.137640982866287e-05, -4.837848246097565e-05, -4.538055509328842e-05, -4.2382627725601196e-05, -3.938470035791397e-05, -3.6386772990226746e-05, -3.338884562253952e-05, -3.0390918254852295e-05, -2.739299088716507e-05, -2.4395063519477844e-05, -2.139713615179062e-05, -1.8399208784103394e-05, -1.5401281416416168e-05, -1.2403354048728943e-05, -9.405426681041718e-06, -6.407499313354492e-06, -3.409571945667267e-06, -4.116445779800415e-07, 2.586282789707184e-06, 5.584210157394409e-06, 8.582137525081635e-06, 1.158006489276886e-05, 1.4577992260456085e-05, 1.757591962814331e-05, 2.0573846995830536e-05, 2.357177436351776e-05, 2.6569701731204987e-05, 2.9567629098892212e-05, 3.256555646657944e-05, 3.556348383426666e-05, 3.856141120195389e-05, 4.155933856964111e-05, 4.455726593732834e-05, 4.7555193305015564e-05, 5.055312067270279e-05, 5.3551048040390015e-05, 5.654897540807724e-05, 5.9546902775764465e-05, 6.254483014345169e-05, 6.554275751113892e-05, 6.854068487882614e-05, 7.153861224651337e-05, 7.453653961420059e-05, 7.753446698188782e-05, 8.053239434957504e-05, 8.353032171726227e-05, 8.65282490849495e-05, 8.952617645263672e-05]}, "gradients/encoder.encoder.layers.13.feed_forward.output_dense.weight": {"_type": "histogram", "values": [3.0, 3.0, 2.0, 11.0, 8.0, 15.0, 18.0, 33.0, 49.0, 83.0, 100.0, 144.0, 211.0, 277.0, 416.0, 633.0, 852.0, 1263.0, 1839.0, 2800.0, 4458.0, 7690.0, 14302.0, 29779.0, 78541.0, 3634638.0, 305342.0, 54172.0, 23819.0, 12041.0, 6901.0, 4348.0, 2969.0, 1876.0, 1326.0, 980.0, 628.0, 464.0, 335.0, 245.0, 175.0, 136.0, 109.0, 68.0, 44.0, 30.0, 35.0, 16.0, 24.0, 7.0, 12.0, 6.0, 3.0, 5.0, 4.0, 4.0, 3.0, 3.0, 1.0, 2.0, 0.0, 1.0, 0.0, 2.0], "bins": [-8.356571197509766e-05, -8.033029735088348e-05, -7.709488272666931e-05, -7.385946810245514e-05, -7.062405347824097e-05, -6.73886388540268e-05, -6.415322422981262e-05, -6.091780960559845e-05, -5.768239498138428e-05, -5.4446980357170105e-05, -5.121156573295593e-05, -4.797615110874176e-05, -4.474073648452759e-05, -4.1505321860313416e-05, -3.826990723609924e-05, -3.503449261188507e-05, -3.17990779876709e-05, -2.8563663363456726e-05, -2.5328248739242554e-05, -2.209283411502838e-05, -1.885741949081421e-05, -1.5622004866600037e-05, -1.2386590242385864e-05, -9.151175618171692e-06, -5.9157609939575195e-06, -2.680346369743347e-06, 5.550682544708252e-07, 3.7904828786849976e-06, 7.02589750289917e-06, 1.0261312127113342e-05, 1.3496726751327515e-05, 1.6732141375541687e-05, 1.996755599975586e-05, 2.3202970623970032e-05, 2.6438385248184204e-05, 2.9673799872398376e-05, 3.290921449661255e-05, 3.614462912082672e-05, 3.9380043745040894e-05, 4.2615458369255066e-05, 4.585087299346924e-05, 4.908628761768341e-05, 5.232170224189758e-05, 5.5557116866111755e-05, 5.879253149032593e-05, 6.20279461145401e-05, 6.526336073875427e-05, 6.849877536296844e-05, 7.173418998718262e-05, 7.496960461139679e-05, 7.820501923561096e-05, 8.144043385982513e-05, 8.46758484840393e-05, 8.791126310825348e-05, 9.114667773246765e-05, 9.438209235668182e-05, 9.7617506980896e-05, 0.00010085292160511017, 0.00010408833622932434, 0.00010732375085353851, 0.00011055916547775269, 0.00011379458010196686, 0.00011702999472618103, 0.0001202654093503952, 0.00012350082397460938]}, "gradients/encoder.encoder.layers.13.feed_forward.output_dense.bias": {"_type": "histogram", "values": [3.0, 2.0, 1.0, 1.0, 1.0, 5.0, 1.0, 2.0, 3.0, 6.0, 7.0, 7.0, 3.0, 5.0, 8.0, 13.0, 17.0, 29.0, 26.0, 40.0, 48.0, 68.0, 83.0, 79.0, 97.0, 82.0, 84.0, 61.0, 53.0, 32.0, 33.0, 23.0, 21.0, 19.0, 5.0, 8.0, 7.0, 2.0, 7.0, 7.0, 1.0, 8.0, 2.0, 2.0, 3.0, 2.0, 1.0, 3.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.245737075805664e-05, -1.194886863231659e-05, -1.1440366506576538e-05, -1.0931864380836487e-05, -1.0423362255096436e-05, -9.914860129356384e-06, -9.406358003616333e-06, -8.897855877876282e-06, -8.38935375213623e-06, -7.88085162639618e-06, -7.372349500656128e-06, -6.863847374916077e-06, -6.355345249176025e-06, -5.846843123435974e-06, -5.338340997695923e-06, -4.829838871955872e-06, -4.32133674621582e-06, -3.812834620475769e-06, -3.3043324947357178e-06, -2.7958303689956665e-06, -2.2873282432556152e-06, -1.778826117515564e-06, -1.2703239917755127e-06, -7.618218660354614e-07, -2.5331974029541016e-07, 2.551823854446411e-07, 7.636845111846924e-07, 1.2721866369247437e-06, 1.780688762664795e-06, 2.289190888404846e-06, 2.7976930141448975e-06, 3.3061951398849487e-06, 3.814697265625e-06, 4.323199391365051e-06, 4.8317015171051025e-06, 5.340203642845154e-06, 5.848705768585205e-06, 6.357207894325256e-06, 6.865710020065308e-06, 7.374212145805359e-06, 7.88271427154541e-06, 8.391216397285461e-06, 8.899718523025513e-06, 9.408220648765564e-06, 9.916722774505615e-06, 1.0425224900245667e-05, 1.0933727025985718e-05, 1.1442229151725769e-05, 1.195073127746582e-05, 1.2459233403205872e-05, 1.2967735528945923e-05, 1.3476237654685974e-05, 1.3984739780426025e-05, 1.4493241906166077e-05, 1.5001744031906128e-05, 1.551024615764618e-05, 1.601874828338623e-05, 1.6527250409126282e-05, 1.7035752534866333e-05, 1.7544254660606384e-05, 1.8052756786346436e-05, 1.8561258912086487e-05, 1.9069761037826538e-05, 1.957826316356659e-05, 2.008676528930664e-05]}, "gradients/encoder.encoder.layers.13.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 1.0, 4.0, 5.0, 7.0, 8.0, 15.0, 28.0, 30.0, 34.0, 73.0, 101.0, 151.0, 218.0, 310.0, 523.0, 854.0, 1153.0, 2030.0, 3269.0, 5439.0, 9608.0, 17281.0, 33941.0, 72774.0, 222589.0, 3416437.0, 247546.0, 79843.0, 36499.0, 18729.0, 10027.0, 5584.0, 3465.0, 2027.0, 1300.0, 819.0, 529.0, 321.0, 223.0, 156.0, 102.0, 76.0, 58.0, 33.0, 23.0, 18.0, 13.0, 9.0, 5.0, 3.0, 2.0, 1.0, 1.0, 1.0, 0.0, 1.0], "bins": [-6.157159805297852e-05, -5.973596125841141e-05, -5.79003244638443e-05, -5.606468766927719e-05, -5.422905087471008e-05, -5.2393414080142975e-05, -5.055777728557587e-05, -4.872214049100876e-05, -4.688650369644165e-05, -4.505086690187454e-05, -4.3215230107307434e-05, -4.1379593312740326e-05, -3.954395651817322e-05, -3.770831972360611e-05, -3.5872682929039e-05, -3.403704613447189e-05, -3.2201409339904785e-05, -3.0365772545337677e-05, -2.853013575077057e-05, -2.669449895620346e-05, -2.4858862161636353e-05, -2.3023225367069244e-05, -2.1187588572502136e-05, -1.9351951777935028e-05, -1.751631498336792e-05, -1.5680678188800812e-05, -1.3845041394233704e-05, -1.2009404599666595e-05, -1.0173767805099487e-05, -8.338131010532379e-06, -6.502494215965271e-06, -4.666857421398163e-06, -2.8312206268310547e-06, -9.955838322639465e-07, 8.400529623031616e-07, 2.6756897568702698e-06, 4.511326551437378e-06, 6.346963346004486e-06, 8.182600140571594e-06, 1.0018236935138702e-05, 1.185387372970581e-05, 1.3689510524272919e-05, 1.5525147318840027e-05, 1.7360784113407135e-05, 1.9196420907974243e-05, 2.103205770254135e-05, 2.286769449710846e-05, 2.4703331291675568e-05, 2.6538968086242676e-05, 2.8374604880809784e-05, 3.0210241675376892e-05, 3.2045878469944e-05, 3.388151526451111e-05, 3.5717152059078217e-05, 3.7552788853645325e-05, 3.938842564821243e-05, 4.122406244277954e-05, 4.305969923734665e-05, 4.489533603191376e-05, 4.6730972826480865e-05, 4.8566609621047974e-05, 5.040224641561508e-05, 5.223788321018219e-05, 5.40735200047493e-05, 5.5909156799316406e-05]}, "gradients/encoder.encoder.layers.13.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 2.0, 0.0, 4.0, 1.0, 0.0, 0.0, 3.0, 0.0, 7.0, 7.0, 3.0, 6.0, 8.0, 16.0, 15.0, 19.0, 24.0, 27.0, 31.0, 43.0, 50.0, 73.0, 107.0, 281.0, 1676.0, 980.0, 229.0, 96.0, 56.0, 52.0, 45.0, 40.0, 26.0, 36.0, 24.0, 26.0, 8.0, 16.0, 10.0, 10.0, 5.0, 3.0, 6.0, 4.0, 2.0, 2.0, 3.0, 5.0, 3.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-3.4928321838378906e-05, -3.3834949135780334e-05, -3.274157643318176e-05, -3.164820373058319e-05, -3.055483102798462e-05, -2.9461458325386047e-05, -2.8368085622787476e-05, -2.7274712920188904e-05, -2.6181340217590332e-05, -2.508796751499176e-05, -2.399459481239319e-05, -2.2901222109794617e-05, -2.1807849407196045e-05, -2.0714476704597473e-05, -1.96211040019989e-05, -1.852773129940033e-05, -1.7434358596801758e-05, -1.6340985894203186e-05, -1.5247613191604614e-05, -1.4154240489006042e-05, -1.306086778640747e-05, -1.1967495083808899e-05, -1.0874122381210327e-05, -9.780749678611755e-06, -8.687376976013184e-06, -7.594004273414612e-06, -6.50063157081604e-06, -5.407258868217468e-06, -4.3138861656188965e-06, -3.2205134630203247e-06, -2.127140760421753e-06, -1.0337680578231812e-06, 5.960464477539063e-08, 1.1529773473739624e-06, 2.246350049972534e-06, 3.339722752571106e-06, 4.433095455169678e-06, 5.5264681577682495e-06, 6.619840860366821e-06, 7.713213562965393e-06, 8.806586265563965e-06, 9.899958968162537e-06, 1.0993331670761108e-05, 1.208670437335968e-05, 1.3180077075958252e-05, 1.4273449778556824e-05, 1.5366822481155396e-05, 1.6460195183753967e-05, 1.755356788635254e-05, 1.864694058895111e-05, 1.9740313291549683e-05, 2.0833685994148254e-05, 2.1927058696746826e-05, 2.3020431399345398e-05, 2.411380410194397e-05, 2.520717680454254e-05, 2.6300549507141113e-05, 2.7393922209739685e-05, 2.8487294912338257e-05, 2.958066761493683e-05, 3.06740403175354e-05, 3.176741302013397e-05, 3.2860785722732544e-05, 3.3954158425331116e-05, 3.504753112792969e-05]}, "gradients/encoder.encoder.layers.13.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 2.0, 2.0, 1.0, 2.0, 1.0, 3.0, 8.0, 9.0, 9.0, 17.0, 20.0, 37.0, 57.0, 73.0, 105.0, 163.0, 114.0, 93.0, 67.0, 48.0, 31.0, 33.0, 18.0, 19.0, 16.0, 15.0, 7.0, 7.0, 7.0, 4.0, 4.0, 4.0, 1.0, 3.0, 3.0, 1.0, 3.0, 2.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.00021992232359480113, -0.0002124725142493844, -0.00020502270490396768, -0.00019757289555855095, -0.00019012308621313423, -0.0001826732768677175, -0.00017522346752230078, -0.00016777365817688406, -0.00016032384883146733, -0.0001528740394860506, -0.00014542423014063388, -0.00013797442079521716, -0.00013052461144980043, -0.0001230748021043837, -0.00011562499275896698, -0.00010817518341355026, -0.00010072537406813353, -9.327556472271681e-05, -8.582575537730008e-05, -7.837594603188336e-05, -7.092613668646663e-05, -6.347632734104991e-05, -5.6026517995633185e-05, -4.857670865021646e-05, -4.1126899304799736e-05, -3.367708995938301e-05, -2.6227280613966286e-05, -1.877747126854956e-05, -1.1327661923132837e-05, -3.877852577716112e-06, 3.5719567677006125e-06, 1.1021766113117337e-05, 1.847159001044929e-05, 2.5921399355866015e-05, 3.337120870128274e-05, 4.0821018046699464e-05, 4.827082739211619e-05, 5.5720636737532914e-05, 6.317044608294964e-05, 7.062025542836636e-05, 7.807006477378309e-05, 8.551987411919981e-05, 9.296968346461654e-05, 0.00010041949281003326, 0.00010786930215544999, 0.00011531911150086671, 0.00012276892084628344, 0.00013021873019170016, 0.00013766853953711689, 0.0001451183488825336, 0.00015256815822795033, 0.00016001796757336706, 0.00016746777691878378, 0.0001749175862642005, 0.00018236739560961723, 0.00018981720495503396, 0.00019726701430045068, 0.0002047168236458674, 0.00021216663299128413, 0.00021961644233670086, 0.00022706625168211758, 0.0002345160610275343, 0.00024196587037295103, 0.000249415694270283, 0.0002568654890637845]}, "gradients/encoder.encoder.layers.13.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 4.0, 2.0, 2.0, 0.0, 4.0, 6.0, 8.0, 8.0, 13.0, 11.0, 13.0, 13.0, 13.0, 25.0, 27.0, 26.0, 24.0, 34.0, 33.0, 26.0, 43.0, 37.0, 46.0, 41.0, 41.0, 39.0, 38.0, 45.0, 39.0, 35.0, 44.0, 26.0, 28.0, 28.0, 24.0, 24.0, 22.0, 22.0, 23.0, 14.0, 14.0, 12.0, 10.0, 10.0, 3.0, 2.0, 3.0, 1.0, 7.0, 2.0, 1.0, 1.0, 1.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00010186433792114258, -9.807292371988297e-05, -9.428150951862335e-05, -9.049009531736374e-05, -8.669868111610413e-05, -8.290726691484451e-05, -7.91158527135849e-05, -7.532443851232529e-05, -7.153302431106567e-05, -6.774161010980606e-05, -6.395019590854645e-05, -6.0158781707286835e-05, -5.636736750602722e-05, -5.257595330476761e-05, -4.8784539103507996e-05, -4.499312490224838e-05, -4.120171070098877e-05, -3.7410296499729156e-05, -3.3618882298469543e-05, -2.982746809720993e-05, -2.6036053895950317e-05, -2.2244639694690704e-05, -1.845322549343109e-05, -1.4661811292171478e-05, -1.0870397090911865e-05, -7.078982889652252e-06, -3.287568688392639e-06, 5.038455128669739e-07, 4.295259714126587e-06, 8.0866739153862e-06, 1.1878088116645813e-05, 1.5669502317905426e-05, 1.946091651916504e-05, 2.3252330720424652e-05, 2.7043744921684265e-05, 3.083515912294388e-05, 3.462657332420349e-05, 3.8417987525463104e-05, 4.220940172672272e-05, 4.600081592798233e-05, 4.979223012924194e-05, 5.3583644330501556e-05, 5.737505853176117e-05, 6.116647273302078e-05, 6.49578869342804e-05, 6.874930113554001e-05, 7.254071533679962e-05, 7.633212953805923e-05, 8.012354373931885e-05, 8.391495794057846e-05, 8.770637214183807e-05, 9.149778634309769e-05, 9.52892005443573e-05, 9.908061474561691e-05, 0.00010287202894687653, 0.00010666344314813614, 0.00011045485734939575, 0.00011424627155065536, 0.00011803768575191498, 0.00012182909995317459, 0.0001256205141544342, 0.00012941192835569382, 0.00013320334255695343, 0.00013699475675821304, 0.00014078617095947266]}, "gradients/encoder.encoder.layers.13.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 2.0, 2.0, 2.0, 2.0, 10.0, 7.0, 12.0, 9.0, 9.0, 14.0, 22.0, 22.0, 30.0, 41.0, 67.0, 104.0, 168.0, 327.0, 611.0, 1302.0, 2914.0, 7236.0, 21165.0, 86960.0, 686834.0, 187307.0, 35185.0, 10591.0, 3959.0, 1769.0, 817.0, 414.0, 239.0, 123.0, 104.0, 52.0, 28.0, 26.0, 12.0, 14.0, 9.0, 9.0, 12.0, 7.0, 8.0, 3.0, 1.0, 4.0, 0.0, 1.0, 3.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0001552104949951172, -0.0001498013734817505, -0.0001443922519683838, -0.0001389831304550171, -0.0001335740089416504, -0.0001281648874282837, -0.000122755765914917, -0.00011734664440155029, -0.0001119375228881836, -0.0001065284013748169, -0.0001011192798614502, -9.57101583480835e-05, -9.03010368347168e-05, -8.48919153213501e-05, -7.94827938079834e-05, -7.40736722946167e-05, -6.866455078125e-05, -6.32554292678833e-05, -5.78463077545166e-05, -5.24371862411499e-05, -4.70280647277832e-05, -4.1618943214416504e-05, -3.6209821701049805e-05, -3.0800700187683105e-05, -2.5391578674316406e-05, -1.9982457160949707e-05, -1.4573335647583008e-05, -9.164214134216309e-06, -3.7550926208496094e-06, 1.6540288925170898e-06, 7.063150405883789e-06, 1.2472271919250488e-05, 1.7881393432617188e-05, 2.3290514945983887e-05, 2.8699636459350586e-05, 3.4108757972717285e-05, 3.9517879486083984e-05, 4.4927000999450684e-05, 5.033612251281738e-05, 5.574524402618408e-05, 6.115436553955078e-05, 6.656348705291748e-05, 7.197260856628418e-05, 7.738173007965088e-05, 8.279085159301758e-05, 8.819997310638428e-05, 9.360909461975098e-05, 9.901821613311768e-05, 0.00010442733764648438, 0.00010983645915985107, 0.00011524558067321777, 0.00012065470218658447, 0.00012606382369995117, 0.00013147294521331787, 0.00013688206672668457, 0.00014229118824005127, 0.00014770030975341797, 0.00015310943126678467, 0.00015851855278015137, 0.00016392767429351807, 0.00016933679580688477, 0.00017474591732025146, 0.00018015503883361816, 0.00018556416034698486, 0.00019097328186035156]}, "gradients/encoder.encoder.layers.13.attention.out_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 1.0, 0.0, 2.0, 2.0, 0.0, 4.0, 2.0, 5.0, 11.0, 17.0, 16.0, 31.0, 55.0, 64.0, 93.0, 143.0, 123.0, 120.0, 110.0, 68.0, 36.0, 33.0, 19.0, 13.0, 12.0, 12.0, 6.0, 6.0, 8.0, 1.0, 1.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.52587890625e-05, -1.4477409422397614e-05, -1.3696029782295227e-05, -1.291465014219284e-05, -1.2133270502090454e-05, -1.1351890861988068e-05, -1.0570511221885681e-05, -9.789131581783295e-06, -9.007751941680908e-06, -8.226372301578522e-06, -7.444992661476135e-06, -6.663613021373749e-06, -5.882233381271362e-06, -5.100853741168976e-06, -4.319474101066589e-06, -3.538094460964203e-06, -2.7567148208618164e-06, -1.97533518075943e-06, -1.1939555406570435e-06, -4.12575900554657e-07, 3.688037395477295e-07, 1.150183379650116e-06, 1.9315630197525024e-06, 2.712942659854889e-06, 3.4943222999572754e-06, 4.275701940059662e-06, 5.057081580162048e-06, 5.838461220264435e-06, 6.619840860366821e-06, 7.401220500469208e-06, 8.182600140571594e-06, 8.96397978067398e-06, 9.745359420776367e-06, 1.0526739060878754e-05, 1.130811870098114e-05, 1.2089498341083527e-05, 1.2870877981185913e-05, 1.36522576212883e-05, 1.4433637261390686e-05, 1.5215016901493073e-05, 1.599639654159546e-05, 1.6777776181697845e-05, 1.7559155821800232e-05, 1.834053546190262e-05, 1.9121915102005005e-05, 1.990329474210739e-05, 2.0684674382209778e-05, 2.1466054022312164e-05, 2.224743366241455e-05, 2.3028813302516937e-05, 2.3810192942619324e-05, 2.459157258272171e-05, 2.5372952222824097e-05, 2.6154331862926483e-05, 2.693571150302887e-05, 2.7717091143131256e-05, 2.8498470783233643e-05, 2.927985042333603e-05, 3.0061230063438416e-05, 3.08426097035408e-05, 3.162398934364319e-05, 3.2405368983745575e-05, 3.318674862384796e-05, 3.396812826395035e-05, 3.4749507904052734e-05]}, "gradients/encoder.encoder.layers.13.attention.v_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 3.0, 3.0, 1.0, 8.0, 17.0, 14.0, 19.0, 33.0, 50.0, 55.0, 81.0, 104.0, 179.0, 294.0, 445.0, 646.0, 938.0, 1429.0, 2002.0, 3243.0, 4941.0, 7395.0, 11381.0, 17534.0, 28340.0, 47338.0, 86039.0, 200530.0, 358081.0, 119322.0, 60860.0, 35477.0, 21564.0, 13915.0, 8855.0, 5956.0, 3748.0, 2608.0, 1751.0, 1033.0, 773.0, 479.0, 329.0, 255.0, 160.0, 129.0, 72.0, 47.0, 25.0, 25.0, 8.0, 13.0, 9.0, 5.0, 1.0, 3.0, 7.0, 1.0, 1.0], "bins": [-3.9577484130859375e-05, -3.838632255792618e-05, -3.719516098499298e-05, -3.6003999412059784e-05, -3.481283783912659e-05, -3.362167626619339e-05, -3.243051469326019e-05, -3.1239353120326996e-05, -3.00481915473938e-05, -2.8857029974460602e-05, -2.7665868401527405e-05, -2.6474706828594208e-05, -2.528354525566101e-05, -2.4092383682727814e-05, -2.2901222109794617e-05, -2.171006053686142e-05, -2.0518898963928223e-05, -1.9327737390995026e-05, -1.813657581806183e-05, -1.694541424512863e-05, -1.5754252672195435e-05, -1.4563091099262238e-05, -1.337192952632904e-05, -1.2180767953395844e-05, -1.0989606380462646e-05, -9.79844480752945e-06, -8.607283234596252e-06, -7.416121661663055e-06, -6.224960088729858e-06, -5.033798515796661e-06, -3.842636942863464e-06, -2.6514753699302673e-06, -1.4603137969970703e-06, -2.691522240638733e-07, 9.220093488693237e-07, 2.1131709218025208e-06, 3.3043324947357178e-06, 4.495494067668915e-06, 5.686655640602112e-06, 6.877817213535309e-06, 8.068978786468506e-06, 9.260140359401703e-06, 1.04513019323349e-05, 1.1642463505268097e-05, 1.2833625078201294e-05, 1.4024786651134491e-05, 1.5215948224067688e-05, 1.6407109797000885e-05, 1.7598271369934082e-05, 1.878943294286728e-05, 1.9980594515800476e-05, 2.1171756088733673e-05, 2.236291766166687e-05, 2.3554079234600067e-05, 2.4745240807533264e-05, 2.593640238046646e-05, 2.7127563953399658e-05, 2.8318725526332855e-05, 2.9509887099266052e-05, 3.070104867219925e-05, 3.1892210245132446e-05, 3.308337181806564e-05, 3.427453339099884e-05, 3.546569496393204e-05, 3.6656856536865234e-05]}, "gradients/encoder.encoder.layers.13.attention.v_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 7.0, 10.0, 7.0, 11.0, 10.0, 8.0, 7.0, 20.0, 8.0, 24.0, 18.0, 18.0, 29.0, 29.0, 29.0, 42.0, 29.0, 32.0, 48.0, 46.0, 50.0, 53.0, 47.0, 54.0, 48.0, 42.0, 48.0, 23.0, 29.0, 26.0, 20.0, 22.0, 14.0, 16.0, 17.0, 15.0, 12.0, 10.0, 10.0, 4.0, 2.0, 7.0, 2.0, 3.0, 4.0, 2.0, 1.0, 1.0, 2.0, 2.0, 0.0, 1.0], "bins": [-3.319978713989258e-05, -3.219861537218094e-05, -3.11974436044693e-05, -3.019627183675766e-05, -2.919510006904602e-05, -2.819392830133438e-05, -2.7192756533622742e-05, -2.6191584765911102e-05, -2.5190412998199463e-05, -2.4189241230487823e-05, -2.3188069462776184e-05, -2.2186897695064545e-05, -2.1185725927352905e-05, -2.0184554159641266e-05, -1.9183382391929626e-05, -1.8182210624217987e-05, -1.7181038856506348e-05, -1.6179867088794708e-05, -1.5178695321083069e-05, -1.417752355337143e-05, -1.317635178565979e-05, -1.217518001794815e-05, -1.1174008250236511e-05, -1.0172836482524872e-05, -9.171664714813232e-06, -8.170492947101593e-06, -7.169321179389954e-06, -6.168149411678314e-06, -5.166977643966675e-06, -4.165805876255035e-06, -3.164634108543396e-06, -2.1634623408317566e-06, -1.1622905731201172e-06, -1.6111880540847778e-07, 8.400529623031616e-07, 1.841224730014801e-06, 2.8423964977264404e-06, 3.84356826543808e-06, 4.844740033149719e-06, 5.845911800861359e-06, 6.847083568572998e-06, 7.848255336284637e-06, 8.849427103996277e-06, 9.850598871707916e-06, 1.0851770639419556e-05, 1.1852942407131195e-05, 1.2854114174842834e-05, 1.3855285942554474e-05, 1.4856457710266113e-05, 1.5857629477977753e-05, 1.6858801245689392e-05, 1.785997301340103e-05, 1.886114478111267e-05, 1.986231654882431e-05, 2.086348831653595e-05, 2.186466008424759e-05, 2.286583185195923e-05, 2.3867003619670868e-05, 2.4868175387382507e-05, 2.5869347155094147e-05, 2.6870518922805786e-05, 2.7871690690517426e-05, 2.8872862458229065e-05, 2.9874034225940704e-05, 3.0875205993652344e-05]}, "gradients/encoder.encoder.layers.13.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 1.0, 2.0, 2.0, 4.0, 2.0, 8.0, 5.0, 13.0, 10.0, 18.0, 31.0, 43.0, 60.0, 70.0, 107.0, 156.0, 251.0, 220.0, 491.0, 708.0, 1183.0, 1177.0, 2680.0, 4970.0, 9470.0, 11602.0, 36678.0, 102394.0, 476251.0, 276276.0, 53929.0, 36140.0, 15625.0, 7756.0, 3079.0, 2718.0, 1591.0, 943.0, 422.0, 472.0, 312.0, 222.0, 106.0, 128.0, 80.0, 49.0, 34.0, 19.0, 17.0, 12.0, 12.0, 3.0, 6.0, 5.0, 4.0, 2.0, 1.0, 0.0, 1.0, 2.0], "bins": [-5.424022674560547e-06, -5.259178578853607e-06, -5.0943344831466675e-06, -4.929490387439728e-06, -4.764646291732788e-06, -4.599802196025848e-06, -4.434958100318909e-06, -4.270114004611969e-06, -4.105269908905029e-06, -3.94042581319809e-06, -3.77558171749115e-06, -3.61073762178421e-06, -3.4458935260772705e-06, -3.281049430370331e-06, -3.116205334663391e-06, -2.9513612389564514e-06, -2.7865171432495117e-06, -2.621673047542572e-06, -2.4568289518356323e-06, -2.2919848561286926e-06, -2.127140760421753e-06, -1.9622966647148132e-06, -1.7974525690078735e-06, -1.6326084733009338e-06, -1.4677643775939941e-06, -1.3029202818870544e-06, -1.1380761861801147e-06, -9.73232090473175e-07, -8.083879947662354e-07, -6.435438990592957e-07, -4.78699803352356e-07, -3.1385570764541626e-07, -1.4901161193847656e-07, 1.5832483768463135e-08, 1.8067657947540283e-07, 3.4552067518234253e-07, 5.103647708892822e-07, 6.752088665962219e-07, 8.400529623031616e-07, 1.0048970580101013e-06, 1.169741153717041e-06, 1.3345852494239807e-06, 1.4994293451309204e-06, 1.6642734408378601e-06, 1.8291175365447998e-06, 1.9939616322517395e-06, 2.158805727958679e-06, 2.323649823665619e-06, 2.4884939193725586e-06, 2.6533380150794983e-06, 2.818182110786438e-06, 2.9830262064933777e-06, 3.1478703022003174e-06, 3.312714397907257e-06, 3.4775584936141968e-06, 3.6424025893211365e-06, 3.807246685028076e-06, 3.972090780735016e-06, 4.1369348764419556e-06, 4.301778972148895e-06, 4.466623067855835e-06, 4.631467163562775e-06, 4.796311259269714e-06, 4.961155354976654e-06, 5.125999450683594e-06]}, "gradients/encoder.encoder.layers.13.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 0.0, 1.0, 8.0, 9.0, 0.0, 11.0, 14.0, 0.0, 18.0, 35.0, 0.0, 33.0, 39.0, 0.0, 63.0, 65.0, 0.0, 90.0, 94.0, 82.0, 0.0, 89.0, 81.0, 0.0, 57.0, 60.0, 0.0, 37.0, 31.0, 0.0, 24.0, 14.0, 0.0, 24.0, 13.0, 6.0, 0.0, 5.0, 5.0, 0.0, 0.0, 4.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 1.0], "bins": [-1.430511474609375e-06, -1.389533281326294e-06, -1.3485550880432129e-06, -1.3075768947601318e-06, -1.2665987014770508e-06, -1.2256205081939697e-06, -1.1846423149108887e-06, -1.1436641216278076e-06, -1.1026859283447266e-06, -1.0617077350616455e-06, -1.0207295417785645e-06, -9.797513484954834e-07, -9.387731552124023e-07, -8.977949619293213e-07, -8.568167686462402e-07, -8.158385753631592e-07, -7.748603820800781e-07, -7.338821887969971e-07, -6.92903995513916e-07, -6.51925802230835e-07, -6.109476089477539e-07, -5.699694156646729e-07, -5.289912223815918e-07, -4.880130290985107e-07, -4.470348358154297e-07, -4.0605664253234863e-07, -3.650784492492676e-07, -3.241002559661865e-07, -2.8312206268310547e-07, -2.421438694000244e-07, -2.0116567611694336e-07, -1.601874828338623e-07, -1.1920928955078125e-07, -7.82310962677002e-08, -3.725290298461914e-08, 3.725290298461914e-09, 4.470348358154297e-08, 8.568167686462402e-08, 1.2665987014770508e-07, 1.6763806343078613e-07, 2.086162567138672e-07, 2.4959444999694824e-07, 2.905726432800293e-07, 3.3155083656311035e-07, 3.725290298461914e-07, 4.1350722312927246e-07, 4.544854164123535e-07, 4.954636096954346e-07, 5.364418029785156e-07, 5.774199962615967e-07, 6.183981895446777e-07, 6.593763828277588e-07, 7.003545761108398e-07, 7.413327693939209e-07, 7.82310962677002e-07, 8.23289155960083e-07, 8.642673492431641e-07, 9.052455425262451e-07, 9.462237358093262e-07, 9.872019290924072e-07, 1.0281801223754883e-06, 1.0691583156585693e-06, 1.1101365089416504e-06, 1.1511147022247314e-06, 1.1920928955078125e-06]}, "gradients/encoder.encoder.layers.13.attention.q_proj.weight": {"_type": "histogram", "values": [3.0, 0.0, 4.0, 2.0, 4.0, 2.0, 7.0, 11.0, 26.0, 14.0, 20.0, 79.0, 40.0, 100.0, 85.0, 223.0, 135.0, 413.0, 279.0, 776.0, 548.0, 668.0, 1961.0, 1381.0, 4178.0, 3037.0, 9840.0, 7896.0, 27215.0, 25381.0, 117019.0, 163378.0, 318859.0, 238963.0, 41757.0, 41559.0, 11192.0, 13313.0, 4133.0, 5441.0, 1832.0, 2426.0, 897.0, 721.0, 997.0, 334.0, 465.0, 201.0, 255.0, 92.0, 151.0, 60.0, 87.0, 19.0, 23.0, 30.0, 8.0, 15.0, 7.0, 8.0, 1.0, 3.0, 0.0, 2.0], "bins": [-2.8014183044433594e-06, -2.71480530500412e-06, -2.6281923055648804e-06, -2.541579306125641e-06, -2.4549663066864014e-06, -2.368353307247162e-06, -2.2817403078079224e-06, -2.195127308368683e-06, -2.1085143089294434e-06, -2.021901309490204e-06, -1.9352883100509644e-06, -1.8486753106117249e-06, -1.7620623111724854e-06, -1.6754493117332458e-06, -1.5888363122940063e-06, -1.5022233128547668e-06, -1.4156103134155273e-06, -1.3289973139762878e-06, -1.2423843145370483e-06, -1.1557713150978088e-06, -1.0691583156585693e-06, -9.825453162193298e-07, -8.959323167800903e-07, -8.093193173408508e-07, -7.227063179016113e-07, -6.360933184623718e-07, -5.494803190231323e-07, -4.628673195838928e-07, -3.762543201446533e-07, -2.896413207054138e-07, -2.0302832126617432e-07, -1.1641532182693481e-07, -2.9802322387695312e-08, 5.681067705154419e-08, 1.434236764907837e-07, 2.300366759300232e-07, 3.166496753692627e-07, 4.032626748085022e-07, 4.898756742477417e-07, 5.764886736869812e-07, 6.631016731262207e-07, 7.497146725654602e-07, 8.363276720046997e-07, 9.229406714439392e-07, 1.0095536708831787e-06, 1.0961666703224182e-06, 1.1827796697616577e-06, 1.2693926692008972e-06, 1.3560056686401367e-06, 1.4426186680793762e-06, 1.5292316675186157e-06, 1.6158446669578552e-06, 1.7024576663970947e-06, 1.7890706658363342e-06, 1.8756836652755737e-06, 1.9622966647148132e-06, 2.0489096641540527e-06, 2.1355226635932922e-06, 2.2221356630325317e-06, 2.3087486624717712e-06, 2.3953616619110107e-06, 2.4819746613502502e-06, 2.5685876607894897e-06, 2.6552006602287292e-06, 2.7418136596679688e-06]}, "gradients/encoder.encoder.layers.13.attention.q_proj.bias": {"_type": "histogram", "values": [3.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 2.0, 0.0, 4.0, 3.0, 2.0, 2.0, 6.0, 2.0, 2.0, 7.0, 4.0, 6.0, 4.0, 7.0, 20.0, 15.0, 12.0, 15.0, 24.0, 54.0, 30.0, 38.0, 47.0, 45.0, 68.0, 132.0, 56.0, 62.0, 46.0, 45.0, 71.0, 26.0, 26.0, 17.0, 9.0, 28.0, 10.0, 11.0, 8.0, 2.0, 4.0, 17.0, 2.0, 4.0, 1.0, 1.0, 4.0, 3.0, 1.0, 3.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 3.0], "bins": [-2.3245811462402344e-06, -2.253800630569458e-06, -2.1830201148986816e-06, -2.1122395992279053e-06, -2.041459083557129e-06, -1.9706785678863525e-06, -1.8998980522155762e-06, -1.8291175365447998e-06, -1.7583370208740234e-06, -1.687556505203247e-06, -1.6167759895324707e-06, -1.5459954738616943e-06, -1.475214958190918e-06, -1.4044344425201416e-06, -1.3336539268493652e-06, -1.2628734111785889e-06, -1.1920928955078125e-06, -1.1213123798370361e-06, -1.0505318641662598e-06, -9.797513484954834e-07, -9.08970832824707e-07, -8.381903171539307e-07, -7.674098014831543e-07, -6.966292858123779e-07, -6.258487701416016e-07, -5.550682544708252e-07, -4.842877388000488e-07, -4.1350722312927246e-07, -3.427267074584961e-07, -2.7194619178771973e-07, -2.0116567611694336e-07, -1.30385160446167e-07, -5.960464477539063e-08, 1.1175870895385742e-08, 8.195638656616211e-08, 1.5273690223693848e-07, 2.2351741790771484e-07, 2.942979335784912e-07, 3.650784492492676e-07, 4.3585896492004395e-07, 5.066394805908203e-07, 5.774199962615967e-07, 6.48200511932373e-07, 7.189810276031494e-07, 7.897615432739258e-07, 8.605420589447021e-07, 9.313225746154785e-07, 1.0021030902862549e-06, 1.0728836059570312e-06, 1.1436641216278076e-06, 1.214444637298584e-06, 1.2852251529693604e-06, 1.3560056686401367e-06, 1.426786184310913e-06, 1.4975666999816895e-06, 1.5683472156524658e-06, 1.6391277313232422e-06, 1.7099082469940186e-06, 1.780688762664795e-06, 1.8514692783355713e-06, 1.9222497940063477e-06, 1.993030309677124e-06, 2.0638108253479004e-06, 2.1345913410186768e-06, 2.205371856689453e-06]}, "gradients/encoder.encoder.layers.13.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 5.0, 1.0, 3.0, 3.0, 2.0, 4.0, 7.0, 8.0, 9.0, 21.0, 29.0, 30.0, 48.0, 82.0, 146.0, 189.0, 107.0, 74.0, 36.0, 33.0, 24.0, 24.0, 30.0, 11.0, 15.0, 7.0, 12.0, 10.0, 8.0, 10.0, 5.0, 6.0, 2.0, 5.0, 1.0, 2.0, 1.0, 0.0, 1.0, 1.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.00012616651656571776, -0.00012093228724552318, -0.00011569806520128623, -0.00011046383588109165, -0.0001052296138368547, -9.999538451666012e-05, -9.476115519646555e-05, -8.95269331522286e-05, -8.429270383203402e-05, -7.905847451183945e-05, -7.382425246760249e-05, -6.859002314740792e-05, -6.335579382721335e-05, -5.812157178297639e-05, -5.288734246278182e-05, -4.765311678056605e-05, -4.2418891098350286e-05, -3.718466541613452e-05, -3.1950439733918756e-05, -2.6716210413724184e-05, -2.148198473150842e-05, -1.6247759049292654e-05, -1.1013529729098082e-05, -5.7793040468823165e-06, -5.450783646665514e-07, 4.6891482270439155e-06, 9.923374818754382e-06, 1.5157602319959551e-05, 2.0391828002175316e-05, 2.562605368439108e-05, 3.0860283004585654e-05, 3.609450868680142e-05, 4.1328719817101955e-05, 4.656294549931772e-05, 5.1797171181533486e-05, 5.703140050172806e-05, 6.226562254596502e-05, 6.749985186615959e-05, 7.273408118635416e-05, 7.796830323059112e-05, 8.320253255078569e-05, 8.843676187098026e-05, 9.367098391521722e-05, 9.890521323541179e-05, 0.00010413944255560637, 0.00010937366459984332, 0.0001146078939200379, 0.00011984212324023247, 0.00012507634528446943, 0.00013031056732870638, 0.00013554480392485857, 0.00014077902596909553, 0.00014601324801333249, 0.00015124748460948467, 0.00015648170665372163, 0.0001617159286979586, 0.00016695016529411077, 0.00017218438733834773, 0.00017741862393449992, 0.00018265284597873688, 0.00018788706802297384, 0.0001931212900672108, 0.00019835552666336298, 0.00020358974870759994, 0.0002088239707518369]}, "gradients/encoder.encoder.layers.13.layer_norm.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 4.0, 2.0, 3.0, 8.0, 4.0, 3.0, 7.0, 7.0, 9.0, 15.0, 10.0, 10.0, 11.0, 11.0, 17.0, 18.0, 19.0, 23.0, 27.0, 30.0, 33.0, 28.0, 32.0, 39.0, 49.0, 39.0, 47.0, 47.0, 36.0, 30.0, 41.0, 31.0, 38.0, 34.0, 32.0, 29.0, 28.0, 18.0, 28.0, 17.0, 16.0, 16.0, 13.0, 7.0, 13.0, 3.0, 5.0, 8.0, 3.0, 2.0, 5.0, 3.0, 8.0, 0.0, 1.0, 0.0, 2.0, 1.0], "bins": [-9.620189666748047e-05, -9.331479668617249e-05, -9.04276967048645e-05, -8.754059672355652e-05, -8.465349674224854e-05, -8.176639676094055e-05, -7.887929677963257e-05, -7.599219679832458e-05, -7.31050968170166e-05, -7.021799683570862e-05, -6.733089685440063e-05, -6.444379687309265e-05, -6.155669689178467e-05, -5.8669596910476685e-05, -5.57824969291687e-05, -5.289539694786072e-05, -5.0008296966552734e-05, -4.712119698524475e-05, -4.423409700393677e-05, -4.1346997022628784e-05, -3.84598970413208e-05, -3.557279706001282e-05, -3.2685697078704834e-05, -2.979859709739685e-05, -2.6911497116088867e-05, -2.4024397134780884e-05, -2.11372971534729e-05, -1.8250197172164917e-05, -1.5363097190856934e-05, -1.247599720954895e-05, -9.588897228240967e-06, -6.701797246932983e-06, -3.814697265625e-06, -9.275972843170166e-07, 1.959502696990967e-06, 4.84660267829895e-06, 7.733702659606934e-06, 1.0620802640914917e-05, 1.35079026222229e-05, 1.6395002603530884e-05, 1.9282102584838867e-05, 2.216920256614685e-05, 2.5056302547454834e-05, 2.7943402528762817e-05, 3.08305025100708e-05, 3.3717602491378784e-05, 3.660470247268677e-05, 3.949180245399475e-05, 4.2378902435302734e-05, 4.526600241661072e-05, 4.81531023979187e-05, 5.1040202379226685e-05, 5.392730236053467e-05, 5.681440234184265e-05, 5.9701502323150635e-05, 6.258860230445862e-05, 6.54757022857666e-05, 6.836280226707458e-05, 7.124990224838257e-05, 7.413700222969055e-05, 7.702410221099854e-05, 7.991120219230652e-05, 8.27983021736145e-05, 8.568540215492249e-05, 8.857250213623047e-05]}, "gradients/encoder.encoder.layers.12.feed_forward.output_dense.weight": {"_type": "histogram", "values": [2.0, 2.0, 3.0, 2.0, 2.0, 6.0, 16.0, 24.0, 27.0, 38.0, 54.0, 89.0, 136.0, 203.0, 306.0, 519.0, 723.0, 1211.0, 1926.0, 3053.0, 4964.0, 8100.0, 14325.0, 29887.0, 72523.0, 3698165.0, 258589.0, 50415.0, 22557.0, 10891.0, 6020.0, 3236.0, 2048.0, 1288.0, 777.0, 654.0, 418.0, 262.0, 217.0, 161.0, 120.0, 79.0, 53.0, 54.0, 43.0, 32.0, 16.0, 9.0, 11.0, 8.0, 7.0, 11.0, 7.0, 2.0, 3.0, 2.0, 1.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 3.0], "bins": [-8.374452590942383e-05, -8.050259202718735e-05, -7.726065814495087e-05, -7.401872426271439e-05, -7.07767903804779e-05, -6.753485649824142e-05, -6.429292261600494e-05, -6.105098873376846e-05, -5.780905485153198e-05, -5.45671209692955e-05, -5.132518708705902e-05, -4.808325320482254e-05, -4.484131932258606e-05, -4.159938544034958e-05, -3.83574515581131e-05, -3.511551767587662e-05, -3.187358379364014e-05, -2.8631649911403656e-05, -2.5389716029167175e-05, -2.2147782146930695e-05, -1.8905848264694214e-05, -1.5663914382457733e-05, -1.2421980500221252e-05, -9.180046617984772e-06, -5.938112735748291e-06, -2.6961788535118103e-06, 5.457550287246704e-07, 3.787688910961151e-06, 7.029622793197632e-06, 1.0271556675434113e-05, 1.3513490557670593e-05, 1.6755424439907074e-05, 1.9997358322143555e-05, 2.3239292204380035e-05, 2.6481226086616516e-05, 2.9723159968852997e-05, 3.296509385108948e-05, 3.620702773332596e-05, 3.944896161556244e-05, 4.269089549779892e-05, 4.59328293800354e-05, 4.917476326227188e-05, 5.241669714450836e-05, 5.565863102674484e-05, 5.890056490898132e-05, 6.21424987912178e-05, 6.538443267345428e-05, 6.862636655569077e-05, 7.186830043792725e-05, 7.511023432016373e-05, 7.835216820240021e-05, 8.159410208463669e-05, 8.483603596687317e-05, 8.807796984910965e-05, 9.131990373134613e-05, 9.456183761358261e-05, 9.780377149581909e-05, 0.00010104570537805557, 0.00010428763926029205, 0.00010752957314252853, 0.00011077150702476501, 0.0001140134409070015, 0.00011725537478923798, 0.00012049730867147446, 0.00012373924255371094]}, "gradients/encoder.encoder.layers.12.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 2.0, 1.0, 2.0, 3.0, 2.0, 6.0, 9.0, 10.0, 11.0, 15.0, 16.0, 20.0, 13.0, 35.0, 44.0, 48.0, 68.0, 82.0, 90.0, 94.0, 74.0, 80.0, 71.0, 50.0, 43.0, 29.0, 30.0, 15.0, 11.0, 9.0, 9.0, 2.0, 4.0, 3.0, 5.0, 1.0, 2.0, 3.0, 2.0, 1.0, 1.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.3828277587890625e-05, -1.3285316526889801e-05, -1.2742355465888977e-05, -1.2199394404888153e-05, -1.1656433343887329e-05, -1.1113472282886505e-05, -1.0570511221885681e-05, -1.0027550160884857e-05, -9.484589099884033e-06, -8.94162803888321e-06, -8.398666977882385e-06, -7.855705916881561e-06, -7.312744855880737e-06, -6.769783794879913e-06, -6.226822733879089e-06, -5.683861672878265e-06, -5.140900611877441e-06, -4.5979395508766174e-06, -4.0549784898757935e-06, -3.5120174288749695e-06, -2.9690563678741455e-06, -2.4260953068733215e-06, -1.8831342458724976e-06, -1.3401731848716736e-06, -7.972121238708496e-07, -2.5425106287002563e-07, 2.8870999813079834e-07, 8.316710591316223e-07, 1.3746321201324463e-06, 1.9175931811332703e-06, 2.4605542421340942e-06, 3.0035153031349182e-06, 3.546476364135742e-06, 4.089437425136566e-06, 4.63239848613739e-06, 5.175359547138214e-06, 5.718320608139038e-06, 6.261281669139862e-06, 6.804242730140686e-06, 7.34720379114151e-06, 7.890164852142334e-06, 8.433125913143158e-06, 8.976086974143982e-06, 9.519048035144806e-06, 1.006200909614563e-05, 1.0604970157146454e-05, 1.1147931218147278e-05, 1.1690892279148102e-05, 1.2233853340148926e-05, 1.277681440114975e-05, 1.3319775462150574e-05, 1.3862736523151398e-05, 1.4405697584152222e-05, 1.4948658645153046e-05, 1.549161970615387e-05, 1.6034580767154694e-05, 1.6577541828155518e-05, 1.712050288915634e-05, 1.7663463950157166e-05, 1.820642501115799e-05, 1.8749386072158813e-05, 1.9292347133159637e-05, 1.983530819416046e-05, 2.0378269255161285e-05, 2.092123031616211e-05]}, "gradients/encoder.encoder.layers.12.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 5.0, 0.0, 2.0, 4.0, 6.0, 9.0, 11.0, 30.0, 22.0, 22.0, 47.0, 81.0, 87.0, 166.0, 238.0, 411.0, 607.0, 969.0, 1587.0, 2845.0, 5015.0, 9393.0, 19108.0, 43542.0, 124202.0, 3609708.0, 259033.0, 63221.0, 25931.0, 12340.0, 6682.0, 3509.0, 2070.0, 1269.0, 734.0, 525.0, 299.0, 171.0, 136.0, 82.0, 48.0, 41.0, 28.0, 20.0, 10.0, 8.0, 11.0, 3.0, 4.0, 1.0, 2.0, 1.0, 2.0, 0.0, 1.0, 0.0, 1.0], "bins": [-7.635354995727539e-05, -7.401779294013977e-05, -7.168203592300415e-05, -6.934627890586853e-05, -6.701052188873291e-05, -6.467476487159729e-05, -6.233900785446167e-05, -6.000325083732605e-05, -5.766749382019043e-05, -5.533173680305481e-05, -5.299597978591919e-05, -5.066022276878357e-05, -4.832446575164795e-05, -4.598870873451233e-05, -4.365295171737671e-05, -4.131719470024109e-05, -3.898143768310547e-05, -3.664568066596985e-05, -3.430992364883423e-05, -3.197416663169861e-05, -2.9638409614562988e-05, -2.7302652597427368e-05, -2.4966895580291748e-05, -2.2631138563156128e-05, -2.0295381546020508e-05, -1.7959624528884888e-05, -1.5623867511749268e-05, -1.3288110494613647e-05, -1.0952353477478027e-05, -8.616596460342407e-06, -6.280839443206787e-06, -3.945082426071167e-06, -1.6093254089355469e-06, 7.264316082000732e-07, 3.0621886253356934e-06, 5.3979456424713135e-06, 7.733702659606934e-06, 1.0069459676742554e-05, 1.2405216693878174e-05, 1.4740973711013794e-05, 1.7076730728149414e-05, 1.9412487745285034e-05, 2.1748244762420654e-05, 2.4084001779556274e-05, 2.6419758796691895e-05, 2.8755515813827515e-05, 3.1091272830963135e-05, 3.3427029848098755e-05, 3.5762786865234375e-05, 3.8098543882369995e-05, 4.0434300899505615e-05, 4.2770057916641235e-05, 4.5105814933776855e-05, 4.7441571950912476e-05, 4.9777328968048096e-05, 5.2113085985183716e-05, 5.4448843002319336e-05, 5.6784600019454956e-05, 5.9120357036590576e-05, 6.14561140537262e-05, 6.379187107086182e-05, 6.612762808799744e-05, 6.846338510513306e-05, 7.079914212226868e-05, 7.31348991394043e-05]}, "gradients/encoder.encoder.layers.12.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 1.0, 1.0, 4.0, 1.0, 3.0, 2.0, 6.0, 6.0, 10.0, 21.0, 15.0, 23.0, 26.0, 38.0, 50.0, 76.0, 260.0, 2118.0, 872.0, 167.0, 65.0, 47.0, 45.0, 40.0, 33.0, 29.0, 14.0, 16.0, 21.0, 14.0, 9.0, 8.0, 9.0, 7.0, 5.0, 5.0, 3.0, 7.0, 3.0, 1.0, 1.0, 4.0, 0.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.5391578674316406e-05, -2.4178065359592438e-05, -2.296455204486847e-05, -2.17510387301445e-05, -2.0537525415420532e-05, -1.9324012100696564e-05, -1.8110498785972595e-05, -1.6896985471248627e-05, -1.5683472156524658e-05, -1.446995884180069e-05, -1.3256445527076721e-05, -1.2042932212352753e-05, -1.0829418897628784e-05, -9.615905582904816e-06, -8.402392268180847e-06, -7.188878953456879e-06, -5.97536563873291e-06, -4.761852324008942e-06, -3.548339009284973e-06, -2.3348256945610046e-06, -1.1213123798370361e-06, 9.220093488693237e-08, 1.3057142496109009e-06, 2.5192275643348694e-06, 3.732740879058838e-06, 4.946254193782806e-06, 6.159767508506775e-06, 7.373280823230743e-06, 8.586794137954712e-06, 9.80030745267868e-06, 1.1013820767402649e-05, 1.2227334082126617e-05, 1.3440847396850586e-05, 1.4654360711574554e-05, 1.5867874026298523e-05, 1.708138734102249e-05, 1.829490065574646e-05, 1.950841397047043e-05, 2.0721927285194397e-05, 2.1935440599918365e-05, 2.3148953914642334e-05, 2.4362467229366302e-05, 2.557598054409027e-05, 2.678949385881424e-05, 2.8003007173538208e-05, 2.9216520488262177e-05, 3.0430033802986145e-05, 3.1643547117710114e-05, 3.285706043243408e-05, 3.407057374715805e-05, 3.528408706188202e-05, 3.649760037660599e-05, 3.7711113691329956e-05, 3.8924627006053925e-05, 4.013814032077789e-05, 4.135165363550186e-05, 4.256516695022583e-05, 4.37786802649498e-05, 4.499219357967377e-05, 4.6205706894397736e-05, 4.7419220209121704e-05, 4.863273352384567e-05, 4.984624683856964e-05, 5.105976015329361e-05, 5.227327346801758e-05]}, "gradients/encoder.encoder.layers.12.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 2.0, 2.0, 2.0, 6.0, 12.0, 17.0, 21.0, 36.0, 63.0, 102.0, 190.0, 124.0, 101.0, 81.0, 45.0, 40.0, 32.0, 32.0, 21.0, 18.0, 10.0, 5.0, 10.0, 7.0, 6.0, 4.0, 4.0, 9.0, 2.0, 2.0, 0.0, 1.0, 2.0, 3.0, 0.0, 4.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0002415499766357243, -0.00023281027097254992, -0.00022407056530937552, -0.0002153308450942859, -0.00020659113943111151, -0.00019785143376793712, -0.0001891117135528475, -0.00018037200788967311, -0.00017163230222649872, -0.00016289259656332433, -0.00015415289090014994, -0.00014541317068506032, -0.00013667346502188593, -0.00012793375935871154, -0.00011919404641957954, -0.00011045433348044753, -0.00010171462781727314, -9.297492215409875e-05, -8.423520921496674e-05, -7.549549627583474e-05, -6.675579061266035e-05, -5.801608131150715e-05, -4.927637201035395e-05, -4.053665907122195e-05, -3.179695340804756e-05, -2.305724410689436e-05, -1.4317534805741161e-05, -5.577825504587963e-06, 3.1618837965652347e-06, 1.1901593097718433e-05, 2.064130239887163e-05, 2.9381015338003635e-05, 3.812069189734757e-05, 4.686040119850077e-05, 5.5600110499653965e-05, 6.433982343878597e-05, 7.307952910196036e-05, 8.181923476513475e-05, 9.055894770426676e-05, 9.929866064339876e-05, 0.00010803836630657315, 0.00011677807196974754, 0.00012551777763292193, 0.00013425749784801155, 0.00014299720351118594, 0.00015173690917436033, 0.00016047662938944995, 0.00016921633505262434, 0.00017795604071579874, 0.00018669574637897313, 0.00019543545204214752, 0.00020417517225723714, 0.00021291487792041153, 0.00022165458358358592, 0.00023039430379867554, 0.00023913400946184993, 0.0002478737151250243, 0.0002566134207881987, 0.0002653531264513731, 0.0002740928321145475, 0.0002828325377777219, 0.00029157227254472673, 0.0003003119782079011, 0.0003090516838710755, 0.0003177913895342499]}, "gradients/encoder.encoder.layers.12.final_layer_norm.bias": {"_type": "histogram", "values": [2.0, 2.0, 2.0, 0.0, 1.0, 1.0, 6.0, 4.0, 9.0, 11.0, 8.0, 1.0, 12.0, 9.0, 16.0, 17.0, 17.0, 17.0, 25.0, 29.0, 32.0, 31.0, 34.0, 29.0, 30.0, 35.0, 43.0, 37.0, 42.0, 40.0, 38.0, 34.0, 42.0, 37.0, 37.0, 33.0, 21.0, 27.0, 23.0, 30.0, 29.0, 20.0, 11.0, 20.0, 8.0, 11.0, 9.0, 11.0, 7.0, 7.0, 3.0, 7.0, 4.0, 6.0, 2.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.00012010335922241211, -0.00011602137237787247, -0.00011193938553333282, -0.00010785739868879318, -0.00010377541184425354, -9.96934249997139e-05, -9.561143815517426e-05, -9.152945131063461e-05, -8.744746446609497e-05, -8.336547762155533e-05, -7.928349077701569e-05, -7.520150393247604e-05, -7.11195170879364e-05, -6.703753024339676e-05, -6.295554339885712e-05, -5.8873556554317474e-05, -5.479156970977783e-05, -5.070958286523819e-05, -4.662759602069855e-05, -4.2545609176158905e-05, -3.846362233161926e-05, -3.438163548707962e-05, -3.0299648642539978e-05, -2.6217661798000336e-05, -2.2135674953460693e-05, -1.805368810892105e-05, -1.3971701264381409e-05, -9.889714419841766e-06, -5.807727575302124e-06, -1.7257407307624817e-06, 2.3562461137771606e-06, 6.438232958316803e-06, 1.0520219802856445e-05, 1.4602206647396088e-05, 1.868419349193573e-05, 2.2766180336475372e-05, 2.6848167181015015e-05, 3.093015402555466e-05, 3.50121408700943e-05, 3.909412771463394e-05, 4.3176114559173584e-05, 4.7258101403713226e-05, 5.134008824825287e-05, 5.542207509279251e-05, 5.950406193733215e-05, 6.35860487818718e-05, 6.766803562641144e-05, 7.175002247095108e-05, 7.583200931549072e-05, 7.991399616003036e-05, 8.399598300457001e-05, 8.807796984910965e-05, 9.215995669364929e-05, 9.624194353818893e-05, 0.00010032393038272858, 0.00010440591722726822, 0.00010848790407180786, 0.0001125698909163475, 0.00011665187776088715, 0.00012073386460542679, 0.00012481585144996643, 0.00012889783829450607, 0.00013297982513904572, 0.00013706181198358536, 0.000141143798828125]}, "gradients/encoder.encoder.layers.12.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 2.0, 3.0, 3.0, 3.0, 5.0, 4.0, 4.0, 4.0, 8.0, 9.0, 10.0, 10.0, 17.0, 24.0, 20.0, 36.0, 46.0, 81.0, 147.0, 270.0, 510.0, 1092.0, 2400.0, 5455.0, 14590.0, 46087.0, 274879.0, 601559.0, 68835.0, 19574.0, 7089.0, 3013.0, 1337.0, 623.0, 308.0, 184.0, 114.0, 54.0, 36.0, 17.0, 23.0, 18.0, 10.0, 17.0, 5.0, 6.0, 4.0, 8.0, 4.0, 0.0, 4.0, 2.0, 3.0, 3.0, 0.0, 2.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.0001360177993774414, -0.0001315046101808548, -0.0001269914209842682, -0.00012247823178768158, -0.00011796504259109497, -0.00011345185339450836, -0.00010893866419792175, -0.00010442547500133514, -9.991228580474854e-05, -9.539909660816193e-05, -9.088590741157532e-05, -8.637271821498871e-05, -8.18595290184021e-05, -7.734633982181549e-05, -7.283315062522888e-05, -6.831996142864227e-05, -6.380677223205566e-05, -5.9293583035469055e-05, -5.4780393838882446e-05, -5.026720464229584e-05, -4.575401544570923e-05, -4.124082624912262e-05, -3.672763705253601e-05, -3.22144478559494e-05, -2.7701258659362793e-05, -2.3188069462776184e-05, -1.8674880266189575e-05, -1.4161691069602966e-05, -9.648501873016357e-06, -5.1353126764297485e-06, -6.221234798431396e-07, 3.891065716743469e-06, 8.404254913330078e-06, 1.2917444109916687e-05, 1.7430633306503296e-05, 2.1943822503089905e-05, 2.6457011699676514e-05, 3.097020089626312e-05, 3.548339009284973e-05, 3.999657928943634e-05, 4.450976848602295e-05, 4.902295768260956e-05, 5.353614687919617e-05, 5.8049336075782776e-05, 6.256252527236938e-05, 6.7075714468956e-05, 7.15889036655426e-05, 7.610209286212921e-05, 8.061528205871582e-05, 8.512847125530243e-05, 8.964166045188904e-05, 9.415484964847565e-05, 9.866803884506226e-05, 0.00010318122804164886, 0.00010769441723823547, 0.00011220760643482208, 0.00011672079563140869, 0.0001212339848279953, 0.0001257471740245819, 0.00013026036322116852, 0.00013477355241775513, 0.00013928674161434174, 0.00014379993081092834, 0.00014831312000751495, 0.00015282630920410156]}, "gradients/encoder.encoder.layers.12.attention.out_proj.bias": {"_type": "histogram", "values": [2.0, 2.0, 2.0, 2.0, 0.0, 4.0, 1.0, 2.0, 9.0, 6.0, 19.0, 17.0, 23.0, 24.0, 49.0, 67.0, 106.0, 108.0, 144.0, 102.0, 109.0, 68.0, 40.0, 39.0, 21.0, 16.0, 12.0, 5.0, 7.0, 3.0, 5.0, 1.0, 4.0, 1.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.3887882232666016e-05, -1.3134442269802094e-05, -1.2381002306938171e-05, -1.162756234407425e-05, -1.0874122381210327e-05, -1.0120682418346405e-05, -9.367242455482483e-06, -8.61380249261856e-06, -7.860362529754639e-06, -7.1069225668907166e-06, -6.3534826040267944e-06, -5.600042641162872e-06, -4.84660267829895e-06, -4.093162715435028e-06, -3.339722752571106e-06, -2.586282789707184e-06, -1.8328428268432617e-06, -1.0794028639793396e-06, -3.259629011154175e-07, 4.2747706174850464e-07, 1.1809170246124268e-06, 1.934356987476349e-06, 2.687796950340271e-06, 3.441236913204193e-06, 4.194676876068115e-06, 4.948116838932037e-06, 5.7015568017959595e-06, 6.454996764659882e-06, 7.208436727523804e-06, 7.961876690387726e-06, 8.715316653251648e-06, 9.46875661611557e-06, 1.0222196578979492e-05, 1.0975636541843414e-05, 1.1729076504707336e-05, 1.2482516467571259e-05, 1.323595643043518e-05, 1.3989396393299103e-05, 1.4742836356163025e-05, 1.5496276319026947e-05, 1.624971628189087e-05, 1.700315624475479e-05, 1.7756596207618713e-05, 1.8510036170482635e-05, 1.9263476133346558e-05, 2.001691609621048e-05, 2.0770356059074402e-05, 2.1523796021938324e-05, 2.2277235984802246e-05, 2.3030675947666168e-05, 2.378411591053009e-05, 2.4537555873394012e-05, 2.5290995836257935e-05, 2.6044435799121857e-05, 2.679787576198578e-05, 2.75513157248497e-05, 2.8304755687713623e-05, 2.9058195650577545e-05, 2.9811635613441467e-05, 3.056507557630539e-05, 3.131851553916931e-05, 3.2071955502033234e-05, 3.2825395464897156e-05, 3.357883542776108e-05, 3.4332275390625e-05]}, "gradients/encoder.encoder.layers.12.attention.v_proj.weight": {"_type": "histogram", "values": [3.0, 1.0, 4.0, 5.0, 7.0, 4.0, 17.0, 16.0, 24.0, 45.0, 30.0, 66.0, 110.0, 153.0, 211.0, 292.0, 418.0, 569.0, 871.0, 1235.0, 1756.0, 2427.0, 3737.0, 5476.0, 7541.0, 11767.0, 17538.0, 26251.0, 37691.0, 65783.0, 119860.0, 291836.0, 203143.0, 94555.0, 53923.0, 31865.0, 22540.0, 15040.0, 10113.0, 6433.0, 4742.0, 3320.0, 2051.0, 1594.0, 1064.0, 738.0, 494.0, 350.0, 270.0, 175.0, 119.0, 90.0, 60.0, 50.0, 35.0, 15.0, 19.0, 9.0, 8.0, 6.0, 5.0, 3.0, 2.0, 1.0], "bins": [-2.9802322387695312e-05, -2.886541187763214e-05, -2.792850136756897e-05, -2.69915908575058e-05, -2.6054680347442627e-05, -2.5117769837379456e-05, -2.4180859327316284e-05, -2.3243948817253113e-05, -2.230703830718994e-05, -2.137012779712677e-05, -2.04332172870636e-05, -1.9496306777000427e-05, -1.8559396266937256e-05, -1.7622485756874084e-05, -1.6685575246810913e-05, -1.5748664736747742e-05, -1.481175422668457e-05, -1.3874843716621399e-05, -1.2937933206558228e-05, -1.2001022696495056e-05, -1.1064112186431885e-05, -1.0127201676368713e-05, -9.190291166305542e-06, -8.25338065624237e-06, -7.316470146179199e-06, -6.379559636116028e-06, -5.4426491260528564e-06, -4.505738615989685e-06, -3.5688281059265137e-06, -2.6319175958633423e-06, -1.695007085800171e-06, -7.580965757369995e-07, 1.7881393432617188e-07, 1.1157244443893433e-06, 2.0526349544525146e-06, 2.989545464515686e-06, 3.926455974578857e-06, 4.863366484642029e-06, 5.8002769947052e-06, 6.737187504768372e-06, 7.674098014831543e-06, 8.611008524894714e-06, 9.547919034957886e-06, 1.0484829545021057e-05, 1.1421740055084229e-05, 1.23586505651474e-05, 1.3295561075210571e-05, 1.4232471585273743e-05, 1.5169382095336914e-05, 1.6106292605400085e-05, 1.7043203115463257e-05, 1.7980113625526428e-05, 1.89170241355896e-05, 1.985393464565277e-05, 2.0790845155715942e-05, 2.1727755665779114e-05, 2.2664666175842285e-05, 2.3601576685905457e-05, 2.4538487195968628e-05, 2.54753977060318e-05, 2.641230821609497e-05, 2.7349218726158142e-05, 2.8286129236221313e-05, 2.9223039746284485e-05, 3.0159950256347656e-05]}, "gradients/encoder.encoder.layers.12.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 2.0, 1.0, 0.0, 1.0, 3.0, 1.0, 3.0, 2.0, 5.0, 9.0, 10.0, 8.0, 14.0, 14.0, 12.0, 18.0, 22.0, 12.0, 16.0, 31.0, 24.0, 33.0, 32.0, 35.0, 50.0, 40.0, 36.0, 35.0, 39.0, 45.0, 38.0, 33.0, 48.0, 38.0, 37.0, 28.0, 35.0, 19.0, 23.0, 22.0, 18.0, 20.0, 19.0, 10.0, 11.0, 13.0, 12.0, 10.0, 11.0, 5.0, 2.0, 3.0, 2.0, 2.0, 5.0, 0.0, 1.0, 1.0, 1.0, 0.0, 2.0], "bins": [-2.855062484741211e-05, -2.766679972410202e-05, -2.678297460079193e-05, -2.5899149477481842e-05, -2.5015324354171753e-05, -2.4131499230861664e-05, -2.3247674107551575e-05, -2.2363848984241486e-05, -2.1480023860931396e-05, -2.0596198737621307e-05, -1.9712373614311218e-05, -1.882854849100113e-05, -1.794472336769104e-05, -1.706089824438095e-05, -1.6177073121070862e-05, -1.5293247997760773e-05, -1.4409422874450684e-05, -1.3525597751140594e-05, -1.2641772627830505e-05, -1.1757947504520416e-05, -1.0874122381210327e-05, -9.990297257900238e-06, -9.106472134590149e-06, -8.22264701128006e-06, -7.338821887969971e-06, -6.454996764659882e-06, -5.5711716413497925e-06, -4.687346518039703e-06, -3.8035213947296143e-06, -2.919696271419525e-06, -2.035871148109436e-06, -1.152046024799347e-06, -2.682209014892578e-07, 6.156042218208313e-07, 1.4994293451309204e-06, 2.3832544684410095e-06, 3.2670795917510986e-06, 4.150904715061188e-06, 5.034729838371277e-06, 5.918554961681366e-06, 6.802380084991455e-06, 7.686205208301544e-06, 8.570030331611633e-06, 9.453855454921722e-06, 1.0337680578231812e-05, 1.12215057015419e-05, 1.210533082485199e-05, 1.2989155948162079e-05, 1.3872981071472168e-05, 1.4756806194782257e-05, 1.5640631318092346e-05, 1.6524456441402435e-05, 1.7408281564712524e-05, 1.8292106688022614e-05, 1.9175931811332703e-05, 2.0059756934642792e-05, 2.094358205795288e-05, 2.182740718126297e-05, 2.271123230457306e-05, 2.3595057427883148e-05, 2.4478882551193237e-05, 2.5362707674503326e-05, 2.6246532797813416e-05, 2.7130357921123505e-05, 2.8014183044433594e-05]}, "gradients/encoder.encoder.layers.12.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 3.0, 5.0, 6.0, 8.0, 8.0, 9.0, 10.0, 17.0, 28.0, 24.0, 46.0, 70.0, 75.0, 244.0, 254.0, 427.0, 637.0, 1114.0, 2074.0, 3677.0, 6974.0, 13834.0, 29260.0, 64215.0, 295868.0, 357860.0, 149274.0, 63889.0, 28891.0, 13942.0, 7028.0, 3682.0, 2064.0, 1183.0, 908.0, 321.0, 191.0, 152.0, 90.0, 59.0, 46.0, 21.0, 16.0, 15.0, 9.0, 14.0, 8.0, 4.0, 2.0, 4.0, 2.0, 1.0, 1.0, 3.0, 2.0, 3.0], "bins": [-4.172325134277344e-06, -4.04752790927887e-06, -3.9227306842803955e-06, -3.7979334592819214e-06, -3.6731362342834473e-06, -3.548339009284973e-06, -3.423541784286499e-06, -3.298744559288025e-06, -3.1739473342895508e-06, -3.0491501092910767e-06, -2.9243528842926025e-06, -2.7995556592941284e-06, -2.6747584342956543e-06, -2.54996120929718e-06, -2.425163984298706e-06, -2.300366759300232e-06, -2.175569534301758e-06, -2.0507723093032837e-06, -1.9259750843048096e-06, -1.8011778593063354e-06, -1.6763806343078613e-06, -1.5515834093093872e-06, -1.426786184310913e-06, -1.301988959312439e-06, -1.1771917343139648e-06, -1.0523945093154907e-06, -9.275972843170166e-07, -8.028000593185425e-07, -6.780028343200684e-07, -5.532056093215942e-07, -4.284083843231201e-07, -3.03611159324646e-07, -1.7881393432617188e-07, -5.4016709327697754e-08, 7.078051567077637e-08, 1.955777406692505e-07, 3.203749656677246e-07, 4.4517219066619873e-07, 5.699694156646729e-07, 6.94766640663147e-07, 8.195638656616211e-07, 9.443610906600952e-07, 1.0691583156585693e-06, 1.1939555406570435e-06, 1.3187527656555176e-06, 1.4435499906539917e-06, 1.5683472156524658e-06, 1.69314444065094e-06, 1.817941665649414e-06, 1.942738890647888e-06, 2.0675361156463623e-06, 2.1923333406448364e-06, 2.3171305656433105e-06, 2.4419277906417847e-06, 2.566725015640259e-06, 2.691522240638733e-06, 2.816319465637207e-06, 2.941116690635681e-06, 3.0659139156341553e-06, 3.1907111406326294e-06, 3.3155083656311035e-06, 3.4403055906295776e-06, 3.5651028156280518e-06, 3.689900040626526e-06, 3.814697265625e-06]}, "gradients/encoder.encoder.layers.12.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 4.0, 1.0, 1.0, 4.0, 0.0, 5.0, 2.0, 6.0, 19.0, 12.0, 15.0, 12.0, 21.0, 27.0, 24.0, 32.0, 38.0, 37.0, 38.0, 41.0, 41.0, 83.0, 54.0, 46.0, 53.0, 47.0, 36.0, 38.0, 48.0, 31.0, 41.0, 35.0, 22.0, 26.0, 27.0, 10.0, 7.0, 7.0, 7.0, 4.0, 5.0, 2.0, 3.0, 1.0, 0.0, 2.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.7285346984863281e-06, -1.6642734408378601e-06, -1.600012183189392e-06, -1.535750925540924e-06, -1.471489667892456e-06, -1.407228410243988e-06, -1.34296715259552e-06, -1.278705894947052e-06, -1.214444637298584e-06, -1.150183379650116e-06, -1.085922122001648e-06, -1.02166086435318e-06, -9.57399606704712e-07, -8.931383490562439e-07, -8.288770914077759e-07, -7.646158337593079e-07, -7.003545761108398e-07, -6.360933184623718e-07, -5.718320608139038e-07, -5.075708031654358e-07, -4.4330954551696777e-07, -3.7904828786849976e-07, -3.1478703022003174e-07, -2.505257725715637e-07, -1.862645149230957e-07, -1.2200325727462769e-07, -5.774199962615967e-08, 6.51925802230835e-09, 7.078051567077637e-08, 1.3504177331924438e-07, 1.993030309677124e-07, 2.635642886161804e-07, 3.2782554626464844e-07, 3.9208680391311646e-07, 4.5634806156158447e-07, 5.206093192100525e-07, 5.848705768585205e-07, 6.491318345069885e-07, 7.133930921554565e-07, 7.776543498039246e-07, 8.419156074523926e-07, 9.061768651008606e-07, 9.704381227493286e-07, 1.0346993803977966e-06, 1.0989606380462646e-06, 1.1632218956947327e-06, 1.2274831533432007e-06, 1.2917444109916687e-06, 1.3560056686401367e-06, 1.4202669262886047e-06, 1.4845281839370728e-06, 1.5487894415855408e-06, 1.6130506992340088e-06, 1.6773119568824768e-06, 1.7415732145309448e-06, 1.8058344721794128e-06, 1.8700957298278809e-06, 1.934356987476349e-06, 1.998618245124817e-06, 2.062879502773285e-06, 2.127140760421753e-06, 2.191402018070221e-06, 2.255663275718689e-06, 2.319924533367157e-06, 2.384185791015625e-06]}, "gradients/encoder.encoder.layers.12.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 1.0, 2.0, 0.0, 2.0, 2.0, 3.0, 4.0, 3.0, 0.0, 5.0, 7.0, 12.0, 10.0, 15.0, 15.0, 23.0, 37.0, 67.0, 143.0, 171.0, 282.0, 516.0, 985.0, 1987.0, 4192.0, 18334.0, 37673.0, 114963.0, 519485.0, 244665.0, 63743.0, 23293.0, 11921.0, 2844.0, 1398.0, 746.0, 394.0, 218.0, 126.0, 112.0, 43.0, 26.0, 28.0, 11.0, 14.0, 7.0, 19.0, 6.0, 3.0, 4.0, 6.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-4.0531158447265625e-06, -3.925524652004242e-06, -3.7979334592819214e-06, -3.670342266559601e-06, -3.5427510738372803e-06, -3.4151598811149597e-06, -3.287568688392639e-06, -3.1599774956703186e-06, -3.032386302947998e-06, -2.9047951102256775e-06, -2.777203917503357e-06, -2.6496127247810364e-06, -2.522021532058716e-06, -2.3944303393363953e-06, -2.2668391466140747e-06, -2.139247953891754e-06, -2.0116567611694336e-06, -1.884065568447113e-06, -1.7564743757247925e-06, -1.628883183002472e-06, -1.5012919902801514e-06, -1.3737007975578308e-06, -1.2461096048355103e-06, -1.1185184121131897e-06, -9.909272193908691e-07, -8.633360266685486e-07, -7.35744833946228e-07, -6.081536412239075e-07, -4.805624485015869e-07, -3.5297125577926636e-07, -2.253800630569458e-07, -9.778887033462524e-08, 2.9802322387695312e-08, 1.5739351511001587e-07, 2.849847078323364e-07, 4.12575900554657e-07, 5.401670932769775e-07, 6.677582859992981e-07, 7.953494787216187e-07, 9.229406714439392e-07, 1.0505318641662598e-06, 1.1781230568885803e-06, 1.3057142496109009e-06, 1.4333054423332214e-06, 1.560896635055542e-06, 1.6884878277778625e-06, 1.816079020500183e-06, 1.9436702132225037e-06, 2.0712614059448242e-06, 2.1988525986671448e-06, 2.3264437913894653e-06, 2.454034984111786e-06, 2.5816261768341064e-06, 2.709217369556427e-06, 2.8368085622787476e-06, 2.964399755001068e-06, 3.0919909477233887e-06, 3.2195821404457092e-06, 3.3471733331680298e-06, 3.4747645258903503e-06, 3.602355718612671e-06, 3.7299469113349915e-06, 3.857538104057312e-06, 3.9851292967796326e-06, 4.112720489501953e-06]}, "gradients/encoder.encoder.layers.12.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 5.0, 1.0, 3.0, 2.0, 4.0, 3.0, 9.0, 10.0, 14.0, 31.0, 17.0, 59.0, 26.0, 82.0, 26.0, 99.0, 65.0, 150.0, 61.0, 99.0, 47.0, 69.0, 19.0, 45.0, 15.0, 17.0, 7.0, 14.0, 4.0, 8.0, 3.0, 2.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0], "bins": [-3.814697265625e-06, -3.7243589758872986e-06, -3.634020686149597e-06, -3.5436823964118958e-06, -3.4533441066741943e-06, -3.363005816936493e-06, -3.2726675271987915e-06, -3.18232923746109e-06, -3.0919909477233887e-06, -3.0016526579856873e-06, -2.911314368247986e-06, -2.8209760785102844e-06, -2.730637788772583e-06, -2.6402994990348816e-06, -2.54996120929718e-06, -2.4596229195594788e-06, -2.3692846298217773e-06, -2.278946340084076e-06, -2.1886080503463745e-06, -2.098269760608673e-06, -2.0079314708709717e-06, -1.9175931811332703e-06, -1.8272548913955688e-06, -1.7369166016578674e-06, -1.646578311920166e-06, -1.5562400221824646e-06, -1.4659017324447632e-06, -1.3755634427070618e-06, -1.2852251529693604e-06, -1.194886863231659e-06, -1.1045485734939575e-06, -1.014210283756256e-06, -9.238719940185547e-07, -8.335337042808533e-07, -7.431954145431519e-07, -6.528571248054504e-07, -5.62518835067749e-07, -4.721805453300476e-07, -3.818422555923462e-07, -2.915039658546448e-07, -2.0116567611694336e-07, -1.1082738637924194e-07, -2.0489096641540527e-08, 6.984919309616089e-08, 1.601874828338623e-07, 2.505257725715637e-07, 3.4086406230926514e-07, 4.3120235204696655e-07, 5.21540641784668e-07, 6.118789315223694e-07, 7.022172212600708e-07, 7.925555109977722e-07, 8.828938007354736e-07, 9.73232090473175e-07, 1.0635703802108765e-06, 1.1539086699485779e-06, 1.2442469596862793e-06, 1.3345852494239807e-06, 1.4249235391616821e-06, 1.5152618288993835e-06, 1.605600118637085e-06, 1.6959384083747864e-06, 1.7862766981124878e-06, 1.8766149878501892e-06, 1.9669532775878906e-06]}, "gradients/encoder.encoder.layers.12.layer_norm.weight": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 3.0, 2.0, 2.0, 6.0, 8.0, 6.0, 13.0, 17.0, 42.0, 79.0, 164.0, 224.0, 115.0, 73.0, 55.0, 33.0, 35.0, 29.0, 24.0, 9.0, 16.0, 14.0, 9.0, 4.0, 5.0, 3.0, 4.0, 2.0, 6.0, 2.0, 0.0, 6.0, 2.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-9.116457658819854e-05, -8.470849570585415e-05, -7.825241482350975e-05, -7.179633394116536e-05, -6.534025305882096e-05, -5.888417217647657e-05, -5.242809129413217e-05, -4.597201041178778e-05, -3.951592952944338e-05, -3.305984864709899e-05, -2.6603767764754593e-05, -2.01476868824102e-05, -1.3691606000065804e-05, -7.235525117721409e-06, -7.794442353770137e-07, 5.676636646967381e-06, 1.2132717529311776e-05, 1.858879841165617e-05, 2.5044879294000566e-05, 3.150096017634496e-05, 3.7957041058689356e-05, 4.441312194103375e-05, 5.0869202823378146e-05, 5.732528370572254e-05, 6.378136458806694e-05, 7.023744547041133e-05, 7.669352635275573e-05, 8.314960723510012e-05, 8.960568811744452e-05, 9.606176899978891e-05, 0.0001025178498821333, 0.0001089739307644777, 0.0001154300116468221, 0.00012188609252916649, 0.00012834217341151088, 0.0001347982615698129, 0.00014125433517619967, 0.00014771040878258646, 0.00015416649694088846, 0.00016062258509919047, 0.00016707865870557725, 0.00017353473231196404, 0.00017999082047026604, 0.00018644690862856805, 0.00019290298223495483, 0.00019935905584134161, 0.00020581514399964362, 0.00021227123215794563, 0.00021872730576433241, 0.0002251833793707192, 0.0002316394675290212, 0.0002380955556873232, 0.00024455162929371, 0.0002510077029000968, 0.00025746377650648355, 0.0002639198792167008, 0.0002703759528230876, 0.00027683202642947435, 0.0002832881291396916, 0.00028974420274607837, 0.00029620027635246515, 0.00030265634995885193, 0.0003091124235652387, 0.00031556852627545595, 0.00032202459988184273]}, "gradients/encoder.encoder.layers.12.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 3.0, 2.0, 7.0, 3.0, 5.0, 4.0, 10.0, 10.0, 10.0, 11.0, 17.0, 9.0, 24.0, 36.0, 32.0, 34.0, 41.0, 34.0, 39.0, 40.0, 50.0, 49.0, 45.0, 55.0, 50.0, 48.0, 32.0, 41.0, 34.0, 26.0, 40.0, 25.0, 27.0, 22.0, 14.0, 9.0, 16.0, 6.0, 15.0, 9.0, 7.0, 6.0, 4.0, 4.0, 3.0, 8.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-9.846687316894531e-05, -9.508151561021805e-05, -9.169615805149078e-05, -8.831080049276352e-05, -8.492544293403625e-05, -8.154008537530899e-05, -7.815472781658173e-05, -7.476937025785446e-05, -7.13840126991272e-05, -6.799865514039993e-05, -6.461329758167267e-05, -6.12279400229454e-05, -5.784258246421814e-05, -5.4457224905490875e-05, -5.107186734676361e-05, -4.7686509788036346e-05, -4.430115222930908e-05, -4.091579467058182e-05, -3.753043711185455e-05, -3.414507955312729e-05, -3.0759721994400024e-05, -2.737436443567276e-05, -2.3989006876945496e-05, -2.060364931821823e-05, -1.7218291759490967e-05, -1.3832934200763702e-05, -1.0447576642036438e-05, -7.062219083309174e-06, -3.676861524581909e-06, -2.915039658546448e-07, 3.0938535928726196e-06, 6.479211151599884e-06, 9.864568710327148e-06, 1.3249926269054413e-05, 1.6635283827781677e-05, 2.002064138650894e-05, 2.3405998945236206e-05, 2.679135650396347e-05, 3.0176714062690735e-05, 3.3562071621418e-05, 3.6947429180145264e-05, 4.033278673887253e-05, 4.371814429759979e-05, 4.710350185632706e-05, 5.048885941505432e-05, 5.3874216973781586e-05, 5.725957453250885e-05, 6.0644932091236115e-05, 6.403028964996338e-05, 6.741564720869064e-05, 7.080100476741791e-05, 7.418636232614517e-05, 7.757171988487244e-05, 8.09570774435997e-05, 8.434243500232697e-05, 8.772779256105423e-05, 9.11131501197815e-05, 9.449850767850876e-05, 9.788386523723602e-05, 0.00010126922279596329, 0.00010465458035469055, 0.00010803993791341782, 0.00011142529547214508, 0.00011481065303087234, 0.00011819601058959961]}, "gradients/encoder.encoder.layers.11.feed_forward.output_dense.weight": {"_type": "histogram", "values": [2.0, 2.0, 4.0, 3.0, 6.0, 9.0, 13.0, 25.0, 63.0, 77.0, 122.0, 186.0, 319.0, 514.0, 825.0, 1540.0, 2796.0, 5558.0, 14029.0, 44694.0, 3834363.0, 237493.0, 29775.0, 10606.0, 4913.0, 2429.0, 1409.0, 875.0, 552.0, 328.0, 200.0, 126.0, 122.0, 81.0, 41.0, 24.0, 38.0, 22.0, 18.0, 18.0, 11.0, 9.0, 8.0, 7.0, 7.0, 3.0, 11.0, 8.0, 7.0, 1.0, 5.0, 0.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-7.784366607666016e-05, -7.410906255245209e-05, -7.037445902824402e-05, -6.663985550403595e-05, -6.290525197982788e-05, -5.917064845561981e-05, -5.543604493141174e-05, -5.1701441407203674e-05, -4.7966837882995605e-05, -4.423223435878754e-05, -4.049763083457947e-05, -3.67630273103714e-05, -3.302842378616333e-05, -2.929382026195526e-05, -2.5559216737747192e-05, -2.1824613213539124e-05, -1.8090009689331055e-05, -1.4355406165122986e-05, -1.0620802640914917e-05, -6.886199116706848e-06, -3.1515955924987793e-06, 5.830079317092896e-07, 4.317611455917358e-06, 8.052214980125427e-06, 1.1786818504333496e-05, 1.5521422028541565e-05, 1.9256025552749634e-05, 2.2990629076957703e-05, 2.672523260116577e-05, 3.045983612537384e-05, 3.419443964958191e-05, 3.792904317378998e-05, 4.166364669799805e-05, 4.5398250222206116e-05, 4.9132853746414185e-05, 5.2867457270622253e-05, 5.660206079483032e-05, 6.033666431903839e-05, 6.407126784324646e-05, 6.780587136745453e-05, 7.15404748916626e-05, 7.527507841587067e-05, 7.900968194007874e-05, 8.27442854642868e-05, 8.647888898849487e-05, 9.021349251270294e-05, 9.394809603691101e-05, 9.768269956111908e-05, 0.00010141730308532715, 0.00010515190660953522, 0.00010888651013374329, 0.00011262111365795135, 0.00011635571718215942, 0.00012009032070636749, 0.00012382492423057556, 0.00012755952775478363, 0.0001312941312789917, 0.00013502873480319977, 0.00013876333832740784, 0.0001424979418516159, 0.00014623254537582397, 0.00014996714890003204, 0.0001537017524242401, 0.00015743635594844818, 0.00016117095947265625]}, "gradients/encoder.encoder.layers.11.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 2.0, 0.0, 1.0, 3.0, 0.0, 1.0, 3.0, 5.0, 5.0, 2.0, 15.0, 10.0, 15.0, 16.0, 29.0, 64.0, 75.0, 100.0, 133.0, 90.0, 113.0, 103.0, 74.0, 35.0, 41.0, 28.0, 11.0, 7.0, 8.0, 8.0, 5.0, 8.0, 4.0, 0.0, 2.0, 0.0, 1.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.4722347259521484e-05, -1.400243490934372e-05, -1.3282522559165955e-05, -1.256261020898819e-05, -1.1842697858810425e-05, -1.112278550863266e-05, -1.0402873158454895e-05, -9.68296080827713e-06, -8.963048458099365e-06, -8.2431361079216e-06, -7.5232237577438354e-06, -6.8033114075660706e-06, -6.083399057388306e-06, -5.363486707210541e-06, -4.643574357032776e-06, -3.923662006855011e-06, -3.203749656677246e-06, -2.483837306499481e-06, -1.7639249563217163e-06, -1.0440126061439514e-06, -3.241002559661865e-07, 3.9581209421157837e-07, 1.1157244443893433e-06, 1.8356367945671082e-06, 2.555549144744873e-06, 3.275461494922638e-06, 3.995373845100403e-06, 4.715286195278168e-06, 5.435198545455933e-06, 6.1551108956336975e-06, 6.875023245811462e-06, 7.594935595989227e-06, 8.314847946166992e-06, 9.034760296344757e-06, 9.754672646522522e-06, 1.0474584996700287e-05, 1.1194497346878052e-05, 1.1914409697055817e-05, 1.2634322047233582e-05, 1.3354234397411346e-05, 1.4074146747589111e-05, 1.4794059097766876e-05, 1.551397144794464e-05, 1.6233883798122406e-05, 1.695379614830017e-05, 1.7673708498477936e-05, 1.83936208486557e-05, 1.9113533198833466e-05, 1.983344554901123e-05, 2.0553357899188995e-05, 2.127327024936676e-05, 2.1993182599544525e-05, 2.271309494972229e-05, 2.3433007299900055e-05, 2.415291965007782e-05, 2.4872832000255585e-05, 2.559274435043335e-05, 2.6312656700611115e-05, 2.703256905078888e-05, 2.7752481400966644e-05, 2.847239375114441e-05, 2.9192306101322174e-05, 2.991221845149994e-05, 3.0632130801677704e-05, 3.135204315185547e-05]}, "gradients/encoder.encoder.layers.11.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 1.0, 0.0, 3.0, 1.0, 2.0, 5.0, 6.0, 10.0, 8.0, 14.0, 18.0, 31.0, 50.0, 66.0, 79.0, 162.0, 212.0, 344.0, 525.0, 848.0, 1451.0, 2507.0, 3948.0, 7377.0, 14006.0, 28249.0, 77208.0, 2706817.0, 1211510.0, 78179.0, 28934.0, 14019.0, 7116.0, 4353.0, 2409.0, 1402.0, 864.0, 552.0, 367.0, 203.0, 154.0, 95.0, 58.0, 37.0, 33.0, 17.0, 20.0, 6.0, 8.0, 2.0, 3.0, 3.0, 2.0, 1.0, 3.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.1140785217285156e-05, -4.949048161506653e-05, -4.78401780128479e-05, -4.618987441062927e-05, -4.4539570808410645e-05, -4.2889267206192017e-05, -4.123896360397339e-05, -3.958866000175476e-05, -3.793835639953613e-05, -3.6288052797317505e-05, -3.463774919509888e-05, -3.298744559288025e-05, -3.133714199066162e-05, -2.9686838388442993e-05, -2.8036534786224365e-05, -2.6386231184005737e-05, -2.473592758178711e-05, -2.308562397956848e-05, -2.1435320377349854e-05, -1.9785016775131226e-05, -1.8134713172912598e-05, -1.648440957069397e-05, -1.4834105968475342e-05, -1.3183802366256714e-05, -1.1533498764038086e-05, -9.883195161819458e-06, -8.23289155960083e-06, -6.582587957382202e-06, -4.932284355163574e-06, -3.2819807529449463e-06, -1.6316771507263184e-06, 1.862645149230957e-08, 1.6689300537109375e-06, 3.3192336559295654e-06, 4.969537258148193e-06, 6.619840860366821e-06, 8.27014446258545e-06, 9.920448064804077e-06, 1.1570751667022705e-05, 1.3221055269241333e-05, 1.4871358871459961e-05, 1.652166247367859e-05, 1.8171966075897217e-05, 1.9822269678115845e-05, 2.1472573280334473e-05, 2.31228768825531e-05, 2.477318048477173e-05, 2.6423484086990356e-05, 2.8073787689208984e-05, 2.9724091291427612e-05, 3.137439489364624e-05, 3.302469849586487e-05, 3.4675002098083496e-05, 3.6325305700302124e-05, 3.797560930252075e-05, 3.962591290473938e-05, 4.127621650695801e-05, 4.2926520109176636e-05, 4.4576823711395264e-05, 4.622712731361389e-05, 4.787743091583252e-05, 4.952773451805115e-05, 5.1178038120269775e-05, 5.28283417224884e-05, 5.447864532470703e-05]}, "gradients/encoder.encoder.layers.11.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 3.0, 1.0, 1.0, 2.0, 3.0, 4.0, 5.0, 8.0, 5.0, 8.0, 5.0, 11.0, 20.0, 19.0, 27.0, 47.0, 67.0, 183.0, 634.0, 2274.0, 362.0, 104.0, 46.0, 47.0, 40.0, 19.0, 14.0, 15.0, 20.0, 18.0, 17.0, 12.0, 4.0, 7.0, 8.0, 9.0, 3.0, 9.0, 2.0, 0.0, 1.0, 3.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.4080276489257812e-05, -2.3000873625278473e-05, -2.1921470761299133e-05, -2.0842067897319794e-05, -1.9762665033340454e-05, -1.8683262169361115e-05, -1.7603859305381775e-05, -1.6524456441402435e-05, -1.5445053577423096e-05, -1.4365650713443756e-05, -1.3286247849464417e-05, -1.2206844985485077e-05, -1.1127442121505737e-05, -1.0048039257526398e-05, -8.968636393547058e-06, -7.889233529567719e-06, -6.809830665588379e-06, -5.730427801609039e-06, -4.6510249376297e-06, -3.57162207365036e-06, -2.4922192096710205e-06, -1.412816345691681e-06, -3.334134817123413e-07, 7.459893822669983e-07, 1.8253922462463379e-06, 2.9047951102256775e-06, 3.984197974205017e-06, 5.063600838184357e-06, 6.143003702163696e-06, 7.222406566143036e-06, 8.301809430122375e-06, 9.381212294101715e-06, 1.0460615158081055e-05, 1.1540018022060394e-05, 1.2619420886039734e-05, 1.3698823750019073e-05, 1.4778226613998413e-05, 1.5857629477977753e-05, 1.6937032341957092e-05, 1.8016435205936432e-05, 1.909583806991577e-05, 2.017524093389511e-05, 2.125464379787445e-05, 2.233404666185379e-05, 2.341344952583313e-05, 2.449285238981247e-05, 2.557225525379181e-05, 2.665165811777115e-05, 2.7731060981750488e-05, 2.8810463845729828e-05, 2.9889866709709167e-05, 3.096926957368851e-05, 3.204867243766785e-05, 3.3128075301647186e-05, 3.4207478165626526e-05, 3.5286881029605865e-05, 3.6366283893585205e-05, 3.7445686757564545e-05, 3.8525089621543884e-05, 3.9604492485523224e-05, 4.0683895349502563e-05, 4.17632982134819e-05, 4.284270107746124e-05, 4.392210394144058e-05, 4.500150680541992e-05]}, "gradients/encoder.encoder.layers.11.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 3.0, 0.0, 0.0, 1.0, 3.0, 4.0, 11.0, 9.0, 15.0, 18.0, 37.0, 53.0, 76.0, 154.0, 158.0, 105.0, 81.0, 58.0, 59.0, 33.0, 22.0, 26.0, 21.0, 18.0, 9.0, 10.0, 4.0, 6.0, 5.0, 3.0, 2.0, 2.0, 3.0, 3.0, 3.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00013057143951300532, -0.0001240523997694254, -0.00011753334547393024, -0.0001110142984543927, -0.00010449525143485516, -9.797620441531762e-05, -9.145715739578009e-05, -8.493811037624255e-05, -7.841906335670501e-05, -7.190001633716747e-05, -6.538096931762993e-05, -5.8861922298092395e-05, -5.234287527855486e-05, -4.582382825901732e-05, -3.930478123947978e-05, -3.278573421994224e-05, -2.6266687200404704e-05, -1.9747640180867165e-05, -1.3228593161329627e-05, -6.709546141792089e-06, -1.9049912225455046e-07, 6.328547897282988e-06, 1.2847594916820526e-05, 1.9366641936358064e-05, 2.5885688955895603e-05, 3.240473597543314e-05, 3.892378299497068e-05, 4.544283001450822e-05, 5.1961877034045756e-05, 5.8480924053583294e-05, 6.499997107312083e-05, 7.151901809265837e-05, 7.803805056028068e-05, 8.455709757981822e-05, 9.107614459935576e-05, 9.75951916188933e-05, 0.00010411423863843083, 0.00011063328565796837, 0.00011715233267750591, 0.00012367137242108583, 0.000130190426716581, 0.00013670948101207614, 0.00014322852075565606, 0.000149747560499236, 0.00015626661479473114, 0.0001627856690902263, 0.00016930470883380622, 0.00017582374857738614, 0.0001823428028728813, 0.00018886185716837645, 0.00019538089691195637, 0.0002018999366555363, 0.00020841899095103145, 0.0002149380452465266, 0.00022145708499010652, 0.00022797612473368645, 0.0002344951790291816, 0.00024101423332467675, 0.00024753325851634145, 0.0002540523128118366, 0.00026057136710733175, 0.0002670904214028269, 0.00027360947569832206, 0.00028012850088998675, 0.0002866475551854819]}, "gradients/encoder.encoder.layers.11.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 2.0, 3.0, 4.0, 3.0, 11.0, 4.0, 6.0, 15.0, 8.0, 16.0, 13.0, 19.0, 25.0, 17.0, 21.0, 22.0, 34.0, 38.0, 36.0, 35.0, 31.0, 35.0, 36.0, 40.0, 22.0, 48.0, 35.0, 38.0, 30.0, 33.0, 34.0, 45.0, 33.0, 29.0, 27.0, 32.0, 18.0, 25.0, 15.0, 13.0, 14.0, 13.0, 12.0, 5.0, 6.0, 3.0, 6.0, 2.0, 0.0, 5.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-9.34600830078125e-05, -9.044818580150604e-05, -8.743628859519958e-05, -8.442439138889313e-05, -8.141249418258667e-05, -7.840059697628021e-05, -7.538869976997375e-05, -7.23768025636673e-05, -6.936490535736084e-05, -6.635300815105438e-05, -6.334111094474792e-05, -6.032921373844147e-05, -5.731731653213501e-05, -5.430541932582855e-05, -5.1293522119522095e-05, -4.828162491321564e-05, -4.526972770690918e-05, -4.225783050060272e-05, -3.9245933294296265e-05, -3.623403608798981e-05, -3.322213888168335e-05, -3.0210241675376892e-05, -2.7198344469070435e-05, -2.4186447262763977e-05, -2.117455005645752e-05, -1.8162652850151062e-05, -1.5150755643844604e-05, -1.2138858437538147e-05, -9.12696123123169e-06, -6.115064024925232e-06, -3.1031668186187744e-06, -9.12696123123169e-08, 2.9206275939941406e-06, 5.932524800300598e-06, 8.944422006607056e-06, 1.1956319212913513e-05, 1.496821641921997e-05, 1.7980113625526428e-05, 2.0992010831832886e-05, 2.4003908038139343e-05, 2.70158052444458e-05, 3.0027702450752258e-05, 3.3039599657058716e-05, 3.605149686336517e-05, 3.906339406967163e-05, 4.207529127597809e-05, 4.5087188482284546e-05, 4.8099085688591003e-05, 5.111098289489746e-05, 5.412288010120392e-05, 5.7134777307510376e-05, 6.0146674513816833e-05, 6.315857172012329e-05, 6.617046892642975e-05, 6.91823661327362e-05, 7.219426333904266e-05, 7.520616054534912e-05, 7.821805775165558e-05, 8.122995495796204e-05, 8.42418521642685e-05, 8.725374937057495e-05, 9.026564657688141e-05, 9.327754378318787e-05, 9.628944098949432e-05, 9.930133819580078e-05]}, "gradients/encoder.encoder.layers.11.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 3.0, 3.0, 5.0, 5.0, 4.0, 4.0, 6.0, 8.0, 8.0, 15.0, 10.0, 16.0, 14.0, 16.0, 34.0, 57.0, 96.0, 168.0, 390.0, 795.0, 1764.0, 4153.0, 11403.0, 39417.0, 251289.0, 639695.0, 71455.0, 17206.0, 6036.0, 2379.0, 1013.0, 497.0, 237.0, 114.0, 60.0, 40.0, 33.0, 23.0, 24.0, 19.0, 11.0, 7.0, 8.0, 7.0, 4.0, 3.0, 6.0, 4.0, 3.0, 3.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.000179290771484375, -0.0001741945743560791, -0.0001690983772277832, -0.0001640021800994873, -0.0001589059829711914, -0.0001538097858428955, -0.0001487135887145996, -0.0001436173915863037, -0.0001385211944580078, -0.00013342499732971191, -0.00012832880020141602, -0.00012323260307312012, -0.00011813640594482422, -0.00011304020881652832, -0.00010794401168823242, -0.00010284781455993652, -9.775161743164062e-05, -9.265542030334473e-05, -8.755922317504883e-05, -8.246302604675293e-05, -7.736682891845703e-05, -7.227063179016113e-05, -6.717443466186523e-05, -6.207823753356934e-05, -5.698204040527344e-05, -5.188584327697754e-05, -4.678964614868164e-05, -4.169344902038574e-05, -3.6597251892089844e-05, -3.1501054763793945e-05, -2.6404857635498047e-05, -2.130866050720215e-05, -1.621246337890625e-05, -1.1116266250610352e-05, -6.020069122314453e-06, -9.238719940185547e-07, 4.172325134277344e-06, 9.268522262573242e-06, 1.436471939086914e-05, 1.946091651916504e-05, 2.4557113647460938e-05, 2.9653310775756836e-05, 3.4749507904052734e-05, 3.984570503234863e-05, 4.494190216064453e-05, 5.003809928894043e-05, 5.513429641723633e-05, 6.0230493545532227e-05, 6.532669067382812e-05, 7.042288780212402e-05, 7.551908493041992e-05, 8.061528205871582e-05, 8.571147918701172e-05, 9.080767631530762e-05, 9.590387344360352e-05, 0.00010100007057189941, 0.00010609626770019531, 0.00011119246482849121, 0.00011628866195678711, 0.00012138485908508301, 0.0001264810562133789, 0.0001315772533416748, 0.0001366734504699707, 0.0001417696475982666, 0.0001468658447265625]}, "gradients/encoder.encoder.layers.11.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 1.0, 1.0, 2.0, 5.0, 3.0, 1.0, 4.0, 7.0, 16.0, 18.0, 22.0, 35.0, 57.0, 84.0, 126.0, 119.0, 121.0, 111.0, 98.0, 62.0, 38.0, 24.0, 21.0, 8.0, 11.0, 7.0, 5.0, 2.0, 5.0, 3.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.531839370727539e-05, -1.451931893825531e-05, -1.372024416923523e-05, -1.2921169400215149e-05, -1.2122094631195068e-05, -1.1323019862174988e-05, -1.0523945093154907e-05, -9.724870324134827e-06, -8.925795555114746e-06, -8.126720786094666e-06, -7.327646017074585e-06, -6.528571248054504e-06, -5.729496479034424e-06, -4.930421710014343e-06, -4.131346940994263e-06, -3.332272171974182e-06, -2.5331974029541016e-06, -1.734122633934021e-06, -9.350478649139404e-07, -1.3597309589385986e-07, 6.631016731262207e-07, 1.4621764421463013e-06, 2.261251211166382e-06, 3.0603259801864624e-06, 3.859400749206543e-06, 4.6584755182266235e-06, 5.457550287246704e-06, 6.256625056266785e-06, 7.055699825286865e-06, 7.854774594306946e-06, 8.653849363327026e-06, 9.452924132347107e-06, 1.0251998901367188e-05, 1.1051073670387268e-05, 1.1850148439407349e-05, 1.264922320842743e-05, 1.344829797744751e-05, 1.424737274646759e-05, 1.5046447515487671e-05, 1.584552228450775e-05, 1.6644597053527832e-05, 1.7443671822547913e-05, 1.8242746591567993e-05, 1.9041821360588074e-05, 1.9840896129608154e-05, 2.0639970898628235e-05, 2.1439045667648315e-05, 2.2238120436668396e-05, 2.3037195205688477e-05, 2.3836269974708557e-05, 2.4635344743728638e-05, 2.5434419512748718e-05, 2.62334942817688e-05, 2.703256905078888e-05, 2.783164381980896e-05, 2.863071858882904e-05, 2.942979335784912e-05, 3.02288681268692e-05, 3.102794289588928e-05, 3.182701766490936e-05, 3.262609243392944e-05, 3.3425167202949524e-05, 3.4224241971969604e-05, 3.5023316740989685e-05, 3.5822391510009766e-05]}, "gradients/encoder.encoder.layers.11.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 1.0, 6.0, 4.0, 10.0, 15.0, 16.0, 18.0, 26.0, 54.0, 69.0, 96.0, 136.0, 231.0, 319.0, 489.0, 743.0, 1121.0, 1764.0, 2908.0, 4271.0, 7077.0, 11403.0, 17691.0, 32126.0, 54960.0, 117338.0, 364905.0, 222337.0, 93271.0, 47428.0, 25400.0, 15915.0, 9797.0, 5823.0, 3789.0, 2373.0, 1671.0, 1009.0, 664.0, 420.0, 267.0, 206.0, 135.0, 82.0, 58.0, 41.0, 25.0, 22.0, 13.0, 7.0, 4.0, 6.0, 4.0, 3.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-3.6716461181640625e-05, -3.548618406057358e-05, -3.425590693950653e-05, -3.3025629818439484e-05, -3.1795352697372437e-05, -3.056507557630539e-05, -2.9334798455238342e-05, -2.8104521334171295e-05, -2.6874244213104248e-05, -2.56439670920372e-05, -2.4413689970970154e-05, -2.3183412849903107e-05, -2.195313572883606e-05, -2.0722858607769012e-05, -1.9492581486701965e-05, -1.8262304365634918e-05, -1.703202724456787e-05, -1.5801750123500824e-05, -1.4571473002433777e-05, -1.334119588136673e-05, -1.2110918760299683e-05, -1.0880641639232635e-05, -9.650364518165588e-06, -8.420087397098541e-06, -7.189810276031494e-06, -5.959533154964447e-06, -4.7292560338974e-06, -3.4989789128303528e-06, -2.2687017917633057e-06, -1.0384246706962585e-06, 1.9185245037078857e-07, 1.4221295714378357e-06, 2.652406692504883e-06, 3.88268381357193e-06, 5.112960934638977e-06, 6.343238055706024e-06, 7.573515176773071e-06, 8.803792297840118e-06, 1.0034069418907166e-05, 1.1264346539974213e-05, 1.249462366104126e-05, 1.3724900782108307e-05, 1.4955177903175354e-05, 1.61854550242424e-05, 1.7415732145309448e-05, 1.8646009266376495e-05, 1.9876286387443542e-05, 2.110656350851059e-05, 2.2336840629577637e-05, 2.3567117750644684e-05, 2.479739487171173e-05, 2.6027671992778778e-05, 2.7257949113845825e-05, 2.8488226234912872e-05, 2.971850335597992e-05, 3.0948780477046967e-05, 3.2179057598114014e-05, 3.340933471918106e-05, 3.463961184024811e-05, 3.5869888961315155e-05, 3.71001660823822e-05, 3.833044320344925e-05, 3.9560720324516296e-05, 4.0790997445583344e-05, 4.202127456665039e-05]}, "gradients/encoder.encoder.layers.11.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 2.0, 3.0, 2.0, 3.0, 5.0, 0.0, 5.0, 6.0, 10.0, 5.0, 13.0, 4.0, 10.0, 10.0, 19.0, 14.0, 20.0, 29.0, 33.0, 26.0, 37.0, 28.0, 32.0, 33.0, 54.0, 35.0, 35.0, 54.0, 47.0, 46.0, 46.0, 38.0, 46.0, 39.0, 30.0, 26.0, 23.0, 18.0, 13.0, 22.0, 15.0, 18.0, 10.0, 13.0, 3.0, 12.0, 7.0, 3.0, 2.0, 5.0, 3.0, 2.0, 3.0, 1.0, 2.0], "bins": [-3.6835670471191406e-05, -3.5814009606838226e-05, -3.4792348742485046e-05, -3.3770687878131866e-05, -3.2749027013778687e-05, -3.1727366149425507e-05, -3.070570528507233e-05, -2.9684044420719147e-05, -2.8662383556365967e-05, -2.7640722692012787e-05, -2.6619061827659607e-05, -2.5597400963306427e-05, -2.4575740098953247e-05, -2.3554079234600067e-05, -2.2532418370246887e-05, -2.1510757505893707e-05, -2.0489096641540527e-05, -1.9467435777187347e-05, -1.8445774912834167e-05, -1.7424114048480988e-05, -1.6402453184127808e-05, -1.5380792319774628e-05, -1.4359131455421448e-05, -1.3337470591068268e-05, -1.2315809726715088e-05, -1.1294148862361908e-05, -1.0272487998008728e-05, -9.250827133655548e-06, -8.229166269302368e-06, -7.207505404949188e-06, -6.185844540596008e-06, -5.164183676242828e-06, -4.1425228118896484e-06, -3.1208619475364685e-06, -2.0992010831832886e-06, -1.0775402188301086e-06, -5.587935447692871e-08, 9.657815098762512e-07, 1.987442374229431e-06, 3.009103238582611e-06, 4.030764102935791e-06, 5.052424967288971e-06, 6.074085831642151e-06, 7.095746695995331e-06, 8.11740756034851e-06, 9.13906842470169e-06, 1.016072928905487e-05, 1.118239015340805e-05, 1.220405101776123e-05, 1.322571188211441e-05, 1.424737274646759e-05, 1.526903361082077e-05, 1.629069447517395e-05, 1.731235533952713e-05, 1.833401620388031e-05, 1.935567706823349e-05, 2.037733793258667e-05, 2.139899879693985e-05, 2.242065966129303e-05, 2.344232052564621e-05, 2.446398138999939e-05, 2.548564225435257e-05, 2.650730311870575e-05, 2.752896398305893e-05, 2.855062484741211e-05]}, "gradients/encoder.encoder.layers.11.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 3.0, 2.0, 3.0, 1.0, 13.0, 12.0, 24.0, 52.0, 72.0, 252.0, 307.0, 642.0, 2445.0, 4313.0, 20894.0, 45676.0, 139685.0, 619488.0, 139741.0, 56107.0, 10686.0, 4328.0, 2479.0, 625.0, 417.0, 118.0, 65.0, 57.0, 25.0, 7.0, 11.0, 6.0, 3.0, 1.0, 2.0, 3.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.76837158203125e-06, -4.625879228115082e-06, -4.4833868741989136e-06, -4.340894520282745e-06, -4.198402166366577e-06, -4.055909812450409e-06, -3.913417458534241e-06, -3.7709251046180725e-06, -3.6284327507019043e-06, -3.485940396785736e-06, -3.343448042869568e-06, -3.2009556889533997e-06, -3.0584633350372314e-06, -2.9159709811210632e-06, -2.773478627204895e-06, -2.630986273288727e-06, -2.4884939193725586e-06, -2.3460015654563904e-06, -2.203509211540222e-06, -2.061016857624054e-06, -1.9185245037078857e-06, -1.7760321497917175e-06, -1.6335397958755493e-06, -1.491047441959381e-06, -1.3485550880432129e-06, -1.2060627341270447e-06, -1.0635703802108765e-06, -9.210780262947083e-07, -7.7858567237854e-07, -6.360933184623718e-07, -4.936009645462036e-07, -3.511086106300354e-07, -2.086162567138672e-07, -6.612390279769897e-08, 7.636845111846924e-08, 2.1886080503463745e-07, 3.6135315895080566e-07, 5.038455128669739e-07, 6.463378667831421e-07, 7.888302206993103e-07, 9.313225746154785e-07, 1.0738149285316467e-06, 1.216307282447815e-06, 1.3587996363639832e-06, 1.5012919902801514e-06, 1.6437843441963196e-06, 1.7862766981124878e-06, 1.928769052028656e-06, 2.0712614059448242e-06, 2.2137537598609924e-06, 2.3562461137771606e-06, 2.498738467693329e-06, 2.641230821609497e-06, 2.7837231755256653e-06, 2.9262155294418335e-06, 3.0687078833580017e-06, 3.21120023727417e-06, 3.353692591190338e-06, 3.4961849451065063e-06, 3.6386772990226746e-06, 3.7811696529388428e-06, 3.923662006855011e-06, 4.066154360771179e-06, 4.208646714687347e-06, 4.351139068603516e-06]}, "gradients/encoder.encoder.layers.11.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 2.0, 0.0, 2.0, 3.0, 0.0, 6.0, 0.0, 16.0, 12.0, 0.0, 9.0, 0.0, 17.0, 36.0, 0.0, 29.0, 0.0, 38.0, 44.0, 0.0, 62.0, 71.0, 0.0, 59.0, 0.0, 71.0, 66.0, 0.0, 76.0, 0.0, 75.0, 62.0, 0.0, 36.0, 0.0, 46.0, 48.0, 0.0, 31.0, 0.0, 30.0, 15.0, 0.0, 22.0, 11.0, 0.0, 7.0, 0.0, 6.0, 5.0, 0.0, 3.0, 0.0, 3.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-1.0132789611816406e-06, -9.76957380771637e-07, -9.406358003616333e-07, -9.043142199516296e-07, -8.67992639541626e-07, -8.316710591316223e-07, -7.953494787216187e-07, -7.59027898311615e-07, -7.227063179016113e-07, -6.863847374916077e-07, -6.50063157081604e-07, -6.137415766716003e-07, -5.774199962615967e-07, -5.41098415851593e-07, -5.047768354415894e-07, -4.684552550315857e-07, -4.3213367462158203e-07, -3.9581209421157837e-07, -3.594905138015747e-07, -3.2316893339157104e-07, -2.868473529815674e-07, -2.505257725715637e-07, -2.1420419216156006e-07, -1.778826117515564e-07, -1.4156103134155273e-07, -1.0523945093154907e-07, -6.891787052154541e-08, -3.259629011154175e-08, 3.725290298461914e-09, 4.0046870708465576e-08, 7.636845111846924e-08, 1.126900315284729e-07, 1.4901161193847656e-07, 1.8533319234848022e-07, 2.2165477275848389e-07, 2.5797635316848755e-07, 2.942979335784912e-07, 3.3061951398849487e-07, 3.6694109439849854e-07, 4.032626748085022e-07, 4.3958425521850586e-07, 4.759058356285095e-07, 5.122274160385132e-07, 5.485489964485168e-07, 5.848705768585205e-07, 6.211921572685242e-07, 6.575137376785278e-07, 6.938353180885315e-07, 7.301568984985352e-07, 7.664784789085388e-07, 8.028000593185425e-07, 8.391216397285461e-07, 8.754432201385498e-07, 9.117648005485535e-07, 9.480863809585571e-07, 9.844079613685608e-07, 1.0207295417785645e-06, 1.0570511221885681e-06, 1.0933727025985718e-06, 1.1296942830085754e-06, 1.166015863418579e-06, 1.2023374438285828e-06, 1.2386590242385864e-06, 1.27498060464859e-06, 1.3113021850585938e-06]}, "gradients/encoder.encoder.layers.11.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 3.0, 2.0, 6.0, 12.0, 12.0, 14.0, 22.0, 22.0, 40.0, 81.0, 72.0, 131.0, 182.0, 252.0, 361.0, 492.0, 724.0, 1106.0, 0.0, 1678.0, 2511.0, 3938.0, 6390.0, 10607.0, 18241.0, 34480.0, 71613.0, 178879.0, 384587.0, 178660.0, 72113.0, 34351.0, 18386.0, 10589.0, 6323.0, 3918.0, 2621.0, 1651.0, 1132.0, 0.0, 714.0, 483.0, 356.0, 255.0, 151.0, 120.0, 86.0, 62.0, 34.0, 37.0, 19.0, 10.0, 11.0, 14.0, 6.0, 4.0, 3.0, 2.0, 1.0, 2.0, 1.0], "bins": [-1.7881393432617188e-06, -1.7313286662101746e-06, -1.6745179891586304e-06, -1.6177073121070862e-06, -1.560896635055542e-06, -1.5040859580039978e-06, -1.4472752809524536e-06, -1.3904646039009094e-06, -1.3336539268493652e-06, -1.276843249797821e-06, -1.2200325727462769e-06, -1.1632218956947327e-06, -1.1064112186431885e-06, -1.0496005415916443e-06, -9.927898645401e-07, -9.359791874885559e-07, -8.791685104370117e-07, -8.223578333854675e-07, -7.655471563339233e-07, -7.087364792823792e-07, -6.51925802230835e-07, -5.951151251792908e-07, -5.383044481277466e-07, -4.814937710762024e-07, -4.246830940246582e-07, -3.67872416973114e-07, -3.110617399215698e-07, -2.5425106287002563e-07, -1.9744038581848145e-07, -1.4062970876693726e-07, -8.381903171539307e-08, -2.7008354663848877e-08, 2.9802322387695312e-08, 8.66129994392395e-08, 1.434236764907837e-07, 2.0023435354232788e-07, 2.5704503059387207e-07, 3.1385570764541626e-07, 3.7066638469696045e-07, 4.2747706174850464e-07, 4.842877388000488e-07, 5.41098415851593e-07, 5.979090929031372e-07, 6.547197699546814e-07, 7.115304470062256e-07, 7.683411240577698e-07, 8.25151801109314e-07, 8.819624781608582e-07, 9.387731552124023e-07, 9.955838322639465e-07, 1.0523945093154907e-06, 1.109205186367035e-06, 1.166015863418579e-06, 1.2228265404701233e-06, 1.2796372175216675e-06, 1.3364478945732117e-06, 1.3932585716247559e-06, 1.4500692486763e-06, 1.5068799257278442e-06, 1.5636906027793884e-06, 1.6205012798309326e-06, 1.6773119568824768e-06, 1.734122633934021e-06, 1.7909333109855652e-06, 1.8477439880371094e-06]}, "gradients/encoder.encoder.layers.11.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 3.0, 1.0, 1.0, 4.0, 2.0, 6.0, 7.0, 5.0, 0.0, 9.0, 5.0, 9.0, 11.0, 13.0, 25.0, 26.0, 28.0, 28.0, 48.0, 58.0, 63.0, 65.0, 62.0, 75.0, 0.0, 54.0, 67.0, 48.0, 52.0, 48.0, 51.0, 18.0, 29.0, 14.0, 16.0, 18.0, 10.0, 8.0, 6.0, 6.0, 0.0, 5.0, 2.0, 6.0, 2.0, 1.0, 2.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.7285346984863281e-06, -1.6726553440093994e-06, -1.6167759895324707e-06, -1.560896635055542e-06, -1.5050172805786133e-06, -1.4491379261016846e-06, -1.3932585716247559e-06, -1.3373792171478271e-06, -1.2814998626708984e-06, -1.2256205081939697e-06, -1.169741153717041e-06, -1.1138617992401123e-06, -1.0579824447631836e-06, -1.0021030902862549e-06, -9.462237358093262e-07, -8.903443813323975e-07, -8.344650268554688e-07, -7.7858567237854e-07, -7.227063179016113e-07, -6.668269634246826e-07, -6.109476089477539e-07, -5.550682544708252e-07, -4.991888999938965e-07, -4.4330954551696777e-07, -3.8743019104003906e-07, -3.3155083656311035e-07, -2.7567148208618164e-07, -2.1979212760925293e-07, -1.6391277313232422e-07, -1.0803341865539551e-07, -5.21540641784668e-08, 3.725290298461914e-09, 5.960464477539063e-08, 1.1548399925231934e-07, 1.7136335372924805e-07, 2.2724270820617676e-07, 2.8312206268310547e-07, 3.390014171600342e-07, 3.948807716369629e-07, 4.507601261138916e-07, 5.066394805908203e-07, 5.62518835067749e-07, 6.183981895446777e-07, 6.742775440216064e-07, 7.301568984985352e-07, 7.860362529754639e-07, 8.419156074523926e-07, 8.977949619293213e-07, 9.5367431640625e-07, 1.0095536708831787e-06, 1.0654330253601074e-06, 1.1213123798370361e-06, 1.1771917343139648e-06, 1.2330710887908936e-06, 1.2889504432678223e-06, 1.344829797744751e-06, 1.4007091522216797e-06, 1.4565885066986084e-06, 1.5124678611755371e-06, 1.5683472156524658e-06, 1.6242265701293945e-06, 1.6801059246063232e-06, 1.735985279083252e-06, 1.7918646335601807e-06, 1.8477439880371094e-06]}, "gradients/encoder.encoder.layers.11.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 2.0, 1.0, 3.0, 6.0, 11.0, 7.0, 11.0, 10.0, 26.0, 55.0, 98.0, 230.0, 183.0, 94.0, 57.0, 47.0, 39.0, 38.0, 19.0, 18.0, 9.0, 12.0, 12.0, 11.0, 2.0, 3.0, 2.0, 3.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0001511340233264491, -0.00014345854287967086, -0.00013578306243289262, -0.00012810758198611438, -0.00012043210153933614, -0.00011275662109255791, -0.00010508114792173728, -9.740566747495905e-05, -8.973018702818081e-05, -8.205470658140257e-05, -7.437922613462433e-05, -6.670375296380371e-05, -5.9028268879046664e-05, -5.1352788432268426e-05, -4.3677311623468995e-05, -3.600183117669076e-05, -2.832635072991252e-05, -2.065087028313428e-05, -1.2975391655345447e-05, -5.299913027556613e-06, 2.3755674192216247e-06, 1.0051047865999863e-05, 1.7726524674799293e-05, 2.540200512157753e-05, 3.307748556835577e-05, 4.075296601513401e-05, 4.8428446461912245e-05, 5.6103923270711675e-05, 6.37794000795111e-05, 7.145488052628934e-05, 7.913036097306758e-05, 8.680584141984582e-05, 9.448133641853929e-05, 0.00010215681686531752, 0.00010983229731209576, 0.000117507777758874, 0.00012518325820565224, 0.00013285873865243047, 0.0001405342190992087, 0.00014820968499407172, 0.0001558851799927652, 0.00016356066043954343, 0.00017123614088632166, 0.0001789116213330999, 0.00018658710177987814, 0.00019426258222665638, 0.00020193806267343462, 0.00020961352856829762, 0.00021728900901507586, 0.0002249644894618541, 0.00023263996990863234, 0.00024031545035541058, 0.0002479909162502736, 0.00025566641124896705, 0.00026334187714383006, 0.00027101737214252353, 0.00027869283803738654, 0.00028636830393224955, 0.000294043798930943, 0.000301719264825806, 0.0003093947598244995, 0.0003170702257193625, 0.00032474572071805596, 0.000332421186612919, 0.00034009668161161244]}, "gradients/encoder.encoder.layers.11.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 2.0, 3.0, 1.0, 3.0, 5.0, 3.0, 5.0, 6.0, 13.0, 11.0, 10.0, 8.0, 11.0, 18.0, 23.0, 30.0, 34.0, 29.0, 22.0, 36.0, 38.0, 50.0, 39.0, 44.0, 47.0, 49.0, 46.0, 49.0, 43.0, 42.0, 47.0, 36.0, 32.0, 35.0, 29.0, 23.0, 21.0, 15.0, 12.0, 9.0, 4.0, 11.0, 3.0, 3.0, 6.0, 1.0, 3.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 2.0], "bins": [-0.00012433528900146484, -0.00012046750634908676, -0.00011659972369670868, -0.0001127319410443306, -0.00010886415839195251, -0.00010499637573957443, -0.00010112859308719635, -9.726081043481827e-05, -9.339302778244019e-05, -8.95252451300621e-05, -8.565746247768402e-05, -8.178967982530594e-05, -7.792189717292786e-05, -7.405411452054977e-05, -7.018633186817169e-05, -6.631854921579361e-05, -6.245076656341553e-05, -5.8582983911037445e-05, -5.471520125865936e-05, -5.084741860628128e-05, -4.69796359539032e-05, -4.3111853301525116e-05, -3.9244070649147034e-05, -3.537628799676895e-05, -3.150850534439087e-05, -2.7640722692012787e-05, -2.3772940039634705e-05, -1.9905157387256622e-05, -1.603737473487854e-05, -1.2169592082500458e-05, -8.301809430122375e-06, -4.434026777744293e-06, -5.662441253662109e-07, 3.3015385270118713e-06, 7.169321179389954e-06, 1.1037103831768036e-05, 1.4904886484146118e-05, 1.87726691365242e-05, 2.2640451788902283e-05, 2.6508234441280365e-05, 3.0376017093658447e-05, 3.424379974603653e-05, 3.811158239841461e-05, 4.1979365050792694e-05, 4.5847147703170776e-05, 4.971493035554886e-05, 5.358271300792694e-05, 5.745049566030502e-05, 6.13182783126831e-05, 6.518606096506119e-05, 6.905384361743927e-05, 7.292162626981735e-05, 7.678940892219543e-05, 8.065719157457352e-05, 8.45249742269516e-05, 8.839275687932968e-05, 9.226053953170776e-05, 9.612832218408585e-05, 9.999610483646393e-05, 0.00010386388748884201, 0.00010773167014122009, 0.00011159945279359818, 0.00011546723544597626, 0.00011933501809835434, 0.00012320280075073242]}, "gradients/encoder.encoder.layers.10.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 4.0, 3.0, 15.0, 21.0, 42.0, 61.0, 103.0, 188.0, 359.0, 670.0, 1213.0, 2188.0, 4440.0, 9612.0, 25735.0, 96387.0, 3929693.0, 83345.0, 23563.0, 8752.0, 3786.0, 1849.0, 908.0, 549.0, 243.0, 159.0, 109.0, 61.0, 36.0, 27.0, 25.0, 18.0, 17.0, 14.0, 10.0, 11.0, 12.0, 10.0, 9.0, 4.0, 8.0, 2.0, 4.0, 7.0, 5.0, 4.0, 3.0, 3.0, 3.0, 3.0, 1.0, 1.0, 4.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-6.401538848876953e-05, -6.072781980037689e-05, -5.744025111198425e-05, -5.4152682423591614e-05, -5.0865113735198975e-05, -4.7577545046806335e-05, -4.4289976358413696e-05, -4.100240767002106e-05, -3.771483898162842e-05, -3.442727029323578e-05, -3.113970160484314e-05, -2.78521329164505e-05, -2.456456422805786e-05, -2.1276995539665222e-05, -1.7989426851272583e-05, -1.4701858162879944e-05, -1.1414289474487305e-05, -8.126720786094666e-06, -4.839152097702026e-06, -1.5515834093093872e-06, 1.735985279083252e-06, 5.023553967475891e-06, 8.31112265586853e-06, 1.159869134426117e-05, 1.4886260032653809e-05, 1.8173828721046448e-05, 2.1461397409439087e-05, 2.4748966097831726e-05, 2.8036534786224365e-05, 3.1324103474617004e-05, 3.4611672163009644e-05, 3.789924085140228e-05, 4.118680953979492e-05, 4.447437822818756e-05, 4.77619469165802e-05, 5.104951560497284e-05, 5.433708429336548e-05, 5.762465298175812e-05, 6.091222167015076e-05, 6.41997903585434e-05, 6.748735904693604e-05, 7.077492773532867e-05, 7.406249642372131e-05, 7.735006511211395e-05, 8.063763380050659e-05, 8.392520248889923e-05, 8.721277117729187e-05, 9.050033986568451e-05, 9.378790855407715e-05, 9.707547724246979e-05, 0.00010036304593086243, 0.00010365061461925507, 0.0001069381833076477, 0.00011022575199604034, 0.00011351332068443298, 0.00011680088937282562, 0.00012008845806121826, 0.0001233760267496109, 0.00012666359543800354, 0.00012995116412639618, 0.00013323873281478882, 0.00013652630150318146, 0.0001398138701915741, 0.00014310143887996674, 0.00014638900756835938]}, "gradients/encoder.encoder.layers.10.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 0.0, 1.0, 1.0, 6.0, 4.0, 6.0, 13.0, 14.0, 15.0, 17.0, 28.0, 54.0, 77.0, 104.0, 111.0, 131.0, 110.0, 108.0, 65.0, 39.0, 39.0, 27.0, 12.0, 8.0, 11.0, 6.0, 2.0, 3.0, 3.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.3768672943115234e-05, -1.3024546205997467e-05, -1.22804194688797e-05, -1.1536292731761932e-05, -1.0792165994644165e-05, -1.0048039257526398e-05, -9.30391252040863e-06, -8.559785783290863e-06, -7.815659046173096e-06, -7.071532309055328e-06, -6.327405571937561e-06, -5.583278834819794e-06, -4.839152097702026e-06, -4.095025360584259e-06, -3.3508986234664917e-06, -2.6067718863487244e-06, -1.862645149230957e-06, -1.1185184121131897e-06, -3.7439167499542236e-07, 3.6973506212234497e-07, 1.1138617992401123e-06, 1.8579885363578796e-06, 2.602115273475647e-06, 3.3462420105934143e-06, 4.090368747711182e-06, 4.834495484828949e-06, 5.578622221946716e-06, 6.322748959064484e-06, 7.066875696182251e-06, 7.811002433300018e-06, 8.555129170417786e-06, 9.299255907535553e-06, 1.004338264465332e-05, 1.0787509381771088e-05, 1.1531636118888855e-05, 1.2275762856006622e-05, 1.301988959312439e-05, 1.3764016330242157e-05, 1.4508143067359924e-05, 1.5252269804477692e-05, 1.599639654159546e-05, 1.6740523278713226e-05, 1.7484650015830994e-05, 1.822877675294876e-05, 1.897290349006653e-05, 1.9717030227184296e-05, 2.0461156964302063e-05, 2.120528370141983e-05, 2.1949410438537598e-05, 2.2693537175655365e-05, 2.3437663912773132e-05, 2.41817906498909e-05, 2.4925917387008667e-05, 2.5670044124126434e-05, 2.64141708612442e-05, 2.715829759836197e-05, 2.7902424335479736e-05, 2.8646551072597504e-05, 2.939067780971527e-05, 3.013480454683304e-05, 3.0878931283950806e-05, 3.162305802106857e-05, 3.236718475818634e-05, 3.311131149530411e-05, 3.3855438232421875e-05]}, "gradients/encoder.encoder.layers.10.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 3.0, 4.0, 5.0, 2.0, 12.0, 7.0, 9.0, 22.0, 34.0, 41.0, 45.0, 92.0, 131.0, 214.0, 370.0, 612.0, 1119.0, 1903.0, 3478.0, 6873.0, 14540.0, 31957.0, 82488.0, 2564904.0, 1341796.0, 82843.0, 31454.0, 14632.0, 6750.0, 3442.0, 1856.0, 1056.0, 541.0, 387.0, 229.0, 154.0, 77.0, 61.0, 35.0, 28.0, 26.0, 13.0, 8.0, 9.0, 11.0, 6.0, 7.0, 3.0, 2.0, 1.0, 1.0, 1.0, 0.0, 2.0], "bins": [-5.936622619628906e-05, -5.761999636888504e-05, -5.587376654148102e-05, -5.4127536714076996e-05, -5.2381306886672974e-05, -5.063507705926895e-05, -4.888884723186493e-05, -4.714261740446091e-05, -4.5396387577056885e-05, -4.365015774965286e-05, -4.190392792224884e-05, -4.015769809484482e-05, -3.8411468267440796e-05, -3.6665238440036774e-05, -3.491900861263275e-05, -3.317277878522873e-05, -3.142654895782471e-05, -2.9680319130420685e-05, -2.7934089303016663e-05, -2.618785947561264e-05, -2.4441629648208618e-05, -2.2695399820804596e-05, -2.0949169993400574e-05, -1.920294016599655e-05, -1.745671033859253e-05, -1.5710480511188507e-05, -1.3964250683784485e-05, -1.2218020856380463e-05, -1.047179102897644e-05, -8.725561201572418e-06, -6.979331374168396e-06, -5.233101546764374e-06, -3.4868717193603516e-06, -1.7406418919563293e-06, 5.587935447692871e-09, 1.751817762851715e-06, 3.4980475902557373e-06, 5.2442774176597595e-06, 6.990507245063782e-06, 8.736737072467804e-06, 1.0482966899871826e-05, 1.2229196727275848e-05, 1.397542655467987e-05, 1.5721656382083893e-05, 1.7467886209487915e-05, 1.9214116036891937e-05, 2.096034586429596e-05, 2.270657569169998e-05, 2.4452805519104004e-05, 2.6199035346508026e-05, 2.794526517391205e-05, 2.969149500131607e-05, 3.143772482872009e-05, 3.3183954656124115e-05, 3.493018448352814e-05, 3.667641431093216e-05, 3.842264413833618e-05, 4.0168873965740204e-05, 4.1915103793144226e-05, 4.366133362054825e-05, 4.540756344795227e-05, 4.715379327535629e-05, 4.8900023102760315e-05, 5.064625293016434e-05, 5.239248275756836e-05]}, "gradients/encoder.encoder.layers.10.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 0.0, 0.0, 2.0, 4.0, 4.0, 6.0, 8.0, 10.0, 19.0, 10.0, 8.0, 17.0, 31.0, 23.0, 41.0, 44.0, 59.0, 115.0, 466.0, 2229.0, 580.0, 136.0, 70.0, 40.0, 36.0, 33.0, 26.0, 14.0, 15.0, 14.0, 5.0, 5.0, 3.0, 2.0, 2.0, 3.0, 0.0, 2.0, 3.0, 1.0, 3.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.658367156982422e-05, -2.540741115808487e-05, -2.423115074634552e-05, -2.305489033460617e-05, -2.187862992286682e-05, -2.0702369511127472e-05, -1.9526109099388123e-05, -1.8349848687648773e-05, -1.7173588275909424e-05, -1.5997327864170074e-05, -1.4821067452430725e-05, -1.3644807040691376e-05, -1.2468546628952026e-05, -1.1292286217212677e-05, -1.0116025805473328e-05, -8.939765393733978e-06, -7.763504981994629e-06, -6.5872445702552795e-06, -5.41098415851593e-06, -4.234723746776581e-06, -3.0584633350372314e-06, -1.882202923297882e-06, -7.059425115585327e-07, 4.7031790018081665e-07, 1.646578311920166e-06, 2.8228387236595154e-06, 3.999099135398865e-06, 5.175359547138214e-06, 6.3516199588775635e-06, 7.527880370616913e-06, 8.704140782356262e-06, 9.880401194095612e-06, 1.1056661605834961e-05, 1.223292201757431e-05, 1.340918242931366e-05, 1.4585442841053009e-05, 1.576170325279236e-05, 1.6937963664531708e-05, 1.8114224076271057e-05, 1.9290484488010406e-05, 2.0466744899749756e-05, 2.1643005311489105e-05, 2.2819265723228455e-05, 2.3995526134967804e-05, 2.5171786546707153e-05, 2.6348046958446503e-05, 2.7524307370185852e-05, 2.87005677819252e-05, 2.987682819366455e-05, 3.10530886054039e-05, 3.222934901714325e-05, 3.34056094288826e-05, 3.458186984062195e-05, 3.57581302523613e-05, 3.693439066410065e-05, 3.8110651075839996e-05, 3.9286911487579346e-05, 4.0463171899318695e-05, 4.1639432311058044e-05, 4.2815692722797394e-05, 4.399195313453674e-05, 4.516821354627609e-05, 4.634447395801544e-05, 4.752073436975479e-05, 4.869699478149414e-05]}, "gradients/encoder.encoder.layers.10.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 5.0, 0.0, 9.0, 5.0, 7.0, 8.0, 22.0, 30.0, 27.0, 44.0, 65.0, 105.0, 167.0, 171.0, 103.0, 81.0, 53.0, 33.0, 24.0, 18.0, 14.0, 10.0, 10.0, 0.0, 3.0, 0.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00018070924852509052, -0.00017403258243575692, -0.00016735591634642333, -0.00016067925025708973, -0.00015400258416775614, -0.00014732591807842255, -0.00014064923743717372, -0.00013397258589975536, -0.00012729590525850654, -0.00012061923916917294, -0.00011394257307983935, -0.00010726590699050575, -0.00010058924090117216, -9.391257481183857e-05, -8.723590144654736e-05, -8.055923535721377e-05, -7.388257654383779e-05, -6.720591045450419e-05, -6.05292443651706e-05, -5.38525746378582e-05, -4.71759085485246e-05, -4.049924245919101e-05, -3.382257273187861e-05, -2.7145906642545015e-05, -2.046924055321142e-05, -1.3792573554383125e-05, -7.1159065555548295e-06, -4.3923864723183215e-07, 6.237427442101762e-06, 1.2914093531435356e-05, 1.9590763258747756e-05, 2.626742934808135e-05, 3.2944080885499716e-05, 3.962074697483331e-05, 4.6297413064166903e-05, 5.2974082791479304e-05, 5.96507488808129e-05, 6.632741133216769e-05, 7.300408469745889e-05, 7.968075078679249e-05, 8.635741687612608e-05, 9.303408296545967e-05, 9.971074905479327e-05, 0.00010638742242008448, 0.00011306408850941807, 0.00011974075459875166, 0.00012641742068808526, 0.00013309408677741885, 0.00013977075286675245, 0.00014644741895608604, 0.00015312408504541963, 0.00015980075113475323, 0.00016647741722408682, 0.00017315408331342041, 0.00017983076395466924, 0.0001865074154920876, 0.00019318409613333642, 0.00019986076222267002, 0.0002065374283120036, 0.0002132140944013372, 0.0002198907604906708, 0.0002265674265800044, 0.000233244092669338, 0.0002399207733105868, 0.0002465974248480052]}, "gradients/encoder.encoder.layers.10.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 3.0, 1.0, 2.0, 6.0, 4.0, 3.0, 11.0, 6.0, 10.0, 13.0, 18.0, 14.0, 27.0, 24.0, 26.0, 26.0, 29.0, 36.0, 41.0, 41.0, 36.0, 37.0, 42.0, 41.0, 40.0, 55.0, 50.0, 33.0, 31.0, 22.0, 42.0, 31.0, 36.0, 26.0, 20.0, 27.0, 13.0, 15.0, 10.0, 20.0, 8.0, 9.0, 4.0, 7.0, 6.0, 6.0, 1.0, 4.0, 3.0, 2.0, 1.0, 0.0, 1.0], "bins": [-8.589029312133789e-05, -8.345581591129303e-05, -8.102133870124817e-05, -7.858686149120331e-05, -7.615238428115845e-05, -7.371790707111359e-05, -7.128342986106873e-05, -6.884895265102386e-05, -6.6414475440979e-05, -6.397999823093414e-05, -6.154552102088928e-05, -5.911104381084442e-05, -5.667656660079956e-05, -5.42420893907547e-05, -5.180761218070984e-05, -4.937313497066498e-05, -4.693865776062012e-05, -4.4504180550575256e-05, -4.2069703340530396e-05, -3.9635226130485535e-05, -3.7200748920440674e-05, -3.476627171039581e-05, -3.233179450035095e-05, -2.989731729030609e-05, -2.746284008026123e-05, -2.502836287021637e-05, -2.259388566017151e-05, -2.0159408450126648e-05, -1.7724931240081787e-05, -1.5290454030036926e-05, -1.2855976819992065e-05, -1.0421499609947205e-05, -7.987022399902344e-06, -5.552545189857483e-06, -3.118067979812622e-06, -6.835907697677612e-07, 1.7508864402770996e-06, 4.1853636503219604e-06, 6.619840860366821e-06, 9.054318070411682e-06, 1.1488795280456543e-05, 1.3923272490501404e-05, 1.6357749700546265e-05, 1.8792226910591125e-05, 2.1226704120635986e-05, 2.3661181330680847e-05, 2.6095658540725708e-05, 2.853013575077057e-05, 3.096461296081543e-05, 3.339909017086029e-05, 3.583356738090515e-05, 3.826804459095001e-05, 4.070252180099487e-05, 4.3136999011039734e-05, 4.5571476221084595e-05, 4.8005953431129456e-05, 5.0440430641174316e-05, 5.287490785121918e-05, 5.530938506126404e-05, 5.77438622713089e-05, 6.017833948135376e-05, 6.261281669139862e-05, 6.504729390144348e-05, 6.748177111148834e-05, 6.99162483215332e-05]}, "gradients/encoder.encoder.layers.10.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 0.0, 1.0, 2.0, 3.0, 2.0, 6.0, 11.0, 28.0, 22.0, 31.0, 41.0, 39.0, 86.0, 147.0, 176.0, 294.0, 404.0, 674.0, 1056.0, 1796.0, 3071.0, 5835.0, 11647.0, 26738.0, 71258.0, 265287.0, 495042.0, 99120.0, 34787.0, 14546.0, 7032.0, 3683.0, 2135.0, 1264.0, 786.0, 450.0, 357.0, 221.0, 154.0, 85.0, 74.0, 44.0, 40.0, 20.0, 24.0, 13.0, 5.0, 8.0, 5.0, 7.0, 6.0, 4.0, 1.0, 1.0, 0.0, 1.0], "bins": [-8.380413055419922e-05, -8.135009557008743e-05, -7.889606058597565e-05, -7.644202560186386e-05, -7.398799061775208e-05, -7.153395563364029e-05, -6.90799206495285e-05, -6.662588566541672e-05, -6.417185068130493e-05, -6.171781569719315e-05, -5.926378071308136e-05, -5.6809745728969574e-05, -5.435571074485779e-05, -5.1901675760746e-05, -4.9447640776634216e-05, -4.699360579252243e-05, -4.4539570808410645e-05, -4.208553582429886e-05, -3.963150084018707e-05, -3.717746585607529e-05, -3.47234308719635e-05, -3.2269395887851715e-05, -2.981536090373993e-05, -2.7361325919628143e-05, -2.4907290935516357e-05, -2.245325595140457e-05, -1.9999220967292786e-05, -1.7545185983181e-05, -1.5091150999069214e-05, -1.2637116014957428e-05, -1.0183081030845642e-05, -7.729046046733856e-06, -5.27501106262207e-06, -2.8209760785102844e-06, -3.6694109439849854e-07, 2.0870938897132874e-06, 4.541128873825073e-06, 6.995163857936859e-06, 9.449198842048645e-06, 1.1903233826160431e-05, 1.4357268810272217e-05, 1.6811303794384003e-05, 1.926533877849579e-05, 2.1719373762607574e-05, 2.417340874671936e-05, 2.6627443730831146e-05, 2.9081478714942932e-05, 3.153551369905472e-05, 3.3989548683166504e-05, 3.644358366727829e-05, 3.8897618651390076e-05, 4.135165363550186e-05, 4.380568861961365e-05, 4.625972360372543e-05, 4.871375858783722e-05, 5.1167793571949005e-05, 5.362182855606079e-05, 5.607586354017258e-05, 5.852989852428436e-05, 6.098393350839615e-05, 6.343796849250793e-05, 6.589200347661972e-05, 6.83460384607315e-05, 7.080007344484329e-05, 7.325410842895508e-05]}, "gradients/encoder.encoder.layers.10.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 2.0, 2.0, 0.0, 1.0, 3.0, 5.0, 2.0, 4.0, 5.0, 9.0, 9.0, 6.0, 21.0, 20.0, 27.0, 41.0, 59.0, 57.0, 81.0, 115.0, 77.0, 80.0, 80.0, 54.0, 56.0, 55.0, 42.0, 23.0, 22.0, 14.0, 10.0, 5.0, 6.0, 6.0, 3.0, 3.0, 1.0, 4.0, 2.0, 0.0, 1.0, 3.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-1.52587890625e-05, -1.4711171388626099e-05, -1.4163553714752197e-05, -1.3615936040878296e-05, -1.3068318367004395e-05, -1.2520700693130493e-05, -1.1973083019256592e-05, -1.142546534538269e-05, -1.0877847671508789e-05, -1.0330229997634888e-05, -9.782612323760986e-06, -9.234994649887085e-06, -8.687376976013184e-06, -8.139759302139282e-06, -7.592141628265381e-06, -7.0445239543914795e-06, -6.496906280517578e-06, -5.949288606643677e-06, -5.401670932769775e-06, -4.854053258895874e-06, -4.306435585021973e-06, -3.7588179111480713e-06, -3.21120023727417e-06, -2.6635825634002686e-06, -2.115964889526367e-06, -1.5683472156524658e-06, -1.0207295417785645e-06, -4.731118679046631e-07, 7.450580596923828e-08, 6.221234798431396e-07, 1.169741153717041e-06, 1.7173588275909424e-06, 2.2649765014648438e-06, 2.812594175338745e-06, 3.3602118492126465e-06, 3.907829523086548e-06, 4.455447196960449e-06, 5.003064870834351e-06, 5.550682544708252e-06, 6.098300218582153e-06, 6.645917892456055e-06, 7.193535566329956e-06, 7.741153240203857e-06, 8.288770914077759e-06, 8.83638858795166e-06, 9.384006261825562e-06, 9.931623935699463e-06, 1.0479241609573364e-05, 1.1026859283447266e-05, 1.1574476957321167e-05, 1.2122094631195068e-05, 1.266971230506897e-05, 1.3217329978942871e-05, 1.3764947652816772e-05, 1.4312565326690674e-05, 1.4860183000564575e-05, 1.5407800674438477e-05, 1.5955418348312378e-05, 1.650303602218628e-05, 1.705065369606018e-05, 1.7598271369934082e-05, 1.8145889043807983e-05, 1.8693506717681885e-05, 1.9241124391555786e-05, 1.9788742065429688e-05]}, "gradients/encoder.encoder.layers.10.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 2.0, 1.0, 2.0, 4.0, 6.0, 7.0, 7.0, 15.0, 29.0, 39.0, 36.0, 67.0, 108.0, 156.0, 232.0, 321.0, 584.0, 803.0, 1388.0, 1960.0, 3169.0, 5414.0, 8288.0, 14558.0, 23367.0, 42508.0, 74795.0, 155816.0, 394574.0, 148309.0, 74979.0, 38099.0, 23728.0, 13205.0, 8480.0, 4860.0, 3027.0, 2039.0, 1197.0, 856.0, 505.0, 325.0, 246.0, 144.0, 101.0, 62.0, 58.0, 30.0, 24.0, 14.0, 5.0, 10.0, 6.0, 3.0, 1.0, 2.0, 2.0, 0.0, 0.0, 1.0], "bins": [-3.3736228942871094e-05, -3.269687294960022e-05, -3.1657516956329346e-05, -3.061816096305847e-05, -2.9578804969787598e-05, -2.8539448976516724e-05, -2.750009298324585e-05, -2.6460736989974976e-05, -2.54213809967041e-05, -2.4382025003433228e-05, -2.3342669010162354e-05, -2.230331301689148e-05, -2.1263957023620605e-05, -2.022460103034973e-05, -1.9185245037078857e-05, -1.8145889043807983e-05, -1.710653305053711e-05, -1.6067177057266235e-05, -1.5027821063995361e-05, -1.3988465070724487e-05, -1.2949109077453613e-05, -1.190975308418274e-05, -1.0870397090911865e-05, -9.831041097640991e-06, -8.791685104370117e-06, -7.752329111099243e-06, -6.712973117828369e-06, -5.673617124557495e-06, -4.634261131286621e-06, -3.594905138015747e-06, -2.555549144744873e-06, -1.516193151473999e-06, -4.76837158203125e-07, 5.62518835067749e-07, 1.601874828338623e-06, 2.641230821609497e-06, 3.680586814880371e-06, 4.719942808151245e-06, 5.759298801422119e-06, 6.798654794692993e-06, 7.838010787963867e-06, 8.877366781234741e-06, 9.916722774505615e-06, 1.095607876777649e-05, 1.1995434761047363e-05, 1.3034790754318237e-05, 1.4074146747589111e-05, 1.5113502740859985e-05, 1.615285873413086e-05, 1.7192214727401733e-05, 1.8231570720672607e-05, 1.927092671394348e-05, 2.0310282707214355e-05, 2.134963870048523e-05, 2.2388994693756104e-05, 2.3428350687026978e-05, 2.446770668029785e-05, 2.5507062673568726e-05, 2.65464186668396e-05, 2.7585774660110474e-05, 2.8625130653381348e-05, 2.966448664665222e-05, 3.0703842639923096e-05, 3.174319863319397e-05, 3.2782554626464844e-05]}, "gradients/encoder.encoder.layers.10.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 3.0, 7.0, 4.0, 6.0, 5.0, 8.0, 12.0, 20.0, 16.0, 17.0, 25.0, 21.0, 22.0, 28.0, 43.0, 34.0, 40.0, 48.0, 51.0, 42.0, 56.0, 56.0, 37.0, 49.0, 48.0, 45.0, 35.0, 38.0, 32.0, 30.0, 24.0, 14.0, 26.0, 17.0, 14.0, 11.0, 8.0, 4.0, 5.0, 2.0, 4.0, 3.0, 1.0, 3.0, 0.0, 0.0, 4.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.0219554901123047e-05, -2.9212795197963715e-05, -2.8206035494804382e-05, -2.719927579164505e-05, -2.6192516088485718e-05, -2.5185756385326385e-05, -2.4178996682167053e-05, -2.317223697900772e-05, -2.216547727584839e-05, -2.1158717572689056e-05, -2.0151957869529724e-05, -1.9145198166370392e-05, -1.813843846321106e-05, -1.7131678760051727e-05, -1.6124919056892395e-05, -1.5118159353733063e-05, -1.411139965057373e-05, -1.3104639947414398e-05, -1.2097880244255066e-05, -1.1091120541095734e-05, -1.0084360837936401e-05, -9.077601134777069e-06, -8.070841431617737e-06, -7.0640817284584045e-06, -6.057322025299072e-06, -5.05056232213974e-06, -4.043802618980408e-06, -3.0370429158210754e-06, -2.030283212661743e-06, -1.0235235095024109e-06, -1.6763806343078613e-08, 9.899958968162537e-07, 1.996755599975586e-06, 3.0035153031349182e-06, 4.0102750062942505e-06, 5.017034709453583e-06, 6.023794412612915e-06, 7.030554115772247e-06, 8.03731381893158e-06, 9.044073522090912e-06, 1.0050833225250244e-05, 1.1057592928409576e-05, 1.2064352631568909e-05, 1.3071112334728241e-05, 1.4077872037887573e-05, 1.5084631741046906e-05, 1.6091391444206238e-05, 1.709815114736557e-05, 1.8104910850524902e-05, 1.9111670553684235e-05, 2.0118430256843567e-05, 2.11251899600029e-05, 2.213194966316223e-05, 2.3138709366321564e-05, 2.4145469069480896e-05, 2.5152228772640228e-05, 2.615898847579956e-05, 2.7165748178958893e-05, 2.8172507882118225e-05, 2.9179267585277557e-05, 3.018602728843689e-05, 3.119278699159622e-05, 3.2199546694755554e-05, 3.3206306397914886e-05, 3.421306610107422e-05]}, "gradients/encoder.encoder.layers.10.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 4.0, 9.0, 12.0, 0.0, 27.0, 23.0, 42.0, 46.0, 0.0, 71.0, 100.0, 168.0, 240.0, 0.0, 367.0, 609.0, 992.0, 1614.0, 2702.0, 0.0, 4459.0, 7569.0, 13153.0, 23333.0, 0.0, 41933.0, 80660.0, 176508.0, 339235.0, 0.0, 176501.0, 80870.0, 41889.0, 23031.0, 13046.0, 0.0, 7806.0, 4504.0, 2690.0, 1711.0, 0.0, 976.0, 623.0, 357.0, 230.0, 0.0, 153.0, 98.0, 65.0, 43.0, 33.0, 0.0, 22.0, 15.0, 10.0, 8.0, 0.0, 6.0, 3.0, 5.0, 2.0, 1.0], "bins": [-1.4901161193847656e-06, -1.4416873455047607e-06, -1.3932585716247559e-06, -1.344829797744751e-06, -1.296401023864746e-06, -1.2479722499847412e-06, -1.1995434761047363e-06, -1.1511147022247314e-06, -1.1026859283447266e-06, -1.0542571544647217e-06, -1.0058283805847168e-06, -9.57399606704712e-07, -9.08970832824707e-07, -8.605420589447021e-07, -8.121132850646973e-07, -7.636845111846924e-07, -7.152557373046875e-07, -6.668269634246826e-07, -6.183981895446777e-07, -5.699694156646729e-07, -5.21540641784668e-07, -4.731118679046631e-07, -4.246830940246582e-07, -3.762543201446533e-07, -3.2782554626464844e-07, -2.7939677238464355e-07, -2.3096799850463867e-07, -1.825392246246338e-07, -1.341104507446289e-07, -8.568167686462402e-08, -3.725290298461914e-08, 1.1175870895385742e-08, 5.960464477539063e-08, 1.0803341865539551e-07, 1.564621925354004e-07, 2.0489096641540527e-07, 2.5331974029541016e-07, 3.0174851417541504e-07, 3.501772880554199e-07, 3.986060619354248e-07, 4.470348358154297e-07, 4.954636096954346e-07, 5.438923835754395e-07, 5.923211574554443e-07, 6.407499313354492e-07, 6.891787052154541e-07, 7.37607479095459e-07, 7.860362529754639e-07, 8.344650268554688e-07, 8.828938007354736e-07, 9.313225746154785e-07, 9.797513484954834e-07, 1.0281801223754883e-06, 1.0766088962554932e-06, 1.125037670135498e-06, 1.173466444015503e-06, 1.2218952178955078e-06, 1.2703239917755127e-06, 1.3187527656555176e-06, 1.3671815395355225e-06, 1.4156103134155273e-06, 1.4640390872955322e-06, 1.5124678611755371e-06, 1.560896635055542e-06, 1.6093254089355469e-06]}, "gradients/encoder.encoder.layers.10.attention.k_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 2.0, 2.0, 3.0, 4.0, 0.0, 13.0, 10.0, 26.0, 26.0, 0.0, 20.0, 24.0, 33.0, 38.0, 0.0, 59.0, 60.0, 71.0, 70.0, 0.0, 77.0, 86.0, 58.0, 46.0, 0.0, 48.0, 45.0, 35.0, 33.0, 0.0, 36.0, 19.0, 16.0, 15.0, 0.0, 11.0, 10.0, 8.0, 4.0, 0.0, 2.0, 3.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-1.6689300537109375e-06, -1.621432602405548e-06, -1.5739351511001587e-06, -1.5264376997947693e-06, -1.4789402484893799e-06, -1.4314427971839905e-06, -1.383945345878601e-06, -1.3364478945732117e-06, -1.2889504432678223e-06, -1.2414529919624329e-06, -1.1939555406570435e-06, -1.146458089351654e-06, -1.0989606380462646e-06, -1.0514631867408752e-06, -1.0039657354354858e-06, -9.564682841300964e-07, -9.08970832824707e-07, -8.614733815193176e-07, -8.139759302139282e-07, -7.664784789085388e-07, -7.189810276031494e-07, -6.7148357629776e-07, -6.239861249923706e-07, -5.764886736869812e-07, -5.289912223815918e-07, -4.814937710762024e-07, -4.33996319770813e-07, -3.864988684654236e-07, -3.390014171600342e-07, -2.915039658546448e-07, -2.4400651454925537e-07, -1.9650906324386597e-07, -1.4901161193847656e-07, -1.0151416063308716e-07, -5.4016709327697754e-08, -6.51925802230835e-09, 4.0978193283081055e-08, 8.847564458847046e-08, 1.3597309589385986e-07, 1.8347054719924927e-07, 2.3096799850463867e-07, 2.784654498100281e-07, 3.259629011154175e-07, 3.734603524208069e-07, 4.209578037261963e-07, 4.684552550315857e-07, 5.159527063369751e-07, 5.634501576423645e-07, 6.109476089477539e-07, 6.584450602531433e-07, 7.059425115585327e-07, 7.534399628639221e-07, 8.009374141693115e-07, 8.484348654747009e-07, 8.959323167800903e-07, 9.434297680854797e-07, 9.909272193908691e-07, 1.0384246706962585e-06, 1.085922122001648e-06, 1.1334195733070374e-06, 1.1809170246124268e-06, 1.2284144759178162e-06, 1.2759119272232056e-06, 1.323409378528595e-06, 1.3709068298339844e-06]}, "gradients/encoder.encoder.layers.10.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 4.0, 10.0, 0.0, 10.0, 0.0, 24.0, 51.0, 0.0, 87.0, 144.0, 0.0, 316.0, 568.0, 0.0, 1033.0, 2090.0, 0.0, 4155.0, 0.0, 9045.0, 20919.0, 0.0, 55440.0, 183161.0, 0.0, 494318.0, 183120.0, 0.0, 55619.0, 0.0, 20925.0, 8964.0, 0.0, 4287.0, 2019.0, 0.0, 1065.0, 567.0, 0.0, 296.0, 146.0, 0.0, 85.0, 0.0, 51.0, 21.0, 0.0, 14.0, 9.0, 0.0, 6.0, 3.0, 0.0, 0.0, 2.0], "bins": [-1.3113021850585938e-06, -1.2731179594993591e-06, -1.2349337339401245e-06, -1.1967495083808899e-06, -1.1585652828216553e-06, -1.1203810572624207e-06, -1.082196831703186e-06, -1.0440126061439514e-06, -1.0058283805847168e-06, -9.676441550254822e-07, -9.294599294662476e-07, -8.912757039070129e-07, -8.530914783477783e-07, -8.149072527885437e-07, -7.767230272293091e-07, -7.385388016700745e-07, -7.003545761108398e-07, -6.621703505516052e-07, -6.239861249923706e-07, -5.85801899433136e-07, -5.476176738739014e-07, -5.094334483146667e-07, -4.7124922275543213e-07, -4.330649971961975e-07, -3.948807716369629e-07, -3.5669654607772827e-07, -3.1851232051849365e-07, -2.8032809495925903e-07, -2.421438694000244e-07, -2.039596438407898e-07, -1.6577541828155518e-07, -1.2759119272232056e-07, -8.940696716308594e-08, -5.122274160385132e-08, -1.30385160446167e-08, 2.514570951461792e-08, 6.332993507385254e-08, 1.0151416063308716e-07, 1.3969838619232178e-07, 1.778826117515564e-07, 2.1606683731079102e-07, 2.5425106287002563e-07, 2.9243528842926025e-07, 3.3061951398849487e-07, 3.688037395477295e-07, 4.069879651069641e-07, 4.4517219066619873e-07, 4.833564162254333e-07, 5.21540641784668e-07, 5.597248673439026e-07, 5.979090929031372e-07, 6.360933184623718e-07, 6.742775440216064e-07, 7.124617695808411e-07, 7.506459951400757e-07, 7.888302206993103e-07, 8.270144462585449e-07, 8.651986718177795e-07, 9.033828973770142e-07, 9.415671229362488e-07, 9.797513484954834e-07, 1.017935574054718e-06, 1.0561197996139526e-06, 1.0943040251731873e-06, 1.1324882507324219e-06]}, "gradients/encoder.encoder.layers.10.attention.q_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 4.0, 0.0, 1.0, 0.0, 8.0, 0.0, 6.0, 0.0, 8.0, 0.0, 19.0, 0.0, 21.0, 0.0, 30.0, 0.0, 43.0, 0.0, 52.0, 0.0, 76.0, 0.0, 98.0, 0.0, 103.0, 0.0, 88.0, 0.0, 114.0, 0.0, 83.0, 0.0, 75.0, 0.0, 56.0, 0.0, 40.0, 0.0, 23.0, 0.0, 26.0, 0.0, 14.0, 0.0, 7.0, 0.0, 7.0, 0.0, 5.0, 0.0, 3.0, 0.0, 3.0, 0.0, 2.0, 0.0, 0.0, 3.0], "bins": [-9.5367431640625e-07, -9.238719940185547e-07, -8.940696716308594e-07, -8.642673492431641e-07, -8.344650268554688e-07, -8.046627044677734e-07, -7.748603820800781e-07, -7.450580596923828e-07, -7.152557373046875e-07, -6.854534149169922e-07, -6.556510925292969e-07, -6.258487701416016e-07, -5.960464477539062e-07, -5.662441253662109e-07, -5.364418029785156e-07, -5.066394805908203e-07, -4.76837158203125e-07, -4.470348358154297e-07, -4.172325134277344e-07, -3.8743019104003906e-07, -3.5762786865234375e-07, -3.2782554626464844e-07, -2.980232238769531e-07, -2.682209014892578e-07, -2.384185791015625e-07, -2.086162567138672e-07, -1.7881393432617188e-07, -1.4901161193847656e-07, -1.1920928955078125e-07, -8.940696716308594e-08, -5.960464477539063e-08, -2.9802322387695312e-08, 0.0, 2.9802322387695312e-08, 5.960464477539063e-08, 8.940696716308594e-08, 1.1920928955078125e-07, 1.4901161193847656e-07, 1.7881393432617188e-07, 2.086162567138672e-07, 2.384185791015625e-07, 2.682209014892578e-07, 2.980232238769531e-07, 3.2782554626464844e-07, 3.5762786865234375e-07, 3.8743019104003906e-07, 4.172325134277344e-07, 4.470348358154297e-07, 4.76837158203125e-07, 5.066394805908203e-07, 5.364418029785156e-07, 5.662441253662109e-07, 5.960464477539062e-07, 6.258487701416016e-07, 6.556510925292969e-07, 6.854534149169922e-07, 7.152557373046875e-07, 7.450580596923828e-07, 7.748603820800781e-07, 8.046627044677734e-07, 8.344650268554688e-07, 8.642673492431641e-07, 8.940696716308594e-07, 9.238719940185547e-07, 9.5367431640625e-07]}, "gradients/encoder.encoder.layers.10.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 3.0, 0.0, 2.0, 3.0, 4.0, 6.0, 10.0, 6.0, 14.0, 14.0, 21.0, 21.0, 33.0, 40.0, 51.0, 78.0, 172.0, 125.0, 92.0, 65.0, 52.0, 34.0, 28.0, 26.0, 16.0, 20.0, 17.0, 13.0, 13.0, 8.0, 4.0, 1.0, 3.0, 1.0, 4.0, 3.0, 2.0, 2.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.0001048088088282384, -0.00010088112321682274, -9.69534448813647e-05, -9.302575926994905e-05, -8.90980736585334e-05, -8.517039532307535e-05, -8.12427097116597e-05, -7.731502410024405e-05, -7.3387345764786e-05, -6.945966015337035e-05, -6.553198181791231e-05, -6.160429620649666e-05, -5.7676610595081e-05, -5.3748928621644154e-05, -4.982124664820731e-05, -4.589356103679165e-05, -4.1965875425376e-05, -3.803819345193915e-05, -3.4110507840523496e-05, -3.018282586708665e-05, -2.6255142074660398e-05, -2.2327458282234147e-05, -1.83997763087973e-05, -1.4472092516371049e-05, -1.0544408723944798e-05, -6.616725386265898e-06, -2.689042048586998e-06, 1.238640834344551e-06, 5.166324626770802e-06, 9.094008419197053e-06, 1.30216903926339e-05, 1.694937418506015e-05, 2.0877050701528788e-05, 2.480473449395504e-05, 2.873241828638129e-05, 3.266010025981814e-05, 3.658778587123379e-05, 4.051546784467064e-05, 4.4443149818107486e-05, 4.837083542952314e-05, 5.229851740295999e-05, 5.6226199376396835e-05, 6.015388498781249e-05, 6.408157059922814e-05, 6.800924893468618e-05, 7.193693454610184e-05, 7.586462015751749e-05, 7.979229849297553e-05, 8.371998410439119e-05, 8.764766971580684e-05, 9.157534805126488e-05, 9.550303366268054e-05, 9.943071927409619e-05, 0.00010335839760955423, 0.00010728608322096989, 0.00011121376883238554, 0.00011514144716784358, 0.00011906913277925923, 0.0001229968183906749, 0.00012692449672613293, 0.00013085217506159097, 0.00013477986794896424, 0.00013870754628442228, 0.00014263522461988032, 0.0001465629175072536]}, "gradients/encoder.encoder.layers.10.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 0.0, 2.0, 3.0, 1.0, 1.0, 2.0, 4.0, 9.0, 11.0, 13.0, 9.0, 16.0, 13.0, 19.0, 15.0, 23.0, 28.0, 22.0, 39.0, 31.0, 48.0, 33.0, 33.0, 35.0, 54.0, 42.0, 42.0, 25.0, 45.0, 55.0, 26.0, 31.0, 32.0, 38.0, 27.0, 35.0, 27.0, 17.0, 20.0, 17.0, 16.0, 13.0, 9.0, 12.0, 4.0, 7.0, 4.0, 1.0, 2.0, 2.0, 1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 2.0], "bins": [-9.107589721679688e-05, -8.833874017000198e-05, -8.560158312320709e-05, -8.28644260764122e-05, -8.012726902961731e-05, -7.739011198282242e-05, -7.465295493602753e-05, -7.191579788923264e-05, -6.917864084243774e-05, -6.644148379564285e-05, -6.370432674884796e-05, -6.096716970205307e-05, -5.823001265525818e-05, -5.549285560846329e-05, -5.2755698561668396e-05, -5.0018541514873505e-05, -4.728138446807861e-05, -4.454422742128372e-05, -4.180707037448883e-05, -3.906991332769394e-05, -3.633275628089905e-05, -3.3595599234104156e-05, -3.0858442187309265e-05, -2.8121285140514374e-05, -2.5384128093719482e-05, -2.264697104692459e-05, -1.99098140001297e-05, -1.717265695333481e-05, -1.4435499906539917e-05, -1.1698342859745026e-05, -8.961185812950134e-06, -6.224028766155243e-06, -3.4868717193603516e-06, -7.497146725654602e-07, 1.987442374229431e-06, 4.7245994210243225e-06, 7.461756467819214e-06, 1.0198913514614105e-05, 1.2936070561408997e-05, 1.5673227608203888e-05, 1.841038465499878e-05, 2.114754170179367e-05, 2.3884698748588562e-05, 2.6621855795383453e-05, 2.9359012842178345e-05, 3.2096169888973236e-05, 3.483332693576813e-05, 3.757048398256302e-05, 4.030764102935791e-05, 4.30447980761528e-05, 4.578195512294769e-05, 4.8519112169742584e-05, 5.1256269216537476e-05, 5.399342626333237e-05, 5.673058331012726e-05, 5.946774035692215e-05, 6.220489740371704e-05, 6.494205445051193e-05, 6.767921149730682e-05, 7.041636854410172e-05, 7.31535255908966e-05, 7.58906826376915e-05, 7.862783968448639e-05, 8.136499673128128e-05, 8.410215377807617e-05]}, "gradients/encoder.encoder.layers.9.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 2.0, 2.0, 1.0, 6.0, 6.0, 15.0, 19.0, 22.0, 38.0, 60.0, 107.0, 182.0, 291.0, 482.0, 832.0, 1636.0, 3272.0, 7338.0, 20187.0, 101057.0, 3955948.0, 69458.0, 18535.0, 7239.0, 3218.0, 1672.0, 983.0, 571.0, 315.0, 212.0, 180.0, 98.0, 58.0, 53.0, 40.0, 38.0, 33.0, 14.0, 13.0, 10.0, 8.0, 16.0, 6.0, 7.0, 3.0, 2.0, 3.0, 0.0, 1.0, 2.0, 4.0, 0.0, 2.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-7.43865966796875e-05, -7.091276347637177e-05, -6.743893027305603e-05, -6.39650970697403e-05, -6.049126386642456e-05, -5.7017430663108826e-05, -5.354359745979309e-05, -5.0069764256477356e-05, -4.659593105316162e-05, -4.3122097849845886e-05, -3.964826464653015e-05, -3.6174431443214417e-05, -3.270059823989868e-05, -2.9226765036582947e-05, -2.5752931833267212e-05, -2.2279098629951477e-05, -1.8805265426635742e-05, -1.5331432223320007e-05, -1.1857599020004272e-05, -8.383765816688538e-06, -4.909932613372803e-06, -1.4360994100570679e-06, 2.037733793258667e-06, 5.511566996574402e-06, 8.985400199890137e-06, 1.2459233403205872e-05, 1.5933066606521606e-05, 1.940689980983734e-05, 2.2880733013153076e-05, 2.635456621646881e-05, 2.9828399419784546e-05, 3.330223262310028e-05, 3.6776065826416016e-05, 4.024989902973175e-05, 4.3723732233047485e-05, 4.719756543636322e-05, 5.0671398639678955e-05, 5.414523184299469e-05, 5.7619065046310425e-05, 6.109289824962616e-05, 6.45667314529419e-05, 6.804056465625763e-05, 7.151439785957336e-05, 7.49882310628891e-05, 7.846206426620483e-05, 8.193589746952057e-05, 8.54097306728363e-05, 8.888356387615204e-05, 9.235739707946777e-05, 9.583123028278351e-05, 9.930506348609924e-05, 0.00010277889668941498, 0.00010625272989273071, 0.00010972656309604645, 0.00011320039629936218, 0.00011667422950267792, 0.00012014806270599365, 0.0001236218959093094, 0.00012709572911262512, 0.00013056956231594086, 0.0001340433955192566, 0.00013751722872257233, 0.00014099106192588806, 0.0001444648951292038, 0.00014793872833251953]}, "gradients/encoder.encoder.layers.9.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 1.0, 1.0, 2.0, 2.0, 3.0, 4.0, 5.0, 10.0, 7.0, 12.0, 16.0, 21.0, 29.0, 48.0, 67.0, 81.0, 107.0, 98.0, 114.0, 91.0, 75.0, 58.0, 48.0, 31.0, 23.0, 16.0, 7.0, 6.0, 10.0, 5.0, 5.0, 5.0, 4.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.341104507446289e-05, -1.2765638530254364e-05, -1.2120231986045837e-05, -1.147482544183731e-05, -1.0829418897628784e-05, -1.0184012353420258e-05, -9.538605809211731e-06, -8.893199265003204e-06, -8.247792720794678e-06, -7.602386176586151e-06, -6.9569796323776245e-06, -6.311573088169098e-06, -5.666166543960571e-06, -5.020759999752045e-06, -4.375353455543518e-06, -3.7299469113349915e-06, -3.084540367126465e-06, -2.4391338229179382e-06, -1.7937272787094116e-06, -1.148320734500885e-06, -5.029141902923584e-07, 1.424923539161682e-07, 7.878988981246948e-07, 1.4333054423332214e-06, 2.078711986541748e-06, 2.7241185307502747e-06, 3.3695250749588013e-06, 4.014931619167328e-06, 4.6603381633758545e-06, 5.305744707584381e-06, 5.951151251792908e-06, 6.596557796001434e-06, 7.241964340209961e-06, 7.887370884418488e-06, 8.532777428627014e-06, 9.17818397283554e-06, 9.823590517044067e-06, 1.0468997061252594e-05, 1.111440360546112e-05, 1.1759810149669647e-05, 1.2405216693878174e-05, 1.30506232380867e-05, 1.3696029782295227e-05, 1.4341436326503754e-05, 1.498684287071228e-05, 1.5632249414920807e-05, 1.6277655959129333e-05, 1.692306250333786e-05, 1.7568469047546387e-05, 1.8213875591754913e-05, 1.885928213596344e-05, 1.9504688680171967e-05, 2.0150095224380493e-05, 2.079550176858902e-05, 2.1440908312797546e-05, 2.2086314857006073e-05, 2.27317214012146e-05, 2.3377127945423126e-05, 2.4022534489631653e-05, 2.466794103384018e-05, 2.5313347578048706e-05, 2.5958754122257233e-05, 2.660416066646576e-05, 2.7249567210674286e-05, 2.7894973754882812e-05]}, "gradients/encoder.encoder.layers.9.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 3.0, 5.0, 3.0, 7.0, 10.0, 20.0, 27.0, 29.0, 44.0, 101.0, 160.0, 208.0, 403.0, 750.0, 1339.0, 2540.0, 5035.0, 11145.0, 24910.0, 73774.0, 1431881.0, 2516336.0, 76063.0, 27016.0, 11466.0, 5148.0, 2695.0, 1371.0, 768.0, 419.0, 230.0, 153.0, 84.0, 52.0, 42.0, 16.0, 13.0, 8.0, 4.0, 8.0, 3.0, 5.0, 1.0, 0.0, 0.0, 2.0, 2.0], "bins": [-7.414817810058594e-05, -7.219798862934113e-05, -7.024779915809631e-05, -6.82976096868515e-05, -6.634742021560669e-05, -6.439723074436188e-05, -6.244704127311707e-05, -6.0496851801872253e-05, -5.854666233062744e-05, -5.659647285938263e-05, -5.464628338813782e-05, -5.2696093916893005e-05, -5.074590444564819e-05, -4.879571497440338e-05, -4.684552550315857e-05, -4.489533603191376e-05, -4.2945146560668945e-05, -4.099495708942413e-05, -3.904476761817932e-05, -3.709457814693451e-05, -3.51443886756897e-05, -3.3194199204444885e-05, -3.124400973320007e-05, -2.929382026195526e-05, -2.734363079071045e-05, -2.5393441319465637e-05, -2.3443251848220825e-05, -2.1493062376976013e-05, -1.95428729057312e-05, -1.759268343448639e-05, -1.5642493963241577e-05, -1.3692304491996765e-05, -1.1742115020751953e-05, -9.791925549507141e-06, -7.841736078262329e-06, -5.891546607017517e-06, -3.941357135772705e-06, -1.991167664527893e-06, -4.0978193283081055e-08, 1.909211277961731e-06, 3.859400749206543e-06, 5.809590220451355e-06, 7.759779691696167e-06, 9.709969162940979e-06, 1.1660158634185791e-05, 1.3610348105430603e-05, 1.5560537576675415e-05, 1.7510727047920227e-05, 1.946091651916504e-05, 2.141110599040985e-05, 2.3361295461654663e-05, 2.5311484932899475e-05, 2.7261674404144287e-05, 2.92118638753891e-05, 3.116205334663391e-05, 3.311224281787872e-05, 3.5062432289123535e-05, 3.701262176036835e-05, 3.896281123161316e-05, 4.091300070285797e-05, 4.286319017410278e-05, 4.4813379645347595e-05, 4.676356911659241e-05, 4.871375858783722e-05, 5.066394805908203e-05]}, "gradients/encoder.encoder.layers.9.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 2.0, 1.0, 1.0, 4.0, 5.0, 3.0, 7.0, 5.0, 9.0, 9.0, 18.0, 12.0, 24.0, 32.0, 26.0, 47.0, 64.0, 188.0, 630.0, 2065.0, 494.0, 144.0, 74.0, 35.0, 39.0, 33.0, 24.0, 15.0, 16.0, 4.0, 9.0, 7.0, 7.0, 9.0, 8.0, 6.0, 5.0, 3.0, 1.0, 2.0, 1.0, 1.0, 1.0, 0.0, 1.0], "bins": [-4.380941390991211e-05, -4.267040640115738e-05, -4.153139889240265e-05, -4.039239138364792e-05, -3.925338387489319e-05, -3.811437636613846e-05, -3.697536885738373e-05, -3.5836361348629e-05, -3.469735383987427e-05, -3.355834633111954e-05, -3.241933882236481e-05, -3.128033131361008e-05, -3.0141323804855347e-05, -2.9002316296100616e-05, -2.7863308787345886e-05, -2.6724301278591156e-05, -2.5585293769836426e-05, -2.4446286261081696e-05, -2.3307278752326965e-05, -2.2168271243572235e-05, -2.1029263734817505e-05, -1.9890256226062775e-05, -1.8751248717308044e-05, -1.7612241208553314e-05, -1.6473233699798584e-05, -1.5334226191043854e-05, -1.4195218682289124e-05, -1.3056211173534393e-05, -1.1917203664779663e-05, -1.0778196156024933e-05, -9.639188647270203e-06, -8.500181138515472e-06, -7.361173629760742e-06, -6.222166121006012e-06, -5.083158612251282e-06, -3.9441511034965515e-06, -2.8051435947418213e-06, -1.666136085987091e-06, -5.271285772323608e-07, 6.118789315223694e-07, 1.7508864402770996e-06, 2.88989394903183e-06, 4.02890145778656e-06, 5.16790896654129e-06, 6.3069164752960205e-06, 7.445923984050751e-06, 8.584931492805481e-06, 9.723939001560211e-06, 1.0862946510314941e-05, 1.2001954019069672e-05, 1.3140961527824402e-05, 1.4279969036579132e-05, 1.5418976545333862e-05, 1.6557984054088593e-05, 1.7696991562843323e-05, 1.8835999071598053e-05, 1.9975006580352783e-05, 2.1114014089107513e-05, 2.2253021597862244e-05, 2.3392029106616974e-05, 2.4531036615371704e-05, 2.5670044124126434e-05, 2.6809051632881165e-05, 2.7948059141635895e-05, 2.9087066650390625e-05]}, "gradients/encoder.encoder.layers.9.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 2.0, 1.0, 1.0, 2.0, 5.0, 5.0, 11.0, 6.0, 8.0, 13.0, 22.0, 27.0, 28.0, 51.0, 76.0, 101.0, 125.0, 120.0, 103.0, 56.0, 50.0, 48.0, 32.0, 29.0, 24.0, 13.0, 11.0, 9.0, 5.0, 6.0, 8.0, 5.0, 1.0, 7.0, 2.0, 0.0, 3.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00017065282736439258, -0.00016529108688700944, -0.0001599293464096263, -0.00015456760593224317, -0.0001492058509029448, -0.00014384411042556167, -0.00013848236994817853, -0.0001331206294707954, -0.00012775888899341226, -0.00012239714851602912, -0.00011703540803864598, -0.00011167366028530523, -0.0001063119198079221, -0.00010095017933053896, -9.558843157719821e-05, -9.022669109981507e-05, -8.486495062243193e-05, -7.95032101450488e-05, -7.414146966766566e-05, -6.877972191432491e-05, -6.341798143694177e-05, -5.8056240959558636e-05, -5.269449684419669e-05, -4.733275272883475e-05, -4.197101225145161e-05, -3.6609271774068475e-05, -3.124752765870653e-05, -2.588578536233399e-05, -2.052404306596145e-05, -1.516230076958891e-05, -9.80055847321637e-06, -4.438814357854426e-06, 9.229115676134825e-07, 6.284653863986023e-06, 1.1646396160358563e-05, 1.7008138456731103e-05, 2.2369880753103644e-05, 2.7731623049476184e-05, 3.3093365345848724e-05, 3.845510946121067e-05, 4.3816849938593805e-05, 4.917859041597694e-05, 5.4540334531338885e-05, 5.990207864670083e-05, 6.526381912408397e-05, 7.06255596014671e-05, 7.598730735480785e-05, 8.134904783219099e-05, 8.671078830957413e-05, 9.207252878695726e-05, 9.74342692643404e-05, 0.00010279601701768115, 0.00010815775749506429, 0.00011351949797244743, 0.00011888124572578818, 0.0001242429862031713, 0.00012960472668055445, 0.00013496646715793759, 0.00014032820763532072, 0.00014568994811270386, 0.00015105170314200222, 0.00015641344361938536, 0.0001617751840967685, 0.00016713692457415164, 0.00017249866505153477]}, "gradients/encoder.encoder.layers.9.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 3.0, 0.0, 3.0, 1.0, 2.0, 2.0, 5.0, 5.0, 4.0, 8.0, 12.0, 5.0, 12.0, 17.0, 19.0, 17.0, 22.0, 25.0, 31.0, 33.0, 28.0, 31.0, 40.0, 30.0, 43.0, 38.0, 41.0, 44.0, 50.0, 46.0, 43.0, 36.0, 42.0, 26.0, 32.0, 30.0, 32.0, 25.0, 21.0, 17.0, 19.0, 13.0, 12.0, 16.0, 9.0, 9.0, 6.0, 1.0, 4.0, 6.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0], "bins": [-9.161233901977539e-05, -8.882023394107819e-05, -8.602812886238098e-05, -8.323602378368378e-05, -8.044391870498657e-05, -7.765181362628937e-05, -7.485970854759216e-05, -7.206760346889496e-05, -6.927549839019775e-05, -6.648339331150055e-05, -6.369128823280334e-05, -6.089918315410614e-05, -5.8107078075408936e-05, -5.531497299671173e-05, -5.2522867918014526e-05, -4.973076283931732e-05, -4.693865776062012e-05, -4.414655268192291e-05, -4.135444760322571e-05, -3.8562342524528503e-05, -3.57702374458313e-05, -3.2978132367134094e-05, -3.018602728843689e-05, -2.7393922209739685e-05, -2.460181713104248e-05, -2.1809712052345276e-05, -1.901760697364807e-05, -1.6225501894950867e-05, -1.3433396816253662e-05, -1.0641291737556458e-05, -7.849186658859253e-06, -5.057081580162048e-06, -2.2649765014648438e-06, 5.271285772323608e-07, 3.3192336559295654e-06, 6.11133873462677e-06, 8.903443813323975e-06, 1.169554889202118e-05, 1.4487653970718384e-05, 1.727975904941559e-05, 2.0071864128112793e-05, 2.2863969206809998e-05, 2.5656074285507202e-05, 2.8448179364204407e-05, 3.124028444290161e-05, 3.4032389521598816e-05, 3.682449460029602e-05, 3.9616599678993225e-05, 4.240870475769043e-05, 4.5200809836387634e-05, 4.799291491508484e-05, 5.0785019993782043e-05, 5.357712507247925e-05, 5.636923015117645e-05, 5.916133522987366e-05, 6.195344030857086e-05, 6.474554538726807e-05, 6.753765046596527e-05, 7.032975554466248e-05, 7.312186062335968e-05, 7.591396570205688e-05, 7.870607078075409e-05, 8.14981758594513e-05, 8.42902809381485e-05, 8.70823860168457e-05]}, "gradients/encoder.encoder.layers.9.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 4.0, 1.0, 0.0, 2.0, 4.0, 7.0, 7.0, 8.0, 9.0, 16.0, 21.0, 42.0, 38.0, 76.0, 107.0, 196.0, 341.0, 596.0, 1079.0, 2012.0, 3812.0, 8146.0, 19524.0, 58118.0, 243780.0, 553559.0, 103929.0, 30191.0, 11695.0, 5424.0, 2622.0, 1332.0, 776.0, 423.0, 244.0, 154.0, 96.0, 61.0, 33.0, 28.0, 14.0, 13.0, 9.0, 4.0, 6.0, 4.0, 5.0, 0.0, 3.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0], "bins": [-0.00011289119720458984, -0.00010949280112981796, -0.00010609440505504608, -0.0001026960089802742, -9.929761290550232e-05, -9.589921683073044e-05, -9.250082075595856e-05, -8.910242468118668e-05, -8.57040286064148e-05, -8.230563253164291e-05, -7.890723645687103e-05, -7.550884038209915e-05, -7.211044430732727e-05, -6.871204823255539e-05, -6.531365215778351e-05, -6.191525608301163e-05, -5.8516860008239746e-05, -5.5118463933467865e-05, -5.1720067858695984e-05, -4.83216717839241e-05, -4.492327570915222e-05, -4.152487963438034e-05, -3.812648355960846e-05, -3.472808748483658e-05, -3.13296914100647e-05, -2.7931295335292816e-05, -2.4532899260520935e-05, -2.1134503185749054e-05, -1.7736107110977173e-05, -1.4337711036205292e-05, -1.093931496143341e-05, -7.5409188866615295e-06, -4.1425228118896484e-06, -7.441267371177673e-07, 2.6542693376541138e-06, 6.052665412425995e-06, 9.451061487197876e-06, 1.2849457561969757e-05, 1.6247853636741638e-05, 1.964624971151352e-05, 2.30446457862854e-05, 2.644304186105728e-05, 2.9841437935829163e-05, 3.3239834010601044e-05, 3.6638230085372925e-05, 4.0036626160144806e-05, 4.343502223491669e-05, 4.683341830968857e-05, 5.023181438446045e-05, 5.363021045923233e-05, 5.702860653400421e-05, 6.042700260877609e-05, 6.382539868354797e-05, 6.722379475831985e-05, 7.062219083309174e-05, 7.402058690786362e-05, 7.74189829826355e-05, 8.081737905740738e-05, 8.421577513217926e-05, 8.761417120695114e-05, 9.101256728172302e-05, 9.44109633564949e-05, 9.780935943126678e-05, 0.00010120775550603867, 0.00010460615158081055]}, "gradients/encoder.encoder.layers.9.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 1.0, 2.0, 0.0, 4.0, 3.0, 2.0, 4.0, 12.0, 12.0, 14.0, 17.0, 20.0, 22.0, 47.0, 63.0, 82.0, 90.0, 104.0, 86.0, 78.0, 81.0, 76.0, 49.0, 55.0, 19.0, 22.0, 7.0, 8.0, 8.0, 3.0, 6.0, 3.0, 6.0, 4.0, 2.0, 2.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.5497207641601562e-05, -1.4884397387504578e-05, -1.4271587133407593e-05, -1.3658776879310608e-05, -1.3045966625213623e-05, -1.2433156371116638e-05, -1.1820346117019653e-05, -1.1207535862922668e-05, -1.0594725608825684e-05, -9.981915354728699e-06, -9.369105100631714e-06, -8.756294846534729e-06, -8.143484592437744e-06, -7.530674338340759e-06, -6.917864084243774e-06, -6.3050538301467896e-06, -5.692243576049805e-06, -5.07943332195282e-06, -4.466623067855835e-06, -3.85381281375885e-06, -3.2410025596618652e-06, -2.6281923055648804e-06, -2.0153820514678955e-06, -1.4025717973709106e-06, -7.897615432739258e-07, -1.7695128917694092e-07, 4.3585896492004395e-07, 1.0486692190170288e-06, 1.6614794731140137e-06, 2.2742897272109985e-06, 2.8870999813079834e-06, 3.4999102354049683e-06, 4.112720489501953e-06, 4.725530743598938e-06, 5.338340997695923e-06, 5.951151251792908e-06, 6.563961505889893e-06, 7.1767717599868774e-06, 7.789582014083862e-06, 8.402392268180847e-06, 9.015202522277832e-06, 9.628012776374817e-06, 1.0240823030471802e-05, 1.0853633284568787e-05, 1.1466443538665771e-05, 1.2079253792762756e-05, 1.2692064046859741e-05, 1.3304874300956726e-05, 1.3917684555053711e-05, 1.4530494809150696e-05, 1.514330506324768e-05, 1.5756115317344666e-05, 1.636892557144165e-05, 1.6981735825538635e-05, 1.759454607963562e-05, 1.8207356333732605e-05, 1.882016658782959e-05, 1.9432976841926575e-05, 2.004578709602356e-05, 2.0658597350120544e-05, 2.127140760421753e-05, 2.1884217858314514e-05, 2.24970281124115e-05, 2.3109838366508484e-05, 2.372264862060547e-05]}, "gradients/encoder.encoder.layers.9.attention.v_proj.weight": {"_type": "histogram", "values": [3.0, 2.0, 2.0, 2.0, 4.0, 5.0, 10.0, 18.0, 15.0, 33.0, 39.0, 66.0, 73.0, 107.0, 164.0, 261.0, 385.0, 655.0, 952.0, 1462.0, 2252.0, 3876.0, 6016.0, 9578.0, 16049.0, 26728.0, 47450.0, 90712.0, 212054.0, 360048.0, 123139.0, 61265.0, 35357.0, 18972.0, 11655.0, 7236.0, 4195.0, 2628.0, 1752.0, 1123.0, 737.0, 502.0, 357.0, 194.0, 143.0, 91.0, 76.0, 42.0, 30.0, 19.0, 10.0, 10.0, 6.0, 4.0, 3.0, 3.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0], "bins": [-2.9742717742919922e-05, -2.872385084629059e-05, -2.7704983949661255e-05, -2.668611705303192e-05, -2.5667250156402588e-05, -2.4648383259773254e-05, -2.362951636314392e-05, -2.2610649466514587e-05, -2.1591782569885254e-05, -2.057291567325592e-05, -1.9554048776626587e-05, -1.8535181879997253e-05, -1.751631498336792e-05, -1.6497448086738586e-05, -1.5478581190109253e-05, -1.445971429347992e-05, -1.3440847396850586e-05, -1.2421980500221252e-05, -1.1403113603591919e-05, -1.0384246706962585e-05, -9.365379810333252e-06, -8.346512913703918e-06, -7.327646017074585e-06, -6.3087791204452515e-06, -5.289912223815918e-06, -4.2710453271865845e-06, -3.252178430557251e-06, -2.2333115339279175e-06, -1.214444637298584e-06, -1.955777406692505e-07, 8.23289155960083e-07, 1.8421560525894165e-06, 2.86102294921875e-06, 3.8798898458480835e-06, 4.898756742477417e-06, 5.9176236391067505e-06, 6.936490535736084e-06, 7.955357432365417e-06, 8.974224328994751e-06, 9.993091225624084e-06, 1.1011958122253418e-05, 1.2030825018882751e-05, 1.3049691915512085e-05, 1.4068558812141418e-05, 1.5087425708770752e-05, 1.6106292605400085e-05, 1.712515950202942e-05, 1.8144026398658752e-05, 1.9162893295288086e-05, 2.018176019191742e-05, 2.1200627088546753e-05, 2.2219493985176086e-05, 2.323836088180542e-05, 2.4257227778434753e-05, 2.5276094675064087e-05, 2.629496157169342e-05, 2.7313828468322754e-05, 2.8332695364952087e-05, 2.935156226158142e-05, 3.0370429158210754e-05, 3.138929605484009e-05, 3.240816295146942e-05, 3.3427029848098755e-05, 3.444589674472809e-05, 3.546476364135742e-05]}, "gradients/encoder.encoder.layers.9.attention.v_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 1.0, 1.0, 2.0, 3.0, 3.0, 6.0, 4.0, 10.0, 12.0, 10.0, 16.0, 10.0, 12.0, 8.0, 20.0, 20.0, 23.0, 22.0, 28.0, 30.0, 29.0, 30.0, 45.0, 34.0, 44.0, 46.0, 53.0, 35.0, 46.0, 33.0, 49.0, 39.0, 41.0, 32.0, 34.0, 33.0, 21.0, 25.0, 16.0, 18.0, 12.0, 10.0, 12.0, 7.0, 7.0, 5.0, 4.0, 5.0, 3.0, 4.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 2.0], "bins": [-2.5391578674316406e-05, -2.453010529279709e-05, -2.366863191127777e-05, -2.2807158529758453e-05, -2.1945685148239136e-05, -2.1084211766719818e-05, -2.02227383852005e-05, -1.9361265003681183e-05, -1.8499791622161865e-05, -1.7638318240642548e-05, -1.677684485912323e-05, -1.5915371477603912e-05, -1.5053898096084595e-05, -1.4192424714565277e-05, -1.333095133304596e-05, -1.2469477951526642e-05, -1.1608004570007324e-05, -1.0746531188488007e-05, -9.885057806968689e-06, -9.023584425449371e-06, -8.162111043930054e-06, -7.300637662410736e-06, -6.4391642808914185e-06, -5.577690899372101e-06, -4.716217517852783e-06, -3.8547441363334656e-06, -2.993270754814148e-06, -2.1317973732948303e-06, -1.2703239917755127e-06, -4.0885061025619507e-07, 4.5262277126312256e-07, 1.3140961527824402e-06, 2.175569534301758e-06, 3.0370429158210754e-06, 3.898516297340393e-06, 4.759989678859711e-06, 5.621463060379028e-06, 6.482936441898346e-06, 7.3444098234176636e-06, 8.205883204936981e-06, 9.067356586456299e-06, 9.928829967975616e-06, 1.0790303349494934e-05, 1.1651776731014252e-05, 1.251325011253357e-05, 1.3374723494052887e-05, 1.4236196875572205e-05, 1.5097670257091522e-05, 1.595914363861084e-05, 1.6820617020130157e-05, 1.7682090401649475e-05, 1.8543563783168793e-05, 1.940503716468811e-05, 2.0266510546207428e-05, 2.1127983927726746e-05, 2.1989457309246063e-05, 2.285093069076538e-05, 2.37124040722847e-05, 2.4573877453804016e-05, 2.5435350835323334e-05, 2.629682421684265e-05, 2.715829759836197e-05, 2.8019770979881287e-05, 2.8881244361400604e-05, 2.9742717742919922e-05]}, "gradients/encoder.encoder.layers.9.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 3.0, 2.0, 0.0, 6.0, 4.0, 12.0, 19.0, 19.0, 33.0, 50.0, 59.0, 94.0, 122.0, 237.0, 380.0, 639.0, 1122.0, 2049.0, 4247.0, 9593.0, 25346.0, 75109.0, 262257.0, 459388.0, 136775.0, 42890.0, 15351.0, 6293.0, 2883.0, 1483.0, 855.0, 453.0, 289.0, 165.0, 113.0, 88.0, 36.0, 22.0, 25.0, 15.0, 13.0, 9.0, 2.0, 7.0, 2.0, 7.0, 1.0, 1.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.6954879760742188e-06, -3.577210009098053e-06, -3.458932042121887e-06, -3.3406540751457214e-06, -3.2223761081695557e-06, -3.10409814119339e-06, -2.985820174217224e-06, -2.8675422072410583e-06, -2.7492642402648926e-06, -2.630986273288727e-06, -2.512708306312561e-06, -2.3944303393363953e-06, -2.2761523723602295e-06, -2.1578744053840637e-06, -2.039596438407898e-06, -1.921318471431732e-06, -1.8030405044555664e-06, -1.6847625374794006e-06, -1.5664845705032349e-06, -1.448206603527069e-06, -1.3299286365509033e-06, -1.2116506695747375e-06, -1.0933727025985718e-06, -9.75094735622406e-07, -8.568167686462402e-07, -7.385388016700745e-07, -6.202608346939087e-07, -5.019828677177429e-07, -3.8370490074157715e-07, -2.654269337654114e-07, -1.471489667892456e-07, -2.8870999813079834e-08, 8.940696716308594e-08, 2.076849341392517e-07, 3.259629011154175e-07, 4.4424086809158325e-07, 5.62518835067749e-07, 6.807968020439148e-07, 7.990747690200806e-07, 9.173527359962463e-07, 1.0356307029724121e-06, 1.1539086699485779e-06, 1.2721866369247437e-06, 1.3904646039009094e-06, 1.5087425708770752e-06, 1.627020537853241e-06, 1.7452985048294067e-06, 1.8635764718055725e-06, 1.9818544387817383e-06, 2.100132405757904e-06, 2.21841037273407e-06, 2.3366883397102356e-06, 2.4549663066864014e-06, 2.573244273662567e-06, 2.691522240638733e-06, 2.8098002076148987e-06, 2.9280781745910645e-06, 3.0463561415672302e-06, 3.164634108543396e-06, 3.2829120755195618e-06, 3.4011900424957275e-06, 3.5194680094718933e-06, 3.637745976448059e-06, 3.756023943424225e-06, 3.874301910400391e-06]}, "gradients/encoder.encoder.layers.9.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 4.0, 0.0, 6.0, 4.0, 0.0, 5.0, 0.0, 8.0, 11.0, 0.0, 15.0, 0.0, 24.0, 28.0, 0.0, 27.0, 0.0, 41.0, 34.0, 0.0, 55.0, 0.0, 53.0, 78.0, 0.0, 85.0, 0.0, 80.0, 0.0, 85.0, 75.0, 0.0, 43.0, 0.0, 65.0, 41.0, 0.0, 35.0, 0.0, 32.0, 34.0, 0.0, 14.0, 0.0, 9.0, 10.0, 0.0, 4.0, 0.0, 6.0, 1.0, 0.0, 3.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 2.0, 2.0], "bins": [-1.0728836059570312e-06, -1.037493348121643e-06, -1.0021030902862549e-06, -9.667128324508667e-07, -9.313225746154785e-07, -8.959323167800903e-07, -8.605420589447021e-07, -8.25151801109314e-07, -7.897615432739258e-07, -7.543712854385376e-07, -7.189810276031494e-07, -6.835907697677612e-07, -6.48200511932373e-07, -6.128102540969849e-07, -5.774199962615967e-07, -5.420297384262085e-07, -5.066394805908203e-07, -4.7124922275543213e-07, -4.3585896492004395e-07, -4.0046870708465576e-07, -3.650784492492676e-07, -3.296881914138794e-07, -2.942979335784912e-07, -2.5890767574310303e-07, -2.2351741790771484e-07, -1.8812716007232666e-07, -1.5273690223693848e-07, -1.1734664440155029e-07, -8.195638656616211e-08, -4.6566128730773926e-08, -1.1175870895385742e-08, 2.421438694000244e-08, 5.960464477539063e-08, 9.499490261077881e-08, 1.30385160446167e-07, 1.6577541828155518e-07, 2.0116567611694336e-07, 2.3655593395233154e-07, 2.7194619178771973e-07, 3.073364496231079e-07, 3.427267074584961e-07, 3.781169652938843e-07, 4.1350722312927246e-07, 4.4889748096466064e-07, 4.842877388000488e-07, 5.19677996635437e-07, 5.550682544708252e-07, 5.904585123062134e-07, 6.258487701416016e-07, 6.612390279769897e-07, 6.966292858123779e-07, 7.320195436477661e-07, 7.674098014831543e-07, 8.028000593185425e-07, 8.381903171539307e-07, 8.735805749893188e-07, 9.08970832824707e-07, 9.443610906600952e-07, 9.797513484954834e-07, 1.0151416063308716e-06, 1.0505318641662598e-06, 1.085922122001648e-06, 1.1213123798370361e-06, 1.1567026376724243e-06, 1.1920928955078125e-06]}, "gradients/encoder.encoder.layers.9.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 2.0, 4.0, 2.0, 3.0, 4.0, 19.0, 14.0, 26.0, 46.0, 129.0, 72.0, 114.0, 151.0, 224.0, 781.0, 573.0, 910.0, 1262.0, 4661.0, 4345.0, 7017.0, 11816.0, 58381.0, 75921.0, 179490.0, 357060.0, 255172.0, 37625.0, 20443.0, 11837.0, 7003.0, 7200.0, 1977.0, 1338.0, 899.0, 947.0, 308.0, 206.0, 142.0, 219.0, 60.0, 44.0, 36.0, 47.0, 10.0, 11.0, 8.0, 1.0, 3.0, 2.0, 1.0, 0.0, 3.0, 0.0, 2.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.086162567138672e-06, -2.012588083744049e-06, -1.9390136003494263e-06, -1.8654391169548035e-06, -1.7918646335601807e-06, -1.7182901501655579e-06, -1.644715666770935e-06, -1.5711411833763123e-06, -1.4975666999816895e-06, -1.4239922165870667e-06, -1.3504177331924438e-06, -1.276843249797821e-06, -1.2032687664031982e-06, -1.1296942830085754e-06, -1.0561197996139526e-06, -9.825453162193298e-07, -9.08970832824707e-07, -8.353963494300842e-07, -7.618218660354614e-07, -6.882473826408386e-07, -6.146728992462158e-07, -5.41098415851593e-07, -4.675239324569702e-07, -3.939494490623474e-07, -3.203749656677246e-07, -2.468004822731018e-07, -1.73225998878479e-07, -9.96515154838562e-08, -2.60770320892334e-08, 4.7497451305389404e-08, 1.210719347000122e-07, 1.94646418094635e-07, 2.682209014892578e-07, 3.417953848838806e-07, 4.153698682785034e-07, 4.889443516731262e-07, 5.62518835067749e-07, 6.360933184623718e-07, 7.096678018569946e-07, 7.832422852516174e-07, 8.568167686462402e-07, 9.30391252040863e-07, 1.0039657354354858e-06, 1.0775402188301086e-06, 1.1511147022247314e-06, 1.2246891856193542e-06, 1.298263669013977e-06, 1.3718381524085999e-06, 1.4454126358032227e-06, 1.5189871191978455e-06, 1.5925616025924683e-06, 1.666136085987091e-06, 1.7397105693817139e-06, 1.8132850527763367e-06, 1.8868595361709595e-06, 1.9604340195655823e-06, 2.034008502960205e-06, 2.107582986354828e-06, 2.1811574697494507e-06, 2.2547319531440735e-06, 2.3283064365386963e-06, 2.401880919933319e-06, 2.475455403327942e-06, 2.5490298867225647e-06, 2.6226043701171875e-06]}, "gradients/encoder.encoder.layers.9.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 3.0, 2.0, 1.0, 4.0, 1.0, 4.0, 0.0, 3.0, 1.0, 4.0, 2.0, 2.0, 3.0, 0.0, 10.0, 4.0, 18.0, 14.0, 15.0, 21.0, 0.0, 29.0, 28.0, 37.0, 43.0, 51.0, 55.0, 0.0, 57.0, 69.0, 70.0, 55.0, 59.0, 52.0, 0.0, 42.0, 40.0, 44.0, 40.0, 31.0, 20.0, 0.0, 16.0, 11.0, 12.0, 12.0, 5.0, 9.0, 0.0, 6.0, 1.0, 1.0, 4.0, 2.0, 6.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0], "bins": [-1.6093254089355469e-06, -1.5581026673316956e-06, -1.5068799257278442e-06, -1.455657184123993e-06, -1.4044344425201416e-06, -1.3532117009162903e-06, -1.301988959312439e-06, -1.2507662177085876e-06, -1.1995434761047363e-06, -1.148320734500885e-06, -1.0970979928970337e-06, -1.0458752512931824e-06, -9.94652509689331e-07, -9.434297680854797e-07, -8.922070264816284e-07, -8.409842848777771e-07, -7.897615432739258e-07, -7.385388016700745e-07, -6.873160600662231e-07, -6.360933184623718e-07, -5.848705768585205e-07, -5.336478352546692e-07, -4.824250936508179e-07, -4.3120235204696655e-07, -3.7997961044311523e-07, -3.287568688392639e-07, -2.775341272354126e-07, -2.2631138563156128e-07, -1.7508864402770996e-07, -1.2386590242385864e-07, -7.264316082000732e-08, -2.1420419216156006e-08, 2.9802322387695312e-08, 8.102506399154663e-08, 1.3224780559539795e-07, 1.8347054719924927e-07, 2.3469328880310059e-07, 2.859160304069519e-07, 3.371387720108032e-07, 3.8836151361465454e-07, 4.3958425521850586e-07, 4.908069968223572e-07, 5.420297384262085e-07, 5.932524800300598e-07, 6.444752216339111e-07, 6.956979632377625e-07, 7.469207048416138e-07, 7.981434464454651e-07, 8.493661880493164e-07, 9.005889296531677e-07, 9.51811671257019e-07, 1.0030344128608704e-06, 1.0542571544647217e-06, 1.105479896068573e-06, 1.1567026376724243e-06, 1.2079253792762756e-06, 1.259148120880127e-06, 1.3103708624839783e-06, 1.3615936040878296e-06, 1.412816345691681e-06, 1.4640390872955322e-06, 1.5152618288993835e-06, 1.5664845705032349e-06, 1.6177073121070862e-06, 1.6689300537109375e-06]}, "gradients/encoder.encoder.layers.9.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 2.0, 2.0, 1.0, 2.0, 1.0, 3.0, 5.0, 4.0, 6.0, 4.0, 11.0, 17.0, 17.0, 21.0, 32.0, 48.0, 73.0, 103.0, 153.0, 111.0, 70.0, 80.0, 35.0, 28.0, 27.0, 26.0, 18.0, 23.0, 14.0, 7.0, 7.0, 11.0, 6.0, 9.0, 13.0, 3.0, 2.0, 4.0, 4.0, 3.0, 1.0, 4.0, 4.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-9.561863407725468e-05, -9.201111242873594e-05, -8.84035907802172e-05, -8.479606913169846e-05, -8.11885402072221e-05, -7.758101855870336e-05, -7.397349691018462e-05, -7.036597526166588e-05, -6.675845361314714e-05, -6.31509319646284e-05, -5.954341031610966e-05, -5.593588502961211e-05, -5.232836338109337e-05, -4.872084173257463e-05, -4.511331644607708e-05, -4.150579479755834e-05, -3.7898273149039596e-05, -3.4290751500520855e-05, -3.0683229852002114e-05, -2.7075704565504566e-05, -2.3468182916985825e-05, -1.9860661268467084e-05, -1.625313780095894e-05, -1.2645614333450794e-05, -9.038092684932053e-06, -5.43057012691861e-06, -1.8230475689051673e-06, 1.7844749891082756e-06, 5.3919975471217185e-06, 8.99951919564046e-06, 1.2607042663148604e-05, 1.621456613065675e-05, 1.9822080503217876e-05, 2.3429602151736617e-05, 2.7037125619244762e-05, 3.0644649086752906e-05, 3.425217073527165e-05, 3.785969238379039e-05, 4.146721767028794e-05, 4.507473931880668e-05, 4.868226096732542e-05, 5.228978261584416e-05, 5.58973042643629e-05, 5.950482955086045e-05, 6.3112354837358e-05, 6.671987648587674e-05, 7.032739813439548e-05, 7.393491978291422e-05, 7.754244143143296e-05, 8.11499630799517e-05, 8.475748472847044e-05, 8.836500637698919e-05, 9.197252802550793e-05, 9.558004967402667e-05, 9.918757859850302e-05, 0.00010279510024702176, 0.0001064026218955405, 0.00011001014354405925, 0.00011361766519257799, 0.00011722518684109673, 0.00012083271576557308, 0.00012444023741409183, 0.00012804775906261057, 0.0001316552807111293, 0.00013526280235964805]}, "gradients/encoder.encoder.layers.9.layer_norm.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 3.0, 2.0, 3.0, 5.0, 3.0, 2.0, 4.0, 2.0, 9.0, 6.0, 5.0, 10.0, 10.0, 17.0, 16.0, 17.0, 15.0, 27.0, 29.0, 30.0, 25.0, 35.0, 40.0, 31.0, 48.0, 28.0, 37.0, 38.0, 41.0, 40.0, 47.0, 37.0, 35.0, 37.0, 40.0, 28.0, 28.0, 28.0, 23.0, 15.0, 19.0, 15.0, 9.0, 14.0, 18.0, 8.0, 5.0, 7.0, 4.0, 4.0, 4.0, 7.0, 4.0, 3.0, 0.0, 0.0, 1.0, 1.0], "bins": [-8.26120376586914e-05, -8.016545325517654e-05, -7.771886885166168e-05, -7.527228444814682e-05, -7.282570004463196e-05, -7.03791156411171e-05, -6.793253123760223e-05, -6.548594683408737e-05, -6.303936243057251e-05, -6.059277802705765e-05, -5.8146193623542786e-05, -5.5699609220027924e-05, -5.325302481651306e-05, -5.08064404129982e-05, -4.835985600948334e-05, -4.5913271605968475e-05, -4.346668720245361e-05, -4.102010279893875e-05, -3.857351839542389e-05, -3.612693399190903e-05, -3.3680349588394165e-05, -3.12337651848793e-05, -2.878718078136444e-05, -2.634059637784958e-05, -2.3894011974334717e-05, -2.1447427570819855e-05, -1.9000843167304993e-05, -1.655425876379013e-05, -1.4107674360275269e-05, -1.1661089956760406e-05, -9.214505553245544e-06, -6.767921149730682e-06, -4.32133674621582e-06, -1.8747523427009583e-06, 5.718320608139038e-07, 3.018416464328766e-06, 5.465000867843628e-06, 7.91158527135849e-06, 1.0358169674873352e-05, 1.2804754078388214e-05, 1.5251338481903076e-05, 1.7697922885417938e-05, 2.01445072889328e-05, 2.2591091692447662e-05, 2.5037676095962524e-05, 2.7484260499477386e-05, 2.993084490299225e-05, 3.237742930650711e-05, 3.482401371002197e-05, 3.7270598113536835e-05, 3.97171825170517e-05, 4.216376692056656e-05, 4.461035132408142e-05, 4.705693572759628e-05, 4.9503520131111145e-05, 5.195010453462601e-05, 5.439668893814087e-05, 5.684327334165573e-05, 5.928985774517059e-05, 6.173644214868546e-05, 6.418302655220032e-05, 6.662961095571518e-05, 6.907619535923004e-05, 7.15227797627449e-05, 7.396936416625977e-05]}, "gradients/encoder.encoder.layers.8.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 1.0, 1.0, 2.0, 5.0, 6.0, 8.0, 8.0, 18.0, 24.0, 34.0, 61.0, 68.0, 90.0, 176.0, 294.0, 433.0, 657.0, 1153.0, 2046.0, 3774.0, 8444.0, 21705.0, 123401.0, 3933626.0, 68947.0, 16205.0, 6408.0, 2962.0, 1697.0, 792.0, 496.0, 264.0, 149.0, 95.0, 75.0, 43.0, 31.0, 25.0, 18.0, 13.0, 15.0, 6.0, 7.0, 3.0, 1.0, 2.0, 2.0, 1.0, 2.0, 1.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-7.414817810058594e-05, -7.144175469875336e-05, -6.873533129692078e-05, -6.60289078950882e-05, -6.332248449325562e-05, -6.0616061091423035e-05, -5.7909637689590454e-05, -5.5203214287757874e-05, -5.249679088592529e-05, -4.979036748409271e-05, -4.708394408226013e-05, -4.437752068042755e-05, -4.167109727859497e-05, -3.896467387676239e-05, -3.625825047492981e-05, -3.355182707309723e-05, -3.084540367126465e-05, -2.8138980269432068e-05, -2.5432556867599487e-05, -2.2726133465766907e-05, -2.0019710063934326e-05, -1.7313286662101746e-05, -1.4606863260269165e-05, -1.1900439858436584e-05, -9.194016456604004e-06, -6.487593054771423e-06, -3.7811696529388428e-06, -1.0747462511062622e-06, 1.6316771507263184e-06, 4.338100552558899e-06, 7.0445239543914795e-06, 9.75094735622406e-06, 1.245737075805664e-05, 1.5163794159889221e-05, 1.7870217561721802e-05, 2.0576640963554382e-05, 2.3283064365386963e-05, 2.5989487767219543e-05, 2.8695911169052124e-05, 3.1402334570884705e-05, 3.4108757972717285e-05, 3.6815181374549866e-05, 3.9521604776382446e-05, 4.222802817821503e-05, 4.493445158004761e-05, 4.764087498188019e-05, 5.034729838371277e-05, 5.305372178554535e-05, 5.576014518737793e-05, 5.846656858921051e-05, 6.117299199104309e-05, 6.387941539287567e-05, 6.658583879470825e-05, 6.929226219654083e-05, 7.199868559837341e-05, 7.4705109000206e-05, 7.741153240203857e-05, 8.011795580387115e-05, 8.282437920570374e-05, 8.553080260753632e-05, 8.82372260093689e-05, 9.094364941120148e-05, 9.365007281303406e-05, 9.635649621486664e-05, 9.906291961669922e-05]}, "gradients/encoder.encoder.layers.8.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 4.0, 4.0, 2.0, 3.0, 1.0, 6.0, 8.0, 5.0, 7.0, 9.0, 10.0, 21.0, 19.0, 35.0, 42.0, 71.0, 85.0, 89.0, 96.0, 83.0, 100.0, 79.0, 60.0, 39.0, 39.0, 29.0, 20.0, 6.0, 7.0, 7.0, 12.0, 3.0, 8.0, 1.0, 3.0, 3.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.6570091247558594e-05, -1.595914363861084e-05, -1.5348196029663086e-05, -1.4737248420715332e-05, -1.4126300811767578e-05, -1.3515353202819824e-05, -1.290440559387207e-05, -1.2293457984924316e-05, -1.1682510375976562e-05, -1.1071562767028809e-05, -1.0460615158081055e-05, -9.8496675491333e-06, -9.238719940185547e-06, -8.627772331237793e-06, -8.016824722290039e-06, -7.405877113342285e-06, -6.794929504394531e-06, -6.183981895446777e-06, -5.5730342864990234e-06, -4.9620866775512695e-06, -4.351139068603516e-06, -3.7401914596557617e-06, -3.129243850708008e-06, -2.518296241760254e-06, -1.9073486328125e-06, -1.296401023864746e-06, -6.854534149169922e-07, -7.450580596923828e-08, 5.364418029785156e-07, 1.1473894119262695e-06, 1.7583370208740234e-06, 2.3692846298217773e-06, 2.9802322387695312e-06, 3.591179847717285e-06, 4.202127456665039e-06, 4.813075065612793e-06, 5.424022674560547e-06, 6.034970283508301e-06, 6.645917892456055e-06, 7.256865501403809e-06, 7.867813110351562e-06, 8.478760719299316e-06, 9.08970832824707e-06, 9.700655937194824e-06, 1.0311603546142578e-05, 1.0922551155090332e-05, 1.1533498764038086e-05, 1.214444637298584e-05, 1.2755393981933594e-05, 1.3366341590881348e-05, 1.3977289199829102e-05, 1.4588236808776855e-05, 1.519918441772461e-05, 1.5810132026672363e-05, 1.6421079635620117e-05, 1.703202724456787e-05, 1.7642974853515625e-05, 1.825392246246338e-05, 1.8864870071411133e-05, 1.9475817680358887e-05, 2.008676528930664e-05, 2.0697712898254395e-05, 2.130866050720215e-05, 2.1919608116149902e-05, 2.2530555725097656e-05]}, "gradients/encoder.encoder.layers.8.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 2.0, 3.0, 2.0, 5.0, 4.0, 2.0, 9.0, 14.0, 19.0, 34.0, 48.0, 67.0, 112.0, 151.0, 240.0, 409.0, 650.0, 967.0, 1776.0, 3161.0, 6274.0, 11767.0, 25015.0, 65032.0, 398877.0, 3460593.0, 140430.0, 41470.0, 17686.0, 8598.0, 4478.0, 2542.0, 1433.0, 927.0, 496.0, 335.0, 225.0, 157.0, 83.0, 68.0, 45.0, 32.0, 21.0, 15.0, 5.0, 10.0, 4.0, 1.0, 3.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-4.166364669799805e-05, -4.0288083255290985e-05, -3.891251981258392e-05, -3.753695636987686e-05, -3.61613929271698e-05, -3.478582948446274e-05, -3.3410266041755676e-05, -3.2034702599048615e-05, -3.065913915634155e-05, -2.928357571363449e-05, -2.790801227092743e-05, -2.6532448828220367e-05, -2.5156885385513306e-05, -2.3781321942806244e-05, -2.2405758500099182e-05, -2.103019505739212e-05, -1.965463161468506e-05, -1.8279068171977997e-05, -1.6903504729270935e-05, -1.5527941286563873e-05, -1.4152377843856812e-05, -1.277681440114975e-05, -1.1401250958442688e-05, -1.0025687515735626e-05, -8.650124073028564e-06, -7.274560630321503e-06, -5.898997187614441e-06, -4.523433744907379e-06, -3.1478703022003174e-06, -1.7723068594932556e-06, -3.9674341678619385e-07, 9.78820025920868e-07, 2.3543834686279297e-06, 3.7299469113349915e-06, 5.105510354042053e-06, 6.481073796749115e-06, 7.856637239456177e-06, 9.232200682163239e-06, 1.06077641248703e-05, 1.1983327567577362e-05, 1.3358891010284424e-05, 1.4734454452991486e-05, 1.6110017895698547e-05, 1.748558133840561e-05, 1.886114478111267e-05, 2.0236708223819733e-05, 2.1612271666526794e-05, 2.2987835109233856e-05, 2.4363398551940918e-05, 2.573896199464798e-05, 2.711452543735504e-05, 2.8490088880062103e-05, 2.9865652322769165e-05, 3.124121576547623e-05, 3.261677920818329e-05, 3.399234265089035e-05, 3.536790609359741e-05, 3.6743469536304474e-05, 3.8119032979011536e-05, 3.94945964217186e-05, 4.087015986442566e-05, 4.224572330713272e-05, 4.362128674983978e-05, 4.4996850192546844e-05, 4.6372413635253906e-05]}, "gradients/encoder.encoder.layers.8.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [3.0, 0.0, 0.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 5.0, 1.0, 5.0, 5.0, 7.0, 5.0, 10.0, 7.0, 14.0, 20.0, 17.0, 24.0, 33.0, 35.0, 38.0, 73.0, 147.0, 383.0, 1112.0, 1352.0, 365.0, 138.0, 62.0, 35.0, 42.0, 20.0, 21.0, 20.0, 18.0, 12.0, 8.0, 9.0, 7.0, 5.0, 3.0, 3.0, 4.0, 4.0, 3.0, 3.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 2.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-2.6285648345947266e-05, -2.5346875190734863e-05, -2.440810203552246e-05, -2.346932888031006e-05, -2.2530555725097656e-05, -2.1591782569885254e-05, -2.065300941467285e-05, -1.971423625946045e-05, -1.8775463104248047e-05, -1.7836689949035645e-05, -1.6897916793823242e-05, -1.595914363861084e-05, -1.5020370483398438e-05, -1.4081597328186035e-05, -1.3142824172973633e-05, -1.220405101776123e-05, -1.1265277862548828e-05, -1.0326504707336426e-05, -9.387731552124023e-06, -8.448958396911621e-06, -7.510185241699219e-06, -6.571412086486816e-06, -5.632638931274414e-06, -4.693865776062012e-06, -3.7550926208496094e-06, -2.816319465637207e-06, -1.8775463104248047e-06, -9.387731552124023e-07, 0.0, 9.387731552124023e-07, 1.8775463104248047e-06, 2.816319465637207e-06, 3.7550926208496094e-06, 4.693865776062012e-06, 5.632638931274414e-06, 6.571412086486816e-06, 7.510185241699219e-06, 8.448958396911621e-06, 9.387731552124023e-06, 1.0326504707336426e-05, 1.1265277862548828e-05, 1.220405101776123e-05, 1.3142824172973633e-05, 1.4081597328186035e-05, 1.5020370483398438e-05, 1.595914363861084e-05, 1.6897916793823242e-05, 1.7836689949035645e-05, 1.8775463104248047e-05, 1.971423625946045e-05, 2.065300941467285e-05, 2.1591782569885254e-05, 2.2530555725097656e-05, 2.346932888031006e-05, 2.440810203552246e-05, 2.5346875190734863e-05, 2.6285648345947266e-05, 2.7224421501159668e-05, 2.816319465637207e-05, 2.9101967811584473e-05, 3.0040740966796875e-05, 3.097951412200928e-05, 3.191828727722168e-05, 3.285706043243408e-05, 3.3795833587646484e-05]}, "gradients/encoder.encoder.layers.8.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 2.0, 5.0, 2.0, 7.0, 7.0, 7.0, 4.0, 18.0, 25.0, 35.0, 42.0, 56.0, 88.0, 107.0, 144.0, 98.0, 92.0, 61.0, 55.0, 40.0, 26.0, 19.0, 20.0, 10.0, 5.0, 13.0, 6.0, 8.0, 1.0, 5.0, 4.0, 0.0, 1.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00014836332411505282, -0.00014347155229188502, -0.00013857978046871722, -0.0001336879940936342, -0.0001287962222704664, -0.00012390445044729859, -0.00011901267862413079, -0.00011412090680096298, -0.00010922912770183757, -0.00010433735587866977, -9.944557677954435e-05, -9.455380495637655e-05, -8.966203313320875e-05, -8.477025403408334e-05, -7.987848221091554e-05, -7.498670311179012e-05, -7.009493128862232e-05, -6.520315946545452e-05, -6.0311380366329104e-05, -5.54196085431613e-05, -5.0527833082014695e-05, -4.563605762086809e-05, -4.0744285797700286e-05, -3.585251033655368e-05, -3.096073487540707e-05, -2.6068959414260462e-05, -2.1177185772103257e-05, -1.6285412129946053e-05, -1.1393636668799445e-05, -6.501861207652837e-06, -1.6100875654956326e-06, 3.281686076661572e-06, 8.17346153780818e-06, 1.3065236089460086e-05, 1.7957010641111992e-05, 2.2848784283269197e-05, 2.7740559744415805e-05, 3.263233520556241e-05, 3.7524107028730214e-05, 4.241588248987682e-05, 4.730765795102343e-05, 5.219943341217004e-05, 5.7091208873316646e-05, 6.198298069648445e-05, 6.687475251965225e-05, 7.176653161877766e-05, 7.665830344194546e-05, 8.155008254107088e-05, 8.644185436423868e-05, 9.133362618740648e-05, 9.62254052865319e-05, 0.0001011171771096997, 0.00010600895620882511, 0.00011090072803199291, 0.00011579249985516071, 0.00012068427167832851, 0.00012557604350149632, 0.00013046781532466412, 0.00013535958714783192, 0.00014025137352291495, 0.00014514314534608275, 0.00015003491716925055, 0.00015492668899241835, 0.00015981846081558615, 0.00016471024719066918]}, "gradients/encoder.encoder.layers.8.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 2.0, 1.0, 3.0, 0.0, 6.0, 2.0, 6.0, 7.0, 10.0, 6.0, 17.0, 15.0, 17.0, 18.0, 25.0, 26.0, 35.0, 29.0, 41.0, 43.0, 46.0, 47.0, 59.0, 41.0, 54.0, 42.0, 39.0, 39.0, 50.0, 44.0, 39.0, 31.0, 22.0, 27.0, 23.0, 20.0, 17.0, 11.0, 17.0, 12.0, 9.0, 4.0, 4.0, 5.0, 3.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0], "bins": [-9.679794311523438e-05, -9.406637400388718e-05, -9.133480489253998e-05, -8.860323578119278e-05, -8.587166666984558e-05, -8.314009755849838e-05, -8.040852844715118e-05, -7.767695933580399e-05, -7.494539022445679e-05, -7.221382111310959e-05, -6.948225200176239e-05, -6.675068289041519e-05, -6.401911377906799e-05, -6.12875446677208e-05, -5.8555975556373596e-05, -5.58244064450264e-05, -5.30928373336792e-05, -5.0361268222332e-05, -4.76296991109848e-05, -4.4898129999637604e-05, -4.2166560888290405e-05, -3.943499177694321e-05, -3.670342266559601e-05, -3.397185355424881e-05, -3.124028444290161e-05, -2.8508715331554413e-05, -2.5777146220207214e-05, -2.3045577108860016e-05, -2.0314007997512817e-05, -1.758243888616562e-05, -1.485086977481842e-05, -1.2119300663471222e-05, -9.387731552124023e-06, -6.656162440776825e-06, -3.9245933294296265e-06, -1.193024218082428e-06, 1.5385448932647705e-06, 4.270114004611969e-06, 7.0016831159591675e-06, 9.733252227306366e-06, 1.2464821338653564e-05, 1.5196390450000763e-05, 1.792795956134796e-05, 2.065952867269516e-05, 2.339109778404236e-05, 2.6122666895389557e-05, 2.8854236006736755e-05, 3.1585805118083954e-05, 3.431737422943115e-05, 3.704894334077835e-05, 3.978051245212555e-05, 4.251208156347275e-05, 4.5243650674819946e-05, 4.7975219786167145e-05, 5.070678889751434e-05, 5.343835800886154e-05, 5.616992712020874e-05, 5.890149623155594e-05, 6.163306534290314e-05, 6.436463445425034e-05, 6.709620356559753e-05, 6.982777267694473e-05, 7.255934178829193e-05, 7.529091089963913e-05, 7.802248001098633e-05]}, "gradients/encoder.encoder.layers.8.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 3.0, 2.0, 1.0, 3.0, 4.0, 6.0, 3.0, 10.0, 12.0, 15.0, 26.0, 35.0, 40.0, 76.0, 103.0, 183.0, 261.0, 411.0, 647.0, 1090.0, 1785.0, 3151.0, 5633.0, 10239.0, 21098.0, 47043.0, 132037.0, 488254.0, 211190.0, 67306.0, 27374.0, 13652.0, 7257.0, 3880.0, 2283.0, 1306.0, 765.0, 457.0, 308.0, 173.0, 139.0, 89.0, 69.0, 52.0, 28.0, 21.0, 20.0, 8.0, 8.0, 5.0, 6.0, 1.0, 2.0, 1.0, 0.0, 2.0], "bins": [-8.231401443481445e-05, -8.000805974006653e-05, -7.77021050453186e-05, -7.539615035057068e-05, -7.309019565582275e-05, -7.078424096107483e-05, -6.84782862663269e-05, -6.617233157157898e-05, -6.386637687683105e-05, -6.156042218208313e-05, -5.9254467487335205e-05, -5.694851279258728e-05, -5.4642558097839355e-05, -5.233660340309143e-05, -5.0030648708343506e-05, -4.772469401359558e-05, -4.5418739318847656e-05, -4.311278462409973e-05, -4.080682992935181e-05, -3.850087523460388e-05, -3.619492053985596e-05, -3.388896584510803e-05, -3.158301115036011e-05, -2.9277056455612183e-05, -2.6971101760864258e-05, -2.4665147066116333e-05, -2.2359192371368408e-05, -2.0053237676620483e-05, -1.774728298187256e-05, -1.5441328287124634e-05, -1.3135373592376709e-05, -1.0829418897628784e-05, -8.52346420288086e-06, -6.2175095081329346e-06, -3.91155481338501e-06, -1.605600118637085e-06, 7.003545761108398e-07, 3.0063092708587646e-06, 5.3122639656066895e-06, 7.618218660354614e-06, 9.924173355102539e-06, 1.2230128049850464e-05, 1.4536082744598389e-05, 1.6842037439346313e-05, 1.9147992134094238e-05, 2.1453946828842163e-05, 2.3759901523590088e-05, 2.6065856218338013e-05, 2.8371810913085938e-05, 3.067776560783386e-05, 3.298372030258179e-05, 3.528967499732971e-05, 3.759562969207764e-05, 3.990158438682556e-05, 4.2207539081573486e-05, 4.451349377632141e-05, 4.6819448471069336e-05, 4.912540316581726e-05, 5.1431357860565186e-05, 5.373731255531311e-05, 5.6043267250061035e-05, 5.834922194480896e-05, 6.0655176639556885e-05, 6.296113133430481e-05, 6.526708602905273e-05]}, "gradients/encoder.encoder.layers.8.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 2.0, 3.0, 3.0, 9.0, 7.0, 10.0, 12.0, 17.0, 17.0, 22.0, 22.0, 30.0, 49.0, 74.0, 81.0, 81.0, 69.0, 78.0, 77.0, 74.0, 53.0, 48.0, 37.0, 28.0, 23.0, 15.0, 22.0, 7.0, 7.0, 6.0, 10.0, 6.0, 3.0, 2.0, 3.0, 3.0, 3.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-1.8596649169921875e-05, -1.8084421753883362e-05, -1.757219433784485e-05, -1.7059966921806335e-05, -1.6547739505767822e-05, -1.603551208972931e-05, -1.5523284673690796e-05, -1.5011057257652283e-05, -1.449882984161377e-05, -1.3986602425575256e-05, -1.3474375009536743e-05, -1.296214759349823e-05, -1.2449920177459717e-05, -1.1937692761421204e-05, -1.142546534538269e-05, -1.0913237929344177e-05, -1.0401010513305664e-05, -9.888783097267151e-06, -9.376555681228638e-06, -8.864328265190125e-06, -8.352100849151611e-06, -7.839873433113098e-06, -7.327646017074585e-06, -6.815418601036072e-06, -6.303191184997559e-06, -5.790963768959045e-06, -5.278736352920532e-06, -4.766508936882019e-06, -4.254281520843506e-06, -3.7420541048049927e-06, -3.2298266887664795e-06, -2.7175992727279663e-06, -2.205371856689453e-06, -1.69314444065094e-06, -1.1809170246124268e-06, -6.686896085739136e-07, -1.564621925354004e-07, 3.557652235031128e-07, 8.67992639541626e-07, 1.3802200555801392e-06, 1.8924474716186523e-06, 2.4046748876571655e-06, 2.9169023036956787e-06, 3.429129719734192e-06, 3.941357135772705e-06, 4.453584551811218e-06, 4.9658119678497314e-06, 5.478039383888245e-06, 5.990266799926758e-06, 6.502494215965271e-06, 7.014721632003784e-06, 7.526949048042297e-06, 8.03917646408081e-06, 8.551403880119324e-06, 9.063631296157837e-06, 9.57585871219635e-06, 1.0088086128234863e-05, 1.0600313544273376e-05, 1.111254096031189e-05, 1.1624768376350403e-05, 1.2136995792388916e-05, 1.264922320842743e-05, 1.3161450624465942e-05, 1.3673678040504456e-05, 1.4185905456542969e-05]}, "gradients/encoder.encoder.layers.8.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 3.0, 5.0, 8.0, 16.0, 24.0, 34.0, 46.0, 81.0, 107.0, 172.0, 255.0, 370.0, 632.0, 966.0, 1532.0, 2312.0, 3627.0, 6386.0, 10370.0, 17101.0, 30542.0, 56084.0, 124524.0, 347169.0, 237437.0, 94825.0, 50149.0, 25710.0, 14789.0, 8750.0, 5290.0, 3482.0, 2049.0, 1287.0, 870.0, 546.0, 355.0, 230.0, 157.0, 95.0, 68.0, 35.0, 27.0, 7.0, 14.0, 10.0, 7.0, 4.0, 3.0, 3.0, 2.0, 0.0, 1.0, 2.0], "bins": [-3.4749507904052734e-05, -3.372319042682648e-05, -3.269687294960022e-05, -3.167055547237396e-05, -3.0644237995147705e-05, -2.9617920517921448e-05, -2.859160304069519e-05, -2.7565285563468933e-05, -2.6538968086242676e-05, -2.551265060901642e-05, -2.448633313179016e-05, -2.3460015654563904e-05, -2.2433698177337646e-05, -2.140738070011139e-05, -2.0381063222885132e-05, -1.9354745745658875e-05, -1.8328428268432617e-05, -1.730211079120636e-05, -1.6275793313980103e-05, -1.5249475836753845e-05, -1.4223158359527588e-05, -1.319684088230133e-05, -1.2170523405075073e-05, -1.1144205927848816e-05, -1.0117888450622559e-05, -9.091570973396301e-06, -8.065253496170044e-06, -7.038936018943787e-06, -6.012618541717529e-06, -4.986301064491272e-06, -3.959983587265015e-06, -2.9336661100387573e-06, -1.9073486328125e-06, -8.810311555862427e-07, 1.4528632164001465e-07, 1.171603798866272e-06, 2.1979212760925293e-06, 3.2242387533187866e-06, 4.250556230545044e-06, 5.276873707771301e-06, 6.303191184997559e-06, 7.329508662223816e-06, 8.355826139450073e-06, 9.38214361667633e-06, 1.0408461093902588e-05, 1.1434778571128845e-05, 1.2461096048355103e-05, 1.348741352558136e-05, 1.4513731002807617e-05, 1.5540048480033875e-05, 1.6566365957260132e-05, 1.759268343448639e-05, 1.8619000911712646e-05, 1.9645318388938904e-05, 2.067163586616516e-05, 2.169795334339142e-05, 2.2724270820617676e-05, 2.3750588297843933e-05, 2.477690577507019e-05, 2.5803223252296448e-05, 2.6829540729522705e-05, 2.7855858206748962e-05, 2.888217568397522e-05, 2.9908493161201477e-05, 3.0934810638427734e-05]}, "gradients/encoder.encoder.layers.8.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 3.0, 3.0, 3.0, 4.0, 6.0, 8.0, 6.0, 16.0, 13.0, 13.0, 13.0, 13.0, 14.0, 24.0, 24.0, 26.0, 21.0, 32.0, 48.0, 57.0, 37.0, 33.0, 37.0, 34.0, 36.0, 42.0, 36.0, 49.0, 49.0, 31.0, 33.0, 31.0, 35.0, 22.0, 24.0, 18.0, 16.0, 25.0, 16.0, 11.0, 11.0, 6.0, 7.0, 3.0, 9.0, 5.0, 2.0, 3.0, 5.0, 0.0, 2.0, 0.0, 2.0, 1.0], "bins": [-2.8014183044433594e-05, -2.7186237275600433e-05, -2.6358291506767273e-05, -2.5530345737934113e-05, -2.4702399969100952e-05, -2.3874454200267792e-05, -2.304650843143463e-05, -2.221856266260147e-05, -2.139061689376831e-05, -2.056267112493515e-05, -1.973472535610199e-05, -1.890677958726883e-05, -1.807883381843567e-05, -1.725088804960251e-05, -1.6422942280769348e-05, -1.5594996511936188e-05, -1.4767050743103027e-05, -1.3939104974269867e-05, -1.3111159205436707e-05, -1.2283213436603546e-05, -1.1455267667770386e-05, -1.0627321898937225e-05, -9.799376130104065e-06, -8.971430361270905e-06, -8.143484592437744e-06, -7.315538823604584e-06, -6.487593054771423e-06, -5.659647285938263e-06, -4.8317015171051025e-06, -4.003755748271942e-06, -3.1758099794387817e-06, -2.3478642106056213e-06, -1.519918441772461e-06, -6.919726729393005e-07, 1.3597309589385986e-07, 9.639188647270203e-07, 1.7918646335601807e-06, 2.619810402393341e-06, 3.4477561712265015e-06, 4.275701940059662e-06, 5.103647708892822e-06, 5.931593477725983e-06, 6.759539246559143e-06, 7.5874850153923035e-06, 8.415430784225464e-06, 9.243376553058624e-06, 1.0071322321891785e-05, 1.0899268090724945e-05, 1.1727213859558105e-05, 1.2555159628391266e-05, 1.3383105397224426e-05, 1.4211051166057587e-05, 1.5038996934890747e-05, 1.5866942703723907e-05, 1.6694888472557068e-05, 1.7522834241390228e-05, 1.835078001022339e-05, 1.917872577905655e-05, 2.000667154788971e-05, 2.083461731672287e-05, 2.166256308555603e-05, 2.249050885438919e-05, 2.331845462322235e-05, 2.414640039205551e-05, 2.4974346160888672e-05]}, "gradients/encoder.encoder.layers.8.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 3.0, 1.0, 1.0, 0.0, 5.0, 3.0, 5.0, 3.0, 4.0, 0.0, 4.0, 11.0, 22.0, 28.0, 46.0, 80.0, 52.0, 158.0, 223.0, 385.0, 704.0, 1114.0, 941.0, 2905.0, 5944.0, 13716.0, 36281.0, 118848.0, 176453.0, 509336.0, 118666.0, 36195.0, 13577.0, 6216.0, 2914.0, 887.0, 1086.0, 680.0, 387.0, 248.0, 172.0, 48.0, 63.0, 53.0, 29.0, 23.0, 18.0, 8.0, 5.0, 6.0, 5.0, 6.0, 1.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-3.516674041748047e-06, -3.4067779779434204e-06, -3.296881914138794e-06, -3.1869858503341675e-06, -3.077089786529541e-06, -2.9671937227249146e-06, -2.857297658920288e-06, -2.7474015951156616e-06, -2.637505531311035e-06, -2.5276094675064087e-06, -2.4177134037017822e-06, -2.3078173398971558e-06, -2.1979212760925293e-06, -2.088025212287903e-06, -1.9781291484832764e-06, -1.86823308467865e-06, -1.7583370208740234e-06, -1.648440957069397e-06, -1.5385448932647705e-06, -1.428648829460144e-06, -1.3187527656555176e-06, -1.2088567018508911e-06, -1.0989606380462646e-06, -9.890645742416382e-07, -8.791685104370117e-07, -7.692724466323853e-07, -6.593763828277588e-07, -5.494803190231323e-07, -4.3958425521850586e-07, -3.296881914138794e-07, -2.1979212760925293e-07, -1.0989606380462646e-07, 0.0, 1.0989606380462646e-07, 2.1979212760925293e-07, 3.296881914138794e-07, 4.3958425521850586e-07, 5.494803190231323e-07, 6.593763828277588e-07, 7.692724466323853e-07, 8.791685104370117e-07, 9.890645742416382e-07, 1.0989606380462646e-06, 1.2088567018508911e-06, 1.3187527656555176e-06, 1.428648829460144e-06, 1.5385448932647705e-06, 1.648440957069397e-06, 1.7583370208740234e-06, 1.86823308467865e-06, 1.9781291484832764e-06, 2.088025212287903e-06, 2.1979212760925293e-06, 2.3078173398971558e-06, 2.4177134037017822e-06, 2.5276094675064087e-06, 2.637505531311035e-06, 2.7474015951156616e-06, 2.857297658920288e-06, 2.9671937227249146e-06, 3.077089786529541e-06, 3.1869858503341675e-06, 3.296881914138794e-06, 3.4067779779434204e-06, 3.516674041748047e-06]}, "gradients/encoder.encoder.layers.8.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 3.0, 0.0, 0.0, 0.0, 8.0, 9.0, 0.0, 15.0, 0.0, 15.0, 0.0, 17.0, 0.0, 35.0, 29.0, 0.0, 41.0, 0.0, 59.0, 0.0, 83.0, 88.0, 0.0, 92.0, 0.0, 97.0, 0.0, 84.0, 0.0, 67.0, 61.0, 0.0, 44.0, 0.0, 28.0, 0.0, 34.0, 28.0, 0.0, 18.0, 0.0, 20.0, 0.0, 24.0, 0.0, 8.0, 6.0, 0.0, 1.0, 0.0, 5.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-9.5367431640625e-07, -9.201467037200928e-07, -8.866190910339355e-07, -8.530914783477783e-07, -8.195638656616211e-07, -7.860362529754639e-07, -7.525086402893066e-07, -7.189810276031494e-07, -6.854534149169922e-07, -6.51925802230835e-07, -6.183981895446777e-07, -5.848705768585205e-07, -5.513429641723633e-07, -5.178153514862061e-07, -4.842877388000488e-07, -4.507601261138916e-07, -4.172325134277344e-07, -3.8370490074157715e-07, -3.501772880554199e-07, -3.166496753692627e-07, -2.8312206268310547e-07, -2.4959444999694824e-07, -2.1606683731079102e-07, -1.825392246246338e-07, -1.4901161193847656e-07, -1.1548399925231934e-07, -8.195638656616211e-08, -4.842877388000488e-08, -1.4901161193847656e-08, 1.862645149230957e-08, 5.21540641784668e-08, 8.568167686462402e-08, 1.1920928955078125e-07, 1.5273690223693848e-07, 1.862645149230957e-07, 2.1979212760925293e-07, 2.5331974029541016e-07, 2.868473529815674e-07, 3.203749656677246e-07, 3.5390257835388184e-07, 3.8743019104003906e-07, 4.209578037261963e-07, 4.544854164123535e-07, 4.880130290985107e-07, 5.21540641784668e-07, 5.550682544708252e-07, 5.885958671569824e-07, 6.221234798431396e-07, 6.556510925292969e-07, 6.891787052154541e-07, 7.227063179016113e-07, 7.562339305877686e-07, 7.897615432739258e-07, 8.23289155960083e-07, 8.568167686462402e-07, 8.903443813323975e-07, 9.238719940185547e-07, 9.57399606704712e-07, 9.909272193908691e-07, 1.0244548320770264e-06, 1.0579824447631836e-06, 1.0915100574493408e-06, 1.125037670135498e-06, 1.1585652828216553e-06, 1.1920928955078125e-06]}, "gradients/encoder.encoder.layers.8.attention.q_proj.weight": {"_type": "histogram", "values": [2.0, 3.0, 2.0, 8.0, 3.0, 1.0, 10.0, 10.0, 6.0, 24.0, 21.0, 31.0, 54.0, 44.0, 50.0, 156.0, 156.0, 187.0, 505.0, 421.0, 546.0, 1592.0, 1424.0, 2008.0, 7501.0, 7430.0, 13158.0, 85714.0, 181238.0, 444280.0, 241039.0, 25596.0, 13129.0, 12028.0, 2908.0, 1952.0, 2440.0, 759.0, 500.0, 683.0, 198.0, 158.0, 239.0, 83.0, 59.0, 88.0, 25.0, 29.0, 30.0, 9.0, 4.0, 13.0, 0.0, 5.0, 6.0, 2.0, 4.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0], "bins": [-2.3245811462402344e-06, -2.2454187273979187e-06, -2.166256308555603e-06, -2.0870938897132874e-06, -2.0079314708709717e-06, -1.928769052028656e-06, -1.8496066331863403e-06, -1.7704442143440247e-06, -1.691281795501709e-06, -1.6121193766593933e-06, -1.5329569578170776e-06, -1.453794538974762e-06, -1.3746321201324463e-06, -1.2954697012901306e-06, -1.216307282447815e-06, -1.1371448636054993e-06, -1.0579824447631836e-06, -9.78820025920868e-07, -8.996576070785522e-07, -8.204951882362366e-07, -7.413327693939209e-07, -6.621703505516052e-07, -5.830079317092896e-07, -5.038455128669739e-07, -4.246830940246582e-07, -3.4552067518234253e-07, -2.6635825634002686e-07, -1.8719583749771118e-07, -1.0803341865539551e-07, -2.8870999813079834e-08, 5.029141902923584e-08, 1.2945383787155151e-07, 2.086162567138672e-07, 2.8777867555618286e-07, 3.6694109439849854e-07, 4.461035132408142e-07, 5.252659320831299e-07, 6.044283509254456e-07, 6.835907697677612e-07, 7.627531886100769e-07, 8.419156074523926e-07, 9.210780262947083e-07, 1.000240445137024e-06, 1.0794028639793396e-06, 1.1585652828216553e-06, 1.237727701663971e-06, 1.3168901205062866e-06, 1.3960525393486023e-06, 1.475214958190918e-06, 1.5543773770332336e-06, 1.6335397958755493e-06, 1.712702214717865e-06, 1.7918646335601807e-06, 1.8710270524024963e-06, 1.950189471244812e-06, 2.0293518900871277e-06, 2.1085143089294434e-06, 2.187676727771759e-06, 2.2668391466140747e-06, 2.3460015654563904e-06, 2.425163984298706e-06, 2.5043264031410217e-06, 2.5834888219833374e-06, 2.662651240825653e-06, 2.7418136596679688e-06]}, "gradients/encoder.encoder.layers.8.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 3.0, 1.0, 1.0, 2.0, 2.0, 1.0, 1.0, 6.0, 2.0, 4.0, 8.0, 11.0, 7.0, 13.0, 12.0, 23.0, 61.0, 65.0, 64.0, 74.0, 90.0, 99.0, 90.0, 73.0, 73.0, 79.0, 33.0, 26.0, 20.0, 14.0, 7.0, 9.0, 5.0, 4.0, 10.0, 7.0, 3.0, 0.0, 2.0, 0.0, 1.0, 1.0, 1.0, 6.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0], "bins": [-2.1457672119140625e-06, -2.0796433091163635e-06, -2.0135194063186646e-06, -1.9473955035209656e-06, -1.8812716007232666e-06, -1.8151476979255676e-06, -1.7490237951278687e-06, -1.6828998923301697e-06, -1.6167759895324707e-06, -1.5506520867347717e-06, -1.4845281839370728e-06, -1.4184042811393738e-06, -1.3522803783416748e-06, -1.2861564755439758e-06, -1.2200325727462769e-06, -1.1539086699485779e-06, -1.087784767150879e-06, -1.02166086435318e-06, -9.55536961555481e-07, -8.89413058757782e-07, -8.23289155960083e-07, -7.57165253162384e-07, -6.910413503646851e-07, -6.249174475669861e-07, -5.587935447692871e-07, -4.926696419715881e-07, -4.2654573917388916e-07, -3.604218363761902e-07, -2.942979335784912e-07, -2.2817403078079224e-07, -1.6205012798309326e-07, -9.592622518539429e-08, -2.9802322387695312e-08, 3.632158041000366e-08, 1.0244548320770264e-07, 1.685693860054016e-07, 2.3469328880310059e-07, 3.0081719160079956e-07, 3.6694109439849854e-07, 4.330649971961975e-07, 4.991888999938965e-07, 5.653128027915955e-07, 6.314367055892944e-07, 6.975606083869934e-07, 7.636845111846924e-07, 8.298084139823914e-07, 8.959323167800903e-07, 9.620562195777893e-07, 1.0281801223754883e-06, 1.0943040251731873e-06, 1.1604279279708862e-06, 1.2265518307685852e-06, 1.2926757335662842e-06, 1.3587996363639832e-06, 1.4249235391616821e-06, 1.491047441959381e-06, 1.55717134475708e-06, 1.623295247554779e-06, 1.689419150352478e-06, 1.755543053150177e-06, 1.821666955947876e-06, 1.887790858745575e-06, 1.953914761543274e-06, 2.020038664340973e-06, 2.086162567138672e-06]}, "gradients/encoder.encoder.layers.8.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 3.0, 1.0, 1.0, 5.0, 3.0, 2.0, 3.0, 2.0, 5.0, 7.0, 13.0, 18.0, 35.0, 39.0, 60.0, 107.0, 177.0, 148.0, 88.0, 55.0, 50.0, 37.0, 35.0, 27.0, 8.0, 19.0, 13.0, 6.0, 11.0, 10.0, 8.0, 0.0, 4.0, 3.0, 3.0, 3.0, 0.0, 2.0, 0.0, 1.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00011545175948413089, -0.00011127455218229443, -0.00010709734488045797, -0.0001029201375786215, -9.874293027678505e-05, -9.456572297494859e-05, -9.038852294906974e-05, -8.621130837127566e-05, -8.203410834539682e-05, -7.785690104356036e-05, -7.36796937417239e-05, -6.950248643988743e-05, -6.532527913805097e-05, -6.114807183621451e-05, -5.697086817235686e-05, -5.27936608705204e-05, -4.861644993070513e-05, -4.443924262886867e-05, -4.026203532703221e-05, -3.608482802519575e-05, -3.190762072335929e-05, -2.773041524051223e-05, -2.3553209757665172e-05, -1.937600245582871e-05, -1.519879515399225e-05, -1.102158785215579e-05, -6.844381459814031e-06, -2.6671750674722716e-06, 1.5100322343641892e-06, 5.68723953620065e-06, 9.864445019047707e-06, 1.4041652320884168e-05, 1.8218866898678243e-05, 2.2396074200514704e-05, 2.6573281502351165e-05, 3.0750488804187626e-05, 3.4927696106024086e-05, 3.910490340786055e-05, 4.32821070717182e-05, 4.745931437355466e-05, 5.163652167539112e-05, 5.5813728977227584e-05, 5.9990936279064044e-05, 6.41681399429217e-05, 6.834534724475816e-05, 7.252255454659462e-05, 7.669976184843108e-05, 8.087696915026754e-05, 8.5054176452104e-05, 8.923138375394046e-05, 9.340859105577692e-05, 9.758579835761338e-05, 0.00010176300565944985, 0.0001059402129612863, 0.00011011741298716515, 0.00011429462756495923, 0.00011847182759083807, 0.00012264902761671692, 0.000126826242194511, 0.00013100344222038984, 0.00013518065679818392, 0.00013935785682406276, 0.00014353507140185684, 0.00014771227142773569, 0.00015188948600552976]}, "gradients/encoder.encoder.layers.8.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 3.0, 0.0, 0.0, 3.0, 4.0, 4.0, 4.0, 6.0, 5.0, 9.0, 5.0, 13.0, 13.0, 14.0, 14.0, 25.0, 36.0, 30.0, 25.0, 22.0, 39.0, 43.0, 39.0, 51.0, 37.0, 36.0, 36.0, 50.0, 43.0, 44.0, 32.0, 43.0, 35.0, 30.0, 28.0, 18.0, 26.0, 20.0, 26.0, 24.0, 15.0, 14.0, 12.0, 10.0, 9.0, 7.0, 3.0, 5.0, 5.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0], "bins": [-8.14199447631836e-05, -7.878243923187256e-05, -7.614493370056152e-05, -7.350742816925049e-05, -7.086992263793945e-05, -6.823241710662842e-05, -6.559491157531738e-05, -6.295740604400635e-05, -6.031990051269531e-05, -5.768239498138428e-05, -5.504488945007324e-05, -5.240738391876221e-05, -4.976987838745117e-05, -4.713237285614014e-05, -4.44948673248291e-05, -4.1857361793518066e-05, -3.921985626220703e-05, -3.6582350730895996e-05, -3.394484519958496e-05, -3.1307339668273926e-05, -2.866983413696289e-05, -2.6032328605651855e-05, -2.339482307434082e-05, -2.0757317543029785e-05, -1.811981201171875e-05, -1.5482306480407715e-05, -1.284480094909668e-05, -1.0207295417785645e-05, -7.569789886474609e-06, -4.932284355163574e-06, -2.294778823852539e-06, 3.427267074584961e-07, 2.9802322387695312e-06, 5.617737770080566e-06, 8.255243301391602e-06, 1.0892748832702637e-05, 1.3530254364013672e-05, 1.6167759895324707e-05, 1.8805265426635742e-05, 2.1442770957946777e-05, 2.4080276489257812e-05, 2.6717782020568848e-05, 2.9355287551879883e-05, 3.199279308319092e-05, 3.463029861450195e-05, 3.726780414581299e-05, 3.9905309677124023e-05, 4.254281520843506e-05, 4.5180320739746094e-05, 4.781782627105713e-05, 5.0455331802368164e-05, 5.30928373336792e-05, 5.5730342864990234e-05, 5.836784839630127e-05, 6.1005353927612305e-05, 6.364285945892334e-05, 6.628036499023438e-05, 6.891787052154541e-05, 7.155537605285645e-05, 7.419288158416748e-05, 7.683038711547852e-05, 7.946789264678955e-05, 8.210539817810059e-05, 8.474290370941162e-05, 8.738040924072266e-05]}, "gradients/encoder.encoder.layers.7.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 3.0, 2.0, 4.0, 12.0, 16.0, 30.0, 56.0, 87.0, 158.0, 306.0, 478.0, 919.0, 1682.0, 3415.0, 7458.0, 20808.0, 94961.0, 3931319.0, 92847.0, 21331.0, 8627.0, 4012.0, 2150.0, 1289.0, 837.0, 498.0, 338.0, 194.0, 152.0, 91.0, 64.0, 30.0, 21.0, 19.0, 10.0, 7.0, 10.0, 3.0, 4.0, 5.0, 3.0, 5.0, 3.0, 2.0, 3.0, 5.0, 5.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 2.0, 0.0, 1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.971027374267578e-05, -4.702620208263397e-05, -4.434213042259216e-05, -4.1658058762550354e-05, -3.8973987102508545e-05, -3.6289915442466736e-05, -3.360584378242493e-05, -3.092177212238312e-05, -2.823770046234131e-05, -2.55536288022995e-05, -2.286955714225769e-05, -2.018548548221588e-05, -1.7501413822174072e-05, -1.4817342162132263e-05, -1.2133270502090454e-05, -9.449198842048645e-06, -6.765127182006836e-06, -4.081055521965027e-06, -1.3969838619232178e-06, 1.2870877981185913e-06, 3.9711594581604e-06, 6.6552311182022095e-06, 9.339302778244019e-06, 1.2023374438285828e-05, 1.4707446098327637e-05, 1.7391517758369446e-05, 2.0075589418411255e-05, 2.2759661078453064e-05, 2.5443732738494873e-05, 2.8127804398536682e-05, 3.081187605857849e-05, 3.34959477186203e-05, 3.618001937866211e-05, 3.886409103870392e-05, 4.154816269874573e-05, 4.423223435878754e-05, 4.6916306018829346e-05, 4.9600377678871155e-05, 5.2284449338912964e-05, 5.496852099895477e-05, 5.765259265899658e-05, 6.033666431903839e-05, 6.30207359790802e-05, 6.570480763912201e-05, 6.838887929916382e-05, 7.107295095920563e-05, 7.375702261924744e-05, 7.644109427928925e-05, 7.912516593933105e-05, 8.180923759937286e-05, 8.449330925941467e-05, 8.717738091945648e-05, 8.986145257949829e-05, 9.25455242395401e-05, 9.522959589958191e-05, 9.791366755962372e-05, 0.00010059773921966553, 0.00010328181087970734, 0.00010596588253974915, 0.00010864995419979095, 0.00011133402585983276, 0.00011401809751987457, 0.00011670216917991638, 0.00011938624083995819, 0.0001220703125]}, "gradients/encoder.encoder.layers.7.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 2.0, 4.0, 5.0, 6.0, 8.0, 11.0, 17.0, 21.0, 34.0, 46.0, 81.0, 98.0, 103.0, 117.0, 108.0, 96.0, 63.0, 57.0, 42.0, 21.0, 16.0, 15.0, 9.0, 8.0, 9.0, 5.0, 5.0, 7.0, 2.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.3589859008789062e-05, -1.2862496078014374e-05, -1.2135133147239685e-05, -1.1407770216464996e-05, -1.0680407285690308e-05, -9.953044354915619e-06, -9.22568142414093e-06, -8.498318493366241e-06, -7.770955562591553e-06, -7.043592631816864e-06, -6.316229701042175e-06, -5.5888667702674866e-06, -4.861503839492798e-06, -4.134140908718109e-06, -3.4067779779434204e-06, -2.6794150471687317e-06, -1.952052116394043e-06, -1.2246891856193542e-06, -4.973262548446655e-07, 2.300366759300232e-07, 9.57399606704712e-07, 1.6847625374794006e-06, 2.4121254682540894e-06, 3.139488399028778e-06, 3.866851329803467e-06, 4.5942142605781555e-06, 5.321577191352844e-06, 6.048940122127533e-06, 6.776303052902222e-06, 7.50366598367691e-06, 8.231028914451599e-06, 8.958391845226288e-06, 9.685754776000977e-06, 1.0413117706775665e-05, 1.1140480637550354e-05, 1.1867843568325043e-05, 1.2595206499099731e-05, 1.332256942987442e-05, 1.4049932360649109e-05, 1.4777295291423798e-05, 1.5504658222198486e-05, 1.6232021152973175e-05, 1.6959384083747864e-05, 1.7686747014522552e-05, 1.841410994529724e-05, 1.914147287607193e-05, 1.986883580684662e-05, 2.0596198737621307e-05, 2.1323561668395996e-05, 2.2050924599170685e-05, 2.2778287529945374e-05, 2.3505650460720062e-05, 2.423301339149475e-05, 2.496037632226944e-05, 2.568773925304413e-05, 2.6415102183818817e-05, 2.7142465114593506e-05, 2.7869828045368195e-05, 2.8597190976142883e-05, 2.9324553906917572e-05, 3.005191683769226e-05, 3.077927976846695e-05, 3.150664269924164e-05, 3.223400563001633e-05, 3.2961368560791016e-05]}, "gradients/encoder.encoder.layers.7.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 1.0, 2.0, 2.0, 3.0, 4.0, 6.0, 6.0, 11.0, 15.0, 22.0, 50.0, 66.0, 110.0, 187.0, 313.0, 552.0, 1000.0, 1834.0, 3619.0, 7480.0, 16572.0, 42601.0, 150917.0, 3654527.0, 228691.0, 49728.0, 18332.0, 8685.0, 4030.0, 2166.0, 1124.0, 683.0, 382.0, 228.0, 131.0, 89.0, 57.0, 25.0, 10.0, 13.0, 13.0, 5.0, 1.0, 1.0, 2.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.888938903808594e-05, -5.723442882299423e-05, -5.557946860790253e-05, -5.392450839281082e-05, -5.2269548177719116e-05, -5.061458796262741e-05, -4.8959627747535706e-05, -4.7304667532444e-05, -4.5649707317352295e-05, -4.399474710226059e-05, -4.2339786887168884e-05, -4.068482667207718e-05, -3.9029866456985474e-05, -3.737490624189377e-05, -3.571994602680206e-05, -3.406498581171036e-05, -3.241002559661865e-05, -3.075506538152695e-05, -2.9100105166435242e-05, -2.7445144951343536e-05, -2.579018473625183e-05, -2.4135224521160126e-05, -2.248026430606842e-05, -2.0825304090976715e-05, -1.917034387588501e-05, -1.7515383660793304e-05, -1.58604234457016e-05, -1.4205463230609894e-05, -1.2550503015518188e-05, -1.0895542800426483e-05, -9.240582585334778e-06, -7.5856223702430725e-06, -5.930662155151367e-06, -4.275701940059662e-06, -2.6207417249679565e-06, -9.657815098762512e-07, 6.891787052154541e-07, 2.3441389203071594e-06, 3.999099135398865e-06, 5.65405935049057e-06, 7.309019565582275e-06, 8.96397978067398e-06, 1.0618939995765686e-05, 1.2273900210857391e-05, 1.3928860425949097e-05, 1.5583820641040802e-05, 1.7238780856132507e-05, 1.8893741071224213e-05, 2.0548701286315918e-05, 2.2203661501407623e-05, 2.385862171649933e-05, 2.5513581931591034e-05, 2.716854214668274e-05, 2.8823502361774445e-05, 3.047846257686615e-05, 3.2133422791957855e-05, 3.378838300704956e-05, 3.5443343222141266e-05, 3.709830343723297e-05, 3.8753263652324677e-05, 4.040822386741638e-05, 4.206318408250809e-05, 4.371814429759979e-05, 4.53731045126915e-05, 4.70280647277832e-05]}, "gradients/encoder.encoder.layers.7.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 2.0, 1.0, 0.0, 2.0, 5.0, 5.0, 1.0, 4.0, 4.0, 6.0, 10.0, 5.0, 13.0, 18.0, 13.0, 21.0, 22.0, 31.0, 47.0, 44.0, 82.0, 170.0, 495.0, 1665.0, 777.0, 264.0, 98.0, 60.0, 36.0, 33.0, 26.0, 19.0, 17.0, 16.0, 11.0, 12.0, 5.0, 8.0, 5.0, 8.0, 6.0, 4.0, 2.0, 1.0, 0.0, 2.0, 2.0, 3.0, 4.0, 1.0, 2.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0], "bins": [-2.6464462280273438e-05, -2.5508925318717957e-05, -2.4553388357162476e-05, -2.3597851395606995e-05, -2.2642314434051514e-05, -2.1686777472496033e-05, -2.0731240510940552e-05, -1.977570354938507e-05, -1.882016658782959e-05, -1.786462962627411e-05, -1.6909092664718628e-05, -1.5953555703163147e-05, -1.4998018741607666e-05, -1.4042481780052185e-05, -1.3086944818496704e-05, -1.2131407856941223e-05, -1.1175870895385742e-05, -1.0220333933830261e-05, -9.26479697227478e-06, -8.3092600107193e-06, -7.353723049163818e-06, -6.398186087608337e-06, -5.4426491260528564e-06, -4.4871121644973755e-06, -3.5315752029418945e-06, -2.5760382413864136e-06, -1.6205012798309326e-06, -6.649643182754517e-07, 2.905726432800293e-07, 1.2461096048355103e-06, 2.201646566390991e-06, 3.157183527946472e-06, 4.112720489501953e-06, 5.068257451057434e-06, 6.023794412612915e-06, 6.979331374168396e-06, 7.934868335723877e-06, 8.890405297279358e-06, 9.845942258834839e-06, 1.080147922039032e-05, 1.17570161819458e-05, 1.2712553143501282e-05, 1.3668090105056763e-05, 1.4623627066612244e-05, 1.5579164028167725e-05, 1.6534700989723206e-05, 1.7490237951278687e-05, 1.8445774912834167e-05, 1.940131187438965e-05, 2.035684883594513e-05, 2.131238579750061e-05, 2.226792275905609e-05, 2.3223459720611572e-05, 2.4178996682167053e-05, 2.5134533643722534e-05, 2.6090070605278015e-05, 2.7045607566833496e-05, 2.8001144528388977e-05, 2.8956681489944458e-05, 2.991221845149994e-05, 3.086775541305542e-05, 3.18232923746109e-05, 3.277882933616638e-05, 3.373436629772186e-05, 3.4689903259277344e-05]}, "gradients/encoder.encoder.layers.7.final_layer_norm.weight": {"_type": "histogram", "values": [2.0, 1.0, 2.0, 0.0, 3.0, 0.0, 0.0, 2.0, 2.0, 5.0, 5.0, 6.0, 11.0, 14.0, 21.0, 33.0, 40.0, 66.0, 106.0, 118.0, 132.0, 114.0, 70.0, 64.0, 43.0, 38.0, 25.0, 18.0, 17.0, 11.0, 9.0, 3.0, 5.0, 6.0, 5.0, 7.0, 2.0, 4.0, 3.0, 0.0, 2.0, 0.0, 1.0, 1.0, 0.0, 2.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00010631288023432717, -0.0001009775442071259, -9.564221545588225e-05, -9.030687942868099e-05, -8.497154340147972e-05, -7.963621465023607e-05, -7.43008786230348e-05, -6.896554259583354e-05, -6.363021384458989e-05, -5.829488145536743e-05, -5.295954906614497e-05, -4.762421303894371e-05, -4.228888064972125e-05, -3.695354826049879e-05, -3.161821223329753e-05, -2.628287984407507e-05, -2.094754745485261e-05, -1.5612215065630153e-05, -1.0276880857418291e-05, -4.94154664920643e-06, 3.937857400160283e-07, 5.729118129238486e-06, 1.1064454156439751e-05, 1.639978654566221e-05, 2.1735118934884667e-05, 2.7070451324107125e-05, 3.2405783713329583e-05, 3.774111974053085e-05, 4.3076452129753307e-05, 4.8411784518975765e-05, 5.374712054617703e-05, 5.908245293539949e-05, 6.441777804866433e-05, 6.97531140758656e-05, 7.508844282710925e-05, 8.042377885431051e-05, 8.575910760555416e-05, 9.109444363275543e-05, 9.64297796599567e-05, 0.00010176510841120034, 0.00010710044443840161, 0.00011243578046560287, 0.00011777110921684653, 0.00012310643796809018, 0.00012844178127124906, 0.0001337771100224927, 0.00013911243877373636, 0.00014444778207689524, 0.0001497831108281389, 0.00015511843957938254, 0.00016045378288254142, 0.00016578911163378507, 0.00017112444038502872, 0.0001764597836881876, 0.00018179511243943125, 0.0001871304411906749, 0.00019246578449383378, 0.00019780111324507743, 0.0002031364565482363, 0.00020847178529947996, 0.0002138071140507236, 0.00021914244280196726, 0.00022447778610512614, 0.0002298131148563698, 0.00023514844360761344]}, "gradients/encoder.encoder.layers.7.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 4.0, 4.0, 3.0, 8.0, 6.0, 8.0, 8.0, 15.0, 11.0, 20.0, 14.0, 30.0, 33.0, 24.0, 28.0, 37.0, 35.0, 53.0, 36.0, 38.0, 48.0, 49.0, 51.0, 56.0, 52.0, 49.0, 45.0, 31.0, 29.0, 26.0, 26.0, 22.0, 23.0, 23.0, 18.0, 11.0, 13.0, 2.0, 5.0, 4.0, 7.0, 2.0, 3.0, 1.0, 4.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0], "bins": [-0.0001024007797241211, -9.940657764673233e-05, -9.641237556934357e-05, -9.34181734919548e-05, -9.042397141456604e-05, -8.742976933717728e-05, -8.443556725978851e-05, -8.144136518239975e-05, -7.844716310501099e-05, -7.545296102762222e-05, -7.245875895023346e-05, -6.94645568728447e-05, -6.647035479545593e-05, -6.347615271806717e-05, -6.0481950640678406e-05, -5.748774856328964e-05, -5.449354648590088e-05, -5.1499344408512115e-05, -4.850514233112335e-05, -4.551094025373459e-05, -4.2516738176345825e-05, -3.952253609895706e-05, -3.65283340215683e-05, -3.3534131944179535e-05, -3.053992986679077e-05, -2.7545727789402008e-05, -2.4551525712013245e-05, -2.155732363462448e-05, -1.8563121557235718e-05, -1.5568919479846954e-05, -1.2574717402458191e-05, -9.580515325069427e-06, -6.586313247680664e-06, -3.5921111702919006e-06, -5.979090929031372e-07, 2.3962929844856262e-06, 5.39049506187439e-06, 8.384697139263153e-06, 1.1378899216651917e-05, 1.437310129404068e-05, 1.7367303371429443e-05, 2.0361505448818207e-05, 2.335570752620697e-05, 2.6349909603595734e-05, 2.9344111680984497e-05, 3.233831375837326e-05, 3.5332515835762024e-05, 3.832671791315079e-05, 4.132091999053955e-05, 4.4315122067928314e-05, 4.730932414531708e-05, 5.030352622270584e-05, 5.3297728300094604e-05, 5.629193037748337e-05, 5.928613245487213e-05, 6.22803345322609e-05, 6.527453660964966e-05, 6.826873868703842e-05, 7.126294076442719e-05, 7.425714284181595e-05, 7.725134491920471e-05, 8.024554699659348e-05, 8.323974907398224e-05, 8.6233951151371e-05, 8.922815322875977e-05]}, "gradients/encoder.encoder.layers.7.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 2.0, 2.0, 3.0, 4.0, 6.0, 6.0, 5.0, 11.0, 20.0, 25.0, 38.0, 78.0, 85.0, 150.0, 300.0, 657.0, 1528.0, 4202.0, 13010.0, 51032.0, 469408.0, 438900.0, 49691.0, 12264.0, 4153.0, 1648.0, 684.0, 254.0, 111.0, 79.0, 56.0, 37.0, 30.0, 24.0, 13.0, 14.0, 10.0, 6.0, 5.0, 9.0, 3.0, 4.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00014412403106689453, -0.00013947300612926483, -0.00013482198119163513, -0.00013017095625400543, -0.00012551993131637573, -0.00012086890637874603, -0.00011621788144111633, -0.00011156685650348663, -0.00010691583156585693, -0.00010226480662822723, -9.761378169059753e-05, -9.296275675296783e-05, -8.831173181533813e-05, -8.366070687770844e-05, -7.900968194007874e-05, -7.435865700244904e-05, -6.970763206481934e-05, -6.505660712718964e-05, -6.0405582189559937e-05, -5.575455725193024e-05, -5.110353231430054e-05, -4.645250737667084e-05, -4.180148243904114e-05, -3.715045750141144e-05, -3.249943256378174e-05, -2.784840762615204e-05, -2.319738268852234e-05, -1.854635775089264e-05, -1.389533281326294e-05, -9.24430787563324e-06, -4.59328293800354e-06, 5.774199962615967e-08, 4.708766937255859e-06, 9.359791874885559e-06, 1.4010816812515259e-05, 1.866184175014496e-05, 2.3312866687774658e-05, 2.7963891625404358e-05, 3.261491656303406e-05, 3.726594150066376e-05, 4.191696643829346e-05, 4.656799137592316e-05, 5.1219016313552856e-05, 5.5870041251182556e-05, 6.0521066188812256e-05, 6.517209112644196e-05, 6.982311606407166e-05, 7.447414100170135e-05, 7.912516593933105e-05, 8.377619087696075e-05, 8.842721581459045e-05, 9.307824075222015e-05, 9.772926568984985e-05, 0.00010238029062747955, 0.00010703131556510925, 0.00011168234050273895, 0.00011633336544036865, 0.00012098439037799835, 0.00012563541531562805, 0.00013028644025325775, 0.00013493746519088745, 0.00013958849012851715, 0.00014423951506614685, 0.00014889054000377655, 0.00015354156494140625]}, "gradients/encoder.encoder.layers.7.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 5.0, 4.0, 5.0, 5.0, 14.0, 15.0, 24.0, 31.0, 58.0, 72.0, 89.0, 132.0, 137.0, 109.0, 93.0, 74.0, 42.0, 36.0, 22.0, 6.0, 6.0, 11.0, 7.0, 7.0, 7.0, 6.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.817941665649414e-05, -1.7374753952026367e-05, -1.6570091247558594e-05, -1.576542854309082e-05, -1.4960765838623047e-05, -1.4156103134155273e-05, -1.33514404296875e-05, -1.2546777725219727e-05, -1.1742115020751953e-05, -1.093745231628418e-05, -1.0132789611816406e-05, -9.328126907348633e-06, -8.52346420288086e-06, -7.718801498413086e-06, -6.9141387939453125e-06, -6.109476089477539e-06, -5.304813385009766e-06, -4.500150680541992e-06, -3.6954879760742188e-06, -2.8908252716064453e-06, -2.086162567138672e-06, -1.2814998626708984e-06, -4.76837158203125e-07, 3.2782554626464844e-07, 1.1324882507324219e-06, 1.9371509552001953e-06, 2.7418136596679688e-06, 3.546476364135742e-06, 4.351139068603516e-06, 5.155801773071289e-06, 5.9604644775390625e-06, 6.765127182006836e-06, 7.569789886474609e-06, 8.374452590942383e-06, 9.179115295410156e-06, 9.98377799987793e-06, 1.0788440704345703e-05, 1.1593103408813477e-05, 1.239776611328125e-05, 1.3202428817749023e-05, 1.4007091522216797e-05, 1.481175422668457e-05, 1.5616416931152344e-05, 1.6421079635620117e-05, 1.722574234008789e-05, 1.8030405044555664e-05, 1.8835067749023438e-05, 1.963973045349121e-05, 2.0444393157958984e-05, 2.1249055862426758e-05, 2.205371856689453e-05, 2.2858381271362305e-05, 2.3663043975830078e-05, 2.446770668029785e-05, 2.5272369384765625e-05, 2.60770320892334e-05, 2.6881694793701172e-05, 2.7686357498168945e-05, 2.849102020263672e-05, 2.9295682907104492e-05, 3.0100345611572266e-05, 3.090500831604004e-05, 3.170967102050781e-05, 3.2514333724975586e-05, 3.331899642944336e-05]}, "gradients/encoder.encoder.layers.7.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 2.0, 1.0, 1.0, 3.0, 5.0, 10.0, 9.0, 14.0, 33.0, 60.0, 67.0, 114.0, 192.0, 342.0, 539.0, 1120.0, 1854.0, 3411.0, 6083.0, 11633.0, 21942.0, 43073.0, 101131.0, 312884.0, 346237.0, 101290.0, 47777.0, 22412.0, 11785.0, 6578.0, 3486.0, 1902.0, 1096.0, 627.0, 354.0, 184.0, 118.0, 65.0, 49.0, 27.0, 15.0, 14.0, 8.0, 5.0, 6.0, 3.0, 6.0, 1.0, 2.0, 1.0, 0.0, 0.0, 1.0], "bins": [-4.863739013671875e-05, -4.724878817796707e-05, -4.586018621921539e-05, -4.4471584260463715e-05, -4.3082982301712036e-05, -4.169438034296036e-05, -4.030577838420868e-05, -3.8917176425457e-05, -3.752857446670532e-05, -3.6139972507953644e-05, -3.4751370549201965e-05, -3.336276859045029e-05, -3.197416663169861e-05, -3.058556467294693e-05, -2.919696271419525e-05, -2.7808360755443573e-05, -2.6419758796691895e-05, -2.5031156837940216e-05, -2.3642554879188538e-05, -2.225395292043686e-05, -2.086535096168518e-05, -1.9476749002933502e-05, -1.8088147044181824e-05, -1.6699545085430145e-05, -1.5310943126678467e-05, -1.3922341167926788e-05, -1.253373920917511e-05, -1.1145137250423431e-05, -9.756535291671753e-06, -8.367933332920074e-06, -6.979331374168396e-06, -5.5907294154167175e-06, -4.202127456665039e-06, -2.8135254979133606e-06, -1.4249235391616821e-06, -3.632158041000366e-08, 1.3522803783416748e-06, 2.7408823370933533e-06, 4.129484295845032e-06, 5.51808625459671e-06, 6.906688213348389e-06, 8.295290172100067e-06, 9.683892130851746e-06, 1.1072494089603424e-05, 1.2461096048355103e-05, 1.3849698007106781e-05, 1.523829996585846e-05, 1.6626901924610138e-05, 1.8015503883361816e-05, 1.9404105842113495e-05, 2.0792707800865173e-05, 2.2181309759616852e-05, 2.356991171836853e-05, 2.495851367712021e-05, 2.6347115635871887e-05, 2.7735717594623566e-05, 2.9124319553375244e-05, 3.0512921512126923e-05, 3.19015234708786e-05, 3.329012542963028e-05, 3.467872738838196e-05, 3.6067329347133636e-05, 3.7455931305885315e-05, 3.8844533264636993e-05, 4.023313522338867e-05]}, "gradients/encoder.encoder.layers.7.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 2.0, 1.0, 1.0, 2.0, 5.0, 5.0, 3.0, 5.0, 12.0, 13.0, 14.0, 12.0, 14.0, 13.0, 21.0, 33.0, 31.0, 30.0, 36.0, 40.0, 40.0, 51.0, 50.0, 60.0, 65.0, 38.0, 37.0, 36.0, 40.0, 45.0, 43.0, 35.0, 21.0, 31.0, 26.0, 17.0, 17.0, 11.0, 15.0, 13.0, 5.0, 8.0, 3.0, 7.0, 2.0, 2.0, 5.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0], "bins": [-3.802776336669922e-05, -3.6942772567272186e-05, -3.5857781767845154e-05, -3.477279096841812e-05, -3.368780016899109e-05, -3.2602809369564056e-05, -3.1517818570137024e-05, -3.043282777070999e-05, -2.934783697128296e-05, -2.8262846171855927e-05, -2.7177855372428894e-05, -2.609286457300186e-05, -2.500787377357483e-05, -2.3922882974147797e-05, -2.2837892174720764e-05, -2.175290137529373e-05, -2.06679105758667e-05, -1.9582919776439667e-05, -1.8497928977012634e-05, -1.7412938177585602e-05, -1.632794737815857e-05, -1.5242956578731537e-05, -1.4157965779304504e-05, -1.3072974979877472e-05, -1.198798418045044e-05, -1.0902993381023407e-05, -9.818002581596375e-06, -8.733011782169342e-06, -7.64802098274231e-06, -6.563030183315277e-06, -5.478039383888245e-06, -4.393048584461212e-06, -3.3080577850341797e-06, -2.2230669856071472e-06, -1.1380761861801147e-06, -5.3085386753082275e-08, 1.0319054126739502e-06, 2.1168962121009827e-06, 3.201887011528015e-06, 4.286877810955048e-06, 5.37186861038208e-06, 6.4568594098091125e-06, 7.541850209236145e-06, 8.626841008663177e-06, 9.71183180809021e-06, 1.0796822607517242e-05, 1.1881813406944275e-05, 1.2966804206371307e-05, 1.405179500579834e-05, 1.5136785805225372e-05, 1.6221776604652405e-05, 1.7306767404079437e-05, 1.839175820350647e-05, 1.9476749002933502e-05, 2.0561739802360535e-05, 2.1646730601787567e-05, 2.27317214012146e-05, 2.3816712200641632e-05, 2.4901703000068665e-05, 2.5986693799495697e-05, 2.707168459892273e-05, 2.8156675398349762e-05, 2.9241666197776794e-05, 3.0326656997203827e-05, 3.141164779663086e-05]}, "gradients/encoder.encoder.layers.7.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 4.0, 3.0, 4.0, 9.0, 20.0, 35.0, 30.0, 40.0, 84.0, 90.0, 113.0, 214.0, 328.0, 533.0, 897.0, 1317.0, 2292.0, 3871.0, 6503.0, 11712.0, 21259.0, 124386.0, 184350.0, 331568.0, 185184.0, 83651.0, 40596.0, 21514.0, 11676.0, 6584.0, 3814.0, 2175.0, 1318.0, 839.0, 556.0, 317.0, 222.0, 134.0, 105.0, 62.0, 53.0, 29.0, 19.0, 18.0, 9.0, 13.0, 8.0, 1.0, 3.0, 0.0, 2.0, 3.0, 3.0], "bins": [-2.1457672119140625e-06, -2.084299921989441e-06, -2.0228326320648193e-06, -1.9613653421401978e-06, -1.8998980522155762e-06, -1.8384307622909546e-06, -1.776963472366333e-06, -1.7154961824417114e-06, -1.6540288925170898e-06, -1.5925616025924683e-06, -1.5310943126678467e-06, -1.469627022743225e-06, -1.4081597328186035e-06, -1.346692442893982e-06, -1.2852251529693604e-06, -1.2237578630447388e-06, -1.1622905731201172e-06, -1.1008232831954956e-06, -1.039355993270874e-06, -9.778887033462524e-07, -9.164214134216309e-07, -8.549541234970093e-07, -7.934868335723877e-07, -7.320195436477661e-07, -6.705522537231445e-07, -6.09084963798523e-07, -5.476176738739014e-07, -4.861503839492798e-07, -4.246830940246582e-07, -3.632158041000366e-07, -3.0174851417541504e-07, -2.4028122425079346e-07, -1.7881393432617188e-07, -1.1734664440155029e-07, -5.587935447692871e-08, 5.587935447692871e-09, 6.705522537231445e-08, 1.2852251529693604e-07, 1.8998980522155762e-07, 2.514570951461792e-07, 3.129243850708008e-07, 3.7439167499542236e-07, 4.3585896492004395e-07, 4.973262548446655e-07, 5.587935447692871e-07, 6.202608346939087e-07, 6.817281246185303e-07, 7.431954145431519e-07, 8.046627044677734e-07, 8.66129994392395e-07, 9.275972843170166e-07, 9.890645742416382e-07, 1.0505318641662598e-06, 1.1119991540908813e-06, 1.173466444015503e-06, 1.2349337339401245e-06, 1.296401023864746e-06, 1.3578683137893677e-06, 1.4193356037139893e-06, 1.4808028936386108e-06, 1.5422701835632324e-06, 1.603737473487854e-06, 1.6652047634124756e-06, 1.7266720533370972e-06, 1.7881393432617188e-06]}, "gradients/encoder.encoder.layers.7.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 3.0, 0.0, 3.0, 0.0, 6.0, 7.0, 0.0, 10.0, 15.0, 0.0, 14.0, 0.0, 20.0, 36.0, 0.0, 51.0, 42.0, 0.0, 49.0, 0.0, 64.0, 80.0, 0.0, 65.0, 98.0, 0.0, 85.0, 0.0, 70.0, 77.0, 0.0, 49.0, 43.0, 0.0, 30.0, 0.0, 26.0, 22.0, 0.0, 19.0, 14.0, 0.0, 10.0, 0.0, 6.0, 1.0, 0.0, 2.0, 2.0, 0.0, 1.0, 1.0], "bins": [-1.3709068298339844e-06, -1.3336539268493652e-06, -1.296401023864746e-06, -1.259148120880127e-06, -1.2218952178955078e-06, -1.1846423149108887e-06, -1.1473894119262695e-06, -1.1101365089416504e-06, -1.0728836059570312e-06, -1.0356307029724121e-06, -9.98377799987793e-07, -9.611248970031738e-07, -9.238719940185547e-07, -8.866190910339355e-07, -8.493661880493164e-07, -8.121132850646973e-07, -7.748603820800781e-07, -7.37607479095459e-07, -7.003545761108398e-07, -6.631016731262207e-07, -6.258487701416016e-07, -5.885958671569824e-07, -5.513429641723633e-07, -5.140900611877441e-07, -4.76837158203125e-07, -4.3958425521850586e-07, -4.023313522338867e-07, -3.650784492492676e-07, -3.2782554626464844e-07, -2.905726432800293e-07, -2.5331974029541016e-07, -2.1606683731079102e-07, -1.7881393432617188e-07, -1.4156103134155273e-07, -1.043081283569336e-07, -6.705522537231445e-08, -2.9802322387695312e-08, 7.450580596923828e-09, 4.470348358154297e-08, 8.195638656616211e-08, 1.1920928955078125e-07, 1.564621925354004e-07, 1.9371509552001953e-07, 2.3096799850463867e-07, 2.682209014892578e-07, 3.0547380447387695e-07, 3.427267074584961e-07, 3.7997961044311523e-07, 4.172325134277344e-07, 4.544854164123535e-07, 4.917383193969727e-07, 5.289912223815918e-07, 5.662441253662109e-07, 6.034970283508301e-07, 6.407499313354492e-07, 6.780028343200684e-07, 7.152557373046875e-07, 7.525086402893066e-07, 7.897615432739258e-07, 8.270144462585449e-07, 8.642673492431641e-07, 9.015202522277832e-07, 9.387731552124023e-07, 9.760260581970215e-07, 1.0132789611816406e-06]}, "gradients/encoder.encoder.layers.7.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 2.0, 0.0, 5.0, 1.0, 0.0, 3.0, 2.0, 6.0, 7.0, 15.0, 28.0, 0.0, 34.0, 60.0, 120.0, 208.0, 310.0, 565.0, 0.0, 1080.0, 1955.0, 3567.0, 6971.0, 13751.0, 29315.0, 0.0, 68581.0, 184460.0, 426076.0, 185368.0, 67942.0, 29443.0, 0.0, 13872.0, 6854.0, 3524.0, 1967.0, 1086.0, 590.0, 0.0, 348.0, 199.0, 93.0, 50.0, 34.0, 26.0, 0.0, 18.0, 13.0, 10.0, 2.0, 3.0, 4.0, 0.0, 4.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-1.6093254089355469e-06, -1.5581026673316956e-06, -1.5068799257278442e-06, -1.455657184123993e-06, -1.4044344425201416e-06, -1.3532117009162903e-06, -1.301988959312439e-06, -1.2507662177085876e-06, -1.1995434761047363e-06, -1.148320734500885e-06, -1.0970979928970337e-06, -1.0458752512931824e-06, -9.94652509689331e-07, -9.434297680854797e-07, -8.922070264816284e-07, -8.409842848777771e-07, -7.897615432739258e-07, -7.385388016700745e-07, -6.873160600662231e-07, -6.360933184623718e-07, -5.848705768585205e-07, -5.336478352546692e-07, -4.824250936508179e-07, -4.3120235204696655e-07, -3.7997961044311523e-07, -3.287568688392639e-07, -2.775341272354126e-07, -2.2631138563156128e-07, -1.7508864402770996e-07, -1.2386590242385864e-07, -7.264316082000732e-08, -2.1420419216156006e-08, 2.9802322387695312e-08, 8.102506399154663e-08, 1.3224780559539795e-07, 1.8347054719924927e-07, 2.3469328880310059e-07, 2.859160304069519e-07, 3.371387720108032e-07, 3.8836151361465454e-07, 4.3958425521850586e-07, 4.908069968223572e-07, 5.420297384262085e-07, 5.932524800300598e-07, 6.444752216339111e-07, 6.956979632377625e-07, 7.469207048416138e-07, 7.981434464454651e-07, 8.493661880493164e-07, 9.005889296531677e-07, 9.51811671257019e-07, 1.0030344128608704e-06, 1.0542571544647217e-06, 1.105479896068573e-06, 1.1567026376724243e-06, 1.2079253792762756e-06, 1.259148120880127e-06, 1.3103708624839783e-06, 1.3615936040878296e-06, 1.412816345691681e-06, 1.4640390872955322e-06, 1.5152618288993835e-06, 1.5664845705032349e-06, 1.6177073121070862e-06, 1.6689300537109375e-06]}, "gradients/encoder.encoder.layers.7.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 2.0, 0.0, 0.0, 3.0, 0.0, 1.0, 0.0, 3.0, 9.0, 0.0, 7.0, 10.0, 0.0, 15.0, 15.0, 0.0, 20.0, 36.0, 0.0, 34.0, 0.0, 54.0, 61.0, 0.0, 59.0, 76.0, 0.0, 72.0, 87.0, 0.0, 81.0, 0.0, 76.0, 62.0, 0.0, 45.0, 37.0, 0.0, 30.0, 29.0, 0.0, 28.0, 23.0, 0.0, 11.0, 0.0, 6.0, 4.0, 0.0, 11.0, 5.0, 0.0, 5.0, 0.0, 0.0, 2.0, 1.0], "bins": [-1.3709068298339844e-06, -1.3327226042747498e-06, -1.2945383787155151e-06, -1.2563541531562805e-06, -1.218169927597046e-06, -1.1799857020378113e-06, -1.1418014764785767e-06, -1.103617250919342e-06, -1.0654330253601074e-06, -1.0272487998008728e-06, -9.890645742416382e-07, -9.508803486824036e-07, -9.126961231231689e-07, -8.745118975639343e-07, -8.363276720046997e-07, -7.981434464454651e-07, -7.599592208862305e-07, -7.217749953269958e-07, -6.835907697677612e-07, -6.454065442085266e-07, -6.07222318649292e-07, -5.690380930900574e-07, -5.308538675308228e-07, -4.926696419715881e-07, -4.544854164123535e-07, -4.163011908531189e-07, -3.781169652938843e-07, -3.3993273973464966e-07, -3.0174851417541504e-07, -2.635642886161804e-07, -2.253800630569458e-07, -1.8719583749771118e-07, -1.4901161193847656e-07, -1.1082738637924194e-07, -7.264316082000732e-08, -3.4458935260772705e-08, 3.725290298461914e-09, 4.190951585769653e-08, 8.009374141693115e-08, 1.1827796697616577e-07, 1.564621925354004e-07, 1.94646418094635e-07, 2.3283064365386963e-07, 2.7101486921310425e-07, 3.0919909477233887e-07, 3.473833203315735e-07, 3.855675458908081e-07, 4.237517714500427e-07, 4.6193599700927734e-07, 5.00120222568512e-07, 5.383044481277466e-07, 5.764886736869812e-07, 6.146728992462158e-07, 6.528571248054504e-07, 6.910413503646851e-07, 7.292255759239197e-07, 7.674098014831543e-07, 8.055940270423889e-07, 8.437782526016235e-07, 8.819624781608582e-07, 9.201467037200928e-07, 9.583309292793274e-07, 9.96515154838562e-07, 1.0346993803977966e-06, 1.0728836059570312e-06]}, "gradients/encoder.encoder.layers.7.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 2.0, 2.0, 4.0, 7.0, 12.0, 10.0, 10.0, 22.0, 42.0, 51.0, 74.0, 177.0, 187.0, 83.0, 56.0, 48.0, 49.0, 35.0, 28.0, 24.0, 15.0, 15.0, 13.0, 8.0, 5.0, 5.0, 4.0, 5.0, 5.0, 5.0, 3.0, 3.0, 1.0, 2.0, 0.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.00014240063319448382, -0.00013765542826149613, -0.00013291022332850844, -0.00012816501839552075, -0.00012341981346253306, -0.00011867460852954537, -0.00011392939632060006, -0.00010918419138761237, -0.00010443898645462468, -9.969378152163699e-05, -9.49485765886493e-05, -9.020337165566161e-05, -8.545815944671631e-05, -8.071295451372862e-05, -7.596774958074093e-05, -7.122254464775324e-05, -6.647733971476555e-05, -6.173213478177786e-05, -5.698692984879017e-05, -5.224172127782367e-05, -4.749651634483598e-05, -4.275131141184829e-05, -3.8006102840881795e-05, -3.3260897907894105e-05, -2.8515692974906415e-05, -2.3770488041918725e-05, -1.902528128994163e-05, -1.428007544745924e-05, -9.534869604976848e-06, -4.789664671989158e-06, -4.445792001206428e-08, 4.700748831965029e-06, 9.445968316867948e-06, 1.419117415935034e-05, 1.893638000183273e-05, 2.3681586753809825e-05, 2.8426791686797515e-05, 3.3171996619785205e-05, 3.79172051907517e-05, 4.266241012373939e-05, 4.740761505672708e-05, 5.215281998971477e-05, 5.689802492270246e-05, 6.164322985569015e-05, 6.638844206463546e-05, 7.113364699762315e-05, 7.587885193061084e-05, 8.062405686359853e-05, 8.536926179658622e-05, 9.01144667295739e-05, 9.48596716625616e-05, 9.960487659554929e-05, 0.00010435008152853698, 0.00010909528646152467, 0.00011384049867046997, 0.00011858570360345766, 0.00012333091581240296, 0.00012807612074539065, 0.00013282132567837834, 0.00013756653061136603, 0.00014231173554435372, 0.0001470569404773414, 0.0001518021454103291, 0.0001565473503433168, 0.00016129255527630448]}, "gradients/encoder.encoder.layers.7.layer_norm.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0, 4.0, 5.0, 6.0, 4.0, 5.0, 12.0, 6.0, 8.0, 19.0, 21.0, 17.0, 25.0, 23.0, 31.0, 47.0, 38.0, 39.0, 40.0, 29.0, 40.0, 38.0, 44.0, 48.0, 39.0, 52.0, 36.0, 35.0, 28.0, 30.0, 22.0, 35.0, 27.0, 32.0, 21.0, 15.0, 20.0, 15.0, 14.0, 7.0, 8.0, 3.0, 9.0, 3.0, 2.0, 2.0, 3.0, 1.0, 2.0, 2.0, 1.0, 0.0, 1.0, 0.0, 1.0, 2.0], "bins": [-8.64267349243164e-05, -8.36588442325592e-05, -8.0890953540802e-05, -7.81230628490448e-05, -7.53551721572876e-05, -7.25872814655304e-05, -6.98193907737732e-05, -6.705150008201599e-05, -6.428360939025879e-05, -6.151571869850159e-05, -5.8747828006744385e-05, -5.597993731498718e-05, -5.321204662322998e-05, -5.044415593147278e-05, -4.7676265239715576e-05, -4.4908374547958374e-05, -4.214048385620117e-05, -3.937259316444397e-05, -3.660470247268677e-05, -3.3836811780929565e-05, -3.106892108917236e-05, -2.830103039741516e-05, -2.553313970565796e-05, -2.2765249013900757e-05, -1.9997358322143555e-05, -1.7229467630386353e-05, -1.446157693862915e-05, -1.1693686246871948e-05, -8.925795555114746e-06, -6.157904863357544e-06, -3.390014171600342e-06, -6.221234798431396e-07, 2.1457672119140625e-06, 4.913657903671265e-06, 7.681548595428467e-06, 1.0449439287185669e-05, 1.3217329978942871e-05, 1.5985220670700073e-05, 1.8753111362457275e-05, 2.1521002054214478e-05, 2.428889274597168e-05, 2.7056783437728882e-05, 2.9824674129486084e-05, 3.2592564821243286e-05, 3.536045551300049e-05, 3.812834620475769e-05, 4.089623689651489e-05, 4.3664127588272095e-05, 4.64320182800293e-05, 4.91999089717865e-05, 5.19677996635437e-05, 5.47356903553009e-05, 5.7503581047058105e-05, 6.027147173881531e-05, 6.303936243057251e-05, 6.580725312232971e-05, 6.857514381408691e-05, 7.134303450584412e-05, 7.411092519760132e-05, 7.687881588935852e-05, 7.964670658111572e-05, 8.241459727287292e-05, 8.518248796463013e-05, 8.795037865638733e-05, 9.071826934814453e-05]}, "gradients/encoder.encoder.layers.6.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 2.0, 2.0, 3.0, 5.0, 3.0, 13.0, 18.0, 32.0, 55.0, 87.0, 114.0, 197.0, 310.0, 513.0, 724.0, 1418.0, 2589.0, 5123.0, 12323.0, 36440.0, 450805.0, 3587970.0, 61826.0, 17932.0, 7452.0, 3423.0, 1933.0, 1111.0, 700.0, 392.0, 241.0, 153.0, 103.0, 75.0, 52.0, 26.0, 23.0, 15.0, 12.0, 12.0, 10.0, 11.0, 6.0, 6.0, 4.0, 7.0, 4.0, 6.0, 1.0, 3.0, 4.0, 1.0, 2.0, 2.0, 2.0, 4.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-4.9948692321777344e-05, -4.769954830408096e-05, -4.545040428638458e-05, -4.32012602686882e-05, -4.095211625099182e-05, -3.870297223329544e-05, -3.645382821559906e-05, -3.420468419790268e-05, -3.19555401802063e-05, -2.9706396162509918e-05, -2.7457252144813538e-05, -2.5208108127117157e-05, -2.2958964109420776e-05, -2.0709820091724396e-05, -1.8460676074028015e-05, -1.6211532056331635e-05, -1.3962388038635254e-05, -1.1713244020938873e-05, -9.464100003242493e-06, -7.214955985546112e-06, -4.9658119678497314e-06, -2.716667950153351e-06, -4.675239324569702e-07, 1.7816200852394104e-06, 4.030764102935791e-06, 6.279908120632172e-06, 8.529052138328552e-06, 1.0778196156024933e-05, 1.3027340173721313e-05, 1.5276484191417694e-05, 1.7525628209114075e-05, 1.9774772226810455e-05, 2.2023916244506836e-05, 2.4273060262203217e-05, 2.6522204279899597e-05, 2.8771348297595978e-05, 3.102049231529236e-05, 3.326963633298874e-05, 3.551878035068512e-05, 3.77679243683815e-05, 4.001706838607788e-05, 4.226621240377426e-05, 4.451535642147064e-05, 4.676450043916702e-05, 4.90136444568634e-05, 5.1262788474559784e-05, 5.3511932492256165e-05, 5.5761076509952545e-05, 5.8010220527648926e-05, 6.0259364545345306e-05, 6.250850856304169e-05, 6.475765258073807e-05, 6.700679659843445e-05, 6.925594061613083e-05, 7.150508463382721e-05, 7.375422865152359e-05, 7.600337266921997e-05, 7.825251668691635e-05, 8.050166070461273e-05, 8.275080472230911e-05, 8.499994874000549e-05, 8.724909275770187e-05, 8.949823677539825e-05, 9.174738079309464e-05, 9.399652481079102e-05]}, "gradients/encoder.encoder.layers.6.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 1.0, 4.0, 1.0, 6.0, 3.0, 7.0, 6.0, 10.0, 14.0, 23.0, 34.0, 60.0, 60.0, 99.0, 106.0, 118.0, 101.0, 90.0, 73.0, 71.0, 27.0, 20.0, 23.0, 15.0, 8.0, 12.0, 11.0, 4.0, 1.0, 1.0, 5.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.5974044799804688e-05, -1.5273690223693848e-05, -1.4573335647583008e-05, -1.3872981071472168e-05, -1.3172626495361328e-05, -1.2472271919250488e-05, -1.1771917343139648e-05, -1.1071562767028809e-05, -1.0371208190917969e-05, -9.670853614807129e-06, -8.970499038696289e-06, -8.27014446258545e-06, -7.569789886474609e-06, -6.8694353103637695e-06, -6.16908073425293e-06, -5.46872615814209e-06, -4.76837158203125e-06, -4.06801700592041e-06, -3.3676624298095703e-06, -2.6673078536987305e-06, -1.9669532775878906e-06, -1.2665987014770508e-06, -5.662441253662109e-07, 1.341104507446289e-07, 8.344650268554688e-07, 1.5348196029663086e-06, 2.2351741790771484e-06, 2.9355287551879883e-06, 3.635883331298828e-06, 4.336237907409668e-06, 5.036592483520508e-06, 5.736947059631348e-06, 6.4373016357421875e-06, 7.137656211853027e-06, 7.838010787963867e-06, 8.538365364074707e-06, 9.238719940185547e-06, 9.939074516296387e-06, 1.0639429092407227e-05, 1.1339783668518066e-05, 1.2040138244628906e-05, 1.2740492820739746e-05, 1.3440847396850586e-05, 1.4141201972961426e-05, 1.4841556549072266e-05, 1.5541911125183105e-05, 1.6242265701293945e-05, 1.6942620277404785e-05, 1.7642974853515625e-05, 1.8343329429626465e-05, 1.9043684005737305e-05, 1.9744038581848145e-05, 2.0444393157958984e-05, 2.1144747734069824e-05, 2.1845102310180664e-05, 2.2545456886291504e-05, 2.3245811462402344e-05, 2.3946166038513184e-05, 2.4646520614624023e-05, 2.5346875190734863e-05, 2.6047229766845703e-05, 2.6747584342956543e-05, 2.7447938919067383e-05, 2.8148293495178223e-05, 2.8848648071289062e-05]}, "gradients/encoder.encoder.layers.6.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 4.0, 1.0, 2.0, 4.0, 4.0, 1.0, 2.0, 6.0, 11.0, 23.0, 42.0, 46.0, 58.0, 133.0, 199.0, 352.0, 654.0, 1186.0, 2296.0, 4208.0, 8836.0, 19001.0, 42644.0, 148170.0, 3240613.0, 592637.0, 78270.0, 27911.0, 13296.0, 6384.0, 3381.0, 1825.0, 926.0, 485.0, 269.0, 167.0, 86.0, 56.0, 30.0, 23.0, 18.0, 9.0, 8.0, 3.0, 3.0, 7.0, 3.0, 0.0, 1.0, 1.0, 2.0, 2.0], "bins": [-4.7326087951660156e-05, -4.6039000153541565e-05, -4.4751912355422974e-05, -4.346482455730438e-05, -4.217773675918579e-05, -4.08906489610672e-05, -3.960356116294861e-05, -3.831647336483002e-05, -3.7029385566711426e-05, -3.5742297768592834e-05, -3.445520997047424e-05, -3.316812217235565e-05, -3.188103437423706e-05, -3.059394657611847e-05, -2.9306858777999878e-05, -2.8019770979881287e-05, -2.6732683181762695e-05, -2.5445595383644104e-05, -2.4158507585525513e-05, -2.287141978740692e-05, -2.158433198928833e-05, -2.029724419116974e-05, -1.9010156393051147e-05, -1.7723068594932556e-05, -1.6435980796813965e-05, -1.5148892998695374e-05, -1.3861805200576782e-05, -1.2574717402458191e-05, -1.12876296043396e-05, -1.0000541806221008e-05, -8.713454008102417e-06, -7.426366209983826e-06, -6.139278411865234e-06, -4.852190613746643e-06, -3.5651028156280518e-06, -2.2780150175094604e-06, -9.909272193908691e-07, 2.9616057872772217e-07, 1.5832483768463135e-06, 2.8703361749649048e-06, 4.157423973083496e-06, 5.444511771202087e-06, 6.731599569320679e-06, 8.01868736743927e-06, 9.305775165557861e-06, 1.0592862963676453e-05, 1.1879950761795044e-05, 1.3167038559913635e-05, 1.4454126358032227e-05, 1.5741214156150818e-05, 1.702830195426941e-05, 1.8315389752388e-05, 1.9602477550506592e-05, 2.0889565348625183e-05, 2.2176653146743774e-05, 2.3463740944862366e-05, 2.4750828742980957e-05, 2.603791654109955e-05, 2.732500433921814e-05, 2.861209213733673e-05, 2.9899179935455322e-05, 3.1186267733573914e-05, 3.2473355531692505e-05, 3.3760443329811096e-05, 3.504753112792969e-05]}, "gradients/encoder.encoder.layers.6.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 3.0, 0.0, 0.0, 2.0, 9.0, 2.0, 5.0, 4.0, 3.0, 6.0, 12.0, 9.0, 12.0, 11.0, 25.0, 20.0, 17.0, 30.0, 29.0, 61.0, 78.0, 159.0, 289.0, 635.0, 1320.0, 648.0, 236.0, 116.0, 67.0, 52.0, 42.0, 30.0, 24.0, 18.0, 13.0, 17.0, 9.0, 15.0, 7.0, 8.0, 8.0, 9.0, 5.0, 2.0, 7.0, 6.0, 1.0, 1.0, 2.0, 2.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-2.396106719970703e-05, -2.3200176656246185e-05, -2.243928611278534e-05, -2.1678395569324493e-05, -2.0917505025863647e-05, -2.01566144824028e-05, -1.9395723938941956e-05, -1.863483339548111e-05, -1.7873942852020264e-05, -1.7113052308559418e-05, -1.6352161765098572e-05, -1.5591271221637726e-05, -1.483038067817688e-05, -1.4069490134716034e-05, -1.3308599591255188e-05, -1.2547709047794342e-05, -1.1786818504333496e-05, -1.102592796087265e-05, -1.0265037417411804e-05, -9.504146873950958e-06, -8.743256330490112e-06, -7.982365787029266e-06, -7.22147524356842e-06, -6.4605847001075745e-06, -5.6996941566467285e-06, -4.9388036131858826e-06, -4.177913069725037e-06, -3.4170225262641907e-06, -2.6561319828033447e-06, -1.8952414393424988e-06, -1.1343508958816528e-06, -3.734603524208069e-07, 3.8743019104003906e-07, 1.148320734500885e-06, 1.909211277961731e-06, 2.670101821422577e-06, 3.430992364883423e-06, 4.191882908344269e-06, 4.952773451805115e-06, 5.713663995265961e-06, 6.474554538726807e-06, 7.235445082187653e-06, 7.996335625648499e-06, 8.757226169109344e-06, 9.51811671257019e-06, 1.0279007256031036e-05, 1.1039897799491882e-05, 1.1800788342952728e-05, 1.2561678886413574e-05, 1.332256942987442e-05, 1.4083459973335266e-05, 1.4844350516796112e-05, 1.5605241060256958e-05, 1.6366131603717804e-05, 1.712702214717865e-05, 1.7887912690639496e-05, 1.8648803234100342e-05, 1.9409693777561188e-05, 2.0170584321022034e-05, 2.093147486448288e-05, 2.1692365407943726e-05, 2.245325595140457e-05, 2.3214146494865417e-05, 2.3975037038326263e-05, 2.473592758178711e-05]}, "gradients/encoder.encoder.layers.6.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 2.0, 0.0, 2.0, 2.0, 1.0, 3.0, 1.0, 4.0, 3.0, 3.0, 4.0, 4.0, 5.0, 9.0, 15.0, 14.0, 24.0, 40.0, 44.0, 77.0, 103.0, 115.0, 102.0, 79.0, 68.0, 49.0, 58.0, 44.0, 28.0, 31.0, 21.0, 12.0, 12.0, 8.0, 2.0, 3.0, 7.0, 2.0, 1.0, 4.0, 2.0, 0.0, 4.0, 1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-9.485634654993191e-05, -9.103510819841176e-05, -8.721386984689161e-05, -8.339263149537146e-05, -7.957139314385131e-05, -7.575015479233116e-05, -7.192891644081101e-05, -6.810767808929086e-05, -6.42864397377707e-05, -6.0465201386250556e-05, -5.6643963034730405e-05, -5.2822724683210254e-05, -4.90014863316901e-05, -4.518024798016995e-05, -4.13590096286498e-05, -3.753777127712965e-05, -3.37165329256095e-05, -2.989529457408935e-05, -2.6074056222569197e-05, -2.2252817871049047e-05, -1.8431579519528896e-05, -1.4610341168008745e-05, -1.0789102816488594e-05, -6.967864464968443e-06, -3.146626113448292e-06, 6.746122380718589e-07, 4.49585058959201e-06, 8.31708894111216e-06, 1.2138327292632312e-05, 1.5959565644152462e-05, 1.9780803995672613e-05, 2.3602042347192764e-05, 2.74232734227553e-05, 3.124451177427545e-05, 3.50657501257956e-05, 3.8886988477315754e-05, 4.2708226828835905e-05, 4.6529465180356055e-05, 5.0350703531876206e-05, 5.417194188339636e-05, 5.799318023491651e-05, 6.181441858643666e-05, 6.563565693795681e-05, 6.945689528947696e-05, 7.327813364099711e-05, 7.709937199251726e-05, 8.092061034403741e-05, 8.474184869555756e-05, 8.856308704707772e-05, 9.238432539859787e-05, 9.620556375011802e-05, 0.00010002680210163817, 0.00010384804045315832, 0.00010766927880467847, 0.00011149051715619862, 0.00011531175550771877, 0.00011913299385923892, 0.00012295422493480146, 0.00012677547056227922, 0.000130596716189757, 0.00013441794726531953, 0.00013823917834088206, 0.00014206042396835983, 0.0001458816695958376, 0.00014970290067140013]}, "gradients/encoder.encoder.layers.6.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 2.0, 0.0, 1.0, 2.0, 1.0, 3.0, 8.0, 4.0, 2.0, 5.0, 3.0, 12.0, 18.0, 8.0, 22.0, 19.0, 18.0, 19.0, 29.0, 24.0, 25.0, 31.0, 36.0, 36.0, 36.0, 33.0, 45.0, 37.0, 39.0, 30.0, 44.0, 34.0, 39.0, 33.0, 26.0, 34.0, 36.0, 35.0, 21.0, 20.0, 24.0, 15.0, 13.0, 23.0, 15.0, 12.0, 7.0, 3.0, 12.0, 9.0, 3.0, 5.0, 1.0, 5.0, 0.0, 0.0, 2.0, 2.0, 0.0, 1.0], "bins": [-7.104873657226562e-05, -6.895512342453003e-05, -6.686151027679443e-05, -6.476789712905884e-05, -6.267428398132324e-05, -6.0580670833587646e-05, -5.848705768585205e-05, -5.6393444538116455e-05, -5.429983139038086e-05, -5.2206218242645264e-05, -5.011260509490967e-05, -4.801899194717407e-05, -4.5925378799438477e-05, -4.383176565170288e-05, -4.1738152503967285e-05, -3.964453935623169e-05, -3.7550926208496094e-05, -3.54573130607605e-05, -3.33636999130249e-05, -3.127008676528931e-05, -2.917647361755371e-05, -2.7082860469818115e-05, -2.498924732208252e-05, -2.2895634174346924e-05, -2.0802021026611328e-05, -1.8708407878875732e-05, -1.6614794731140137e-05, -1.4521181583404541e-05, -1.2427568435668945e-05, -1.033395528793335e-05, -8.240342140197754e-06, -6.146728992462158e-06, -4.0531158447265625e-06, -1.959502696990967e-06, 1.341104507446289e-07, 2.2277235984802246e-06, 4.32133674621582e-06, 6.414949893951416e-06, 8.508563041687012e-06, 1.0602176189422607e-05, 1.2695789337158203e-05, 1.4789402484893799e-05, 1.6883015632629395e-05, 1.897662878036499e-05, 2.1070241928100586e-05, 2.316385507583618e-05, 2.5257468223571777e-05, 2.7351081371307373e-05, 2.944469451904297e-05, 3.1538307666778564e-05, 3.363192081451416e-05, 3.5725533962249756e-05, 3.781914710998535e-05, 3.991276025772095e-05, 4.200637340545654e-05, 4.409998655319214e-05, 4.6193599700927734e-05, 4.828721284866333e-05, 5.0380825996398926e-05, 5.247443914413452e-05, 5.456805229187012e-05, 5.666166543960571e-05, 5.875527858734131e-05, 6.0848891735076904e-05, 6.29425048828125e-05]}, "gradients/encoder.encoder.layers.6.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 1.0, 4.0, 0.0, 4.0, 4.0, 5.0, 1.0, 5.0, 7.0, 6.0, 9.0, 10.0, 14.0, 22.0, 13.0, 19.0, 18.0, 23.0, 23.0, 44.0, 63.0, 116.0, 282.0, 635.0, 1543.0, 4135.0, 12013.0, 41804.0, 249155.0, 632245.0, 77226.0, 18842.0, 6198.0, 2281.0, 929.0, 380.0, 176.0, 70.0, 48.0, 30.0, 22.0, 20.0, 23.0, 20.0, 15.0, 7.0, 13.0, 8.0, 7.0, 7.0, 5.0, 4.0, 5.0, 3.0, 3.0, 2.0, 1.0, 4.0, 0.0, 1.0], "bins": [-0.00014388561248779297, -0.00013955123722553253, -0.0001352168619632721, -0.00013088248670101166, -0.00012654811143875122, -0.00012221373617649078, -0.00011787936091423035, -0.00011354498565196991, -0.00010921061038970947, -0.00010487623512744904, -0.0001005418598651886, -9.620748460292816e-05, -9.187310934066772e-05, -8.753873407840729e-05, -8.320435881614685e-05, -7.886998355388641e-05, -7.453560829162598e-05, -7.020123302936554e-05, -6.58668577671051e-05, -6.153248250484467e-05, -5.719810724258423e-05, -5.286373198032379e-05, -4.8529356718063354e-05, -4.419498145580292e-05, -3.986060619354248e-05, -3.5526230931282043e-05, -3.1191855669021606e-05, -2.685748040676117e-05, -2.2523105144500732e-05, -1.8188729882240295e-05, -1.3854354619979858e-05, -9.519979357719421e-06, -5.185604095458984e-06, -8.512288331985474e-07, 3.4831464290618896e-06, 7.817521691322327e-06, 1.2151896953582764e-05, 1.64862722158432e-05, 2.0820647478103638e-05, 2.5155022740364075e-05, 2.9489398002624512e-05, 3.382377326488495e-05, 3.8158148527145386e-05, 4.249252378940582e-05, 4.682689905166626e-05, 5.11612743139267e-05, 5.5495649576187134e-05, 5.983002483844757e-05, 6.416440010070801e-05, 6.849877536296844e-05, 7.283315062522888e-05, 7.716752588748932e-05, 8.150190114974976e-05, 8.583627641201019e-05, 9.017065167427063e-05, 9.450502693653107e-05, 9.88394021987915e-05, 0.00010317377746105194, 0.00010750815272331238, 0.00011184252798557281, 0.00011617690324783325, 0.00012051127851009369, 0.00012484565377235413, 0.00012918002903461456, 0.000133514404296875]}, "gradients/encoder.encoder.layers.6.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 4.0, 10.0, 6.0, 12.0, 23.0, 28.0, 40.0, 57.0, 106.0, 135.0, 154.0, 111.0, 111.0, 75.0, 43.0, 34.0, 25.0, 8.0, 9.0, 10.0, 3.0, 6.0, 3.0, 3.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.5735626220703125e-05, -1.485086977481842e-05, -1.3966113328933716e-05, -1.3081356883049011e-05, -1.2196600437164307e-05, -1.1311843991279602e-05, -1.0427087545394897e-05, -9.542331099510193e-06, -8.657574653625488e-06, -7.772818207740784e-06, -6.888061761856079e-06, -6.0033053159713745e-06, -5.11854887008667e-06, -4.233792424201965e-06, -3.3490359783172607e-06, -2.464279532432556e-06, -1.5795230865478516e-06, -6.94766640663147e-07, 1.8998980522155762e-07, 1.0747462511062622e-06, 1.959502696990967e-06, 2.8442591428756714e-06, 3.729015588760376e-06, 4.6137720346450806e-06, 5.498528480529785e-06, 6.38328492641449e-06, 7.268041372299194e-06, 8.152797818183899e-06, 9.037554264068604e-06, 9.922310709953308e-06, 1.0807067155838013e-05, 1.1691823601722717e-05, 1.2576580047607422e-05, 1.3461336493492126e-05, 1.4346092939376831e-05, 1.5230849385261536e-05, 1.611560583114624e-05, 1.7000362277030945e-05, 1.788511872291565e-05, 1.8769875168800354e-05, 1.965463161468506e-05, 2.0539388060569763e-05, 2.1424144506454468e-05, 2.2308900952339172e-05, 2.3193657398223877e-05, 2.407841384410858e-05, 2.4963170289993286e-05, 2.584792673587799e-05, 2.6732683181762695e-05, 2.76174396276474e-05, 2.8502196073532104e-05, 2.938695251941681e-05, 3.0271708965301514e-05, 3.115646541118622e-05, 3.204122185707092e-05, 3.292597830295563e-05, 3.381073474884033e-05, 3.469549119472504e-05, 3.558024764060974e-05, 3.6465004086494446e-05, 3.734976053237915e-05, 3.8234516978263855e-05, 3.911927342414856e-05, 4.0004029870033264e-05, 4.088878631591797e-05]}, "gradients/encoder.encoder.layers.6.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 3.0, 4.0, 5.0, 4.0, 7.0, 1.0, 8.0, 17.0, 23.0, 32.0, 59.0, 77.0, 134.0, 203.0, 417.0, 859.0, 1509.0, 3081.0, 6896.0, 15865.0, 38228.0, 96101.0, 411020.0, 325374.0, 88658.0, 33162.0, 14405.0, 6323.0, 3016.0, 1356.0, 747.0, 404.0, 223.0, 130.0, 72.0, 41.0, 33.0, 23.0, 14.0, 6.0, 8.0, 8.0, 6.0, 2.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.131959915161133e-05, -4.954636096954346e-05, -4.7773122787475586e-05, -4.5999884605407715e-05, -4.4226646423339844e-05, -4.245340824127197e-05, -4.06801700592041e-05, -3.890693187713623e-05, -3.713369369506836e-05, -3.536045551300049e-05, -3.358721733093262e-05, -3.1813979148864746e-05, -3.0040740966796875e-05, -2.8267502784729004e-05, -2.6494264602661133e-05, -2.4721026420593262e-05, -2.294778823852539e-05, -2.117455005645752e-05, -1.940131187438965e-05, -1.7628073692321777e-05, -1.5854835510253906e-05, -1.4081597328186035e-05, -1.2308359146118164e-05, -1.0535120964050293e-05, -8.761882781982422e-06, -6.988644599914551e-06, -5.21540641784668e-06, -3.4421682357788086e-06, -1.6689300537109375e-06, 1.043081283569336e-07, 1.8775463104248047e-06, 3.6507844924926758e-06, 5.424022674560547e-06, 7.197260856628418e-06, 8.970499038696289e-06, 1.074373722076416e-05, 1.2516975402832031e-05, 1.4290213584899902e-05, 1.6063451766967773e-05, 1.7836689949035645e-05, 1.9609928131103516e-05, 2.1383166313171387e-05, 2.3156404495239258e-05, 2.492964267730713e-05, 2.6702880859375e-05, 2.847611904144287e-05, 3.0249357223510742e-05, 3.202259540557861e-05, 3.3795833587646484e-05, 3.5569071769714355e-05, 3.7342309951782227e-05, 3.91155481338501e-05, 4.088878631591797e-05, 4.266202449798584e-05, 4.443526268005371e-05, 4.620850086212158e-05, 4.798173904418945e-05, 4.9754977226257324e-05, 5.1528215408325195e-05, 5.3301453590393066e-05, 5.507469177246094e-05, 5.684792995452881e-05, 5.862116813659668e-05, 6.039440631866455e-05, 6.216764450073242e-05]}, "gradients/encoder.encoder.layers.6.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 4.0, 3.0, 3.0, 5.0, 3.0, 1.0, 4.0, 10.0, 12.0, 11.0, 21.0, 17.0, 28.0, 23.0, 23.0, 34.0, 32.0, 33.0, 33.0, 42.0, 46.0, 59.0, 49.0, 57.0, 45.0, 47.0, 46.0, 36.0, 45.0, 31.0, 42.0, 27.0, 29.0, 23.0, 20.0, 11.0, 14.0, 12.0, 6.0, 6.0, 6.0, 5.0, 7.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.129243850708008e-05, -3.0213966965675354e-05, -2.913549542427063e-05, -2.8057023882865906e-05, -2.697855234146118e-05, -2.5900080800056458e-05, -2.4821609258651733e-05, -2.374313771724701e-05, -2.2664666175842285e-05, -2.158619463443756e-05, -2.0507723093032837e-05, -1.9429251551628113e-05, -1.835078001022339e-05, -1.7272308468818665e-05, -1.619383692741394e-05, -1.5115365386009216e-05, -1.4036893844604492e-05, -1.2958422303199768e-05, -1.1879950761795044e-05, -1.080147922039032e-05, -9.723007678985596e-06, -8.644536137580872e-06, -7.5660645961761475e-06, -6.487593054771423e-06, -5.409121513366699e-06, -4.330649971961975e-06, -3.252178430557251e-06, -2.173706889152527e-06, -1.0952353477478027e-06, -1.6763806343078613e-08, 1.0617077350616455e-06, 2.1401792764663696e-06, 3.2186508178710938e-06, 4.297122359275818e-06, 5.375593900680542e-06, 6.454065442085266e-06, 7.53253698348999e-06, 8.611008524894714e-06, 9.689480066299438e-06, 1.0767951607704163e-05, 1.1846423149108887e-05, 1.292489469051361e-05, 1.4003366231918335e-05, 1.5081837773323059e-05, 1.6160309314727783e-05, 1.7238780856132507e-05, 1.831725239753723e-05, 1.9395723938941956e-05, 2.047419548034668e-05, 2.1552667021751404e-05, 2.2631138563156128e-05, 2.3709610104560852e-05, 2.4788081645965576e-05, 2.58665531873703e-05, 2.6945024728775024e-05, 2.802349627017975e-05, 2.9101967811584473e-05, 3.0180439352989197e-05, 3.125891089439392e-05, 3.2337382435798645e-05, 3.341585397720337e-05, 3.449432551860809e-05, 3.557279706001282e-05, 3.665126860141754e-05, 3.7729740142822266e-05]}, "gradients/encoder.encoder.layers.6.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 3.0, 2.0, 4.0, 5.0, 5.0, 5.0, 9.0, 14.0, 19.0, 12.0, 26.0, 50.0, 34.0, 79.0, 107.0, 69.0, 202.0, 274.0, 407.0, 288.0, 828.0, 1332.0, 965.0, 3025.0, 5521.0, 4479.0, 15846.0, 39120.0, 122566.0, 175922.0, 481566.0, 123049.0, 24142.0, 24549.0, 10804.0, 5560.0, 1717.0, 2328.0, 1295.0, 445.0, 654.0, 401.0, 133.0, 208.0, 149.0, 114.0, 34.0, 67.0, 39.0, 15.0, 22.0, 18.0, 10.0, 11.0, 7.0, 6.0, 5.0, 2.0, 0.0, 1.0, 5.0, 0.0, 1.0], "bins": [-3.159046173095703e-06, -3.057532012462616e-06, -2.956017851829529e-06, -2.8545036911964417e-06, -2.7529895305633545e-06, -2.6514753699302673e-06, -2.54996120929718e-06, -2.448447048664093e-06, -2.346932888031006e-06, -2.2454187273979187e-06, -2.1439045667648315e-06, -2.0423904061317444e-06, -1.9408762454986572e-06, -1.83936208486557e-06, -1.737847924232483e-06, -1.6363337635993958e-06, -1.5348196029663086e-06, -1.4333054423332214e-06, -1.3317912817001343e-06, -1.2302771210670471e-06, -1.12876296043396e-06, -1.0272487998008728e-06, -9.257346391677856e-07, -8.242204785346985e-07, -7.227063179016113e-07, -6.211921572685242e-07, -5.19677996635437e-07, -4.1816383600234985e-07, -3.166496753692627e-07, -2.1513551473617554e-07, -1.1362135410308838e-07, -1.210719347000122e-08, 8.940696716308594e-08, 1.909211277961731e-07, 2.9243528842926025e-07, 3.939494490623474e-07, 4.954636096954346e-07, 5.969777703285217e-07, 6.984919309616089e-07, 8.00006091594696e-07, 9.015202522277832e-07, 1.0030344128608704e-06, 1.1045485734939575e-06, 1.2060627341270447e-06, 1.3075768947601318e-06, 1.409091055393219e-06, 1.5106052160263062e-06, 1.6121193766593933e-06, 1.7136335372924805e-06, 1.8151476979255676e-06, 1.9166618585586548e-06, 2.018176019191742e-06, 2.119690179824829e-06, 2.2212043404579163e-06, 2.3227185010910034e-06, 2.4242326617240906e-06, 2.5257468223571777e-06, 2.627260982990265e-06, 2.728775143623352e-06, 2.830289304256439e-06, 2.9318034648895264e-06, 3.0333176255226135e-06, 3.1348317861557007e-06, 3.236345946788788e-06, 3.337860107421875e-06]}, "gradients/encoder.encoder.layers.6.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0, 0.0, 1.0, 2.0, 0.0, 2.0, 4.0, 5.0, 2.0, 7.0, 15.0, 0.0, 12.0, 24.0, 20.0, 44.0, 55.0, 76.0, 0.0, 84.0, 108.0, 119.0, 112.0, 89.0, 65.0, 0.0, 58.0, 25.0, 24.0, 20.0, 10.0, 11.0, 0.0, 8.0, 7.0, 3.0, 0.0, 0.0, 2.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.6093254089355469e-06, -1.5581026673316956e-06, -1.5068799257278442e-06, -1.455657184123993e-06, -1.4044344425201416e-06, -1.3532117009162903e-06, -1.301988959312439e-06, -1.2507662177085876e-06, -1.1995434761047363e-06, -1.148320734500885e-06, -1.0970979928970337e-06, -1.0458752512931824e-06, -9.94652509689331e-07, -9.434297680854797e-07, -8.922070264816284e-07, -8.409842848777771e-07, -7.897615432739258e-07, -7.385388016700745e-07, -6.873160600662231e-07, -6.360933184623718e-07, -5.848705768585205e-07, -5.336478352546692e-07, -4.824250936508179e-07, -4.3120235204696655e-07, -3.7997961044311523e-07, -3.287568688392639e-07, -2.775341272354126e-07, -2.2631138563156128e-07, -1.7508864402770996e-07, -1.2386590242385864e-07, -7.264316082000732e-08, -2.1420419216156006e-08, 2.9802322387695312e-08, 8.102506399154663e-08, 1.3224780559539795e-07, 1.8347054719924927e-07, 2.3469328880310059e-07, 2.859160304069519e-07, 3.371387720108032e-07, 3.8836151361465454e-07, 4.3958425521850586e-07, 4.908069968223572e-07, 5.420297384262085e-07, 5.932524800300598e-07, 6.444752216339111e-07, 6.956979632377625e-07, 7.469207048416138e-07, 7.981434464454651e-07, 8.493661880493164e-07, 9.005889296531677e-07, 9.51811671257019e-07, 1.0030344128608704e-06, 1.0542571544647217e-06, 1.105479896068573e-06, 1.1567026376724243e-06, 1.2079253792762756e-06, 1.259148120880127e-06, 1.3103708624839783e-06, 1.3615936040878296e-06, 1.412816345691681e-06, 1.4640390872955322e-06, 1.5152618288993835e-06, 1.5664845705032349e-06, 1.6177073121070862e-06, 1.6689300537109375e-06]}, "gradients/encoder.encoder.layers.6.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 0.0, 0.0, 1.0, 3.0, 4.0, 10.0, 13.0, 17.0, 47.0, 41.0, 61.0, 103.0, 224.0, 229.0, 387.0, 632.0, 1726.0, 2266.0, 3857.0, 8024.0, 36597.0, 106715.0, 547537.0, 285793.0, 29694.0, 11567.0, 5607.0, 3826.0, 1282.0, 819.0, 510.0, 446.0, 171.0, 120.0, 67.0, 70.0, 38.0, 19.0, 17.0, 5.0, 8.0, 1.0, 2.0, 1.0, 3.0, 2.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0], "bins": [-4.410743713378906e-06, -4.275701940059662e-06, -4.1406601667404175e-06, -4.005618393421173e-06, -3.870576620101929e-06, -3.7355348467826843e-06, -3.60049307346344e-06, -3.4654513001441956e-06, -3.330409526824951e-06, -3.1953677535057068e-06, -3.0603259801864624e-06, -2.925284206867218e-06, -2.7902424335479736e-06, -2.6552006602287292e-06, -2.520158886909485e-06, -2.3851171135902405e-06, -2.250075340270996e-06, -2.1150335669517517e-06, -1.9799917936325073e-06, -1.844950020313263e-06, -1.7099082469940186e-06, -1.5748664736747742e-06, -1.4398247003555298e-06, -1.3047829270362854e-06, -1.169741153717041e-06, -1.0346993803977966e-06, -8.996576070785522e-07, -7.646158337593079e-07, -6.295740604400635e-07, -4.945322871208191e-07, -3.594905138015747e-07, -2.2444874048233032e-07, -8.940696716308594e-08, 4.563480615615845e-08, 1.8067657947540283e-07, 3.157183527946472e-07, 4.507601261138916e-07, 5.85801899433136e-07, 7.208436727523804e-07, 8.558854460716248e-07, 9.909272193908691e-07, 1.1259689927101135e-06, 1.261010766029358e-06, 1.3960525393486023e-06, 1.5310943126678467e-06, 1.666136085987091e-06, 1.8011778593063354e-06, 1.93621963262558e-06, 2.0712614059448242e-06, 2.2063031792640686e-06, 2.341344952583313e-06, 2.4763867259025574e-06, 2.6114284992218018e-06, 2.746470272541046e-06, 2.8815120458602905e-06, 3.016553819179535e-06, 3.1515955924987793e-06, 3.2866373658180237e-06, 3.421679139137268e-06, 3.5567209124565125e-06, 3.691762685775757e-06, 3.826804459095001e-06, 3.961846232414246e-06, 4.09688800573349e-06, 4.231929779052734e-06]}, "gradients/encoder.encoder.layers.6.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 3.0, 1.0, 3.0, 3.0, 5.0, 3.0, 11.0, 6.0, 7.0, 6.0, 19.0, 20.0, 39.0, 22.0, 52.0, 62.0, 125.0, 166.0, 62.0, 110.0, 56.0, 70.0, 30.0, 37.0, 12.0, 19.0, 8.0, 16.0, 5.0, 6.0, 5.0, 8.0, 3.0, 3.0, 1.0, 1.0, 1.0, 1.0, 1.0, 3.0, 1.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0], "bins": [-2.9802322387695312e-06, -2.88989394903183e-06, -2.7995556592941284e-06, -2.709217369556427e-06, -2.6188790798187256e-06, -2.528540790081024e-06, -2.4382025003433228e-06, -2.3478642106056213e-06, -2.25752592086792e-06, -2.1671876311302185e-06, -2.076849341392517e-06, -1.9865110516548157e-06, -1.8961727619171143e-06, -1.8058344721794128e-06, -1.7154961824417114e-06, -1.62515789270401e-06, -1.5348196029663086e-06, -1.4444813132286072e-06, -1.3541430234909058e-06, -1.2638047337532043e-06, -1.173466444015503e-06, -1.0831281542778015e-06, -9.927898645401e-07, -9.024515748023987e-07, -8.121132850646973e-07, -7.217749953269958e-07, -6.314367055892944e-07, -5.41098415851593e-07, -4.507601261138916e-07, -3.604218363761902e-07, -2.7008354663848877e-07, -1.7974525690078735e-07, -8.940696716308594e-08, 9.313225746154785e-10, 9.12696123123169e-08, 1.816079020500183e-07, 2.7194619178771973e-07, 3.6228448152542114e-07, 4.5262277126312256e-07, 5.42961061000824e-07, 6.332993507385254e-07, 7.236376404762268e-07, 8.139759302139282e-07, 9.043142199516296e-07, 9.94652509689331e-07, 1.0849907994270325e-06, 1.1753290891647339e-06, 1.2656673789024353e-06, 1.3560056686401367e-06, 1.4463439583778381e-06, 1.5366822481155396e-06, 1.627020537853241e-06, 1.7173588275909424e-06, 1.8076971173286438e-06, 1.8980354070663452e-06, 1.9883736968040466e-06, 2.078711986541748e-06, 2.1690502762794495e-06, 2.259388566017151e-06, 2.3497268557548523e-06, 2.4400651454925537e-06, 2.530403435230255e-06, 2.6207417249679565e-06, 2.711080014705658e-06, 2.8014183044433594e-06]}, "gradients/encoder.encoder.layers.6.layer_norm.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 1.0, 1.0, 3.0, 3.0, 2.0, 6.0, 5.0, 5.0, 10.0, 21.0, 19.0, 23.0, 34.0, 51.0, 102.0, 121.0, 178.0, 85.0, 61.0, 56.0, 37.0, 37.0, 29.0, 20.0, 20.0, 14.0, 9.0, 12.0, 7.0, 3.0, 7.0, 5.0, 8.0, 4.0, 0.0, 3.0, 0.0, 2.0, 5.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-9.611773566575721e-05, -9.229796705767512e-05, -8.847819844959304e-05, -8.465842984151095e-05, -8.083865395747125e-05, -7.701888534938917e-05, -7.319911674130708e-05, -6.9379348133225e-05, -6.555957952514291e-05, -6.173981091706082e-05, -5.792003867099993e-05, -5.4100270062917843e-05, -5.028049781685695e-05, -4.6460729208774865e-05, -4.264096060069278e-05, -3.882119199261069e-05, -3.500141610857099e-05, -3.118164750048891e-05, -2.7361875254428014e-05, -2.354210664634593e-05, -1.972233621927444e-05, -1.590256579220295e-05, -1.2082797184120864e-05, -8.263026757049374e-06, -4.443256329977885e-06, -6.234863576537464e-07, 3.196283614670392e-06, 7.01605313224718e-06, 1.0835823559318669e-05, 1.4655593986390159e-05, 1.8475362594472244e-05, 2.2295133021543734e-05, 2.611489617265761e-05, 2.99346665997291e-05, 3.375443702680059e-05, 3.7574205634882674e-05, 4.1393977880943567e-05, 4.521374648902565e-05, 4.903351509710774e-05, 5.2853283705189824e-05, 5.667305595125072e-05, 6.04928245593328e-05, 6.43125968053937e-05, 6.813236541347578e-05, 7.195213402155787e-05, 7.577190990559757e-05, 7.959167123772204e-05, 8.341144712176174e-05, 8.723121572984383e-05, 9.105098433792591e-05, 9.4870752946008e-05, 9.869052155409008e-05, 0.00010251029743812978, 0.00010633006604621187, 0.00011014983465429395, 0.00011396960326237604, 0.00011778937187045813, 0.00012160914047854021, 0.0001254289090866223, 0.000129248684970662, 0.00013306844630278647, 0.00013688822218682617, 0.00014070799807086587, 0.00014452775940299034, 0.00014834753528703004]}, "gradients/encoder.encoder.layers.6.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 1.0, 1.0, 1.0, 3.0, 3.0, 2.0, 7.0, 6.0, 6.0, 9.0, 16.0, 13.0, 13.0, 13.0, 19.0, 24.0, 23.0, 20.0, 38.0, 34.0, 36.0, 51.0, 36.0, 50.0, 47.0, 53.0, 51.0, 33.0, 55.0, 46.0, 25.0, 45.0, 43.0, 25.0, 28.0, 23.0, 19.0, 22.0, 13.0, 13.0, 11.0, 13.0, 5.0, 7.0, 4.0, 2.0, 6.0, 1.0, 2.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-8.285045623779297e-05, -7.992982864379883e-05, -7.700920104980469e-05, -7.408857345581055e-05, -7.11679458618164e-05, -6.824731826782227e-05, -6.532669067382812e-05, -6.240606307983398e-05, -5.9485435485839844e-05, -5.65648078918457e-05, -5.364418029785156e-05, -5.072355270385742e-05, -4.780292510986328e-05, -4.488229751586914e-05, -4.1961669921875e-05, -3.904104232788086e-05, -3.612041473388672e-05, -3.319978713989258e-05, -3.0279159545898438e-05, -2.7358531951904297e-05, -2.4437904357910156e-05, -2.1517276763916016e-05, -1.8596649169921875e-05, -1.5676021575927734e-05, -1.2755393981933594e-05, -9.834766387939453e-06, -6.9141387939453125e-06, -3.993511199951172e-06, -1.0728836059570312e-06, 1.8477439880371094e-06, 4.76837158203125e-06, 7.68899917602539e-06, 1.0609626770019531e-05, 1.3530254364013672e-05, 1.6450881958007812e-05, 1.9371509552001953e-05, 2.2292137145996094e-05, 2.5212764739990234e-05, 2.8133392333984375e-05, 3.1054019927978516e-05, 3.3974647521972656e-05, 3.68952751159668e-05, 3.981590270996094e-05, 4.273653030395508e-05, 4.565715789794922e-05, 4.857778549194336e-05, 5.14984130859375e-05, 5.441904067993164e-05, 5.733966827392578e-05, 6.026029586791992e-05, 6.318092346191406e-05, 6.61015510559082e-05, 6.902217864990234e-05, 7.194280624389648e-05, 7.486343383789062e-05, 7.778406143188477e-05, 8.07046890258789e-05, 8.362531661987305e-05, 8.654594421386719e-05, 8.946657180786133e-05, 9.238719940185547e-05, 9.530782699584961e-05, 9.822845458984375e-05, 0.00010114908218383789, 0.00010406970977783203]}, "gradients/encoder.encoder.layers.5.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 4.0, 3.0, 2.0, 3.0, 7.0, 10.0, 14.0, 16.0, 27.0, 38.0, 45.0, 71.0, 95.0, 124.0, 220.0, 308.0, 494.0, 815.0, 1304.0, 2318.0, 4123.0, 8353.0, 19141.0, 62385.0, 3325997.0, 700502.0, 38749.0, 13890.0, 6420.0, 3407.0, 1962.0, 1065.0, 724.0, 498.0, 331.0, 220.0, 198.0, 117.0, 86.0, 61.0, 37.0, 25.0, 20.0, 18.0, 13.0, 10.0, 4.0, 12.0, 2.0, 3.0, 3.0, 2.0, 0.0, 2.0, 0.0, 0.0, 1.0], "bins": [-6.0617923736572266e-05, -5.871523171663284e-05, -5.681253969669342e-05, -5.4909847676754e-05, -5.3007155656814575e-05, -5.110446363687515e-05, -4.920177161693573e-05, -4.729907959699631e-05, -4.5396387577056885e-05, -4.349369555711746e-05, -4.159100353717804e-05, -3.968831151723862e-05, -3.7785619497299194e-05, -3.588292747735977e-05, -3.398023545742035e-05, -3.2077543437480927e-05, -3.0174851417541504e-05, -2.827215939760208e-05, -2.636946737766266e-05, -2.4466775357723236e-05, -2.2564083337783813e-05, -2.066139131784439e-05, -1.8758699297904968e-05, -1.6856007277965546e-05, -1.4953315258026123e-05, -1.30506232380867e-05, -1.1147931218147278e-05, -9.245239198207855e-06, -7.342547178268433e-06, -5.43985515832901e-06, -3.5371631383895874e-06, -1.6344711184501648e-06, 2.682209014892578e-07, 2.1709129214286804e-06, 4.073604941368103e-06, 5.976296961307526e-06, 7.878988981246948e-06, 9.781681001186371e-06, 1.1684373021125793e-05, 1.3587065041065216e-05, 1.548975706100464e-05, 1.739244908094406e-05, 1.9295141100883484e-05, 2.1197833120822906e-05, 2.310052514076233e-05, 2.5003217160701752e-05, 2.6905909180641174e-05, 2.8808601200580597e-05, 3.071129322052002e-05, 3.261398524045944e-05, 3.4516677260398865e-05, 3.641936928033829e-05, 3.832206130027771e-05, 4.022475332021713e-05, 4.2127445340156555e-05, 4.403013736009598e-05, 4.59328293800354e-05, 4.783552139997482e-05, 4.9738213419914246e-05, 5.164090543985367e-05, 5.354359745979309e-05, 5.5446289479732513e-05, 5.7348981499671936e-05, 5.925167351961136e-05, 6.115436553955078e-05]}, "gradients/encoder.encoder.layers.5.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 5.0, 5.0, 4.0, 2.0, 12.0, 10.0, 12.0, 21.0, 21.0, 16.0, 36.0, 33.0, 64.0, 75.0, 80.0, 78.0, 81.0, 96.0, 63.0, 67.0, 46.0, 36.0, 40.0, 20.0, 23.0, 14.0, 10.0, 4.0, 6.0, 4.0, 6.0, 2.0, 3.0, 6.0, 2.0, 4.0, 2.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.52587890625e-05, -1.4740973711013794e-05, -1.4223158359527588e-05, -1.3705343008041382e-05, -1.3187527656555176e-05, -1.266971230506897e-05, -1.2151896953582764e-05, -1.1634081602096558e-05, -1.1116266250610352e-05, -1.0598450899124146e-05, -1.008063554763794e-05, -9.562820196151733e-06, -9.045004844665527e-06, -8.527189493179321e-06, -8.009374141693115e-06, -7.491558790206909e-06, -6.973743438720703e-06, -6.455928087234497e-06, -5.938112735748291e-06, -5.420297384262085e-06, -4.902482032775879e-06, -4.384666681289673e-06, -3.866851329803467e-06, -3.3490359783172607e-06, -2.8312206268310547e-06, -2.3134052753448486e-06, -1.7955899238586426e-06, -1.2777745723724365e-06, -7.599592208862305e-07, -2.421438694000244e-07, 2.7567148208618164e-07, 7.934868335723877e-07, 1.3113021850585938e-06, 1.8291175365447998e-06, 2.346932888031006e-06, 2.864748239517212e-06, 3.382563591003418e-06, 3.900378942489624e-06, 4.41819429397583e-06, 4.936009645462036e-06, 5.453824996948242e-06, 5.971640348434448e-06, 6.489455699920654e-06, 7.00727105140686e-06, 7.525086402893066e-06, 8.042901754379272e-06, 8.560717105865479e-06, 9.078532457351685e-06, 9.59634780883789e-06, 1.0114163160324097e-05, 1.0631978511810303e-05, 1.1149793863296509e-05, 1.1667609214782715e-05, 1.2185424566268921e-05, 1.2703239917755127e-05, 1.3221055269241333e-05, 1.3738870620727539e-05, 1.4256685972213745e-05, 1.4774501323699951e-05, 1.5292316675186157e-05, 1.5810132026672363e-05, 1.632794737815857e-05, 1.6845762729644775e-05, 1.736357808113098e-05, 1.7881393432617188e-05]}, "gradients/encoder.encoder.layers.5.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 3.0, 4.0, 2.0, 2.0, 6.0, 5.0, 16.0, 23.0, 28.0, 40.0, 63.0, 117.0, 185.0, 361.0, 683.0, 1316.0, 2751.0, 6010.0, 14023.0, 34822.0, 120865.0, 3437661.0, 471676.0, 61740.0, 23331.0, 9787.0, 4351.0, 2118.0, 1070.0, 515.0, 296.0, 161.0, 88.0, 61.0, 36.0, 21.0, 21.0, 17.0, 6.0, 5.0, 6.0, 3.0, 3.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.2557716369628906e-05, -4.112906754016876e-05, -3.970041871070862e-05, -3.8271769881248474e-05, -3.684312105178833e-05, -3.5414472222328186e-05, -3.398582339286804e-05, -3.25571745634079e-05, -3.1128525733947754e-05, -2.969987690448761e-05, -2.8271228075027466e-05, -2.6842579245567322e-05, -2.5413930416107178e-05, -2.3985281586647034e-05, -2.255663275718689e-05, -2.1127983927726746e-05, -1.96993350982666e-05, -1.8270686268806458e-05, -1.6842037439346313e-05, -1.541338860988617e-05, -1.3984739780426025e-05, -1.2556090950965881e-05, -1.1127442121505737e-05, -9.698793292045593e-06, -8.27014446258545e-06, -6.841495633125305e-06, -5.412846803665161e-06, -3.984197974205017e-06, -2.555549144744873e-06, -1.126900315284729e-06, 3.0174851417541504e-07, 1.730397343635559e-06, 3.159046173095703e-06, 4.587695002555847e-06, 6.016343832015991e-06, 7.444992661476135e-06, 8.87364149093628e-06, 1.0302290320396423e-05, 1.1730939149856567e-05, 1.3159587979316711e-05, 1.4588236808776855e-05, 1.6016885638237e-05, 1.7445534467697144e-05, 1.8874183297157288e-05, 2.030283212661743e-05, 2.1731480956077576e-05, 2.316012978553772e-05, 2.4588778614997864e-05, 2.6017427444458008e-05, 2.7446076273918152e-05, 2.8874725103378296e-05, 3.030337393283844e-05, 3.1732022762298584e-05, 3.316067159175873e-05, 3.458932042121887e-05, 3.6017969250679016e-05, 3.744661808013916e-05, 3.8875266909599304e-05, 4.030391573905945e-05, 4.173256456851959e-05, 4.3161213397979736e-05, 4.458986222743988e-05, 4.6018511056900024e-05, 4.744715988636017e-05, 4.887580871582031e-05]}, "gradients/encoder.encoder.layers.5.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 1.0, 0.0, 2.0, 3.0, 0.0, 5.0, 7.0, 4.0, 4.0, 11.0, 15.0, 14.0, 19.0, 20.0, 23.0, 27.0, 36.0, 57.0, 104.0, 233.0, 640.0, 1523.0, 744.0, 246.0, 86.0, 55.0, 48.0, 32.0, 21.0, 18.0, 9.0, 20.0, 16.0, 4.0, 9.0, 7.0, 5.0, 3.0, 9.0, 6.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.4259090423583984e-05, -2.3310072720050812e-05, -2.236105501651764e-05, -2.1412037312984467e-05, -2.0463019609451294e-05, -1.951400190591812e-05, -1.856498420238495e-05, -1.7615966498851776e-05, -1.6666948795318604e-05, -1.571793109178543e-05, -1.4768913388252258e-05, -1.3819895684719086e-05, -1.2870877981185913e-05, -1.192186027765274e-05, -1.0972842574119568e-05, -1.0023824870586395e-05, -9.074807167053223e-06, -8.12578946352005e-06, -7.1767717599868774e-06, -6.227754056453705e-06, -5.278736352920532e-06, -4.32971864938736e-06, -3.380700945854187e-06, -2.4316832423210144e-06, -1.4826655387878418e-06, -5.336478352546692e-07, 4.153698682785034e-07, 1.364387571811676e-06, 2.3134052753448486e-06, 3.2624229788780212e-06, 4.211440682411194e-06, 5.1604583859443665e-06, 6.109476089477539e-06, 7.058493793010712e-06, 8.007511496543884e-06, 8.956529200077057e-06, 9.90554690361023e-06, 1.0854564607143402e-05, 1.1803582310676575e-05, 1.2752600014209747e-05, 1.370161771774292e-05, 1.4650635421276093e-05, 1.5599653124809265e-05, 1.6548670828342438e-05, 1.749768853187561e-05, 1.8446706235408783e-05, 1.9395723938941956e-05, 2.0344741642475128e-05, 2.12937593460083e-05, 2.2242777049541473e-05, 2.3191794753074646e-05, 2.414081245660782e-05, 2.508983016014099e-05, 2.6038847863674164e-05, 2.6987865567207336e-05, 2.793688327074051e-05, 2.888590097427368e-05, 2.9834918677806854e-05, 3.078393638134003e-05, 3.17329540848732e-05, 3.268197178840637e-05, 3.3630989491939545e-05, 3.458000719547272e-05, 3.552902489900589e-05, 3.647804260253906e-05]}, "gradients/encoder.encoder.layers.5.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 5.0, 4.0, 5.0, 8.0, 10.0, 8.0, 12.0, 12.0, 23.0, 19.0, 17.0, 34.0, 60.0, 57.0, 62.0, 85.0, 109.0, 83.0, 89.0, 51.0, 38.0, 37.0, 24.0, 26.0, 26.0, 12.0, 20.0, 14.0, 14.0, 5.0, 10.0, 3.0, 5.0, 2.0, 5.0, 3.0, 4.0, 1.0, 3.0, 2.0, 2.0, 2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0], "bins": [-9.355880320072174e-05, -9.024501196108758e-05, -8.693122799741104e-05, -8.361743675777689e-05, -8.030364551814273e-05, -7.698986155446619e-05, -7.367607031483203e-05, -7.036227907519788e-05, -6.704848783556372e-05, -6.373469659592956e-05, -6.0420908994274214e-05, -5.7107121392618865e-05, -5.379333015298471e-05, -5.047954255132936e-05, -4.716575494967401e-05, -4.3851963710039854e-05, -4.053817974636331e-05, -3.722439214470796e-05, -3.391060090507381e-05, -3.059681330341846e-05, -2.7283023882773705e-05, -2.3969234462128952e-05, -2.0655446860473603e-05, -1.734165743982885e-05, -1.4027868019184098e-05, -1.0714078598539345e-05, -7.400290087389294e-06, -4.086501576239243e-06, -7.727121555944905e-07, 2.5410772650502622e-06, 5.854864866705611e-06, 9.168654287350364e-06, 1.2482450983952731e-05, 1.5796240404597484e-05, 1.9110029825242236e-05, 2.2423817426897585e-05, 2.5737606847542338e-05, 2.905139626818709e-05, 3.236518386984244e-05, 3.5678975109476596e-05, 3.8992762711131945e-05, 4.2306550312787294e-05, 4.562034155242145e-05, 4.89341291540768e-05, 5.224791675573215e-05, 5.5561707995366305e-05, 5.8875495597021654e-05, 6.2189283198677e-05, 6.550307443831116e-05, 6.881686567794532e-05, 7.213064964162186e-05, 7.544444088125601e-05, 7.875823212089017e-05, 8.207201608456671e-05, 8.538580732420087e-05, 8.869959856383502e-05, 9.201338980346918e-05, 9.532718104310334e-05, 9.864096500677988e-05, 0.00010195475624641404, 0.00010526854748604819, 0.00010858233144972473, 0.00011189612268935889, 0.00011520991392899305, 0.00011852369789266959]}, "gradients/encoder.encoder.layers.5.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 2.0, 0.0, 2.0, 1.0, 0.0, 2.0, 11.0, 3.0, 11.0, 3.0, 8.0, 7.0, 8.0, 10.0, 10.0, 15.0, 13.0, 26.0, 17.0, 27.0, 23.0, 24.0, 27.0, 40.0, 43.0, 37.0, 38.0, 36.0, 48.0, 42.0, 41.0, 29.0, 46.0, 34.0, 33.0, 40.0, 32.0, 29.0, 26.0, 20.0, 23.0, 18.0, 14.0, 25.0, 16.0, 9.0, 16.0, 10.0, 4.0, 3.0, 3.0, 2.0, 4.0, 4.0, 2.0, 2.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-6.341934204101562e-05, -6.139371544122696e-05, -5.9368088841438293e-05, -5.734246224164963e-05, -5.531683564186096e-05, -5.3291209042072296e-05, -5.126558244228363e-05, -4.9239955842494965e-05, -4.72143292427063e-05, -4.518870264291763e-05, -4.316307604312897e-05, -4.11374494433403e-05, -3.9111822843551636e-05, -3.708619624376297e-05, -3.5060569643974304e-05, -3.303494304418564e-05, -3.100931644439697e-05, -2.8983689844608307e-05, -2.695806324481964e-05, -2.4932436645030975e-05, -2.290681004524231e-05, -2.0881183445453644e-05, -1.8855556845664978e-05, -1.6829930245876312e-05, -1.4804303646087646e-05, -1.277867704629898e-05, -1.0753050446510315e-05, -8.72742384672165e-06, -6.701797246932983e-06, -4.676170647144318e-06, -2.650544047355652e-06, -6.249174475669861e-07, 1.4007091522216797e-06, 3.4263357520103455e-06, 5.451962351799011e-06, 7.477588951587677e-06, 9.503215551376343e-06, 1.1528842151165009e-05, 1.3554468750953674e-05, 1.558009535074234e-05, 1.7605721950531006e-05, 1.963134855031967e-05, 2.1656975150108337e-05, 2.3682601749897003e-05, 2.570822834968567e-05, 2.7733854949474335e-05, 2.9759481549263e-05, 3.1785108149051666e-05, 3.381073474884033e-05, 3.5836361348629e-05, 3.7861987948417664e-05, 3.988761454820633e-05, 4.1913241147994995e-05, 4.393886774778366e-05, 4.596449434757233e-05, 4.799012094736099e-05, 5.001574754714966e-05, 5.2041374146938324e-05, 5.406700074672699e-05, 5.6092627346515656e-05, 5.811825394630432e-05, 6.014388054609299e-05, 6.216950714588165e-05, 6.419513374567032e-05, 6.622076034545898e-05]}, "gradients/encoder.encoder.layers.5.attention.out_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 3.0, 3.0, 9.0, 5.0, 6.0, 12.0, 11.0, 13.0, 18.0, 24.0, 28.0, 29.0, 53.0, 76.0, 145.0, 263.0, 654.0, 1438.0, 3395.0, 8550.0, 25979.0, 107971.0, 652776.0, 187824.0, 39481.0, 11651.0, 4544.0, 1884.0, 839.0, 375.0, 187.0, 96.0, 50.0, 37.0, 24.0, 20.0, 18.0, 22.0, 7.0, 10.0, 6.0, 7.0, 9.0, 4.0, 4.0, 3.0, 2.0, 2.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0], "bins": [-0.00013744831085205078, -0.00013296492397785187, -0.00012848153710365295, -0.00012399815022945404, -0.00011951476335525513, -0.00011503137648105621, -0.0001105479896068573, -0.00010606460273265839, -0.00010158121585845947, -9.709782898426056e-05, -9.261444211006165e-05, -8.813105523586273e-05, -8.364766836166382e-05, -7.91642814874649e-05, -7.468089461326599e-05, -7.019750773906708e-05, -6.571412086486816e-05, -6.123073399066925e-05, -5.674734711647034e-05, -5.226396024227142e-05, -4.778057336807251e-05, -4.3297186493873596e-05, -3.881379961967468e-05, -3.433041274547577e-05, -2.9847025871276855e-05, -2.5363638997077942e-05, -2.088025212287903e-05, -1.6396865248680115e-05, -1.1913478374481201e-05, -7.430091500282288e-06, -2.946704626083374e-06, 1.5366822481155396e-06, 6.020069122314453e-06, 1.0503455996513367e-05, 1.498684287071228e-05, 1.9470229744911194e-05, 2.3953616619110107e-05, 2.843700349330902e-05, 3.2920390367507935e-05, 3.740377724170685e-05, 4.188716411590576e-05, 4.6370550990104675e-05, 5.085393786430359e-05, 5.53373247385025e-05, 5.9820711612701416e-05, 6.430409848690033e-05, 6.878748536109924e-05, 7.327087223529816e-05, 7.775425910949707e-05, 8.223764598369598e-05, 8.67210328578949e-05, 9.120441973209381e-05, 9.568780660629272e-05, 0.00010017119348049164, 0.00010465458035469055, 0.00010913796722888947, 0.00011362135410308838, 0.00011810474097728729, 0.0001225881278514862, 0.00012707151472568512, 0.00013155490159988403, 0.00013603828847408295, 0.00014052167534828186, 0.00014500506222248077, 0.0001494884490966797]}, "gradients/encoder.encoder.layers.5.attention.out_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 3.0, 2.0, 6.0, 5.0, 14.0, 8.0, 10.0, 18.0, 58.0, 44.0, 92.0, 109.0, 129.0, 109.0, 110.0, 78.0, 60.0, 46.0, 41.0, 19.0, 14.0, 12.0, 7.0, 5.0, 6.0, 4.0, 4.0, 3.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.5795230865478516e-05, -1.5014782547950745e-05, -1.4234334230422974e-05, -1.3453885912895203e-05, -1.2673437595367432e-05, -1.189298927783966e-05, -1.111254096031189e-05, -1.0332092642784119e-05, -9.551644325256348e-06, -8.771196007728577e-06, -7.990747690200806e-06, -7.210299372673035e-06, -6.429851055145264e-06, -5.649402737617493e-06, -4.868954420089722e-06, -4.088506102561951e-06, -3.3080577850341797e-06, -2.5276094675064087e-06, -1.7471611499786377e-06, -9.667128324508667e-07, -1.862645149230957e-07, 5.941838026046753e-07, 1.3746321201324463e-06, 2.1550804376602173e-06, 2.9355287551879883e-06, 3.7159770727157593e-06, 4.49642539024353e-06, 5.276873707771301e-06, 6.057322025299072e-06, 6.837770342826843e-06, 7.618218660354614e-06, 8.398666977882385e-06, 9.179115295410156e-06, 9.959563612937927e-06, 1.0740011930465698e-05, 1.152046024799347e-05, 1.230090856552124e-05, 1.3081356883049011e-05, 1.3861805200576782e-05, 1.4642253518104553e-05, 1.5422701835632324e-05, 1.6203150153160095e-05, 1.6983598470687866e-05, 1.7764046788215637e-05, 1.8544495105743408e-05, 1.932494342327118e-05, 2.010539174079895e-05, 2.088584005832672e-05, 2.1666288375854492e-05, 2.2446736693382263e-05, 2.3227185010910034e-05, 2.4007633328437805e-05, 2.4788081645965576e-05, 2.5568529963493347e-05, 2.6348978281021118e-05, 2.712942659854889e-05, 2.790987491607666e-05, 2.869032323360443e-05, 2.9470771551132202e-05, 3.0251219868659973e-05, 3.1031668186187744e-05, 3.1812116503715515e-05, 3.2592564821243286e-05, 3.337301313877106e-05, 3.415346145629883e-05]}, "gradients/encoder.encoder.layers.5.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 1.0, 6.0, 18.0, 27.0, 27.0, 43.0, 60.0, 105.0, 187.0, 281.0, 462.0, 936.0, 1477.0, 2780.0, 4718.0, 8757.0, 16687.0, 29837.0, 64536.0, 149439.0, 451320.0, 177516.0, 66801.0, 33984.0, 16925.0, 9496.0, 5417.0, 2842.0, 1651.0, 909.0, 482.0, 298.0, 183.0, 124.0, 72.0, 39.0, 45.0, 31.0, 11.0, 9.0, 3.0, 3.0, 1.0, 4.0, 2.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 2.0], "bins": [-4.3451786041259766e-05, -4.2117200791835785e-05, -4.0782615542411804e-05, -3.9448030292987823e-05, -3.811344504356384e-05, -3.677885979413986e-05, -3.544427454471588e-05, -3.41096892952919e-05, -3.277510404586792e-05, -3.144051879644394e-05, -3.010593354701996e-05, -2.8771348297595978e-05, -2.7436763048171997e-05, -2.6102177798748016e-05, -2.4767592549324036e-05, -2.3433007299900055e-05, -2.2098422050476074e-05, -2.0763836801052094e-05, -1.9429251551628113e-05, -1.8094666302204132e-05, -1.676008105278015e-05, -1.542549580335617e-05, -1.409091055393219e-05, -1.275632530450821e-05, -1.1421740055084229e-05, -1.0087154805660248e-05, -8.752569556236267e-06, -7.417984306812286e-06, -6.083399057388306e-06, -4.748813807964325e-06, -3.4142285585403442e-06, -2.0796433091163635e-06, -7.450580596923828e-07, 5.895271897315979e-07, 1.9241124391555786e-06, 3.2586976885795593e-06, 4.59328293800354e-06, 5.927868187427521e-06, 7.2624534368515015e-06, 8.597038686275482e-06, 9.931623935699463e-06, 1.1266209185123444e-05, 1.2600794434547424e-05, 1.3935379683971405e-05, 1.5269964933395386e-05, 1.6604550182819366e-05, 1.7939135432243347e-05, 1.9273720681667328e-05, 2.060830593109131e-05, 2.194289118051529e-05, 2.327747642993927e-05, 2.461206167936325e-05, 2.594664692878723e-05, 2.7281232178211212e-05, 2.8615817427635193e-05, 2.9950402677059174e-05, 3.1284987926483154e-05, 3.2619573175907135e-05, 3.3954158425331116e-05, 3.5288743674755096e-05, 3.662332892417908e-05, 3.795791417360306e-05, 3.929249942302704e-05, 4.062708467245102e-05, 4.1961669921875e-05]}, "gradients/encoder.encoder.layers.5.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 2.0, 3.0, 0.0, 3.0, 1.0, 1.0, 11.0, 13.0, 4.0, 10.0, 10.0, 14.0, 13.0, 24.0, 22.0, 14.0, 35.0, 22.0, 31.0, 44.0, 49.0, 41.0, 42.0, 37.0, 61.0, 43.0, 46.0, 35.0, 44.0, 38.0, 38.0, 32.0, 32.0, 32.0, 22.0, 31.0, 18.0, 14.0, 11.0, 16.0, 17.0, 11.0, 14.0, 5.0, 2.0, 1.0, 1.0, 2.0, 1.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0, 2.0], "bins": [-3.1113624572753906e-05, -3.016740083694458e-05, -2.9221177101135254e-05, -2.8274953365325928e-05, -2.73287296295166e-05, -2.6382505893707275e-05, -2.543628215789795e-05, -2.4490058422088623e-05, -2.3543834686279297e-05, -2.259761095046997e-05, -2.1651387214660645e-05, -2.070516347885132e-05, -1.9758939743041992e-05, -1.8812716007232666e-05, -1.786649227142334e-05, -1.6920268535614014e-05, -1.5974044799804688e-05, -1.5027821063995361e-05, -1.4081597328186035e-05, -1.3135373592376709e-05, -1.2189149856567383e-05, -1.1242926120758057e-05, -1.029670238494873e-05, -9.350478649139404e-06, -8.404254913330078e-06, -7.458031177520752e-06, -6.511807441711426e-06, -5.5655837059021e-06, -4.6193599700927734e-06, -3.6731362342834473e-06, -2.726912498474121e-06, -1.780688762664795e-06, -8.344650268554688e-07, 1.1175870895385742e-07, 1.0579824447631836e-06, 2.0042061805725098e-06, 2.950429916381836e-06, 3.896653652191162e-06, 4.842877388000488e-06, 5.7891011238098145e-06, 6.735324859619141e-06, 7.681548595428467e-06, 8.627772331237793e-06, 9.573996067047119e-06, 1.0520219802856445e-05, 1.1466443538665771e-05, 1.2412667274475098e-05, 1.3358891010284424e-05, 1.430511474609375e-05, 1.5251338481903076e-05, 1.6197562217712402e-05, 1.714378595352173e-05, 1.8090009689331055e-05, 1.903623342514038e-05, 1.9982457160949707e-05, 2.0928680896759033e-05, 2.187490463256836e-05, 2.2821128368377686e-05, 2.3767352104187012e-05, 2.4713575839996338e-05, 2.5659799575805664e-05, 2.660602331161499e-05, 2.7552247047424316e-05, 2.8498470783233643e-05, 2.944469451904297e-05]}, "gradients/encoder.encoder.layers.5.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 3.0, 1.0, 1.0, 2.0, 5.0, 4.0, 6.0, 10.0, 21.0, 20.0, 26.0, 43.0, 101.0, 113.0, 193.0, 333.0, 485.0, 882.0, 1539.0, 2758.0, 5418.0, 11117.0, 26009.0, 71120.0, 258290.0, 464315.0, 130407.0, 41791.0, 16816.0, 7850.0, 3907.0, 2121.0, 1180.0, 882.0, 285.0, 183.0, 110.0, 63.0, 62.0, 28.0, 14.0, 20.0, 8.0, 5.0, 5.0, 4.0, 2.0, 6.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 3.0], "bins": [-4.172325134277344e-06, -4.050321877002716e-06, -3.928318619728088e-06, -3.8063153624534607e-06, -3.684312105178833e-06, -3.5623088479042053e-06, -3.4403055906295776e-06, -3.31830233335495e-06, -3.1962990760803223e-06, -3.0742958188056946e-06, -2.952292561531067e-06, -2.830289304256439e-06, -2.7082860469818115e-06, -2.586282789707184e-06, -2.464279532432556e-06, -2.3422762751579285e-06, -2.2202730178833008e-06, -2.098269760608673e-06, -1.9762665033340454e-06, -1.8542632460594177e-06, -1.73225998878479e-06, -1.6102567315101624e-06, -1.4882534742355347e-06, -1.366250216960907e-06, -1.2442469596862793e-06, -1.1222437024116516e-06, -1.000240445137024e-06, -8.782371878623962e-07, -7.562339305877686e-07, -6.342306733131409e-07, -5.122274160385132e-07, -3.902241587638855e-07, -2.682209014892578e-07, -1.4621764421463013e-07, -2.421438694000244e-08, 9.778887033462524e-08, 2.1979212760925293e-07, 3.417953848838806e-07, 4.637986421585083e-07, 5.85801899433136e-07, 7.078051567077637e-07, 8.298084139823914e-07, 9.51811671257019e-07, 1.0738149285316467e-06, 1.1958181858062744e-06, 1.317821443080902e-06, 1.4398247003555298e-06, 1.5618279576301575e-06, 1.6838312149047852e-06, 1.8058344721794128e-06, 1.9278377294540405e-06, 2.0498409867286682e-06, 2.171844244003296e-06, 2.2938475012779236e-06, 2.4158507585525513e-06, 2.537854015827179e-06, 2.6598572731018066e-06, 2.7818605303764343e-06, 2.903863787651062e-06, 3.0258670449256897e-06, 3.1478703022003174e-06, 3.269873559474945e-06, 3.3918768167495728e-06, 3.5138800740242004e-06, 3.635883331298828e-06]}, "gradients/encoder.encoder.layers.5.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 4.0, 0.0, 2.0, 0.0, 3.0, 0.0, 1.0, 0.0, 5.0, 0.0, 8.0, 0.0, 15.0, 0.0, 15.0, 0.0, 24.0, 0.0, 47.0, 0.0, 59.0, 0.0, 78.0, 0.0, 82.0, 0.0, 118.0, 0.0, 123.0, 0.0, 92.0, 0.0, 96.0, 0.0, 80.0, 0.0, 57.0, 0.0, 39.0, 0.0, 27.0, 0.0, 10.0, 0.0, 12.0, 0.0, 12.0, 0.0, 4.0, 0.0, 3.0, 0.0, 3.0, 0.0, 1.0, 0.0, 1.0, 1.0], "bins": [-1.0132789611816406e-06, -9.834766387939453e-07, -9.5367431640625e-07, -9.238719940185547e-07, -8.940696716308594e-07, -8.642673492431641e-07, -8.344650268554688e-07, -8.046627044677734e-07, -7.748603820800781e-07, -7.450580596923828e-07, -7.152557373046875e-07, -6.854534149169922e-07, -6.556510925292969e-07, -6.258487701416016e-07, -5.960464477539062e-07, -5.662441253662109e-07, -5.364418029785156e-07, -5.066394805908203e-07, -4.76837158203125e-07, -4.470348358154297e-07, -4.172325134277344e-07, -3.8743019104003906e-07, -3.5762786865234375e-07, -3.2782554626464844e-07, -2.980232238769531e-07, -2.682209014892578e-07, -2.384185791015625e-07, -2.086162567138672e-07, -1.7881393432617188e-07, -1.4901161193847656e-07, -1.1920928955078125e-07, -8.940696716308594e-08, -5.960464477539063e-08, -2.9802322387695312e-08, 0.0, 2.9802322387695312e-08, 5.960464477539063e-08, 8.940696716308594e-08, 1.1920928955078125e-07, 1.4901161193847656e-07, 1.7881393432617188e-07, 2.086162567138672e-07, 2.384185791015625e-07, 2.682209014892578e-07, 2.980232238769531e-07, 3.2782554626464844e-07, 3.5762786865234375e-07, 3.8743019104003906e-07, 4.172325134277344e-07, 4.470348358154297e-07, 4.76837158203125e-07, 5.066394805908203e-07, 5.364418029785156e-07, 5.662441253662109e-07, 5.960464477539062e-07, 6.258487701416016e-07, 6.556510925292969e-07, 6.854534149169922e-07, 7.152557373046875e-07, 7.450580596923828e-07, 7.748603820800781e-07, 8.046627044677734e-07, 8.344650268554688e-07, 8.642673492431641e-07, 8.940696716308594e-07]}, "gradients/encoder.encoder.layers.5.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 1.0, 2.0, 1.0, 4.0, 1.0, 8.0, 4.0, 10.0, 9.0, 23.0, 59.0, 57.0, 74.0, 120.0, 168.0, 258.0, 320.0, 1344.0, 1107.0, 1784.0, 2736.0, 4326.0, 7036.0, 11972.0, 58502.0, 74928.0, 179003.0, 362521.0, 177679.0, 74801.0, 37719.0, 32277.0, 7066.0, 4325.0, 2788.0, 1759.0, 1228.0, 826.0, 913.0, 269.0, 172.0, 130.0, 71.0, 60.0, 31.0, 29.0, 14.0, 12.0, 6.0, 5.0, 1.0, 3.0, 7.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-2.1457672119140625e-06, -2.0777806639671326e-06, -2.0097941160202026e-06, -1.9418075680732727e-06, -1.8738210201263428e-06, -1.8058344721794128e-06, -1.737847924232483e-06, -1.669861376285553e-06, -1.601874828338623e-06, -1.5338882803916931e-06, -1.4659017324447632e-06, -1.3979151844978333e-06, -1.3299286365509033e-06, -1.2619420886039734e-06, -1.1939555406570435e-06, -1.1259689927101135e-06, -1.0579824447631836e-06, -9.899958968162537e-07, -9.220093488693237e-07, -8.540228009223938e-07, -7.860362529754639e-07, -7.180497050285339e-07, -6.50063157081604e-07, -5.820766091346741e-07, -5.140900611877441e-07, -4.461035132408142e-07, -3.781169652938843e-07, -3.1013041734695435e-07, -2.421438694000244e-07, -1.7415732145309448e-07, -1.0617077350616455e-07, -3.818422555923462e-08, 2.9802322387695312e-08, 9.778887033462524e-08, 1.6577541828155518e-07, 2.337619662284851e-07, 3.0174851417541504e-07, 3.6973506212234497e-07, 4.377216100692749e-07, 5.057081580162048e-07, 5.736947059631348e-07, 6.416812539100647e-07, 7.096678018569946e-07, 7.776543498039246e-07, 8.456408977508545e-07, 9.136274456977844e-07, 9.816139936447144e-07, 1.0496005415916443e-06, 1.1175870895385742e-06, 1.1855736374855042e-06, 1.253560185432434e-06, 1.321546733379364e-06, 1.389533281326294e-06, 1.4575198292732239e-06, 1.5255063772201538e-06, 1.5934929251670837e-06, 1.6614794731140137e-06, 1.7294660210609436e-06, 1.7974525690078735e-06, 1.8654391169548035e-06, 1.9334256649017334e-06, 2.0014122128486633e-06, 2.0693987607955933e-06, 2.137385308742523e-06, 2.205371856689453e-06]}, "gradients/encoder.encoder.layers.5.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 3.0, 1.0, 2.0, 3.0, 5.0, 6.0, 0.0, 4.0, 4.0, 15.0, 10.0, 18.0, 20.0, 0.0, 34.0, 29.0, 31.0, 36.0, 43.0, 57.0, 0.0, 65.0, 67.0, 68.0, 74.0, 61.0, 61.0, 0.0, 47.0, 38.0, 46.0, 29.0, 35.0, 21.0, 0.0, 15.0, 9.0, 14.0, 7.0, 3.0, 8.0, 0.0, 7.0, 5.0, 2.0, 2.0, 4.0, 2.0, 0.0, 3.0, 4.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-1.6093254089355469e-06, -1.5581026673316956e-06, -1.5068799257278442e-06, -1.455657184123993e-06, -1.4044344425201416e-06, -1.3532117009162903e-06, -1.301988959312439e-06, -1.2507662177085876e-06, -1.1995434761047363e-06, -1.148320734500885e-06, -1.0970979928970337e-06, -1.0458752512931824e-06, -9.94652509689331e-07, -9.434297680854797e-07, -8.922070264816284e-07, -8.409842848777771e-07, -7.897615432739258e-07, -7.385388016700745e-07, -6.873160600662231e-07, -6.360933184623718e-07, -5.848705768585205e-07, -5.336478352546692e-07, -4.824250936508179e-07, -4.3120235204696655e-07, -3.7997961044311523e-07, -3.287568688392639e-07, -2.775341272354126e-07, -2.2631138563156128e-07, -1.7508864402770996e-07, -1.2386590242385864e-07, -7.264316082000732e-08, -2.1420419216156006e-08, 2.9802322387695312e-08, 8.102506399154663e-08, 1.3224780559539795e-07, 1.8347054719924927e-07, 2.3469328880310059e-07, 2.859160304069519e-07, 3.371387720108032e-07, 3.8836151361465454e-07, 4.3958425521850586e-07, 4.908069968223572e-07, 5.420297384262085e-07, 5.932524800300598e-07, 6.444752216339111e-07, 6.956979632377625e-07, 7.469207048416138e-07, 7.981434464454651e-07, 8.493661880493164e-07, 9.005889296531677e-07, 9.51811671257019e-07, 1.0030344128608704e-06, 1.0542571544647217e-06, 1.105479896068573e-06, 1.1567026376724243e-06, 1.2079253792762756e-06, 1.259148120880127e-06, 1.3103708624839783e-06, 1.3615936040878296e-06, 1.412816345691681e-06, 1.4640390872955322e-06, 1.5152618288993835e-06, 1.5664845705032349e-06, 1.6177073121070862e-06, 1.6689300537109375e-06]}, "gradients/encoder.encoder.layers.5.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 3.0, 4.0, 5.0, 5.0, 4.0, 8.0, 12.0, 16.0, 17.0, 34.0, 65.0, 120.0, 212.0, 139.0, 93.0, 68.0, 44.0, 29.0, 31.0, 26.0, 15.0, 17.0, 6.0, 12.0, 6.0, 8.0, 8.0, 1.0, 1.0, 2.0, 1.0, 2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0], "bins": [-0.0002032840420724824, -0.00019788120698649436, -0.0001924783573485911, -0.00018707552226260304, -0.000181672687176615, -0.00017626983753871173, -0.00017086700245272368, -0.0001654641528148204, -0.00016006131772883236, -0.00015465848264284432, -0.00014925563300494105, -0.000143852797918953, -0.00013844994828104973, -0.00013304711319506168, -0.00012764427810907364, -0.0001222414430230856, -0.00011683859338518232, -0.00011143575102323666, -0.000106032908661291, -0.00010063007357530296, -9.52272312133573e-05, -8.982438885141164e-05, -8.44215537654236e-05, -7.901871140347794e-05, -7.361586904153228e-05, -6.821302667958662e-05, -6.281018431764096e-05, -5.7407349231652915e-05, -5.2004506869707257e-05, -4.66016645077616e-05, -4.1198825783794746e-05, -3.5795987059827894e-05, -3.0393159249797463e-05, -2.4990318706841208e-05, -1.9587478163884953e-05, -1.4184637620928697e-05, -8.781797077972442e-06, -3.3789565350161865e-06, 2.023884007940069e-06, 7.426722731906921e-06, 1.282956509385258e-05, 1.8232405636808835e-05, 2.363524617976509e-05, 2.9038086722721346e-05, 3.44409272656776e-05, 3.984376962762326e-05, 4.524660835159011e-05, 5.0649447075556964e-05, 5.605228943750262e-05, 6.145513179944828e-05, 6.685797416139394e-05, 7.226080924738199e-05, 7.766365160932764e-05, 8.30664939712733e-05, 8.846932905726135e-05, 9.3872171419207e-05, 9.927501378115267e-05, 0.00010467785614309832, 0.00011008069850504398, 0.00011548353359103203, 0.00012088637595297769, 0.00012628921831492335, 0.0001316920534009114, 0.00013709490303881466, 0.0001424977381248027]}, "gradients/encoder.encoder.layers.5.layer_norm.bias": {"_type": "histogram", "values": [2.0, 2.0, 2.0, 0.0, 1.0, 0.0, 5.0, 0.0, 4.0, 3.0, 5.0, 10.0, 6.0, 12.0, 8.0, 16.0, 13.0, 15.0, 14.0, 21.0, 24.0, 13.0, 25.0, 18.0, 35.0, 32.0, 26.0, 25.0, 16.0, 47.0, 39.0, 33.0, 38.0, 39.0, 30.0, 38.0, 39.0, 44.0, 35.0, 27.0, 30.0, 25.0, 32.0, 20.0, 14.0, 21.0, 23.0, 19.0, 17.0, 7.0, 10.0, 8.0, 8.0, 6.0, 5.0, 7.0, 1.0, 2.0, 0.0, 0.0, 3.0, 1.0, 1.0, 2.0], "bins": [-8.112192153930664e-05, -7.863808423280716e-05, -7.615424692630768e-05, -7.36704096198082e-05, -7.118657231330872e-05, -6.870273500680923e-05, -6.621889770030975e-05, -6.373506039381027e-05, -6.125122308731079e-05, -5.876738578081131e-05, -5.628354847431183e-05, -5.379971116781235e-05, -5.1315873861312866e-05, -4.8832036554813385e-05, -4.6348199248313904e-05, -4.386436194181442e-05, -4.138052463531494e-05, -3.889668732881546e-05, -3.641285002231598e-05, -3.39290127158165e-05, -3.1445175409317017e-05, -2.8961338102817535e-05, -2.6477500796318054e-05, -2.3993663489818573e-05, -2.1509826183319092e-05, -1.902598887681961e-05, -1.654215157032013e-05, -1.4058314263820648e-05, -1.1574476957321167e-05, -9.090639650821686e-06, -6.606802344322205e-06, -4.122965037822723e-06, -1.6391277313232422e-06, 8.44709575176239e-07, 3.3285468816757202e-06, 5.812384188175201e-06, 8.296221494674683e-06, 1.0780058801174164e-05, 1.3263896107673645e-05, 1.5747733414173126e-05, 1.8231570720672607e-05, 2.071540802717209e-05, 2.319924533367157e-05, 2.568308264017105e-05, 2.8166919946670532e-05, 3.0650757253170013e-05, 3.3134594559669495e-05, 3.5618431866168976e-05, 3.810226917266846e-05, 4.058610647916794e-05, 4.306994378566742e-05, 4.55537810921669e-05, 4.803761839866638e-05, 5.052145570516586e-05, 5.3005293011665344e-05, 5.5489130318164825e-05, 5.797296762466431e-05, 6.045680493116379e-05, 6.294064223766327e-05, 6.542447954416275e-05, 6.790831685066223e-05, 7.039215415716171e-05, 7.28759914636612e-05, 7.535982877016068e-05, 7.784366607666016e-05]}, "gradients/encoder.encoder.layers.4.feed_forward.output_dense.weight": {"_type": "histogram", "values": [3.0, 1.0, 0.0, 4.0, 0.0, 1.0, 4.0, 5.0, 4.0, 13.0, 18.0, 12.0, 19.0, 39.0, 65.0, 99.0, 144.0, 264.0, 462.0, 832.0, 1691.0, 3721.0, 9007.0, 33051.0, 2839354.0, 1260741.0, 29025.0, 8755.0, 3460.0, 1644.0, 812.0, 407.0, 240.0, 130.0, 72.0, 49.0, 32.0, 13.0, 14.0, 14.0, 10.0, 8.0, 10.0, 10.0, 8.0, 4.0, 4.0, 3.0, 3.0, 5.0, 2.0, 3.0, 1.0, 3.0, 0.0, 0.0, 2.0, 2.0, 0.0, 1.0, 0.0, 2.0, 0.0, 2.0], "bins": [-8.153915405273438e-05, -7.827393710613251e-05, -7.500872015953064e-05, -7.174350321292877e-05, -6.84782862663269e-05, -6.521306931972504e-05, -6.194785237312317e-05, -5.86826354265213e-05, -5.5417418479919434e-05, -5.2152201533317566e-05, -4.88869845867157e-05, -4.562176764011383e-05, -4.235655069351196e-05, -3.9091333746910095e-05, -3.582611680030823e-05, -3.256089985370636e-05, -2.9295682907104492e-05, -2.6030465960502625e-05, -2.2765249013900757e-05, -1.950003206729889e-05, -1.623481512069702e-05, -1.2969598174095154e-05, -9.704381227493286e-06, -6.4391642808914185e-06, -3.1739473342895508e-06, 9.12696123123169e-08, 3.3564865589141846e-06, 6.621703505516052e-06, 9.88692045211792e-06, 1.3152137398719788e-05, 1.6417354345321655e-05, 1.9682571291923523e-05, 2.294778823852539e-05, 2.6213005185127258e-05, 2.9478222131729126e-05, 3.2743439078330994e-05, 3.600865602493286e-05, 3.927387297153473e-05, 4.25390899181366e-05, 4.5804306864738464e-05, 4.906952381134033e-05, 5.23347407579422e-05, 5.559995770454407e-05, 5.8865174651145935e-05, 6.21303915977478e-05, 6.539560854434967e-05, 6.866082549095154e-05, 7.19260424375534e-05, 7.519125938415527e-05, 7.845647633075714e-05, 8.172169327735901e-05, 8.498691022396088e-05, 8.825212717056274e-05, 9.151734411716461e-05, 9.478256106376648e-05, 9.804777801036835e-05, 0.00010131299495697021, 0.00010457821190357208, 0.00010784342885017395, 0.00011110864579677582, 0.00011437386274337769, 0.00011763907968997955, 0.00012090429663658142, 0.0001241695135831833, 0.00012743473052978516]}, "gradients/encoder.encoder.layers.4.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 3.0, 2.0, 8.0, 5.0, 12.0, 14.0, 20.0, 28.0, 40.0, 77.0, 121.0, 133.0, 129.0, 125.0, 99.0, 70.0, 45.0, 26.0, 18.0, 15.0, 8.0, 4.0, 5.0, 6.0, 3.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.09808349609375e-05, -2.0121224224567413e-05, -1.9261613488197327e-05, -1.840200275182724e-05, -1.7542392015457153e-05, -1.6682781279087067e-05, -1.582317054271698e-05, -1.4963559806346893e-05, -1.4103949069976807e-05, -1.324433833360672e-05, -1.2384727597236633e-05, -1.1525116860866547e-05, -1.066550612449646e-05, -9.805895388126373e-06, -8.946284651756287e-06, -8.0866739153862e-06, -7.227063179016113e-06, -6.367452442646027e-06, -5.50784170627594e-06, -4.648230969905853e-06, -3.7886202335357666e-06, -2.92900949716568e-06, -2.0693987607955933e-06, -1.2097880244255066e-06, -3.501772880554199e-07, 5.094334483146667e-07, 1.3690441846847534e-06, 2.22865492105484e-06, 3.0882656574249268e-06, 3.9478763937950134e-06, 4.8074871301651e-06, 5.667097866535187e-06, 6.5267086029052734e-06, 7.38631933927536e-06, 8.245930075645447e-06, 9.105540812015533e-06, 9.96515154838562e-06, 1.0824762284755707e-05, 1.1684373021125793e-05, 1.254398375749588e-05, 1.3403594493865967e-05, 1.4263205230236053e-05, 1.512281596660614e-05, 1.5982426702976227e-05, 1.6842037439346313e-05, 1.77016481757164e-05, 1.8561258912086487e-05, 1.9420869648456573e-05, 2.028048038482666e-05, 2.1140091121196747e-05, 2.1999701857566833e-05, 2.285931259393692e-05, 2.3718923330307007e-05, 2.4578534066677094e-05, 2.543814480304718e-05, 2.6297755539417267e-05, 2.7157366275787354e-05, 2.801697701215744e-05, 2.8876587748527527e-05, 2.9736198484897614e-05, 3.05958092212677e-05, 3.145541995763779e-05, 3.2315030694007874e-05, 3.317464143037796e-05, 3.403425216674805e-05]}, "gradients/encoder.encoder.layers.4.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 2.0, 4.0, 2.0, 3.0, 1.0, 5.0, 8.0, 8.0, 25.0, 22.0, 34.0, 45.0, 65.0, 130.0, 166.0, 265.0, 402.0, 621.0, 969.0, 1524.0, 2481.0, 4291.0, 6914.0, 12182.0, 22979.0, 48154.0, 142496.0, 3195585.0, 594878.0, 86593.0, 33017.0, 16839.0, 9123.0, 5500.0, 3282.0, 2039.0, 1296.0, 842.0, 485.0, 365.0, 223.0, 140.0, 92.0, 61.0, 53.0, 34.0, 15.0, 7.0, 6.0, 7.0, 7.0, 5.0, 4.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 2.0], "bins": [-3.141164779663086e-05, -3.0390918254852295e-05, -2.937018871307373e-05, -2.8349459171295166e-05, -2.73287296295166e-05, -2.6308000087738037e-05, -2.5287270545959473e-05, -2.4266541004180908e-05, -2.3245811462402344e-05, -2.222508192062378e-05, -2.1204352378845215e-05, -2.018362283706665e-05, -1.9162893295288086e-05, -1.814216375350952e-05, -1.7121434211730957e-05, -1.6100704669952393e-05, -1.5079975128173828e-05, -1.4059245586395264e-05, -1.30385160446167e-05, -1.2017786502838135e-05, -1.099705696105957e-05, -9.976327419281006e-06, -8.955597877502441e-06, -7.934868335723877e-06, -6.9141387939453125e-06, -5.893409252166748e-06, -4.872679710388184e-06, -3.851950168609619e-06, -2.8312206268310547e-06, -1.8104910850524902e-06, -7.897615432739258e-07, 2.3096799850463867e-07, 1.2516975402832031e-06, 2.2724270820617676e-06, 3.293156623840332e-06, 4.3138861656188965e-06, 5.334615707397461e-06, 6.355345249176025e-06, 7.37607479095459e-06, 8.396804332733154e-06, 9.417533874511719e-06, 1.0438263416290283e-05, 1.1458992958068848e-05, 1.2479722499847412e-05, 1.3500452041625977e-05, 1.4521181583404541e-05, 1.5541911125183105e-05, 1.656264066696167e-05, 1.7583370208740234e-05, 1.86040997505188e-05, 1.9624829292297363e-05, 2.0645558834075928e-05, 2.1666288375854492e-05, 2.2687017917633057e-05, 2.370774745941162e-05, 2.4728477001190186e-05, 2.574920654296875e-05, 2.6769936084747314e-05, 2.779066562652588e-05, 2.8811395168304443e-05, 2.9832124710083008e-05, 3.085285425186157e-05, 3.187358379364014e-05, 3.28943133354187e-05, 3.3915042877197266e-05]}, "gradients/encoder.encoder.layers.4.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 3.0, 3.0, 3.0, 2.0, 5.0, 6.0, 10.0, 4.0, 10.0, 11.0, 6.0, 21.0, 20.0, 22.0, 27.0, 34.0, 42.0, 71.0, 108.0, 355.0, 1038.0, 1406.0, 441.0, 123.0, 69.0, 52.0, 22.0, 25.0, 35.0, 20.0, 13.0, 12.0, 12.0, 12.0, 7.0, 5.0, 9.0, 5.0, 4.0, 2.0, 2.0, 3.0, 2.0, 2.0, 1.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.5451183319091797e-05, -2.4577602744102478e-05, -2.370402216911316e-05, -2.283044159412384e-05, -2.195686101913452e-05, -2.1083280444145203e-05, -2.0209699869155884e-05, -1.9336119294166565e-05, -1.8462538719177246e-05, -1.7588958144187927e-05, -1.671537756919861e-05, -1.584179699420929e-05, -1.496821641921997e-05, -1.4094635844230652e-05, -1.3221055269241333e-05, -1.2347474694252014e-05, -1.1473894119262695e-05, -1.0600313544273376e-05, -9.726732969284058e-06, -8.853152394294739e-06, -7.97957181930542e-06, -7.105991244316101e-06, -6.232410669326782e-06, -5.358830094337463e-06, -4.4852495193481445e-06, -3.6116689443588257e-06, -2.738088369369507e-06, -1.864507794380188e-06, -9.909272193908691e-07, -1.1734664440155029e-07, 7.562339305877686e-07, 1.6298145055770874e-06, 2.5033950805664062e-06, 3.376975655555725e-06, 4.250556230545044e-06, 5.124136805534363e-06, 5.997717380523682e-06, 6.8712979555130005e-06, 7.74487853050232e-06, 8.618459105491638e-06, 9.492039680480957e-06, 1.0365620255470276e-05, 1.1239200830459595e-05, 1.2112781405448914e-05, 1.2986361980438232e-05, 1.3859942555427551e-05, 1.473352313041687e-05, 1.560710370540619e-05, 1.6480684280395508e-05, 1.7354264855384827e-05, 1.8227845430374146e-05, 1.9101426005363464e-05, 1.9975006580352783e-05, 2.0848587155342102e-05, 2.172216773033142e-05, 2.259574830532074e-05, 2.346932888031006e-05, 2.4342909455299377e-05, 2.5216490030288696e-05, 2.6090070605278015e-05, 2.6963651180267334e-05, 2.7837231755256653e-05, 2.871081233024597e-05, 2.958439290523529e-05, 3.045797348022461e-05]}, "gradients/encoder.encoder.layers.4.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 3.0, 1.0, 1.0, 2.0, 1.0, 2.0, 6.0, 4.0, 7.0, 5.0, 5.0, 12.0, 12.0, 12.0, 27.0, 25.0, 44.0, 46.0, 101.0, 112.0, 135.0, 111.0, 55.0, 64.0, 35.0, 43.0, 37.0, 25.0, 19.0, 21.0, 7.0, 7.0, 5.0, 3.0, 4.0, 2.0, 1.0, 3.0, 0.0, 2.0, 0.0, 0.0, 1.0, 3.0, 0.0, 0.0, 3.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.00012105625501135364, -0.0001169121460407041, -0.00011276803707005456, -0.0001086239208234474, -0.00010447981185279787, -0.00010033570288214833, -9.619159391149879e-05, -9.204748494084924e-05, -8.79033759701997e-05, -8.375926699955016e-05, -7.961515802890062e-05, -7.547104905825108e-05, -7.132693281164393e-05, -6.718282384099439e-05, -6.303871487034485e-05, -5.889460589969531e-05, -5.475048965308815e-05, -5.060638068243861e-05, -4.6462268073810264e-05, -4.2318159103160724e-05, -3.8174046494532377e-05, -3.4029937523882836e-05, -2.9885828553233296e-05, -2.5741717763594352e-05, -2.1597606973955408e-05, -1.7453496184316464e-05, -1.3309386304172222e-05, -9.16527642402798e-06, -5.021165634389035e-06, -8.770548447500914e-07, 3.267054125899449e-06, 7.411164915538393e-06, 1.1555268429219723e-05, 1.5699379218858667e-05, 1.984349000849761e-05, 2.398759897914715e-05, 2.8131709768786095e-05, 3.227582055842504e-05, 3.641992952907458e-05, 4.056403849972412e-05, 4.470815110835247e-05, 4.885226007900201e-05, 5.2996372687630355e-05, 5.7140481658279896e-05, 6.128459062892944e-05, 6.542869959957898e-05, 6.957280857022852e-05, 7.371692481683567e-05, 7.786103378748521e-05, 8.200514275813475e-05, 8.614925172878429e-05, 9.029336797539145e-05, 9.443747694604099e-05, 9.858158591669053e-05, 0.00010272569488734007, 0.00010686980385798961, 0.00011101391282863915, 0.00011515802179928869, 0.00011930213076993823, 0.00012344623974058777, 0.0001275903487112373, 0.00013173447223380208, 0.0001358785666525364, 0.00014002269017510116, 0.0001441667991457507]}, "gradients/encoder.encoder.layers.4.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 2.0, 1.0, 1.0, 0.0, 2.0, 2.0, 0.0, 3.0, 2.0, 5.0, 4.0, 6.0, 8.0, 8.0, 13.0, 11.0, 13.0, 16.0, 23.0, 15.0, 14.0, 23.0, 30.0, 23.0, 29.0, 41.0, 27.0, 37.0, 43.0, 36.0, 36.0, 41.0, 33.0, 32.0, 42.0, 42.0, 39.0, 46.0, 35.0, 31.0, 27.0, 25.0, 26.0, 15.0, 16.0, 15.0, 14.0, 20.0, 9.0, 8.0, 6.0, 8.0, 5.0, 1.0, 3.0, 2.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], "bins": [-6.896257400512695e-05, -6.688293069601059e-05, -6.480328738689423e-05, -6.272364407777786e-05, -6.06440007686615e-05, -5.8564357459545135e-05, -5.648471415042877e-05, -5.440507084131241e-05, -5.2325427532196045e-05, -5.024578422307968e-05, -4.816614091396332e-05, -4.6086497604846954e-05, -4.400685429573059e-05, -4.192721098661423e-05, -3.9847567677497864e-05, -3.77679243683815e-05, -3.568828105926514e-05, -3.360863775014877e-05, -3.152899444103241e-05, -2.9449351131916046e-05, -2.7369707822799683e-05, -2.529006451368332e-05, -2.3210421204566956e-05, -2.1130777895450592e-05, -1.905113458633423e-05, -1.6971491277217865e-05, -1.4891847968101501e-05, -1.2812204658985138e-05, -1.0732561349868774e-05, -8.652918040752411e-06, -6.573274731636047e-06, -4.493631422519684e-06, -2.4139881134033203e-06, -3.343448042869568e-07, 1.7452985048294067e-06, 3.82494181394577e-06, 5.904585123062134e-06, 7.984228432178497e-06, 1.006387174129486e-05, 1.2143515050411224e-05, 1.4223158359527588e-05, 1.630280166864395e-05, 1.8382444977760315e-05, 2.046208828687668e-05, 2.2541731595993042e-05, 2.4621374905109406e-05, 2.670101821422577e-05, 2.8780661523342133e-05, 3.0860304832458496e-05, 3.293994814157486e-05, 3.501959145069122e-05, 3.709923475980759e-05, 3.917887806892395e-05, 4.1258521378040314e-05, 4.333816468715668e-05, 4.541780799627304e-05, 4.7497451305389404e-05, 4.957709461450577e-05, 5.165673792362213e-05, 5.3736381232738495e-05, 5.581602454185486e-05, 5.789566785097122e-05, 5.9975311160087585e-05, 6.205495446920395e-05, 6.413459777832031e-05]}, "gradients/encoder.encoder.layers.4.attention.out_proj.weight": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 4.0, 2.0, 1.0, 4.0, 2.0, 8.0, 8.0, 6.0, 10.0, 8.0, 10.0, 13.0, 19.0, 36.0, 47.0, 61.0, 70.0, 123.0, 170.0, 333.0, 523.0, 1045.0, 2031.0, 4372.0, 10691.0, 31776.0, 128043.0, 667676.0, 145715.0, 34934.0, 11249.0, 4833.0, 2100.0, 1159.0, 540.0, 343.0, 161.0, 127.0, 85.0, 53.0, 47.0, 32.0, 19.0, 15.0, 17.0, 11.0, 5.0, 9.0, 6.0, 3.0, 4.0, 2.0, 2.0, 3.0, 2.0, 3.0], "bins": [-0.0001430511474609375, -0.0001390259712934494, -0.0001350007951259613, -0.0001309756189584732, -0.0001269504427909851, -0.000122925266623497, -0.00011890009045600891, -0.00011487491428852081, -0.00011084973812103271, -0.00010682456195354462, -0.00010279938578605652, -9.877420961856842e-05, -9.474903345108032e-05, -9.072385728359222e-05, -8.669868111610413e-05, -8.267350494861603e-05, -7.864832878112793e-05, -7.462315261363983e-05, -7.059797644615173e-05, -6.657280027866364e-05, -6.254762411117554e-05, -5.852244794368744e-05, -5.449727177619934e-05, -5.047209560871124e-05, -4.6446919441223145e-05, -4.2421743273735046e-05, -3.839656710624695e-05, -3.437139093875885e-05, -3.0346214771270752e-05, -2.6321038603782654e-05, -2.2295862436294556e-05, -1.8270686268806458e-05, -1.424551010131836e-05, -1.0220333933830261e-05, -6.195157766342163e-06, -2.169981598854065e-06, 1.8551945686340332e-06, 5.880370736122131e-06, 9.90554690361023e-06, 1.3930723071098328e-05, 1.7955899238586426e-05, 2.1981075406074524e-05, 2.6006251573562622e-05, 3.003142774105072e-05, 3.405660390853882e-05, 3.8081780076026917e-05, 4.2106956243515015e-05, 4.613213241100311e-05, 5.015730857849121e-05, 5.418248474597931e-05, 5.820766091346741e-05, 6.22328370809555e-05, 6.62580132484436e-05, 7.02831894159317e-05, 7.43083655834198e-05, 7.83335417509079e-05, 8.2358717918396e-05, 8.63838940858841e-05, 9.040907025337219e-05, 9.443424642086029e-05, 9.845942258834839e-05, 0.00010248459875583649, 0.00010650977492332458, 0.00011053495109081268, 0.00011456012725830078]}, "gradients/encoder.encoder.layers.4.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 5.0, 2.0, 2.0, 7.0, 9.0, 11.0, 25.0, 19.0, 23.0, 74.0, 85.0, 111.0, 108.0, 130.0, 120.0, 67.0, 72.0, 40.0, 35.0, 18.0, 14.0, 10.0, 5.0, 9.0, 2.0, 6.0, 2.0, 3.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.633167266845703e-05, -1.5566125512123108e-05, -1.4800578355789185e-05, -1.4035031199455261e-05, -1.3269484043121338e-05, -1.2503936886787415e-05, -1.1738389730453491e-05, -1.0972842574119568e-05, -1.0207295417785645e-05, -9.441748261451721e-06, -8.676201105117798e-06, -7.910653948783875e-06, -7.145106792449951e-06, -6.379559636116028e-06, -5.6140124797821045e-06, -4.848465323448181e-06, -4.082918167114258e-06, -3.3173710107803345e-06, -2.551823854446411e-06, -1.7862766981124878e-06, -1.0207295417785645e-06, -2.551823854446411e-07, 5.103647708892822e-07, 1.2759119272232056e-06, 2.041459083557129e-06, 2.8070062398910522e-06, 3.5725533962249756e-06, 4.338100552558899e-06, 5.103647708892822e-06, 5.869194865226746e-06, 6.634742021560669e-06, 7.400289177894592e-06, 8.165836334228516e-06, 8.931383490562439e-06, 9.696930646896362e-06, 1.0462477803230286e-05, 1.1228024959564209e-05, 1.1993572115898132e-05, 1.2759119272232056e-05, 1.3524666428565979e-05, 1.4290213584899902e-05, 1.5055760741233826e-05, 1.582130789756775e-05, 1.6586855053901672e-05, 1.7352402210235596e-05, 1.811794936656952e-05, 1.8883496522903442e-05, 1.9649043679237366e-05, 2.041459083557129e-05, 2.1180137991905212e-05, 2.1945685148239136e-05, 2.271123230457306e-05, 2.3476779460906982e-05, 2.4242326617240906e-05, 2.500787377357483e-05, 2.5773420929908752e-05, 2.6538968086242676e-05, 2.73045152425766e-05, 2.8070062398910522e-05, 2.8835609555244446e-05, 2.960115671157837e-05, 3.0366703867912292e-05, 3.1132251024246216e-05, 3.189779818058014e-05, 3.266334533691406e-05]}, "gradients/encoder.encoder.layers.4.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 0.0, 3.0, 4.0, 6.0, 8.0, 15.0, 20.0, 21.0, 40.0, 55.0, 91.0, 148.0, 176.0, 279.0, 432.0, 694.0, 949.0, 1380.0, 1936.0, 2943.0, 4404.0, 6711.0, 10339.0, 16007.0, 26615.0, 44060.0, 78566.0, 163558.0, 373119.0, 138750.0, 70074.0, 40007.0, 23936.0, 14867.0, 9575.0, 6195.0, 4098.0, 2687.0, 1799.0, 1275.0, 933.0, 583.0, 403.0, 254.0, 159.0, 150.0, 80.0, 61.0, 37.0, 15.0, 24.0, 12.0, 8.0, 3.0, 6.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-3.0934810638427734e-05, -2.998020499944687e-05, -2.9025599360466003e-05, -2.8070993721485138e-05, -2.7116388082504272e-05, -2.6161782443523407e-05, -2.520717680454254e-05, -2.4252571165561676e-05, -2.329796552658081e-05, -2.2343359887599945e-05, -2.138875424861908e-05, -2.0434148609638214e-05, -1.947954297065735e-05, -1.8524937331676483e-05, -1.7570331692695618e-05, -1.6615726053714752e-05, -1.5661120414733887e-05, -1.4706514775753021e-05, -1.3751909136772156e-05, -1.279730349779129e-05, -1.1842697858810425e-05, -1.088809221982956e-05, -9.933486580848694e-06, -8.978880941867828e-06, -8.024275302886963e-06, -7.069669663906097e-06, -6.115064024925232e-06, -5.1604583859443665e-06, -4.205852746963501e-06, -3.2512471079826355e-06, -2.29664146900177e-06, -1.3420358300209045e-06, -3.8743019104003906e-07, 5.671754479408264e-07, 1.521781086921692e-06, 2.4763867259025574e-06, 3.430992364883423e-06, 4.385598003864288e-06, 5.340203642845154e-06, 6.294809281826019e-06, 7.249414920806885e-06, 8.20402055978775e-06, 9.158626198768616e-06, 1.0113231837749481e-05, 1.1067837476730347e-05, 1.2022443115711212e-05, 1.2977048754692078e-05, 1.3931654393672943e-05, 1.4886260032653809e-05, 1.5840865671634674e-05, 1.679547131061554e-05, 1.7750076949596405e-05, 1.870468258857727e-05, 1.9659288227558136e-05, 2.0613893866539e-05, 2.1568499505519867e-05, 2.2523105144500732e-05, 2.3477710783481598e-05, 2.4432316422462463e-05, 2.538692206144333e-05, 2.6341527700424194e-05, 2.729613333940506e-05, 2.8250738978385925e-05, 2.920534461736679e-05, 3.0159950256347656e-05]}, "gradients/encoder.encoder.layers.4.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 2.0, 0.0, 0.0, 0.0, 1.0, 3.0, 3.0, 5.0, 3.0, 7.0, 3.0, 6.0, 3.0, 10.0, 10.0, 9.0, 12.0, 14.0, 20.0, 22.0, 22.0, 19.0, 27.0, 33.0, 28.0, 37.0, 27.0, 29.0, 45.0, 35.0, 44.0, 48.0, 43.0, 46.0, 30.0, 29.0, 44.0, 32.0, 33.0, 36.0, 21.0, 24.0, 24.0, 27.0, 18.0, 11.0, 8.0, 8.0, 12.0, 7.0, 7.0, 6.0, 2.0, 3.0, 7.0, 9.0, 4.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-2.9265880584716797e-05, -2.8370879590511322e-05, -2.7475878596305847e-05, -2.6580877602100372e-05, -2.5685876607894897e-05, -2.4790875613689423e-05, -2.3895874619483948e-05, -2.3000873625278473e-05, -2.2105872631072998e-05, -2.1210871636867523e-05, -2.031587064266205e-05, -1.9420869648456573e-05, -1.85258686542511e-05, -1.7630867660045624e-05, -1.673586666584015e-05, -1.5840865671634674e-05, -1.49458646774292e-05, -1.4050863683223724e-05, -1.315586268901825e-05, -1.2260861694812775e-05, -1.13658607006073e-05, -1.0470859706401825e-05, -9.57585871219635e-06, -8.680857717990875e-06, -7.7858567237854e-06, -6.8908557295799255e-06, -5.995854735374451e-06, -5.100853741168976e-06, -4.205852746963501e-06, -3.310851752758026e-06, -2.4158507585525513e-06, -1.5208497643470764e-06, -6.258487701416016e-07, 2.691522240638733e-07, 1.1641532182693481e-06, 2.059154212474823e-06, 2.954155206680298e-06, 3.849156200885773e-06, 4.7441571950912476e-06, 5.639158189296722e-06, 6.534159183502197e-06, 7.429160177707672e-06, 8.324161171913147e-06, 9.219162166118622e-06, 1.0114163160324097e-05, 1.1009164154529572e-05, 1.1904165148735046e-05, 1.2799166142940521e-05, 1.3694167137145996e-05, 1.4589168131351471e-05, 1.5484169125556946e-05, 1.637917011976242e-05, 1.7274171113967896e-05, 1.816917210817337e-05, 1.9064173102378845e-05, 1.995917409658432e-05, 2.0854175090789795e-05, 2.174917608499527e-05, 2.2644177079200745e-05, 2.353917807340622e-05, 2.4434179067611694e-05, 2.532918006181717e-05, 2.6224181056022644e-05, 2.711918205022812e-05, 2.8014183044433594e-05]}, "gradients/encoder.encoder.layers.4.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 7.0, 2.0, 7.0, 3.0, 10.0, 11.0, 25.0, 22.0, 34.0, 43.0, 102.0, 91.0, 206.0, 224.0, 504.0, 543.0, 1256.0, 1570.0, 4014.0, 4996.0, 15350.0, 22524.0, 96887.0, 243548.0, 426237.0, 159677.0, 32346.0, 21011.0, 6412.0, 5077.0, 1900.0, 1665.0, 652.0, 661.0, 268.0, 252.0, 109.0, 122.0, 43.0, 52.0, 30.0, 24.0, 7.0, 13.0, 7.0, 11.0, 2.0, 2.0, 4.0, 4.0, 0.0, 2.0, 0.0, 1.0, 0.0, 1.0], "bins": [-4.76837158203125e-06, -4.620291292667389e-06, -4.472211003303528e-06, -4.324130713939667e-06, -4.176050424575806e-06, -4.027970135211945e-06, -3.8798898458480835e-06, -3.7318095564842224e-06, -3.5837292671203613e-06, -3.4356489777565002e-06, -3.287568688392639e-06, -3.139488399028778e-06, -2.991408109664917e-06, -2.843327820301056e-06, -2.695247530937195e-06, -2.5471672415733337e-06, -2.3990869522094727e-06, -2.2510066628456116e-06, -2.1029263734817505e-06, -1.9548460841178894e-06, -1.8067657947540283e-06, -1.6586855053901672e-06, -1.5106052160263062e-06, -1.362524926662445e-06, -1.214444637298584e-06, -1.066364347934723e-06, -9.182840585708618e-07, -7.702037692070007e-07, -6.221234798431396e-07, -4.7404319047927856e-07, -3.259629011154175e-07, -1.778826117515564e-07, -2.9802322387695312e-08, 1.1827796697616577e-07, 2.6635825634002686e-07, 4.1443854570388794e-07, 5.62518835067749e-07, 7.105991244316101e-07, 8.586794137954712e-07, 1.0067597031593323e-06, 1.1548399925231934e-06, 1.3029202818870544e-06, 1.4510005712509155e-06, 1.5990808606147766e-06, 1.7471611499786377e-06, 1.8952414393424988e-06, 2.04332172870636e-06, 2.191402018070221e-06, 2.339482307434082e-06, 2.487562596797943e-06, 2.635642886161804e-06, 2.7837231755256653e-06, 2.9318034648895264e-06, 3.0798837542533875e-06, 3.2279640436172485e-06, 3.3760443329811096e-06, 3.5241246223449707e-06, 3.6722049117088318e-06, 3.820285201072693e-06, 3.968365490436554e-06, 4.116445779800415e-06, 4.264526069164276e-06, 4.412606358528137e-06, 4.560686647891998e-06, 4.708766937255859e-06]}, "gradients/encoder.encoder.layers.4.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 4.0, 2.0, 2.0, 1.0, 7.0, 7.0, 4.0, 11.0, 6.0, 0.0, 12.0, 20.0, 36.0, 31.0, 46.0, 60.0, 87.0, 110.0, 116.0, 111.0, 92.0, 66.0, 47.0, 45.0, 22.0, 0.0, 17.0, 13.0, 12.0, 5.0, 5.0, 8.0, 2.0, 1.0, 2.0, 4.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0], "bins": [-2.2649765014648438e-06, -2.209097146987915e-06, -2.1532177925109863e-06, -2.0973384380340576e-06, -2.041459083557129e-06, -1.9855797290802e-06, -1.9297003746032715e-06, -1.8738210201263428e-06, -1.817941665649414e-06, -1.7620623111724854e-06, -1.7061829566955566e-06, -1.650303602218628e-06, -1.5944242477416992e-06, -1.5385448932647705e-06, -1.4826655387878418e-06, -1.426786184310913e-06, -1.3709068298339844e-06, -1.3150274753570557e-06, -1.259148120880127e-06, -1.2032687664031982e-06, -1.1473894119262695e-06, -1.0915100574493408e-06, -1.0356307029724121e-06, -9.797513484954834e-07, -9.238719940185547e-07, -8.67992639541626e-07, -8.121132850646973e-07, -7.562339305877686e-07, -7.003545761108398e-07, -6.444752216339111e-07, -5.885958671569824e-07, -5.327165126800537e-07, -4.76837158203125e-07, -4.209578037261963e-07, -3.650784492492676e-07, -3.0919909477233887e-07, -2.5331974029541016e-07, -1.9744038581848145e-07, -1.4156103134155273e-07, -8.568167686462402e-08, -2.9802322387695312e-08, 2.60770320892334e-08, 8.195638656616211e-08, 1.3783574104309082e-07, 1.9371509552001953e-07, 2.4959444999694824e-07, 3.0547380447387695e-07, 3.6135315895080566e-07, 4.172325134277344e-07, 4.731118679046631e-07, 5.289912223815918e-07, 5.848705768585205e-07, 6.407499313354492e-07, 6.966292858123779e-07, 7.525086402893066e-07, 8.083879947662354e-07, 8.642673492431641e-07, 9.201467037200928e-07, 9.760260581970215e-07, 1.0319054126739502e-06, 1.087784767150879e-06, 1.1436641216278076e-06, 1.1995434761047363e-06, 1.255422830581665e-06, 1.3113021850585938e-06]}, "gradients/encoder.encoder.layers.4.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 3.0, 7.0, 3.0, 11.0, 17.0, 36.0, 30.0, 60.0, 97.0, 57.0, 200.0, 323.0, 546.0, 405.0, 1166.0, 1910.0, 3470.0, 2545.0, 8339.0, 16867.0, 38607.0, 39291.0, 243373.0, 502802.0, 73833.0, 63434.0, 25124.0, 11673.0, 3424.0, 4443.0, 2578.0, 1509.0, 530.0, 730.0, 401.0, 279.0, 83.0, 120.0, 72.0, 28.0, 53.0, 30.0, 22.0, 6.0, 13.0, 11.0, 4.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0], "bins": [-3.337860107421875e-06, -3.234483301639557e-06, -3.1311064958572388e-06, -3.0277296900749207e-06, -2.9243528842926025e-06, -2.8209760785102844e-06, -2.7175992727279663e-06, -2.614222466945648e-06, -2.51084566116333e-06, -2.407468855381012e-06, -2.304092049598694e-06, -2.2007152438163757e-06, -2.0973384380340576e-06, -1.9939616322517395e-06, -1.8905848264694214e-06, -1.7872080206871033e-06, -1.6838312149047852e-06, -1.580454409122467e-06, -1.477077603340149e-06, -1.3737007975578308e-06, -1.2703239917755127e-06, -1.1669471859931946e-06, -1.0635703802108765e-06, -9.601935744285583e-07, -8.568167686462402e-07, -7.534399628639221e-07, -6.50063157081604e-07, -5.466863512992859e-07, -4.4330954551696777e-07, -3.3993273973464966e-07, -2.3655593395233154e-07, -1.3317912817001343e-07, -2.9802322387695312e-08, 7.35744833946228e-08, 1.7695128917694092e-07, 2.8032809495925903e-07, 3.8370490074157715e-07, 4.870817065238953e-07, 5.904585123062134e-07, 6.938353180885315e-07, 7.972121238708496e-07, 9.005889296531677e-07, 1.0039657354354858e-06, 1.107342541217804e-06, 1.210719347000122e-06, 1.3140961527824402e-06, 1.4174729585647583e-06, 1.5208497643470764e-06, 1.6242265701293945e-06, 1.7276033759117126e-06, 1.8309801816940308e-06, 1.934356987476349e-06, 2.037733793258667e-06, 2.141110599040985e-06, 2.2444874048233032e-06, 2.3478642106056213e-06, 2.4512410163879395e-06, 2.5546178221702576e-06, 2.6579946279525757e-06, 2.761371433734894e-06, 2.864748239517212e-06, 2.96812504529953e-06, 3.071501851081848e-06, 3.1748786568641663e-06, 3.2782554626464844e-06]}, "gradients/encoder.encoder.layers.4.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 2.0, 0.0, 1.0, 3.0, 4.0, 3.0, 4.0, 6.0, 2.0, 2.0, 4.0, 10.0, 5.0, 5.0, 4.0, 11.0, 30.0, 21.0, 13.0, 18.0, 41.0, 23.0, 30.0, 34.0, 104.0, 64.0, 62.0, 74.0, 107.0, 37.0, 47.0, 35.0, 28.0, 56.0, 26.0, 12.0, 12.0, 15.0, 7.0, 6.0, 6.0, 9.0, 5.0, 3.0, 2.0, 4.0, 1.0, 6.0, 4.0, 1.0, 4.0, 1.0, 1.0, 1.0, 2.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.0265579223632812e-06, -1.9529834389686584e-06, -1.8794089555740356e-06, -1.8058344721794128e-06, -1.73225998878479e-06, -1.6586855053901672e-06, -1.5851110219955444e-06, -1.5115365386009216e-06, -1.4379620552062988e-06, -1.364387571811676e-06, -1.2908130884170532e-06, -1.2172386050224304e-06, -1.1436641216278076e-06, -1.0700896382331848e-06, -9.96515154838562e-07, -9.229406714439392e-07, -8.493661880493164e-07, -7.757917046546936e-07, -7.022172212600708e-07, -6.28642737865448e-07, -5.550682544708252e-07, -4.814937710762024e-07, -4.079192876815796e-07, -3.343448042869568e-07, -2.60770320892334e-07, -1.8719583749771118e-07, -1.1362135410308838e-07, -4.0046870708465576e-08, 3.3527612686157227e-08, 1.0710209608078003e-07, 1.8067657947540283e-07, 2.5425106287002563e-07, 3.2782554626464844e-07, 4.0140002965927124e-07, 4.7497451305389404e-07, 5.485489964485168e-07, 6.221234798431396e-07, 6.956979632377625e-07, 7.692724466323853e-07, 8.428469300270081e-07, 9.164214134216309e-07, 9.899958968162537e-07, 1.0635703802108765e-06, 1.1371448636054993e-06, 1.210719347000122e-06, 1.2842938303947449e-06, 1.3578683137893677e-06, 1.4314427971839905e-06, 1.5050172805786133e-06, 1.578591763973236e-06, 1.6521662473678589e-06, 1.7257407307624817e-06, 1.7993152141571045e-06, 1.8728896975517273e-06, 1.94646418094635e-06, 2.020038664340973e-06, 2.0936131477355957e-06, 2.1671876311302185e-06, 2.2407621145248413e-06, 2.314336597919464e-06, 2.387911081314087e-06, 2.4614855647087097e-06, 2.5350600481033325e-06, 2.6086345314979553e-06, 2.682209014892578e-06]}, "gradients/encoder.encoder.layers.4.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 5.0, 3.0, 3.0, 1.0, 9.0, 17.0, 14.0, 26.0, 49.0, 115.0, 206.0, 153.0, 106.0, 62.0, 62.0, 47.0, 26.0, 23.0, 14.0, 13.0, 10.0, 7.0, 7.0, 11.0, 4.0, 2.0, 5.0, 5.0, 0.0, 4.0, 1.0, 2.0, 1.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-9.647011756896973e-05, -9.062661411007866e-05, -8.47831106511876e-05, -7.893961446825415e-05, -7.309611100936309e-05, -6.725260755047202e-05, -6.140911136753857e-05, -5.556560790864751e-05, -4.9722104449756444e-05, -4.387860099086538e-05, -3.803510116995312e-05, -3.2191601349040866e-05, -2.6348097890149802e-05, -2.050459625024814e-05, -1.4661094610346481e-05, -8.817594789434224e-06, -2.9740913305431604e-06, 2.8694103093585e-06, 8.71291194926016e-06, 1.455641358916182e-05, 2.039991522906348e-05, 2.624341686896514e-05, 3.20869185088668e-05, 3.793041832977906e-05, 4.377392178867012e-05, 4.9617425247561187e-05, 5.5460925068473443e-05, 6.13044248893857e-05, 6.714792834827676e-05, 7.299143180716783e-05, 7.883492799010128e-05, 8.467843144899234e-05, 9.05219349078834e-05, 9.636543836677447e-05, 0.00010220894182566553, 0.00010805243800859898, 0.00011389594146749005, 0.00011973944492638111, 0.00012558294110931456, 0.000131426437292248, 0.0001372699480270967, 0.00014311344421003014, 0.00014895695494487882, 0.00015480045112781227, 0.00016064394731074572, 0.0001664874580455944, 0.00017233095422852784, 0.00017817446496337652, 0.00018401796114630997, 0.00018986145732924342, 0.0001957049680640921, 0.00020154846424702555, 0.00020739197498187423, 0.00021323547116480768, 0.00021907896734774113, 0.00022492246353067458, 0.00023076597426552325, 0.0002366094704484567, 0.00024245298118330538, 0.00024829647736623883, 0.0002541399735491723, 0.00025998346973210573, 0.00026582699501886964, 0.0002716704912018031, 0.00027751398738473654]}, "gradients/encoder.encoder.layers.4.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 2.0, 0.0, 1.0, 1.0, 2.0, 2.0, 4.0, 4.0, 7.0, 9.0, 8.0, 6.0, 8.0, 7.0, 17.0, 10.0, 16.0, 22.0, 19.0, 30.0, 17.0, 31.0, 29.0, 34.0, 30.0, 35.0, 43.0, 40.0, 47.0, 42.0, 31.0, 40.0, 52.0, 30.0, 41.0, 31.0, 37.0, 25.0, 31.0, 28.0, 27.0, 23.0, 15.0, 11.0, 18.0, 12.0, 4.0, 12.0, 4.0, 6.0, 6.0, 1.0, 3.0, 0.0, 1.0, 0.0, 4.0, 2.0, 2.0, 0.0, 2.0], "bins": [-9.578466415405273e-05, -9.285099804401398e-05, -8.991733193397522e-05, -8.698366582393646e-05, -8.40499997138977e-05, -8.111633360385895e-05, -7.818266749382019e-05, -7.524900138378143e-05, -7.231533527374268e-05, -6.938166916370392e-05, -6.644800305366516e-05, -6.35143369436264e-05, -6.0580670833587646e-05, -5.764700472354889e-05, -5.471333861351013e-05, -5.1779672503471375e-05, -4.884600639343262e-05, -4.591234028339386e-05, -4.29786741733551e-05, -4.0045008063316345e-05, -3.711134195327759e-05, -3.417767584323883e-05, -3.124400973320007e-05, -2.8310343623161316e-05, -2.537667751312256e-05, -2.24430114030838e-05, -1.9509345293045044e-05, -1.6575679183006287e-05, -1.364201307296753e-05, -1.0708346962928772e-05, -7.774680852890015e-06, -4.841014742851257e-06, -1.9073486328125e-06, 1.0263174772262573e-06, 3.959983587265015e-06, 6.893649697303772e-06, 9.82731580734253e-06, 1.2760981917381287e-05, 1.5694648027420044e-05, 1.86283141374588e-05, 2.156198024749756e-05, 2.4495646357536316e-05, 2.7429312467575073e-05, 3.036297857761383e-05, 3.329664468765259e-05, 3.6230310797691345e-05, 3.91639769077301e-05, 4.209764301776886e-05, 4.503130912780762e-05, 4.7964975237846375e-05, 5.089864134788513e-05, 5.383230745792389e-05, 5.6765973567962646e-05, 5.9699639678001404e-05, 6.263330578804016e-05, 6.556697189807892e-05, 6.850063800811768e-05, 7.143430411815643e-05, 7.436797022819519e-05, 7.730163633823395e-05, 8.02353024482727e-05, 8.316896855831146e-05, 8.610263466835022e-05, 8.903630077838898e-05, 9.196996688842773e-05]}, "gradients/encoder.encoder.layers.3.feed_forward.output_dense.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 1.0, 2.0, 5.0, 3.0, 6.0, 13.0, 23.0, 45.0, 66.0, 144.0, 255.0, 549.0, 966.0, 2006.0, 4693.0, 13169.0, 59508.0, 3919744.0, 161384.0, 18738.0, 6540.0, 2825.0, 1491.0, 773.0, 472.0, 266.0, 201.0, 122.0, 82.0, 47.0, 36.0, 25.0, 19.0, 18.0, 9.0, 6.0, 7.0, 8.0, 4.0, 7.0, 8.0, 0.0, 4.0, 2.0, 3.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.793571472167969e-05, -5.513150244951248e-05, -5.2327290177345276e-05, -4.952307790517807e-05, -4.6718865633010864e-05, -4.391465336084366e-05, -4.111044108867645e-05, -3.830622881650925e-05, -3.550201654434204e-05, -3.2697804272174835e-05, -2.989359200000763e-05, -2.7089379727840424e-05, -2.4285167455673218e-05, -2.1480955183506012e-05, -1.8676742911338806e-05, -1.58725306391716e-05, -1.3068318367004395e-05, -1.0264106094837189e-05, -7.459893822669983e-06, -4.655681550502777e-06, -1.8514692783355713e-06, 9.527429938316345e-07, 3.7569552659988403e-06, 6.561167538166046e-06, 9.365379810333252e-06, 1.2169592082500458e-05, 1.4973804354667664e-05, 1.777801662683487e-05, 2.0582228899002075e-05, 2.338644117116928e-05, 2.6190653443336487e-05, 2.8994865715503693e-05, 3.17990779876709e-05, 3.4603290259838104e-05, 3.740750253200531e-05, 4.0211714804172516e-05, 4.301592707633972e-05, 4.582013934850693e-05, 4.862435162067413e-05, 5.142856389284134e-05, 5.4232776165008545e-05, 5.703698843717575e-05, 5.9841200709342957e-05, 6.264541298151016e-05, 6.544962525367737e-05, 6.825383752584457e-05, 7.105804979801178e-05, 7.386226207017899e-05, 7.666647434234619e-05, 7.94706866145134e-05, 8.22748988866806e-05, 8.507911115884781e-05, 8.788332343101501e-05, 9.068753570318222e-05, 9.349174797534943e-05, 9.629596024751663e-05, 9.910017251968384e-05, 0.00010190438479185104, 0.00010470859706401825, 0.00010751280933618546, 0.00011031702160835266, 0.00011312123388051987, 0.00011592544615268707, 0.00011872965842485428, 0.00012153387069702148]}, "gradients/encoder.encoder.layers.3.feed_forward.output_dense.bias": {"_type": "histogram", "values": [4.0, 1.0, 1.0, 3.0, 8.0, 4.0, 8.0, 13.0, 10.0, 17.0, 22.0, 26.0, 38.0, 63.0, 76.0, 90.0, 101.0, 111.0, 90.0, 79.0, 59.0, 50.0, 38.0, 28.0, 19.0, 9.0, 9.0, 8.0, 7.0, 4.0, 4.0, 8.0, 3.0, 2.0, 3.0, 2.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.0967254638671875e-05, -1.0334886610507965e-05, -9.702518582344055e-06, -9.070150554180145e-06, -8.437782526016235e-06, -7.805414497852325e-06, -7.1730464696884155e-06, -6.540678441524506e-06, -5.908310413360596e-06, -5.275942385196686e-06, -4.643574357032776e-06, -4.011206328868866e-06, -3.378838300704956e-06, -2.746470272541046e-06, -2.1141022443771362e-06, -1.4817342162132263e-06, -8.493661880493164e-07, -2.169981598854065e-07, 4.153698682785034e-07, 1.0477378964424133e-06, 1.6801059246063232e-06, 2.312473952770233e-06, 2.944841980934143e-06, 3.577210009098053e-06, 4.209578037261963e-06, 4.841946065425873e-06, 5.474314093589783e-06, 6.106682121753693e-06, 6.7390501499176025e-06, 7.3714181780815125e-06, 8.003786206245422e-06, 8.636154234409332e-06, 9.268522262573242e-06, 9.900890290737152e-06, 1.0533258318901062e-05, 1.1165626347064972e-05, 1.1797994375228882e-05, 1.2430362403392792e-05, 1.3062730431556702e-05, 1.3695098459720612e-05, 1.4327466487884521e-05, 1.4959834516048431e-05, 1.559220254421234e-05, 1.622457057237625e-05, 1.685693860054016e-05, 1.748930662870407e-05, 1.812167465686798e-05, 1.875404268503189e-05, 1.93864107131958e-05, 2.001877874135971e-05, 2.065114676952362e-05, 2.128351479768753e-05, 2.191588282585144e-05, 2.254825085401535e-05, 2.318061888217926e-05, 2.381298691034317e-05, 2.444535493850708e-05, 2.507772296667099e-05, 2.57100909948349e-05, 2.634245902299881e-05, 2.697482705116272e-05, 2.760719507932663e-05, 2.823956310749054e-05, 2.887193113565445e-05, 2.950429916381836e-05]}, "gradients/encoder.encoder.layers.3.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 1.0, 2.0, 2.0, 0.0, 5.0, 5.0, 9.0, 8.0, 17.0, 19.0, 34.0, 47.0, 63.0, 77.0, 149.0, 238.0, 380.0, 563.0, 936.0, 1396.0, 2370.0, 4163.0, 7748.0, 14833.0, 32386.0, 91755.0, 1090632.0, 2759018.0, 116649.0, 35596.0, 16121.0, 7995.0, 4498.0, 2489.0, 1452.0, 958.0, 626.0, 342.0, 242.0, 159.0, 107.0, 56.0, 45.0, 37.0, 17.0, 13.0, 9.0, 10.0, 7.0, 5.0, 6.0, 4.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-3.522634506225586e-05, -3.4091994166374207e-05, -3.2957643270492554e-05, -3.18232923746109e-05, -3.068894147872925e-05, -2.9554590582847595e-05, -2.8420239686965942e-05, -2.728588879108429e-05, -2.6151537895202637e-05, -2.5017186999320984e-05, -2.388283610343933e-05, -2.2748485207557678e-05, -2.1614134311676025e-05, -2.0479783415794373e-05, -1.934543251991272e-05, -1.8211081624031067e-05, -1.7076730728149414e-05, -1.594237983226776e-05, -1.4808028936386108e-05, -1.3673678040504456e-05, -1.2539327144622803e-05, -1.140497624874115e-05, -1.0270625352859497e-05, -9.136274456977844e-06, -8.001923561096191e-06, -6.8675726652145386e-06, -5.733221769332886e-06, -4.598870873451233e-06, -3.46451997756958e-06, -2.3301690816879272e-06, -1.1958181858062744e-06, -6.146728992462158e-08, 1.0728836059570312e-06, 2.207234501838684e-06, 3.341585397720337e-06, 4.47593629360199e-06, 5.610287189483643e-06, 6.744638085365295e-06, 7.878988981246948e-06, 9.013339877128601e-06, 1.0147690773010254e-05, 1.1282041668891907e-05, 1.241639256477356e-05, 1.3550743460655212e-05, 1.4685094356536865e-05, 1.5819445252418518e-05, 1.695379614830017e-05, 1.8088147044181824e-05, 1.9222497940063477e-05, 2.035684883594513e-05, 2.1491199731826782e-05, 2.2625550627708435e-05, 2.3759901523590088e-05, 2.489425241947174e-05, 2.6028603315353394e-05, 2.7162954211235046e-05, 2.82973051071167e-05, 2.9431656002998352e-05, 3.0566006898880005e-05, 3.170035779476166e-05, 3.283470869064331e-05, 3.396905958652496e-05, 3.5103410482406616e-05, 3.623776137828827e-05, 3.737211227416992e-05]}, "gradients/encoder.encoder.layers.3.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [3.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 2.0, 0.0, 2.0, 1.0, 1.0, 2.0, 1.0, 10.0, 4.0, 6.0, 7.0, 7.0, 18.0, 12.0, 17.0, 26.0, 28.0, 38.0, 54.0, 85.0, 146.0, 547.0, 1405.0, 955.0, 290.0, 133.0, 62.0, 48.0, 45.0, 25.0, 16.0, 15.0, 11.0, 16.0, 9.0, 8.0, 13.0, 1.0, 5.0, 2.0, 4.0, 1.0, 2.0, 0.0, 1.0, 2.0, 4.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.7358531951904297e-05, -2.644117921590805e-05, -2.5523826479911804e-05, -2.4606473743915558e-05, -2.368912100791931e-05, -2.2771768271923065e-05, -2.185441553592682e-05, -2.0937062799930573e-05, -2.0019710063934326e-05, -1.910235732793808e-05, -1.8185004591941833e-05, -1.7267651855945587e-05, -1.635029911994934e-05, -1.5432946383953094e-05, -1.4515593647956848e-05, -1.3598240911960602e-05, -1.2680888175964355e-05, -1.1763535439968109e-05, -1.0846182703971863e-05, -9.928829967975616e-06, -9.01147723197937e-06, -8.094124495983124e-06, -7.1767717599868774e-06, -6.259419023990631e-06, -5.342066287994385e-06, -4.4247135519981384e-06, -3.507360816001892e-06, -2.5900080800056458e-06, -1.6726553440093994e-06, -7.553026080131531e-07, 1.6205012798309326e-07, 1.0794028639793396e-06, 1.996755599975586e-06, 2.9141083359718323e-06, 3.831461071968079e-06, 4.748813807964325e-06, 5.666166543960571e-06, 6.583519279956818e-06, 7.500872015953064e-06, 8.41822475194931e-06, 9.335577487945557e-06, 1.0252930223941803e-05, 1.117028295993805e-05, 1.2087635695934296e-05, 1.3004988431930542e-05, 1.3922341167926788e-05, 1.4839693903923035e-05, 1.575704663991928e-05, 1.6674399375915527e-05, 1.7591752111911774e-05, 1.850910484790802e-05, 1.9426457583904266e-05, 2.0343810319900513e-05, 2.126116305589676e-05, 2.2178515791893005e-05, 2.3095868527889252e-05, 2.4013221263885498e-05, 2.4930573999881744e-05, 2.584792673587799e-05, 2.6765279471874237e-05, 2.7682632207870483e-05, 2.859998494386673e-05, 2.9517337679862976e-05, 3.0434690415859222e-05, 3.135204315185547e-05]}, "gradients/encoder.encoder.layers.3.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 1.0, 3.0, 2.0, 1.0, 5.0, 2.0, 9.0, 8.0, 10.0, 18.0, 21.0, 22.0, 42.0, 43.0, 78.0, 102.0, 128.0, 132.0, 85.0, 67.0, 57.0, 33.0, 32.0, 24.0, 14.0, 10.0, 10.0, 12.0, 9.0, 10.0, 6.0, 3.0, 3.0, 4.0, 2.0, 0.0, 1.0, 1.0, 1.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0], "bins": [-0.00012851697101723403, -0.0001244941959157586, -0.00012047141353832558, -0.00011644863843685016, -0.00011242585605941713, -0.00010840308095794171, -0.0001043803058564663, -0.00010035753075499088, -9.633474837755784e-05, -9.231197327608243e-05, -8.82891908986494e-05, -8.426641579717398e-05, -8.024364069569856e-05, -7.622085831826553e-05, -7.219808321679011e-05, -6.817530083935708e-05, -6.415252573788166e-05, -6.0129746998427436e-05, -5.610696825897321e-05, -5.2084193157497793e-05, -4.806141441804357e-05, -4.4038635678589344e-05, -4.0015860577113926e-05, -3.59930818376597e-05, -3.197030309820548e-05, -2.7947524358751252e-05, -2.392474743828643e-05, -1.990197051782161e-05, -1.5879191778367385e-05, -1.185641303891316e-05, -7.83363611844834e-06, -3.8108591979835182e-06, 2.1191954147070646e-07, 4.234697371430229e-06, 8.257475201389752e-06, 1.2280253031349275e-05, 1.6303030861308798e-05, 2.0325809600763023e-05, 2.4348586521227844e-05, 2.8371363441692665e-05, 3.239414218114689e-05, 3.6416920920601115e-05, 4.043969966005534e-05, 4.446247476153076e-05, 4.848525350098498e-05, 5.2508032240439206e-05, 5.6530807341914624e-05, 6.055358608136885e-05, 6.457636482082307e-05, 6.859913992229849e-05, 7.262192229973152e-05, 7.664469740120694e-05, 8.066747977863997e-05, 8.469025488011539e-05, 8.871302998159081e-05, 9.273580508306623e-05, 9.675858746049926e-05, 0.00010078136256197467, 0.0001048041449394077, 0.00010882692004088312, 0.00011284969514235854, 0.00011687247751979157, 0.00012089525262126699, 0.00012491803499870002, 0.00012894081010017544]}, "gradients/encoder.encoder.layers.3.final_layer_norm.bias": {"_type": "histogram", "values": [2.0, 0.0, 2.0, 4.0, 4.0, 1.0, 1.0, 2.0, 8.0, 5.0, 5.0, 4.0, 9.0, 16.0, 19.0, 26.0, 20.0, 21.0, 29.0, 33.0, 40.0, 48.0, 39.0, 44.0, 52.0, 59.0, 45.0, 45.0, 35.0, 51.0, 39.0, 49.0, 42.0, 32.0, 32.0, 25.0, 25.0, 27.0, 13.0, 14.0, 17.0, 6.0, 8.0, 7.0, 7.0, 1.0, 4.0, 2.0, 0.0, 0.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6.586313247680664e-05, -6.340816617012024e-05, -6.095319986343384e-05, -5.8498233556747437e-05, -5.6043267250061035e-05, -5.3588300943374634e-05, -5.113333463668823e-05, -4.867836833000183e-05, -4.622340202331543e-05, -4.376843571662903e-05, -4.131346940994263e-05, -3.8858503103256226e-05, -3.6403536796569824e-05, -3.394857048988342e-05, -3.149360418319702e-05, -2.903863787651062e-05, -2.658367156982422e-05, -2.4128705263137817e-05, -2.1673738956451416e-05, -1.9218772649765015e-05, -1.6763806343078613e-05, -1.4308840036392212e-05, -1.185387372970581e-05, -9.39890742301941e-06, -6.943941116333008e-06, -4.4889748096466064e-06, -2.034008502960205e-06, 4.209578037261963e-07, 2.8759241104125977e-06, 5.330890417098999e-06, 7.7858567237854e-06, 1.0240823030471802e-05, 1.2695789337158203e-05, 1.5150755643844604e-05, 1.7605721950531006e-05, 2.0060688257217407e-05, 2.251565456390381e-05, 2.497062087059021e-05, 2.742558717727661e-05, 2.9880553483963013e-05, 3.2335519790649414e-05, 3.4790486097335815e-05, 3.724545240402222e-05, 3.970041871070862e-05, 4.215538501739502e-05, 4.461035132408142e-05, 4.706531763076782e-05, 4.9520283937454224e-05, 5.1975250244140625e-05, 5.4430216550827026e-05, 5.688518285751343e-05, 5.934014916419983e-05, 6.179511547088623e-05, 6.425008177757263e-05, 6.670504808425903e-05, 6.916001439094543e-05, 7.161498069763184e-05, 7.406994700431824e-05, 7.652491331100464e-05, 7.897987961769104e-05, 8.143484592437744e-05, 8.388981223106384e-05, 8.634477853775024e-05, 8.879974484443665e-05, 9.125471115112305e-05]}, "gradients/encoder.encoder.layers.3.attention.out_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0, 4.0, 5.0, 5.0, 5.0, 10.0, 9.0, 12.0, 22.0, 28.0, 44.0, 50.0, 78.0, 130.0, 230.0, 452.0, 885.0, 1863.0, 4218.0, 10506.0, 31957.0, 142292.0, 690704.0, 119787.0, 28229.0, 9609.0, 3906.0, 1707.0, 791.0, 418.0, 223.0, 140.0, 77.0, 46.0, 27.0, 29.0, 16.0, 12.0, 14.0, 7.0, 4.0, 2.0, 1.0, 2.0, 2.0, 4.0, 1.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.00012254714965820312, -0.00011838600039482117, -0.00011422485113143921, -0.00011006370186805725, -0.00010590255260467529, -0.00010174140334129333, -9.758025407791138e-05, -9.341910481452942e-05, -8.925795555114746e-05, -8.50968062877655e-05, -8.093565702438354e-05, -7.677450776100159e-05, -7.261335849761963e-05, -6.845220923423767e-05, -6.429105997085571e-05, -6.0129910707473755e-05, -5.59687614440918e-05, -5.180761218070984e-05, -4.764646291732788e-05, -4.348531365394592e-05, -3.9324164390563965e-05, -3.516301512718201e-05, -3.100186586380005e-05, -2.684071660041809e-05, -2.2679567337036133e-05, -1.8518418073654175e-05, -1.4357268810272217e-05, -1.0196119546890259e-05, -6.034970283508301e-06, -1.8738210201263428e-06, 2.2873282432556152e-06, 6.448477506637573e-06, 1.0609626770019531e-05, 1.477077603340149e-05, 1.8931925296783447e-05, 2.3093074560165405e-05, 2.7254223823547363e-05, 3.141537308692932e-05, 3.557652235031128e-05, 3.973767161369324e-05, 4.3898820877075195e-05, 4.805997014045715e-05, 5.222111940383911e-05, 5.638226866722107e-05, 6.054341793060303e-05, 6.470456719398499e-05, 6.886571645736694e-05, 7.30268657207489e-05, 7.718801498413086e-05, 8.134916424751282e-05, 8.551031351089478e-05, 8.967146277427673e-05, 9.383261203765869e-05, 9.799376130104065e-05, 0.00010215491056442261, 0.00010631605982780457, 0.00011047720909118652, 0.00011463835835456848, 0.00011879950761795044, 0.0001229606568813324, 0.00012712180614471436, 0.0001312829554080963, 0.00013544410467147827, 0.00013960525393486023, 0.0001437664031982422]}, "gradients/encoder.encoder.layers.3.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 2.0, 1.0, 0.0, 1.0, 2.0, 1.0, 3.0, 9.0, 4.0, 10.0, 16.0, 17.0, 18.0, 39.0, 47.0, 72.0, 110.0, 95.0, 126.0, 112.0, 89.0, 63.0, 41.0, 46.0, 20.0, 26.0, 9.0, 8.0, 8.0, 5.0, 8.0, 4.0, 3.0, 1.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.5437602996826172e-05, -1.4727003872394562e-05, -1.4016404747962952e-05, -1.3305805623531342e-05, -1.2595206499099731e-05, -1.1884607374668121e-05, -1.1174008250236511e-05, -1.0463409125804901e-05, -9.752810001373291e-06, -9.042210876941681e-06, -8.33161175251007e-06, -7.621012628078461e-06, -6.910413503646851e-06, -6.1998143792152405e-06, -5.48921525478363e-06, -4.77861613035202e-06, -4.06801700592041e-06, -3.3574178814888e-06, -2.64681875705719e-06, -1.93621963262558e-06, -1.2256205081939697e-06, -5.150213837623596e-07, 1.955777406692505e-07, 9.061768651008606e-07, 1.6167759895324707e-06, 2.327375113964081e-06, 3.037974238395691e-06, 3.748573362827301e-06, 4.459172487258911e-06, 5.169771611690521e-06, 5.880370736122131e-06, 6.5909698605537415e-06, 7.3015689849853516e-06, 8.012168109416962e-06, 8.722767233848572e-06, 9.433366358280182e-06, 1.0143965482711792e-05, 1.0854564607143402e-05, 1.1565163731575012e-05, 1.2275762856006622e-05, 1.2986361980438232e-05, 1.3696961104869843e-05, 1.4407560229301453e-05, 1.5118159353733063e-05, 1.5828758478164673e-05, 1.6539357602596283e-05, 1.7249956727027893e-05, 1.7960555851459503e-05, 1.8671154975891113e-05, 1.9381754100322723e-05, 2.0092353224754333e-05, 2.0802952349185944e-05, 2.1513551473617554e-05, 2.2224150598049164e-05, 2.2934749722480774e-05, 2.3645348846912384e-05, 2.4355947971343994e-05, 2.5066547095775604e-05, 2.5777146220207214e-05, 2.6487745344638824e-05, 2.7198344469070435e-05, 2.7908943593502045e-05, 2.8619542717933655e-05, 2.9330141842365265e-05, 3.0040740966796875e-05]}, "gradients/encoder.encoder.layers.3.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 4.0, 2.0, 2.0, 4.0, 8.0, 7.0, 16.0, 16.0, 27.0, 52.0, 85.0, 116.0, 205.0, 337.0, 470.0, 788.0, 1137.0, 1855.0, 3065.0, 5435.0, 8669.0, 14569.0, 25141.0, 45503.0, 88560.0, 220963.0, 386990.0, 114375.0, 55948.0, 30815.0, 17247.0, 9999.0, 6420.0, 3679.0, 2223.0, 1423.0, 865.0, 558.0, 364.0, 228.0, 147.0, 89.0, 49.0, 42.0, 21.0, 20.0, 12.0, 11.0, 5.0, 2.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.4809112548828125e-05, -3.372691571712494e-05, -3.264471888542175e-05, -3.156252205371857e-05, -3.048032522201538e-05, -2.9398128390312195e-05, -2.831593155860901e-05, -2.7233734726905823e-05, -2.6151537895202637e-05, -2.506934106349945e-05, -2.3987144231796265e-05, -2.290494740009308e-05, -2.1822750568389893e-05, -2.0740553736686707e-05, -1.965835690498352e-05, -1.8576160073280334e-05, -1.749396324157715e-05, -1.6411766409873962e-05, -1.5329569578170776e-05, -1.424737274646759e-05, -1.3165175914764404e-05, -1.2082979083061218e-05, -1.1000782251358032e-05, -9.918585419654846e-06, -8.83638858795166e-06, -7.754191756248474e-06, -6.671994924545288e-06, -5.589798092842102e-06, -4.507601261138916e-06, -3.42540442943573e-06, -2.343207597732544e-06, -1.261010766029358e-06, -1.7881393432617188e-07, 9.033828973770142e-07, 1.9855797290802e-06, 3.0677765607833862e-06, 4.149973392486572e-06, 5.232170224189758e-06, 6.314367055892944e-06, 7.39656388759613e-06, 8.478760719299316e-06, 9.560957551002502e-06, 1.0643154382705688e-05, 1.1725351214408875e-05, 1.280754804611206e-05, 1.3889744877815247e-05, 1.4971941709518433e-05, 1.605413854122162e-05, 1.7136335372924805e-05, 1.821853220462799e-05, 1.9300729036331177e-05, 2.0382925868034363e-05, 2.146512269973755e-05, 2.2547319531440735e-05, 2.362951636314392e-05, 2.4711713194847107e-05, 2.5793910026550293e-05, 2.687610685825348e-05, 2.7958303689956665e-05, 2.904050052165985e-05, 3.0122697353363037e-05, 3.120489418506622e-05, 3.228709101676941e-05, 3.3369287848472595e-05, 3.445148468017578e-05]}, "gradients/encoder.encoder.layers.3.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 2.0, 3.0, 2.0, 10.0, 3.0, 5.0, 12.0, 8.0, 18.0, 19.0, 19.0, 24.0, 26.0, 25.0, 28.0, 44.0, 40.0, 47.0, 40.0, 52.0, 55.0, 41.0, 65.0, 52.0, 45.0, 43.0, 32.0, 29.0, 34.0, 30.0, 21.0, 23.0, 26.0, 24.0, 22.0, 7.0, 6.0, 4.0, 7.0, 6.0, 4.0, 6.0, 1.0, 1.0, 3.0, 3.0, 0.0, 0.0, 1.0, 1.0], "bins": [-3.7610530853271484e-05, -3.655627369880676e-05, -3.550201654434204e-05, -3.444775938987732e-05, -3.33935022354126e-05, -3.2339245080947876e-05, -3.1284987926483154e-05, -3.0230730772018433e-05, -2.917647361755371e-05, -2.812221646308899e-05, -2.7067959308624268e-05, -2.6013702154159546e-05, -2.4959444999694824e-05, -2.3905187845230103e-05, -2.285093069076538e-05, -2.179667353630066e-05, -2.0742416381835938e-05, -1.9688159227371216e-05, -1.8633902072906494e-05, -1.7579644918441772e-05, -1.652538776397705e-05, -1.547113060951233e-05, -1.4416873455047607e-05, -1.3362616300582886e-05, -1.2308359146118164e-05, -1.1254101991653442e-05, -1.019984483718872e-05, -9.145587682723999e-06, -8.091330528259277e-06, -7.037073373794556e-06, -5.982816219329834e-06, -4.928559064865112e-06, -3.874301910400391e-06, -2.820044755935669e-06, -1.7657876014709473e-06, -7.115304470062256e-07, 3.427267074584961e-07, 1.3969838619232178e-06, 2.4512410163879395e-06, 3.505498170852661e-06, 4.559755325317383e-06, 5.6140124797821045e-06, 6.668269634246826e-06, 7.722526788711548e-06, 8.77678394317627e-06, 9.831041097640991e-06, 1.0885298252105713e-05, 1.1939555406570435e-05, 1.2993812561035156e-05, 1.4048069715499878e-05, 1.51023268699646e-05, 1.615658402442932e-05, 1.7210841178894043e-05, 1.8265098333358765e-05, 1.9319355487823486e-05, 2.0373612642288208e-05, 2.142786979675293e-05, 2.248212695121765e-05, 2.3536384105682373e-05, 2.4590641260147095e-05, 2.5644898414611816e-05, 2.6699155569076538e-05, 2.775341272354126e-05, 2.880766987800598e-05, 2.9861927032470703e-05]}, "gradients/encoder.encoder.layers.3.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 2.0, 1.0, 2.0, 0.0, 2.0, 4.0, 6.0, 6.0, 8.0, 12.0, 22.0, 30.0, 36.0, 69.0, 86.0, 182.0, 223.0, 396.0, 950.0, 1347.0, 4034.0, 7387.0, 18827.0, 110404.0, 508337.0, 331497.0, 39828.0, 15837.0, 4297.0, 2004.0, 1308.0, 518.0, 383.0, 159.0, 112.0, 104.0, 42.0, 30.0, 18.0, 21.0, 12.0, 13.0, 2.0, 1.0, 4.0, 4.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0], "bins": [-6.854534149169922e-06, -6.6515058279037476e-06, -6.448477506637573e-06, -6.245449185371399e-06, -6.042420864105225e-06, -5.83939254283905e-06, -5.636364221572876e-06, -5.433335900306702e-06, -5.230307579040527e-06, -5.027279257774353e-06, -4.824250936508179e-06, -4.621222615242004e-06, -4.41819429397583e-06, -4.215165972709656e-06, -4.0121376514434814e-06, -3.809109330177307e-06, -3.606081008911133e-06, -3.4030526876449585e-06, -3.200024366378784e-06, -2.99699604511261e-06, -2.7939677238464355e-06, -2.5909394025802612e-06, -2.387911081314087e-06, -2.1848827600479126e-06, -1.9818544387817383e-06, -1.778826117515564e-06, -1.5757977962493896e-06, -1.3727694749832153e-06, -1.169741153717041e-06, -9.667128324508667e-07, -7.636845111846924e-07, -5.606561899185181e-07, -3.5762786865234375e-07, -1.5459954738616943e-07, 4.842877388000488e-08, 2.514570951461792e-07, 4.544854164123535e-07, 6.575137376785278e-07, 8.605420589447021e-07, 1.0635703802108765e-06, 1.2665987014770508e-06, 1.469627022743225e-06, 1.6726553440093994e-06, 1.8756836652755737e-06, 2.078711986541748e-06, 2.2817403078079224e-06, 2.4847686290740967e-06, 2.687796950340271e-06, 2.8908252716064453e-06, 3.0938535928726196e-06, 3.296881914138794e-06, 3.4999102354049683e-06, 3.7029385566711426e-06, 3.905966877937317e-06, 4.108995199203491e-06, 4.3120235204696655e-06, 4.51505184173584e-06, 4.718080163002014e-06, 4.9211084842681885e-06, 5.124136805534363e-06, 5.327165126800537e-06, 5.5301934480667114e-06, 5.733221769332886e-06, 5.93625009059906e-06, 6.139278411865234e-06]}, "gradients/encoder.encoder.layers.3.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 4.0, 0.0, 6.0, 3.0, 0.0, 9.0, 0.0, 19.0, 0.0, 27.0, 32.0, 0.0, 46.0, 0.0, 47.0, 0.0, 66.0, 0.0, 95.0, 117.0, 0.0, 103.0, 0.0, 96.0, 0.0, 94.0, 66.0, 0.0, 56.0, 0.0, 39.0, 0.0, 24.0, 0.0, 20.0, 20.0, 0.0, 10.0, 0.0, 8.0, 0.0, 3.0, 4.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-1.1920928955078125e-06, -1.1585652828216553e-06, -1.125037670135498e-06, -1.0915100574493408e-06, -1.0579824447631836e-06, -1.0244548320770264e-06, -9.909272193908691e-07, -9.57399606704712e-07, -9.238719940185547e-07, -8.903443813323975e-07, -8.568167686462402e-07, -8.23289155960083e-07, -7.897615432739258e-07, -7.562339305877686e-07, -7.227063179016113e-07, -6.891787052154541e-07, -6.556510925292969e-07, -6.221234798431396e-07, -5.885958671569824e-07, -5.550682544708252e-07, -5.21540641784668e-07, -4.880130290985107e-07, -4.544854164123535e-07, -4.209578037261963e-07, -3.8743019104003906e-07, -3.5390257835388184e-07, -3.203749656677246e-07, -2.868473529815674e-07, -2.5331974029541016e-07, -2.1979212760925293e-07, -1.862645149230957e-07, -1.5273690223693848e-07, -1.1920928955078125e-07, -8.568167686462402e-08, -5.21540641784668e-08, -1.862645149230957e-08, 1.4901161193847656e-08, 4.842877388000488e-08, 8.195638656616211e-08, 1.1548399925231934e-07, 1.4901161193847656e-07, 1.825392246246338e-07, 2.1606683731079102e-07, 2.4959444999694824e-07, 2.8312206268310547e-07, 3.166496753692627e-07, 3.501772880554199e-07, 3.8370490074157715e-07, 4.172325134277344e-07, 4.507601261138916e-07, 4.842877388000488e-07, 5.178153514862061e-07, 5.513429641723633e-07, 5.848705768585205e-07, 6.183981895446777e-07, 6.51925802230835e-07, 6.854534149169922e-07, 7.189810276031494e-07, 7.525086402893066e-07, 7.860362529754639e-07, 8.195638656616211e-07, 8.530914783477783e-07, 8.866190910339355e-07, 9.201467037200928e-07, 9.5367431640625e-07]}, "gradients/encoder.encoder.layers.3.attention.q_proj.weight": {"_type": "histogram", "values": [2.0, 2.0, 2.0, 2.0, 1.0, 3.0, 2.0, 6.0, 11.0, 7.0, 25.0, 10.0, 36.0, 55.0, 39.0, 105.0, 67.0, 178.0, 139.0, 426.0, 673.0, 579.0, 1767.0, 1363.0, 4743.0, 10648.0, 9654.0, 41389.0, 45141.0, 257208.0, 299216.0, 257941.0, 71023.0, 15827.0, 15867.0, 4191.0, 4809.0, 1443.0, 1674.0, 943.0, 314.0, 391.0, 128.0, 175.0, 111.0, 47.0, 65.0, 28.0, 37.0, 11.0, 22.0, 16.0, 0.0, 5.0, 3.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0], "bins": [-2.86102294921875e-06, -2.7669593691825867e-06, -2.6728957891464233e-06, -2.57883220911026e-06, -2.4847686290740967e-06, -2.3907050490379333e-06, -2.29664146900177e-06, -2.2025778889656067e-06, -2.1085143089294434e-06, -2.01445072889328e-06, -1.9203871488571167e-06, -1.8263235688209534e-06, -1.73225998878479e-06, -1.6381964087486267e-06, -1.5441328287124634e-06, -1.4500692486763e-06, -1.3560056686401367e-06, -1.2619420886039734e-06, -1.16787850856781e-06, -1.0738149285316467e-06, -9.797513484954834e-07, -8.856877684593201e-07, -7.916241884231567e-07, -6.975606083869934e-07, -6.034970283508301e-07, -5.094334483146667e-07, -4.153698682785034e-07, -3.213062882423401e-07, -2.2724270820617676e-07, -1.3317912817001343e-07, -3.91155481338501e-08, 5.494803190231323e-08, 1.4901161193847656e-07, 2.430751919746399e-07, 3.371387720108032e-07, 4.3120235204696655e-07, 5.252659320831299e-07, 6.193295121192932e-07, 7.133930921554565e-07, 8.074566721916199e-07, 9.015202522277832e-07, 9.955838322639465e-07, 1.0896474123001099e-06, 1.1837109923362732e-06, 1.2777745723724365e-06, 1.3718381524085999e-06, 1.4659017324447632e-06, 1.5599653124809265e-06, 1.6540288925170898e-06, 1.7480924725532532e-06, 1.8421560525894165e-06, 1.93621963262558e-06, 2.030283212661743e-06, 2.1243467926979065e-06, 2.21841037273407e-06, 2.312473952770233e-06, 2.4065375328063965e-06, 2.50060111284256e-06, 2.594664692878723e-06, 2.6887282729148865e-06, 2.78279185295105e-06, 2.876855432987213e-06, 2.9709190130233765e-06, 3.06498259305954e-06, 3.159046173095703e-06]}, "gradients/encoder.encoder.layers.3.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 2.0, 4.0, 1.0, 1.0, 6.0, 5.0, 11.0, 5.0, 9.0, 15.0, 3.0, 37.0, 13.0, 38.0, 54.0, 36.0, 88.0, 46.0, 43.0, 113.0, 42.0, 136.0, 52.0, 57.0, 36.0, 26.0, 42.0, 15.0, 27.0, 9.0, 7.0, 9.0, 7.0, 5.0, 3.0, 3.0, 2.0, 0.0, 4.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.7418136596679688e-06, -2.6579946279525757e-06, -2.5741755962371826e-06, -2.4903565645217896e-06, -2.4065375328063965e-06, -2.3227185010910034e-06, -2.2388994693756104e-06, -2.1550804376602173e-06, -2.0712614059448242e-06, -1.987442374229431e-06, -1.903623342514038e-06, -1.819804310798645e-06, -1.735985279083252e-06, -1.6521662473678589e-06, -1.5683472156524658e-06, -1.4845281839370728e-06, -1.4007091522216797e-06, -1.3168901205062866e-06, -1.2330710887908936e-06, -1.1492520570755005e-06, -1.0654330253601074e-06, -9.816139936447144e-07, -8.977949619293213e-07, -8.139759302139282e-07, -7.301568984985352e-07, -6.463378667831421e-07, -5.62518835067749e-07, -4.78699803352356e-07, -3.948807716369629e-07, -3.110617399215698e-07, -2.2724270820617676e-07, -1.434236764907837e-07, -5.960464477539063e-08, 2.421438694000244e-08, 1.0803341865539551e-07, 1.9185245037078857e-07, 2.7567148208618164e-07, 3.594905138015747e-07, 4.4330954551696777e-07, 5.271285772323608e-07, 6.109476089477539e-07, 6.94766640663147e-07, 7.7858567237854e-07, 8.624047040939331e-07, 9.462237358093262e-07, 1.0300427675247192e-06, 1.1138617992401123e-06, 1.1976808309555054e-06, 1.2814998626708984e-06, 1.3653188943862915e-06, 1.4491379261016846e-06, 1.5329569578170776e-06, 1.6167759895324707e-06, 1.7005950212478638e-06, 1.7844140529632568e-06, 1.86823308467865e-06, 1.952052116394043e-06, 2.035871148109436e-06, 2.119690179824829e-06, 2.203509211540222e-06, 2.2873282432556152e-06, 2.3711472749710083e-06, 2.4549663066864014e-06, 2.5387853384017944e-06, 2.6226043701171875e-06]}, "gradients/encoder.encoder.layers.3.layer_norm.weight": {"_type": "histogram", "values": [2.0, 0.0, 2.0, 3.0, 1.0, 1.0, 3.0, 5.0, 0.0, 2.0, 8.0, 4.0, 9.0, 7.0, 4.0, 10.0, 5.0, 29.0, 26.0, 34.0, 56.0, 65.0, 117.0, 131.0, 124.0, 75.0, 65.0, 36.0, 32.0, 27.0, 25.0, 18.0, 16.0, 13.0, 13.0, 8.0, 6.0, 11.0, 5.0, 4.0, 3.0, 4.0, 0.0, 0.0, 0.0, 2.0, 5.0, 1.0, 2.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-7.812255353201181e-05, -7.48077072785236e-05, -7.149286102503538e-05, -6.817802204750478e-05, -6.486317579401657e-05, -6.154832954052836e-05, -5.8233483287040144e-05, -5.491863703355193e-05, -5.160379078006372e-05, -4.8288944526575506e-05, -4.49741019110661e-05, -4.165925565757789e-05, -3.8344409404089674e-05, -3.502956678858027e-05, -3.1714720535092056e-05, -2.8399874281603843e-05, -2.5085031666094437e-05, -2.1770187231595628e-05, -1.8455340978107415e-05, -1.5140496543608606e-05, -1.1825651199615095e-05, -8.510805855621584e-06, -5.1959614211227745e-06, -1.8811151676345617e-06, 1.4337292668642476e-06, 4.748574610857759e-06, 8.06341995485127e-06, 1.1378264389350079e-05, 1.469310973334359e-05, 1.80079550773371e-05, 2.132279951183591e-05, 2.4637645765324123e-05, 2.7952490199822932e-05, 3.1267336453311145e-05, 3.458217906882055e-05, 3.7897025322308764e-05, 4.121187157579698e-05, 4.452671419130638e-05, 4.7841560444794595e-05, 5.115640669828281e-05, 5.447125295177102e-05, 5.7786099205259234e-05, 6.110094545874745e-05, 6.441578443627805e-05, 6.773063068976626e-05, 7.104547694325447e-05, 7.436032319674268e-05, 7.76751694502309e-05, 8.09900084277615e-05, 8.430485468124971e-05, 8.761970093473792e-05, 9.093453991226852e-05, 9.424938616575673e-05, 9.756423241924495e-05, 0.00010087907867273316, 0.00010419392492622137, 0.00010750877117970958, 0.0001108236174331978, 0.00011413846368668601, 0.00011745330994017422, 0.00012076814891770482, 0.00012408300244715065, 0.00012739784142468125, 0.00013071269495412707, 0.00013402753393165767]}, "gradients/encoder.encoder.layers.3.layer_norm.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 2.0, 3.0, 0.0, 4.0, 4.0, 1.0, 9.0, 8.0, 14.0, 11.0, 9.0, 11.0, 16.0, 17.0, 17.0, 25.0, 31.0, 31.0, 27.0, 29.0, 35.0, 40.0, 33.0, 40.0, 44.0, 35.0, 44.0, 48.0, 41.0, 30.0, 34.0, 25.0, 37.0, 35.0, 36.0, 31.0, 21.0, 12.0, 20.0, 20.0, 11.0, 9.0, 18.0, 14.0, 4.0, 11.0, 7.0, 3.0, 1.0, 5.0, 3.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6.699562072753906e-05, -6.470177322626114e-05, -6.240792572498322e-05, -6.011407822370529e-05, -5.782023072242737e-05, -5.5526383221149445e-05, -5.323253571987152e-05, -5.09386882185936e-05, -4.8644840717315674e-05, -4.635099321603775e-05, -4.405714571475983e-05, -4.17632982134819e-05, -3.946945071220398e-05, -3.7175603210926056e-05, -3.488175570964813e-05, -3.258790820837021e-05, -3.0294060707092285e-05, -2.800021320581436e-05, -2.5706365704536438e-05, -2.3412518203258514e-05, -2.111867070198059e-05, -1.8824823200702667e-05, -1.6530975699424744e-05, -1.423712819814682e-05, -1.1943280696868896e-05, -9.649433195590973e-06, -7.355585694313049e-06, -5.061738193035126e-06, -2.767890691757202e-06, -4.7404319047927856e-07, 1.819804310798645e-06, 4.113651812076569e-06, 6.407499313354492e-06, 8.701346814632416e-06, 1.099519431591034e-05, 1.3289041817188263e-05, 1.5582889318466187e-05, 1.787673681974411e-05, 2.0170584321022034e-05, 2.2464431822299957e-05, 2.475827932357788e-05, 2.7052126824855804e-05, 2.9345974326133728e-05, 3.163982182741165e-05, 3.3933669328689575e-05, 3.62275168299675e-05, 3.852136433124542e-05, 4.0815211832523346e-05, 4.310905933380127e-05, 4.540290683507919e-05, 4.769675433635712e-05, 4.999060183763504e-05, 5.2284449338912964e-05, 5.457829684019089e-05, 5.687214434146881e-05, 5.9165991842746735e-05, 6.145983934402466e-05, 6.375368684530258e-05, 6.60475343465805e-05, 6.834138184785843e-05, 7.063522934913635e-05, 7.292907685041428e-05, 7.52229243516922e-05, 7.751677185297012e-05, 7.981061935424805e-05]}, "gradients/encoder.encoder.layers.2.feed_forward.output_dense.weight": {"_type": "histogram", "values": [6.0, 2.0, 1.0, 5.0, 9.0, 28.0, 24.0, 59.0, 108.0, 172.0, 311.0, 633.0, 1110.0, 2147.0, 4675.0, 13447.0, 48377.0, 3266163.0, 803081.0, 34976.0, 10678.0, 4111.0, 1952.0, 986.0, 502.0, 264.0, 145.0, 91.0, 52.0, 28.0, 23.0, 19.0, 12.0, 14.0, 6.0, 7.0, 6.0, 6.0, 6.0, 5.0, 4.0, 7.0, 9.0, 3.0, 10.0, 8.0, 0.0, 3.0, 1.0, 0.0, 1.0, 0.0, 2.0, 2.0, 0.0, 2.0, 0.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.346536636352539e-05, -5.048047751188278e-05, -4.749558866024017e-05, -4.4510699808597565e-05, -4.1525810956954956e-05, -3.854092210531235e-05, -3.555603325366974e-05, -3.257114440202713e-05, -2.958625555038452e-05, -2.6601366698741913e-05, -2.3616477847099304e-05, -2.0631588995456696e-05, -1.7646700143814087e-05, -1.4661811292171478e-05, -1.167692244052887e-05, -8.692033588886261e-06, -5.707144737243652e-06, -2.7222558856010437e-06, 2.6263296604156494e-07, 3.2475218176841736e-06, 6.232410669326782e-06, 9.217299520969391e-06, 1.2202188372612e-05, 1.5187077224254608e-05, 1.8171966075897217e-05, 2.1156854927539825e-05, 2.4141743779182434e-05, 2.7126632630825043e-05, 3.011152148246765e-05, 3.309641033411026e-05, 3.608129918575287e-05, 3.906618803739548e-05, 4.2051076889038086e-05, 4.5035965740680695e-05, 4.80208545923233e-05, 5.100574344396591e-05, 5.399063229560852e-05, 5.697552114725113e-05, 5.996040999889374e-05, 6.294529885053635e-05, 6.593018770217896e-05, 6.891507655382156e-05, 7.189996540546417e-05, 7.488485425710678e-05, 7.786974310874939e-05, 8.0854631960392e-05, 8.383952081203461e-05, 8.682440966367722e-05, 8.980929851531982e-05, 9.279418736696243e-05, 9.577907621860504e-05, 9.876396507024765e-05, 0.00010174885392189026, 0.00010473374277353287, 0.00010771863162517548, 0.00011070352047681808, 0.0001136884093284607, 0.0001166732981801033, 0.00011965818703174591, 0.00012264307588338852, 0.00012562796473503113, 0.00012861285358667374, 0.00013159774243831635, 0.00013458263128995895, 0.00013756752014160156]}, "gradients/encoder.encoder.layers.2.feed_forward.output_dense.bias": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 0.0, 6.0, 6.0, 5.0, 7.0, 9.0, 9.0, 11.0, 19.0, 49.0, 66.0, 95.0, 136.0, 134.0, 137.0, 103.0, 64.0, 59.0, 35.0, 19.0, 11.0, 7.0, 11.0, 11.0, 7.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.3887882232666016e-05, -1.3059936463832855e-05, -1.2231990694999695e-05, -1.1404044926166534e-05, -1.0576099157333374e-05, -9.748153388500214e-06, -8.920207619667053e-06, -8.092261850833893e-06, -7.264316082000732e-06, -6.436370313167572e-06, -5.608424544334412e-06, -4.780478775501251e-06, -3.952533006668091e-06, -3.1245872378349304e-06, -2.29664146900177e-06, -1.4686957001686096e-06, -6.407499313354492e-07, 1.8719583749771118e-07, 1.0151416063308716e-06, 1.843087375164032e-06, 2.6710331439971924e-06, 3.4989789128303528e-06, 4.326924681663513e-06, 5.154870450496674e-06, 5.982816219329834e-06, 6.810761988162994e-06, 7.638707756996155e-06, 8.466653525829315e-06, 9.294599294662476e-06, 1.0122545063495636e-05, 1.0950490832328796e-05, 1.1778436601161957e-05, 1.2606382369995117e-05, 1.3434328138828278e-05, 1.4262273907661438e-05, 1.5090219676494598e-05, 1.591816544532776e-05, 1.674611121416092e-05, 1.757405698299408e-05, 1.840200275182724e-05, 1.92299485206604e-05, 2.005789428949356e-05, 2.088584005832672e-05, 2.171378582715988e-05, 2.2541731595993042e-05, 2.3369677364826202e-05, 2.4197623133659363e-05, 2.5025568902492523e-05, 2.5853514671325684e-05, 2.6681460440158844e-05, 2.7509406208992004e-05, 2.8337351977825165e-05, 2.9165297746658325e-05, 2.9993243515491486e-05, 3.0821189284324646e-05, 3.1649135053157806e-05, 3.247708082199097e-05, 3.330502659082413e-05, 3.413297235965729e-05, 3.496091812849045e-05, 3.578886389732361e-05, 3.661680966615677e-05, 3.744475543498993e-05, 3.827270120382309e-05, 3.910064697265625e-05]}, "gradients/encoder.encoder.layers.2.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 0.0, 1.0, 2.0, 0.0, 2.0, 7.0, 6.0, 11.0, 9.0, 17.0, 23.0, 25.0, 45.0, 65.0, 107.0, 144.0, 244.0, 356.0, 487.0, 794.0, 1226.0, 1967.0, 3122.0, 5056.0, 8806.0, 15515.0, 29625.0, 63471.0, 195738.0, 3260707.0, 431214.0, 90322.0, 38390.0, 19384.0, 10808.0, 6282.0, 3740.0, 2409.0, 1422.0, 909.0, 620.0, 392.0, 266.0, 176.0, 123.0, 99.0, 46.0, 33.0, 21.0, 11.0, 15.0, 7.0, 9.0, 6.0, 3.0, 5.0, 2.0, 1.0, 3.0, 2.0, 2.0], "bins": [-3.141164779663086e-05, -3.0450522899627686e-05, -2.9489398002624512e-05, -2.8528273105621338e-05, -2.7567148208618164e-05, -2.660602331161499e-05, -2.5644898414611816e-05, -2.4683773517608643e-05, -2.372264862060547e-05, -2.2761523723602295e-05, -2.180039882659912e-05, -2.0839273929595947e-05, -1.9878149032592773e-05, -1.89170241355896e-05, -1.7955899238586426e-05, -1.6994774341583252e-05, -1.6033649444580078e-05, -1.5072524547576904e-05, -1.411139965057373e-05, -1.3150274753570557e-05, -1.2189149856567383e-05, -1.1228024959564209e-05, -1.0266900062561035e-05, -9.305775165557861e-06, -8.344650268554688e-06, -7.383525371551514e-06, -6.42240047454834e-06, -5.461275577545166e-06, -4.500150680541992e-06, -3.5390257835388184e-06, -2.5779008865356445e-06, -1.6167759895324707e-06, -6.556510925292969e-07, 3.0547380447387695e-07, 1.2665987014770508e-06, 2.2277235984802246e-06, 3.1888484954833984e-06, 4.149973392486572e-06, 5.111098289489746e-06, 6.07222318649292e-06, 7.033348083496094e-06, 7.994472980499268e-06, 8.955597877502441e-06, 9.916722774505615e-06, 1.0877847671508789e-05, 1.1838972568511963e-05, 1.2800097465515137e-05, 1.376122236251831e-05, 1.4722347259521484e-05, 1.5683472156524658e-05, 1.6644597053527832e-05, 1.7605721950531006e-05, 1.856684684753418e-05, 1.9527971744537354e-05, 2.0489096641540527e-05, 2.14502215385437e-05, 2.2411346435546875e-05, 2.337247133255005e-05, 2.4333596229553223e-05, 2.5294721126556396e-05, 2.625584602355957e-05, 2.7216970920562744e-05, 2.8178095817565918e-05, 2.9139220714569092e-05, 3.0100345611572266e-05]}, "gradients/encoder.encoder.layers.2.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 2.0, 1.0, 0.0, 2.0, 3.0, 1.0, 4.0, 2.0, 7.0, 6.0, 8.0, 10.0, 19.0, 15.0, 20.0, 20.0, 22.0, 23.0, 36.0, 40.0, 63.0, 83.0, 227.0, 585.0, 1242.0, 792.0, 288.0, 138.0, 73.0, 75.0, 41.0, 33.0, 34.0, 28.0, 21.0, 23.0, 12.0, 8.0, 15.0, 13.0, 9.0, 6.0, 5.0, 3.0, 5.0, 4.0, 3.0, 4.0, 4.0, 4.0, 2.0, 1.0, 3.0, 2.0, 1.0, 1.0], "bins": [-2.5033950805664062e-05, -2.4267472326755524e-05, -2.3500993847846985e-05, -2.2734515368938446e-05, -2.1968036890029907e-05, -2.120155841112137e-05, -2.043507993221283e-05, -1.966860145330429e-05, -1.8902122974395752e-05, -1.8135644495487213e-05, -1.7369166016578674e-05, -1.6602687537670135e-05, -1.5836209058761597e-05, -1.5069730579853058e-05, -1.4303252100944519e-05, -1.353677362203598e-05, -1.2770295143127441e-05, -1.2003816664218903e-05, -1.1237338185310364e-05, -1.0470859706401825e-05, -9.704381227493286e-06, -8.937902748584747e-06, -8.171424269676208e-06, -7.40494579076767e-06, -6.638467311859131e-06, -5.871988832950592e-06, -5.105510354042053e-06, -4.339031875133514e-06, -3.5725533962249756e-06, -2.8060749173164368e-06, -2.039596438407898e-06, -1.2731179594993591e-06, -5.066394805908203e-07, 2.598389983177185e-07, 1.0263174772262573e-06, 1.7927959561347961e-06, 2.559274435043335e-06, 3.3257529139518738e-06, 4.092231392860413e-06, 4.858709871768951e-06, 5.62518835067749e-06, 6.391666829586029e-06, 7.158145308494568e-06, 7.924623787403107e-06, 8.691102266311646e-06, 9.457580745220184e-06, 1.0224059224128723e-05, 1.0990537703037262e-05, 1.17570161819458e-05, 1.252349466085434e-05, 1.3289973139762878e-05, 1.4056451618671417e-05, 1.4822930097579956e-05, 1.5589408576488495e-05, 1.6355887055397034e-05, 1.7122365534305573e-05, 1.788884401321411e-05, 1.865532249212265e-05, 1.942180097103119e-05, 2.0188279449939728e-05, 2.0954757928848267e-05, 2.1721236407756805e-05, 2.2487714886665344e-05, 2.3254193365573883e-05, 2.4020671844482422e-05]}, "gradients/encoder.encoder.layers.2.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 2.0, 3.0, 5.0, 6.0, 5.0, 5.0, 8.0, 20.0, 18.0, 30.0, 51.0, 73.0, 92.0, 127.0, 127.0, 97.0, 84.0, 57.0, 36.0, 32.0, 23.0, 15.0, 18.0, 13.0, 12.0, 7.0, 10.0, 8.0, 5.0, 9.0, 4.0, 2.0, 2.0, 4.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00017429336730856448, -0.00016915309242904186, -0.00016401281754951924, -0.00015887254266999662, -0.000153732267790474, -0.00014859199291095138, -0.00014345171803142875, -0.00013831144315190613, -0.0001331711682723835, -0.0001280308933928609, -0.00012289061851333827, -0.00011775034363381565, -0.00011261006875429302, -0.0001074697938747704, -0.00010232951899524778, -9.718924411572516e-05, -9.204897651216015e-05, -8.690870163263753e-05, -8.176842675311491e-05, -7.662815187359229e-05, -7.148787699406967e-05, -6.634760211454704e-05, -6.120733451098204e-05, -5.606705599348061e-05, -5.0926781113957986e-05, -4.5786506234435365e-05, -4.064623135491274e-05, -3.550596011336893e-05, -3.0365683414856903e-05, -2.522540853533428e-05, -2.0085135474801064e-05, -1.4944860595278442e-05, -9.80458571575582e-06, -4.6643112909805495e-06, 4.759631337947212e-07, 5.616237103822641e-06, 1.0756511983345263e-05, 1.5896786862867884e-05, 2.1037059923401102e-05, 2.6177334802923724e-05, 3.1317609682446346e-05, 3.645788456196897e-05, 4.159815944149159e-05, 4.6738430683035403e-05, 5.1878705562558025e-05, 5.701898044208065e-05, 6.215925532160327e-05, 6.729953020112589e-05, 7.243980508064851e-05, 7.758007996017113e-05, 8.272035483969375e-05, 8.786062971921638e-05, 9.3000904598739e-05, 9.814117947826162e-05, 0.00010328144708182663, 0.00010842172196134925, 0.00011356199684087187, 0.00011870227172039449, 0.00012384254659991711, 0.00012898282147943974, 0.00013412309635896236, 0.00013926337123848498, 0.0001444036461180076, 0.00014954392099753022, 0.00015468419587705284]}, "gradients/encoder.encoder.layers.2.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 3.0, 0.0, 1.0, 0.0, 2.0, 4.0, 2.0, 0.0, 6.0, 4.0, 7.0, 6.0, 10.0, 3.0, 16.0, 16.0, 18.0, 14.0, 14.0, 18.0, 25.0, 28.0, 31.0, 39.0, 37.0, 32.0, 35.0, 35.0, 44.0, 41.0, 36.0, 54.0, 38.0, 40.0, 32.0, 40.0, 32.0, 30.0, 37.0, 33.0, 23.0, 21.0, 18.0, 18.0, 8.0, 12.0, 10.0, 10.0, 5.0, 6.0, 5.0, 6.0, 3.0, 5.0, 4.0, 0.0, 1.0, 1.0, 2.0], "bins": [-9.08970832824707e-05, -8.826423436403275e-05, -8.563138544559479e-05, -8.299853652715683e-05, -8.036568760871887e-05, -7.773283869028091e-05, -7.509998977184296e-05, -7.2467140853405e-05, -6.983429193496704e-05, -6.720144301652908e-05, -6.456859409809113e-05, -6.193574517965317e-05, -5.930289626121521e-05, -5.667004734277725e-05, -5.4037198424339294e-05, -5.140434950590134e-05, -4.877150058746338e-05, -4.613865166902542e-05, -4.350580275058746e-05, -4.0872953832149506e-05, -3.824010491371155e-05, -3.560725599527359e-05, -3.297440707683563e-05, -3.0341558158397675e-05, -2.7708709239959717e-05, -2.507586032152176e-05, -2.24430114030838e-05, -1.9810162484645844e-05, -1.7177313566207886e-05, -1.4544464647769928e-05, -1.191161572933197e-05, -9.278766810894012e-06, -6.645917892456055e-06, -4.013068974018097e-06, -1.3802200555801392e-06, 1.2526288628578186e-06, 3.885477781295776e-06, 6.518326699733734e-06, 9.151175618171692e-06, 1.178402453660965e-05, 1.4416873455047607e-05, 1.7049722373485565e-05, 1.9682571291923523e-05, 2.231542021036148e-05, 2.494826912879944e-05, 2.7581118047237396e-05, 3.0213966965675354e-05, 3.284681588411331e-05, 3.547966480255127e-05, 3.811251372098923e-05, 4.0745362639427185e-05, 4.337821155786514e-05, 4.60110604763031e-05, 4.864390939474106e-05, 5.1276758313179016e-05, 5.3909607231616974e-05, 5.654245615005493e-05, 5.917530506849289e-05, 6.180815398693085e-05, 6.44410029053688e-05, 6.707385182380676e-05, 6.970670074224472e-05, 7.233954966068268e-05, 7.497239857912064e-05, 7.76052474975586e-05]}, "gradients/encoder.encoder.layers.2.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 0.0, 2.0, 0.0, 4.0, 2.0, 1.0, 5.0, 4.0, 2.0, 8.0, 14.0, 10.0, 14.0, 16.0, 17.0, 21.0, 23.0, 40.0, 55.0, 115.0, 217.0, 527.0, 1280.0, 3407.0, 9713.0, 34200.0, 200724.0, 690355.0, 79738.0, 18421.0, 5910.0, 2121.0, 787.0, 362.0, 155.0, 73.0, 41.0, 42.0, 27.0, 21.0, 19.0, 13.0, 11.0, 16.0, 9.0, 6.0, 5.0, 4.0, 7.0, 2.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.0001342296600341797, -0.00012979470193386078, -0.00012535974383354187, -0.00012092478573322296, -0.00011648982763290405, -0.00011205486953258514, -0.00010761991143226624, -0.00010318495333194733, -9.874999523162842e-05, -9.431503713130951e-05, -8.98800790309906e-05, -8.544512093067169e-05, -8.101016283035278e-05, -7.657520473003387e-05, -7.214024662971497e-05, -6.770528852939606e-05, -6.327033042907715e-05, -5.883537232875824e-05, -5.440041422843933e-05, -4.996545612812042e-05, -4.5530498027801514e-05, -4.1095539927482605e-05, -3.6660581827163696e-05, -3.222562372684479e-05, -2.779066562652588e-05, -2.335570752620697e-05, -1.892074942588806e-05, -1.4485791325569153e-05, -1.0050833225250244e-05, -5.6158751249313354e-06, -1.1809170246124268e-06, 3.254041075706482e-06, 7.68899917602539e-06, 1.21239572763443e-05, 1.6558915376663208e-05, 2.0993873476982117e-05, 2.5428831577301025e-05, 2.9863789677619934e-05, 3.429874777793884e-05, 3.873370587825775e-05, 4.316866397857666e-05, 4.760362207889557e-05, 5.203858017921448e-05, 5.6473538279533386e-05, 6.0908496379852295e-05, 6.53434544801712e-05, 6.977841258049011e-05, 7.421337068080902e-05, 7.864832878112793e-05, 8.308328688144684e-05, 8.751824498176575e-05, 9.195320308208466e-05, 9.638816118240356e-05, 0.00010082311928272247, 0.00010525807738304138, 0.00010969303548336029, 0.0001141279935836792, 0.00011856295168399811, 0.00012299790978431702, 0.00012743286788463593, 0.00013186782598495483, 0.00013630278408527374, 0.00014073774218559265, 0.00014517270028591156, 0.00014960765838623047]}, "gradients/encoder.encoder.layers.2.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 3.0, 3.0, 5.0, 7.0, 7.0, 12.0, 12.0, 17.0, 40.0, 57.0, 84.0, 91.0, 143.0, 143.0, 113.0, 75.0, 64.0, 52.0, 23.0, 19.0, 12.0, 15.0, 8.0, 7.0, 2.0, 4.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.6033649444580078e-05, -1.5201978385448456e-05, -1.4370307326316833e-05, -1.3538636267185211e-05, -1.2706965208053589e-05, -1.1875294148921967e-05, -1.1043623089790344e-05, -1.0211952030658722e-05, -9.3802809715271e-06, -8.548609912395477e-06, -7.716938853263855e-06, -6.885267794132233e-06, -6.05359673500061e-06, -5.221925675868988e-06, -4.390254616737366e-06, -3.5585835576057434e-06, -2.726912498474121e-06, -1.8952414393424988e-06, -1.0635703802108765e-06, -2.3189932107925415e-07, 5.997717380523682e-07, 1.4314427971839905e-06, 2.263113856315613e-06, 3.094784915447235e-06, 3.926455974578857e-06, 4.75812703371048e-06, 5.589798092842102e-06, 6.421469151973724e-06, 7.253140211105347e-06, 8.084811270236969e-06, 8.916482329368591e-06, 9.748153388500214e-06, 1.0579824447631836e-05, 1.1411495506763458e-05, 1.224316656589508e-05, 1.3074837625026703e-05, 1.3906508684158325e-05, 1.4738179743289948e-05, 1.556985080242157e-05, 1.6401521861553192e-05, 1.7233192920684814e-05, 1.8064863979816437e-05, 1.889653503894806e-05, 1.972820609807968e-05, 2.0559877157211304e-05, 2.1391548216342926e-05, 2.222321927547455e-05, 2.305489033460617e-05, 2.3886561393737793e-05, 2.4718232452869415e-05, 2.5549903512001038e-05, 2.638157457113266e-05, 2.7213245630264282e-05, 2.8044916689395905e-05, 2.8876587748527527e-05, 2.970825880765915e-05, 3.053992986679077e-05, 3.1371600925922394e-05, 3.2203271985054016e-05, 3.303494304418564e-05, 3.386661410331726e-05, 3.469828516244888e-05, 3.5529956221580505e-05, 3.636162728071213e-05, 3.719329833984375e-05]}, "gradients/encoder.encoder.layers.2.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 0.0, 3.0, 3.0, 9.0, 11.0, 22.0, 28.0, 39.0, 54.0, 78.0, 112.0, 143.0, 228.0, 386.0, 516.0, 811.0, 1148.0, 1891.0, 2849.0, 4621.0, 7406.0, 12195.0, 18571.0, 34623.0, 64559.0, 135647.0, 384554.0, 183379.0, 86543.0, 44589.0, 24960.0, 14509.0, 8822.0, 5087.0, 3555.0, 2290.0, 1446.0, 958.0, 603.0, 384.0, 317.0, 183.0, 148.0, 86.0, 54.0, 46.0, 29.0, 16.0, 16.0, 8.0, 16.0, 4.0, 6.0, 0.0, 3.0, 3.0, 3.0], "bins": [-2.777576446533203e-05, -2.6951543986797333e-05, -2.6127323508262634e-05, -2.5303103029727936e-05, -2.4478882551193237e-05, -2.365466207265854e-05, -2.283044159412384e-05, -2.2006221115589142e-05, -2.1182000637054443e-05, -2.0357780158519745e-05, -1.9533559679985046e-05, -1.8709339201450348e-05, -1.788511872291565e-05, -1.706089824438095e-05, -1.6236677765846252e-05, -1.5412457287311554e-05, -1.4588236808776855e-05, -1.3764016330242157e-05, -1.2939795851707458e-05, -1.211557537317276e-05, -1.1291354894638062e-05, -1.0467134416103363e-05, -9.642913937568665e-06, -8.818693459033966e-06, -7.994472980499268e-06, -7.170252501964569e-06, -6.346032023429871e-06, -5.521811544895172e-06, -4.697591066360474e-06, -3.873370587825775e-06, -3.0491501092910767e-06, -2.224929630756378e-06, -1.4007091522216797e-06, -5.764886736869812e-07, 2.477318048477173e-07, 1.0719522833824158e-06, 1.8961727619171143e-06, 2.7203932404518127e-06, 3.5446137189865112e-06, 4.36883419752121e-06, 5.193054676055908e-06, 6.017275154590607e-06, 6.841495633125305e-06, 7.665716111660004e-06, 8.489936590194702e-06, 9.3141570687294e-06, 1.0138377547264099e-05, 1.0962598025798798e-05, 1.1786818504333496e-05, 1.2611038982868195e-05, 1.3435259461402893e-05, 1.4259479939937592e-05, 1.508370041847229e-05, 1.590792089700699e-05, 1.6732141375541687e-05, 1.7556361854076385e-05, 1.8380582332611084e-05, 1.9204802811145782e-05, 2.002902328968048e-05, 2.085324376821518e-05, 2.1677464246749878e-05, 2.2501684725284576e-05, 2.3325905203819275e-05, 2.4150125682353973e-05, 2.4974346160888672e-05]}, "gradients/encoder.encoder.layers.2.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 2.0, 0.0, 4.0, 1.0, 2.0, 3.0, 3.0, 7.0, 4.0, 4.0, 9.0, 15.0, 13.0, 13.0, 18.0, 24.0, 17.0, 22.0, 32.0, 29.0, 42.0, 33.0, 44.0, 43.0, 44.0, 35.0, 37.0, 37.0, 50.0, 48.0, 33.0, 43.0, 42.0, 40.0, 33.0, 21.0, 24.0, 25.0, 29.0, 11.0, 10.0, 10.0, 12.0, 14.0, 8.0, 6.0, 4.0, 7.0, 4.0, 0.0, 4.0, 1.0, 0.0, 0.0, 3.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-2.5987625122070312e-05, -2.511683851480484e-05, -2.4246051907539368e-05, -2.3375265300273895e-05, -2.2504478693008423e-05, -2.163369208574295e-05, -2.0762905478477478e-05, -1.9892118871212006e-05, -1.9021332263946533e-05, -1.815054565668106e-05, -1.727975904941559e-05, -1.6408972442150116e-05, -1.5538185834884644e-05, -1.4667399227619171e-05, -1.3796612620353699e-05, -1.2925826013088226e-05, -1.2055039405822754e-05, -1.1184252798557281e-05, -1.0313466191291809e-05, -9.442679584026337e-06, -8.571892976760864e-06, -7.701106369495392e-06, -6.8303197622299194e-06, -5.959533154964447e-06, -5.088746547698975e-06, -4.217959940433502e-06, -3.3471733331680298e-06, -2.4763867259025574e-06, -1.605600118637085e-06, -7.348135113716125e-07, 1.3597309589385986e-07, 1.0067597031593323e-06, 1.8775463104248047e-06, 2.748332917690277e-06, 3.6191195249557495e-06, 4.489906132221222e-06, 5.360692739486694e-06, 6.231479346752167e-06, 7.102265954017639e-06, 7.973052561283112e-06, 8.843839168548584e-06, 9.714625775814056e-06, 1.0585412383079529e-05, 1.1456198990345001e-05, 1.2326985597610474e-05, 1.3197772204875946e-05, 1.4068558812141418e-05, 1.4939345419406891e-05, 1.5810132026672363e-05, 1.6680918633937836e-05, 1.7551705241203308e-05, 1.842249184846878e-05, 1.9293278455734253e-05, 2.0164065062999725e-05, 2.1034851670265198e-05, 2.190563827753067e-05, 2.2776424884796143e-05, 2.3647211492061615e-05, 2.4517998099327087e-05, 2.538878470659256e-05, 2.6259571313858032e-05, 2.7130357921123505e-05, 2.8001144528388977e-05, 2.887193113565445e-05, 2.9742717742919922e-05]}, "gradients/encoder.encoder.layers.2.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 3.0, 1.0, 3.0, 1.0, 5.0, 5.0, 4.0, 11.0, 12.0, 19.0, 20.0, 41.0, 60.0, 73.0, 123.0, 186.0, 343.0, 573.0, 1033.0, 2002.0, 5491.0, 21881.0, 342889.0, 633471.0, 28989.0, 6247.0, 2410.0, 1023.0, 634.0, 359.0, 214.0, 145.0, 103.0, 56.0, 51.0, 25.0, 27.0, 10.0, 12.0, 3.0, 4.0, 4.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0], "bins": [-1.1265277862548828e-05, -1.0849907994270325e-05, -1.0434538125991821e-05, -1.0019168257713318e-05, -9.603798389434814e-06, -9.188428521156311e-06, -8.773058652877808e-06, -8.357688784599304e-06, -7.9423189163208e-06, -7.526949048042297e-06, -7.111579179763794e-06, -6.6962093114852905e-06, -6.280839443206787e-06, -5.865469574928284e-06, -5.45009970664978e-06, -5.034729838371277e-06, -4.6193599700927734e-06, -4.20399010181427e-06, -3.7886202335357666e-06, -3.373250365257263e-06, -2.9578804969787598e-06, -2.5425106287002563e-06, -2.127140760421753e-06, -1.7117708921432495e-06, -1.296401023864746e-06, -8.810311555862427e-07, -4.6566128730773926e-07, -5.029141902923584e-08, 3.650784492492676e-07, 7.80448317527771e-07, 1.1958181858062744e-06, 1.6111880540847778e-06, 2.0265579223632812e-06, 2.4419277906417847e-06, 2.857297658920288e-06, 3.2726675271987915e-06, 3.688037395477295e-06, 4.103407263755798e-06, 4.518777132034302e-06, 4.934147000312805e-06, 5.349516868591309e-06, 5.764886736869812e-06, 6.1802566051483154e-06, 6.595626473426819e-06, 7.010996341705322e-06, 7.426366209983826e-06, 7.841736078262329e-06, 8.257105946540833e-06, 8.672475814819336e-06, 9.08784568309784e-06, 9.503215551376343e-06, 9.918585419654846e-06, 1.033395528793335e-05, 1.0749325156211853e-05, 1.1164695024490356e-05, 1.158006489276886e-05, 1.1995434761047363e-05, 1.2410804629325867e-05, 1.282617449760437e-05, 1.3241544365882874e-05, 1.3656914234161377e-05, 1.407228410243988e-05, 1.4487653970718384e-05, 1.4903023838996887e-05, 1.531839370727539e-05]}, "gradients/encoder.encoder.layers.2.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 2.0, 0.0, 2.0, 4.0, 0.0, 1.0, 2.0, 0.0, 7.0, 18.0, 0.0, 11.0, 17.0, 0.0, 30.0, 40.0, 0.0, 50.0, 70.0, 0.0, 88.0, 108.0, 0.0, 105.0, 90.0, 0.0, 91.0, 74.0, 0.0, 52.0, 46.0, 0.0, 31.0, 22.0, 0.0, 25.0, 8.0, 0.0, 10.0, 4.0, 0.0, 6.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.3709068298339844e-06, -1.3308599591255188e-06, -1.2908130884170532e-06, -1.2507662177085876e-06, -1.210719347000122e-06, -1.1706724762916565e-06, -1.130625605583191e-06, -1.0905787348747253e-06, -1.0505318641662598e-06, -1.0104849934577942e-06, -9.704381227493286e-07, -9.30391252040863e-07, -8.903443813323975e-07, -8.502975106239319e-07, -8.102506399154663e-07, -7.702037692070007e-07, -7.301568984985352e-07, -6.901100277900696e-07, -6.50063157081604e-07, -6.100162863731384e-07, -5.699694156646729e-07, -5.299225449562073e-07, -4.898756742477417e-07, -4.498288035392761e-07, -4.0978193283081055e-07, -3.6973506212234497e-07, -3.296881914138794e-07, -2.896413207054138e-07, -2.4959444999694824e-07, -2.0954757928848267e-07, -1.695007085800171e-07, -1.2945383787155151e-07, -8.940696716308594e-08, -4.936009645462036e-08, -9.313225746154785e-09, 3.073364496231079e-08, 7.078051567077637e-08, 1.1082738637924194e-07, 1.5087425708770752e-07, 1.909211277961731e-07, 2.3096799850463867e-07, 2.7101486921310425e-07, 3.110617399215698e-07, 3.511086106300354e-07, 3.91155481338501e-07, 4.3120235204696655e-07, 4.7124922275543213e-07, 5.112960934638977e-07, 5.513429641723633e-07, 5.913898348808289e-07, 6.314367055892944e-07, 6.7148357629776e-07, 7.115304470062256e-07, 7.515773177146912e-07, 7.916241884231567e-07, 8.316710591316223e-07, 8.717179298400879e-07, 9.117648005485535e-07, 9.51811671257019e-07, 9.918585419654846e-07, 1.0319054126739502e-06, 1.0719522833824158e-06, 1.1119991540908813e-06, 1.152046024799347e-06, 1.1920928955078125e-06]}, "gradients/encoder.encoder.layers.2.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 2.0, 1.0, 2.0, 4.0, 2.0, 2.0, 7.0, 3.0, 8.0, 19.0, 11.0, 29.0, 15.0, 58.0, 61.0, 40.0, 142.0, 195.0, 129.0, 379.0, 545.0, 394.0, 1208.0, 2084.0, 1613.0, 5185.0, 4327.0, 15285.0, 37371.0, 39393.0, 240778.0, 517278.0, 73595.0, 62168.0, 23567.0, 6336.0, 7276.0, 2165.0, 2760.0, 1541.0, 503.0, 729.0, 475.0, 143.0, 260.0, 168.0, 49.0, 85.0, 52.0, 25.0, 31.0, 6.0, 17.0, 15.0, 8.0, 14.0, 4.0, 3.0, 3.0, 3.0, 0.0, 1.0, 3.0], "bins": [-3.159046173095703e-06, -3.061257302761078e-06, -2.9634684324264526e-06, -2.8656795620918274e-06, -2.767890691757202e-06, -2.670101821422577e-06, -2.5723129510879517e-06, -2.4745240807533264e-06, -2.376735210418701e-06, -2.278946340084076e-06, -2.1811574697494507e-06, -2.0833685994148254e-06, -1.9855797290802e-06, -1.887790858745575e-06, -1.7900019884109497e-06, -1.6922131180763245e-06, -1.5944242477416992e-06, -1.496635377407074e-06, -1.3988465070724487e-06, -1.3010576367378235e-06, -1.2032687664031982e-06, -1.105479896068573e-06, -1.0076910257339478e-06, -9.099021553993225e-07, -8.121132850646973e-07, -7.14324414730072e-07, -6.165355443954468e-07, -5.187466740608215e-07, -4.209578037261963e-07, -3.2316893339157104e-07, -2.253800630569458e-07, -1.2759119272232056e-07, -2.9802322387695312e-08, 6.798654794692993e-08, 1.6577541828155518e-07, 2.635642886161804e-07, 3.6135315895080566e-07, 4.591420292854309e-07, 5.569308996200562e-07, 6.547197699546814e-07, 7.525086402893066e-07, 8.502975106239319e-07, 9.480863809585571e-07, 1.0458752512931824e-06, 1.1436641216278076e-06, 1.2414529919624329e-06, 1.339241862297058e-06, 1.4370307326316833e-06, 1.5348196029663086e-06, 1.6326084733009338e-06, 1.730397343635559e-06, 1.8281862139701843e-06, 1.9259750843048096e-06, 2.023763954639435e-06, 2.12155282497406e-06, 2.2193416953086853e-06, 2.3171305656433105e-06, 2.414919435977936e-06, 2.512708306312561e-06, 2.6104971766471863e-06, 2.7082860469818115e-06, 2.8060749173164368e-06, 2.903863787651062e-06, 3.0016526579856873e-06, 3.0994415283203125e-06]}, "gradients/encoder.encoder.layers.2.attention.q_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 0.0, 0.0, 1.0, 2.0, 2.0, 2.0, 0.0, 2.0, 7.0, 4.0, 3.0, 13.0, 12.0, 11.0, 23.0, 26.0, 14.0, 42.0, 51.0, 34.0, 76.0, 97.0, 60.0, 113.0, 57.0, 72.0, 72.0, 28.0, 52.0, 34.0, 11.0, 22.0, 22.0, 4.0, 10.0, 8.0, 2.0, 5.0, 6.0, 2.0, 5.0, 2.0, 0.0, 1.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0], "bins": [-2.9802322387695312e-06, -2.8815120458602905e-06, -2.78279185295105e-06, -2.684071660041809e-06, -2.5853514671325684e-06, -2.4866312742233276e-06, -2.387911081314087e-06, -2.289190888404846e-06, -2.1904706954956055e-06, -2.0917505025863647e-06, -1.993030309677124e-06, -1.8943101167678833e-06, -1.7955899238586426e-06, -1.6968697309494019e-06, -1.5981495380401611e-06, -1.4994293451309204e-06, -1.4007091522216797e-06, -1.301988959312439e-06, -1.2032687664031982e-06, -1.1045485734939575e-06, -1.0058283805847168e-06, -9.071081876754761e-07, -8.083879947662354e-07, -7.096678018569946e-07, -6.109476089477539e-07, -5.122274160385132e-07, -4.1350722312927246e-07, -3.1478703022003174e-07, -2.1606683731079102e-07, -1.1734664440155029e-07, -1.862645149230957e-08, 8.009374141693115e-08, 1.7881393432617188e-07, 2.775341272354126e-07, 3.762543201446533e-07, 4.7497451305389404e-07, 5.736947059631348e-07, 6.724148988723755e-07, 7.711350917816162e-07, 8.698552846908569e-07, 9.685754776000977e-07, 1.0672956705093384e-06, 1.166015863418579e-06, 1.2647360563278198e-06, 1.3634562492370605e-06, 1.4621764421463013e-06, 1.560896635055542e-06, 1.6596168279647827e-06, 1.7583370208740234e-06, 1.8570572137832642e-06, 1.955777406692505e-06, 2.0544975996017456e-06, 2.1532177925109863e-06, 2.251937985420227e-06, 2.3506581783294678e-06, 2.4493783712387085e-06, 2.5480985641479492e-06, 2.64681875705719e-06, 2.7455389499664307e-06, 2.8442591428756714e-06, 2.942979335784912e-06, 3.041699528694153e-06, 3.1404197216033936e-06, 3.2391399145126343e-06, 3.337860107421875e-06]}, "gradients/encoder.encoder.layers.2.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 3.0, 3.0, 0.0, 3.0, 3.0, 6.0, 4.0, 6.0, 4.0, 9.0, 8.0, 10.0, 14.0, 21.0, 24.0, 31.0, 50.0, 42.0, 109.0, 119.0, 156.0, 97.0, 69.0, 57.0, 42.0, 34.0, 23.0, 13.0, 12.0, 13.0, 7.0, 5.0, 3.0, 2.0, 2.0, 6.0, 4.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.00011754751903936267, -0.00011379676288925111, -0.00011004601401509717, -0.00010629525786498562, -0.00010254450899083167, -9.879375284072012e-05, -9.504299669060856e-05, -9.1292240540497e-05, -8.754149166634306e-05, -8.379073551623151e-05, -8.003998664207757e-05, -7.628923049196601e-05, -7.253847434185445e-05, -6.878772546770051e-05, -6.503696931758896e-05, -6.128622044343501e-05, -5.753546429332346e-05, -5.378471178119071e-05, -5.003395926905796e-05, -4.62832031189464e-05, -4.2532450606813654e-05, -3.8781698094680905e-05, -3.503094194456935e-05, -3.12801894324366e-05, -2.752943692030385e-05, -2.3778684408171102e-05, -2.002793007704895e-05, -1.6277175745926797e-05, -1.2526423233794048e-05, -8.7756707216613e-06, -5.024916390539147e-06, -1.2741620594169945e-06, 2.4765904527157545e-06, 6.227343874343205e-06, 9.978097295970656e-06, 1.3728850717598107e-05, 1.7479604139225557e-05, 2.1230356651358306e-05, 2.498111098248046e-05, 2.873186531360261e-05, 3.248261782573536e-05, 3.623337033786811e-05, 3.998412285000086e-05, 4.3734879000112414e-05, 4.748563151224516e-05, 5.123638402437791e-05, 5.498714017448947e-05, 5.873789268662222e-05, 6.248864519875497e-05, 6.623940134886652e-05, 6.999015022302046e-05, 7.374090637313202e-05, 7.749165524728596e-05, 8.124241139739752e-05, 8.499316754750907e-05, 8.874392369762063e-05, 9.249467257177457e-05, 9.624542872188613e-05, 9.999617759604007e-05, 0.00010374693374615163, 0.00010749768989626318, 0.00011124843877041712, 0.00011499919492052868, 0.00011874994379468262, 0.00012250069994479418]}, "gradients/encoder.encoder.layers.2.layer_norm.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 2.0, 2.0, 5.0, 6.0, 6.0, 6.0, 10.0, 16.0, 13.0, 24.0, 17.0, 29.0, 31.0, 34.0, 56.0, 39.0, 62.0, 51.0, 53.0, 48.0, 52.0, 52.0, 55.0, 55.0, 43.0, 36.0, 36.0, 22.0, 30.0, 22.0, 17.0, 21.0, 8.0, 6.0, 17.0, 6.0, 8.0, 2.0, 4.0, 4.0, 2.0, 5.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-8.225440979003906e-05, -7.950793951749802e-05, -7.676146924495697e-05, -7.401499897241592e-05, -7.126852869987488e-05, -6.852205842733383e-05, -6.577558815479279e-05, -6.302911788225174e-05, -6.028264760971069e-05, -5.753617733716965e-05, -5.47897070646286e-05, -5.2043236792087555e-05, -4.929676651954651e-05, -4.655029624700546e-05, -4.3803825974464417e-05, -4.105735570192337e-05, -3.8310885429382324e-05, -3.556441515684128e-05, -3.281794488430023e-05, -3.0071474611759186e-05, -2.732500433921814e-05, -2.4578534066677094e-05, -2.1832063794136047e-05, -1.9085593521595e-05, -1.6339123249053955e-05, -1.3592652976512909e-05, -1.0846182703971863e-05, -8.099712431430817e-06, -5.3532421588897705e-06, -2.6067718863487244e-06, 1.3969838619232178e-07, 2.886168658733368e-06, 5.632638931274414e-06, 8.37910920381546e-06, 1.1125579476356506e-05, 1.3872049748897552e-05, 1.66185200214386e-05, 1.9364990293979645e-05, 2.211146056652069e-05, 2.4857930839061737e-05, 2.7604401111602783e-05, 3.035087138414383e-05, 3.3097341656684875e-05, 3.584381192922592e-05, 3.859028220176697e-05, 4.1336752474308014e-05, 4.408322274684906e-05, 4.6829693019390106e-05, 4.957616329193115e-05, 5.23226335644722e-05, 5.5069103837013245e-05, 5.781557410955429e-05, 6.056204438209534e-05, 6.330851465463638e-05, 6.605498492717743e-05, 6.880145519971848e-05, 7.154792547225952e-05, 7.429439574480057e-05, 7.704086601734161e-05, 7.978733628988266e-05, 8.25338065624237e-05, 8.528027683496475e-05, 8.80267471075058e-05, 9.077321738004684e-05, 9.351968765258789e-05]}, "gradients/encoder.encoder.layers.1.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 4.0, 3.0, 4.0, 2.0, 16.0, 15.0, 22.0, 42.0, 58.0, 73.0, 144.0, 221.0, 341.0, 518.0, 872.0, 1401.0, 2773.0, 6048.0, 14903.0, 55269.0, 3534785.0, 523412.0, 31058.0, 11338.0, 5032.0, 2415.0, 1339.0, 738.0, 454.0, 327.0, 208.0, 119.0, 84.0, 66.0, 50.0, 36.0, 19.0, 17.0, 17.0, 10.0, 9.0, 7.0, 7.0, 7.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 1.0, 2.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.4001808166503906e-05, -5.173590034246445e-05, -4.946999251842499e-05, -4.720408469438553e-05, -4.493817687034607e-05, -4.267226904630661e-05, -4.040636122226715e-05, -3.814045339822769e-05, -3.587454557418823e-05, -3.360863775014877e-05, -3.1342729926109314e-05, -2.9076822102069855e-05, -2.6810914278030396e-05, -2.4545006453990936e-05, -2.2279098629951477e-05, -2.0013190805912018e-05, -1.774728298187256e-05, -1.54813751578331e-05, -1.321546733379364e-05, -1.0949559509754181e-05, -8.683651685714722e-06, -6.4177438616752625e-06, -4.151836037635803e-06, -1.885928213596344e-06, 3.7997961044311523e-07, 2.6458874344825745e-06, 4.911795258522034e-06, 7.177703082561493e-06, 9.443610906600952e-06, 1.1709518730640411e-05, 1.397542655467987e-05, 1.624133437871933e-05, 1.850724220275879e-05, 2.0773150026798248e-05, 2.3039057850837708e-05, 2.5304965674877167e-05, 2.7570873498916626e-05, 2.9836781322956085e-05, 3.2102689146995544e-05, 3.4368596971035004e-05, 3.663450479507446e-05, 3.890041261911392e-05, 4.116632044315338e-05, 4.343222826719284e-05, 4.56981360912323e-05, 4.796404391527176e-05, 5.022995173931122e-05, 5.249585956335068e-05, 5.476176738739014e-05, 5.7027675211429596e-05, 5.9293583035469055e-05, 6.155949085950851e-05, 6.382539868354797e-05, 6.609130650758743e-05, 6.835721433162689e-05, 7.062312215566635e-05, 7.288902997970581e-05, 7.515493780374527e-05, 7.742084562778473e-05, 7.968675345182419e-05, 8.195266127586365e-05, 8.42185690999031e-05, 8.648447692394257e-05, 8.875038474798203e-05, 9.101629257202148e-05]}, "gradients/encoder.encoder.layers.1.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 3.0, 0.0, 3.0, 1.0, 9.0, 3.0, 5.0, 3.0, 7.0, 9.0, 12.0, 13.0, 21.0, 42.0, 41.0, 58.0, 75.0, 116.0, 90.0, 112.0, 81.0, 76.0, 53.0, 46.0, 40.0, 26.0, 13.0, 17.0, 10.0, 5.0, 9.0, 5.0, 2.0, 3.0, 1.0, 1.0, 2.0, 4.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.4007091522216797e-05, -1.3397075235843658e-05, -1.278705894947052e-05, -1.2177042663097382e-05, -1.1567026376724243e-05, -1.0957010090351105e-05, -1.0346993803977966e-05, -9.736977517604828e-06, -9.12696123123169e-06, -8.516944944858551e-06, -7.906928658485413e-06, -7.296912372112274e-06, -6.686896085739136e-06, -6.076879799365997e-06, -5.466863512992859e-06, -4.8568472266197205e-06, -4.246830940246582e-06, -3.6368146538734436e-06, -3.026798367500305e-06, -2.4167820811271667e-06, -1.8067657947540283e-06, -1.1967495083808899e-06, -5.867332220077515e-07, 2.3283064365386963e-08, 6.332993507385254e-07, 1.2433156371116638e-06, 1.8533319234848022e-06, 2.4633482098579407e-06, 3.073364496231079e-06, 3.6833807826042175e-06, 4.293397068977356e-06, 4.903413355350494e-06, 5.513429641723633e-06, 6.123445928096771e-06, 6.73346221446991e-06, 7.343478500843048e-06, 7.953494787216187e-06, 8.563511073589325e-06, 9.173527359962463e-06, 9.783543646335602e-06, 1.039355993270874e-05, 1.1003576219081879e-05, 1.1613592505455017e-05, 1.2223608791828156e-05, 1.2833625078201294e-05, 1.3443641364574432e-05, 1.405365765094757e-05, 1.466367393732071e-05, 1.5273690223693848e-05, 1.5883706510066986e-05, 1.6493722796440125e-05, 1.7103739082813263e-05, 1.77137553691864e-05, 1.832377165555954e-05, 1.8933787941932678e-05, 1.9543804228305817e-05, 2.0153820514678955e-05, 2.0763836801052094e-05, 2.1373853087425232e-05, 2.198386937379837e-05, 2.259388566017151e-05, 2.3203901946544647e-05, 2.3813918232917786e-05, 2.4423934519290924e-05, 2.5033950805664062e-05]}, "gradients/encoder.encoder.layers.1.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 3.0, 3.0, 4.0, 4.0, 2.0, 2.0, 13.0, 7.0, 15.0, 22.0, 39.0, 41.0, 85.0, 120.0, 164.0, 221.0, 377.0, 545.0, 902.0, 1496.0, 2403.0, 3979.0, 7039.0, 14363.0, 28122.0, 65918.0, 325488.0, 3429587.0, 205487.0, 54973.0, 24311.0, 12137.0, 6582.0, 3879.0, 2179.0, 1344.0, 817.0, 530.0, 335.0, 253.0, 155.0, 104.0, 75.0, 52.0, 30.0, 20.0, 19.0, 19.0, 7.0, 8.0, 9.0, 1.0, 4.0, 1.0, 5.0], "bins": [-3.4928321838378906e-05, -3.396905958652496e-05, -3.300979733467102e-05, -3.205053508281708e-05, -3.1091272830963135e-05, -3.0132010579109192e-05, -2.917274832725525e-05, -2.8213486075401306e-05, -2.7254223823547363e-05, -2.629496157169342e-05, -2.5335699319839478e-05, -2.4376437067985535e-05, -2.3417174816131592e-05, -2.245791256427765e-05, -2.1498650312423706e-05, -2.0539388060569763e-05, -1.958012580871582e-05, -1.8620863556861877e-05, -1.7661601305007935e-05, -1.6702339053153992e-05, -1.574307680130005e-05, -1.4783814549446106e-05, -1.3824552297592163e-05, -1.286529004573822e-05, -1.1906027793884277e-05, -1.0946765542030334e-05, -9.987503290176392e-06, -9.028241038322449e-06, -8.068978786468506e-06, -7.109716534614563e-06, -6.15045428276062e-06, -5.191192030906677e-06, -4.231929779052734e-06, -3.2726675271987915e-06, -2.3134052753448486e-06, -1.3541430234909058e-06, -3.948807716369629e-07, 5.6438148021698e-07, 1.5236437320709229e-06, 2.4829059839248657e-06, 3.4421682357788086e-06, 4.4014304876327515e-06, 5.360692739486694e-06, 6.319954991340637e-06, 7.27921724319458e-06, 8.238479495048523e-06, 9.197741746902466e-06, 1.0157003998756409e-05, 1.1116266250610352e-05, 1.2075528502464294e-05, 1.3034790754318237e-05, 1.399405300617218e-05, 1.4953315258026123e-05, 1.5912577509880066e-05, 1.687183976173401e-05, 1.783110201358795e-05, 1.8790364265441895e-05, 1.9749626517295837e-05, 2.070888876914978e-05, 2.1668151021003723e-05, 2.2627413272857666e-05, 2.358667552471161e-05, 2.4545937776565552e-05, 2.5505200028419495e-05, 2.6464462280273438e-05]}, "gradients/encoder.encoder.layers.1.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 2.0, 0.0, 2.0, 2.0, 2.0, 1.0, 3.0, 2.0, 4.0, 6.0, 7.0, 7.0, 15.0, 18.0, 11.0, 9.0, 17.0, 31.0, 35.0, 33.0, 47.0, 84.0, 171.0, 464.0, 1218.0, 973.0, 457.0, 141.0, 63.0, 43.0, 34.0, 42.0, 18.0, 24.0, 13.0, 16.0, 6.0, 17.0, 5.0, 7.0, 7.0, 9.0, 1.0, 2.0, 2.0, 0.0, 6.0, 1.0, 1.0, 3.0, 2.0, 2.0, 1.0, 0.0, 3.0], "bins": [-2.7179718017578125e-05, -2.6377849280834198e-05, -2.557598054409027e-05, -2.4774111807346344e-05, -2.3972243070602417e-05, -2.317037433385849e-05, -2.2368505597114563e-05, -2.1566636860370636e-05, -2.076476812362671e-05, -1.9962899386882782e-05, -1.9161030650138855e-05, -1.8359161913394928e-05, -1.7557293176651e-05, -1.6755424439907074e-05, -1.5953555703163147e-05, -1.515168696641922e-05, -1.4349818229675293e-05, -1.3547949492931366e-05, -1.2746080756187439e-05, -1.1944212019443512e-05, -1.1142343282699585e-05, -1.0340474545955658e-05, -9.538605809211731e-06, -8.736737072467804e-06, -7.934868335723877e-06, -7.13299959897995e-06, -6.331130862236023e-06, -5.529262125492096e-06, -4.727393388748169e-06, -3.925524652004242e-06, -3.123655915260315e-06, -2.321787178516388e-06, -1.519918441772461e-06, -7.180497050285339e-07, 8.381903171539307e-08, 8.856877684593201e-07, 1.687556505203247e-06, 2.489425241947174e-06, 3.291293978691101e-06, 4.093162715435028e-06, 4.895031452178955e-06, 5.696900188922882e-06, 6.498768925666809e-06, 7.300637662410736e-06, 8.102506399154663e-06, 8.90437513589859e-06, 9.706243872642517e-06, 1.0508112609386444e-05, 1.1309981346130371e-05, 1.2111850082874298e-05, 1.2913718819618225e-05, 1.3715587556362152e-05, 1.4517456293106079e-05, 1.5319325029850006e-05, 1.6121193766593933e-05, 1.692306250333786e-05, 1.7724931240081787e-05, 1.8526799976825714e-05, 1.932866871356964e-05, 2.0130537450313568e-05, 2.0932406187057495e-05, 2.1734274923801422e-05, 2.253614366054535e-05, 2.3338012397289276e-05, 2.4139881134033203e-05]}, "gradients/encoder.encoder.layers.1.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0, 2.0, 6.0, 3.0, 6.0, 6.0, 13.0, 16.0, 23.0, 34.0, 38.0, 52.0, 76.0, 130.0, 176.0, 135.0, 92.0, 49.0, 36.0, 33.0, 22.0, 23.0, 13.0, 5.0, 9.0, 7.0, 5.0, 1.0, 1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00015494637773372233, -0.00014970886695664376, -0.0001444713561795652, -0.0001392338308505714, -0.00013399632007349283, -0.00012875880929641426, -0.0001235212985193357, -0.00011828378774225712, -0.00011304626968922094, -0.00010780875891214237, -0.00010257124085910618, -9.733373008202761e-05, -9.209621930494905e-05, -8.685870125191286e-05, -8.162119047483429e-05, -7.638367242179811e-05, -7.114616164471954e-05, -6.590865086764097e-05, -6.067113281460479e-05, -5.543362203752622e-05, -5.0196107622468844e-05, -4.495859320741147e-05, -3.97210824303329e-05, -3.448356801527552e-05, -2.9246053600218147e-05, -2.400853918516077e-05, -1.87710265890928e-05, -1.3533513993024826e-05, -8.29599957796745e-06, -3.058485162910074e-06, 2.179025614168495e-06, 7.416540029225871e-06, 1.2654054444283247e-05, 1.7891568859340623e-05, 2.3129081455408596e-05, 2.8366594051476568e-05, 3.3604108466533944e-05, 3.884162288159132e-05, 4.407913365866989e-05, 4.9316648073727265e-05, 5.455416248878464e-05, 5.979167690384202e-05, 6.502919131889939e-05, 7.026670209597796e-05, 7.550421287305653e-05, 8.074173092609271e-05, 8.597924170317128e-05, 9.121675975620747e-05, 9.645427053328604e-05, 0.0001016917813103646, 0.00010692929936340079, 0.00011216681014047936, 0.00011740432819351554, 0.0001226418389705941, 0.00012787934974767268, 0.00013311686052475125, 0.00013835437130182981, 0.00014359188207890838, 0.00014882939285598695, 0.00015406691818498075, 0.00015930442896205932, 0.0001645419397391379, 0.00016977945051621646, 0.00017501696129329503, 0.00018025448662228882]}, "gradients/encoder.encoder.layers.1.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 5.0, 4.0, 4.0, 4.0, 2.0, 14.0, 13.0, 13.0, 12.0, 21.0, 35.0, 41.0, 33.0, 28.0, 46.0, 45.0, 51.0, 56.0, 66.0, 64.0, 57.0, 54.0, 52.0, 41.0, 51.0, 27.0, 33.0, 34.0, 28.0, 25.0, 14.0, 20.0, 5.0, 1.0, 6.0, 5.0, 2.0, 1.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-8.916854858398438e-05, -8.638110011816025e-05, -8.359365165233612e-05, -8.0806203186512e-05, -7.801875472068787e-05, -7.523130625486374e-05, -7.244385778903961e-05, -6.965640932321548e-05, -6.686896085739136e-05, -6.408151239156723e-05, -6.12940639257431e-05, -5.8506615459918976e-05, -5.571916699409485e-05, -5.293171852827072e-05, -5.0144270062446594e-05, -4.735682159662247e-05, -4.456937313079834e-05, -4.178192466497421e-05, -3.8994476199150085e-05, -3.620702773332596e-05, -3.341957926750183e-05, -3.0632130801677704e-05, -2.7844682335853577e-05, -2.505723387002945e-05, -2.2269785404205322e-05, -1.9482336938381195e-05, -1.6694888472557068e-05, -1.390744000673294e-05, -1.1119991540908813e-05, -8.332543075084686e-06, -5.545094609260559e-06, -2.757646143436432e-06, 2.9802322387695312e-08, 2.8172507882118225e-06, 5.60469925403595e-06, 8.392147719860077e-06, 1.1179596185684204e-05, 1.3967044651508331e-05, 1.675449311733246e-05, 1.9541941583156586e-05, 2.2329390048980713e-05, 2.511683851480484e-05, 2.7904286980628967e-05, 3.0691735446453094e-05, 3.347918391227722e-05, 3.626663237810135e-05, 3.9054080843925476e-05, 4.18415293097496e-05, 4.462897777557373e-05, 4.741642624139786e-05, 5.0203874707221985e-05, 5.299132317304611e-05, 5.577877163887024e-05, 5.8566220104694366e-05, 6.13536685705185e-05, 6.414111703634262e-05, 6.692856550216675e-05, 6.971601396799088e-05, 7.2503462433815e-05, 7.529091089963913e-05, 7.807835936546326e-05, 8.086580783128738e-05, 8.365325629711151e-05, 8.644070476293564e-05, 8.922815322875977e-05]}, "gradients/encoder.encoder.layers.1.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 3.0, 2.0, 4.0, 3.0, 6.0, 8.0, 7.0, 21.0, 17.0, 13.0, 20.0, 32.0, 33.0, 56.0, 90.0, 134.0, 200.0, 299.0, 571.0, 1107.0, 2106.0, 4412.0, 10525.0, 26764.0, 82465.0, 382084.0, 408589.0, 82475.0, 26803.0, 10292.0, 4609.0, 2197.0, 1128.0, 549.0, 326.0, 192.0, 127.0, 76.0, 57.0, 37.0, 22.0, 21.0, 19.0, 13.0, 8.0, 10.0, 10.0, 10.0, 6.0, 3.0, 4.0, 2.0, 4.0, 0.0, 1.0], "bins": [-9.548664093017578e-05, -9.275879710912704e-05, -9.003095328807831e-05, -8.730310946702957e-05, -8.457526564598083e-05, -8.18474218249321e-05, -7.911957800388336e-05, -7.639173418283463e-05, -7.366389036178589e-05, -7.093604654073715e-05, -6.820820271968842e-05, -6.548035889863968e-05, -6.275251507759094e-05, -6.0024671256542206e-05, -5.729682743549347e-05, -5.456898361444473e-05, -5.1841139793395996e-05, -4.911329597234726e-05, -4.638545215129852e-05, -4.3657608330249786e-05, -4.092976450920105e-05, -3.820192068815231e-05, -3.547407686710358e-05, -3.274623304605484e-05, -3.0018389225006104e-05, -2.7290545403957367e-05, -2.456270158290863e-05, -2.1834857761859894e-05, -1.9107013940811157e-05, -1.637917011976242e-05, -1.3651326298713684e-05, -1.0923482477664948e-05, -8.195638656616211e-06, -5.467794835567474e-06, -2.739951014518738e-06, -1.210719347000122e-08, 2.7157366275787354e-06, 5.443580448627472e-06, 8.171424269676208e-06, 1.0899268090724945e-05, 1.3627111911773682e-05, 1.6354955732822418e-05, 1.9082799553871155e-05, 2.181064337491989e-05, 2.4538487195968628e-05, 2.7266331017017365e-05, 2.99941748380661e-05, 3.272201865911484e-05, 3.5449862480163574e-05, 3.817770630121231e-05, 4.090555012226105e-05, 4.3633393943309784e-05, 4.636123776435852e-05, 4.908908158540726e-05, 5.1816925406455994e-05, 5.454476922750473e-05, 5.727261304855347e-05, 6.00004568696022e-05, 6.272830069065094e-05, 6.545614451169968e-05, 6.818398833274841e-05, 7.091183215379715e-05, 7.363967597484589e-05, 7.636751979589462e-05, 7.909536361694336e-05]}, "gradients/encoder.encoder.layers.1.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 3.0, 1.0, 1.0, 1.0, 5.0, 12.0, 9.0, 8.0, 26.0, 32.0, 65.0, 73.0, 93.0, 91.0, 134.0, 104.0, 94.0, 78.0, 64.0, 30.0, 27.0, 19.0, 12.0, 11.0, 6.0, 11.0, 3.0, 3.0, 0.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.3066997528076172e-05, -2.2291205823421478e-05, -2.1515414118766785e-05, -2.073962241411209e-05, -1.9963830709457397e-05, -1.9188039004802704e-05, -1.841224730014801e-05, -1.7636455595493317e-05, -1.6860663890838623e-05, -1.608487218618393e-05, -1.5309080481529236e-05, -1.4533288776874542e-05, -1.3757497072219849e-05, -1.2981705367565155e-05, -1.2205913662910461e-05, -1.1430121958255768e-05, -1.0654330253601074e-05, -9.87853854894638e-06, -9.102746844291687e-06, -8.326955139636993e-06, -7.5511634349823e-06, -6.775371730327606e-06, -5.999580025672913e-06, -5.223788321018219e-06, -4.447996616363525e-06, -3.6722049117088318e-06, -2.896413207054138e-06, -2.1206215023994446e-06, -1.344829797744751e-06, -5.690380930900574e-07, 2.0675361156463623e-07, 9.825453162193298e-07, 1.7583370208740234e-06, 2.534128725528717e-06, 3.3099204301834106e-06, 4.085712134838104e-06, 4.861503839492798e-06, 5.6372955441474915e-06, 6.413087248802185e-06, 7.188878953456879e-06, 7.964670658111572e-06, 8.740462362766266e-06, 9.51625406742096e-06, 1.0292045772075653e-05, 1.1067837476730347e-05, 1.184362918138504e-05, 1.2619420886039734e-05, 1.3395212590694427e-05, 1.4171004295349121e-05, 1.4946796000003815e-05, 1.5722587704658508e-05, 1.6498379409313202e-05, 1.7274171113967896e-05, 1.804996281862259e-05, 1.8825754523277283e-05, 1.9601546227931976e-05, 2.037733793258667e-05, 2.1153129637241364e-05, 2.1928921341896057e-05, 2.270471304655075e-05, 2.3480504751205444e-05, 2.4256296455860138e-05, 2.503208816051483e-05, 2.5807879865169525e-05, 2.658367156982422e-05]}, "gradients/encoder.encoder.layers.1.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 2.0, 0.0, 1.0, 2.0, 3.0, 7.0, 3.0, 5.0, 14.0, 18.0, 20.0, 33.0, 45.0, 74.0, 106.0, 141.0, 198.0, 333.0, 413.0, 675.0, 983.0, 1590.0, 2114.0, 3558.0, 5055.0, 8765.0, 12639.0, 23107.0, 36620.0, 75202.0, 154866.0, 418007.0, 141280.0, 70350.0, 34288.0, 22276.0, 12260.0, 8432.0, 4919.0, 3415.0, 2156.0, 1587.0, 942.0, 649.0, 472.0, 301.0, 188.0, 143.0, 101.0, 71.0, 40.0, 34.0, 23.0, 13.0, 11.0, 11.0, 5.0, 0.0, 3.0, 2.0, 3.0], "bins": [-2.3603439331054688e-05, -2.2917985916137695e-05, -2.2232532501220703e-05, -2.154707908630371e-05, -2.086162567138672e-05, -2.0176172256469727e-05, -1.9490718841552734e-05, -1.8805265426635742e-05, -1.811981201171875e-05, -1.7434358596801758e-05, -1.6748905181884766e-05, -1.6063451766967773e-05, -1.537799835205078e-05, -1.4692544937133789e-05, -1.4007091522216797e-05, -1.3321638107299805e-05, -1.2636184692382812e-05, -1.195073127746582e-05, -1.1265277862548828e-05, -1.0579824447631836e-05, -9.894371032714844e-06, -9.208917617797852e-06, -8.52346420288086e-06, -7.838010787963867e-06, -7.152557373046875e-06, -6.467103958129883e-06, -5.781650543212891e-06, -5.0961971282958984e-06, -4.410743713378906e-06, -3.725290298461914e-06, -3.039836883544922e-06, -2.3543834686279297e-06, -1.6689300537109375e-06, -9.834766387939453e-07, -2.980232238769531e-07, 3.8743019104003906e-07, 1.0728836059570312e-06, 1.7583370208740234e-06, 2.4437904357910156e-06, 3.129243850708008e-06, 3.814697265625e-06, 4.500150680541992e-06, 5.185604095458984e-06, 5.8710575103759766e-06, 6.556510925292969e-06, 7.241964340209961e-06, 7.927417755126953e-06, 8.612871170043945e-06, 9.298324584960938e-06, 9.98377799987793e-06, 1.0669231414794922e-05, 1.1354684829711914e-05, 1.2040138244628906e-05, 1.2725591659545898e-05, 1.341104507446289e-05, 1.4096498489379883e-05, 1.4781951904296875e-05, 1.5467405319213867e-05, 1.615285873413086e-05, 1.683831214904785e-05, 1.7523765563964844e-05, 1.8209218978881836e-05, 1.8894672393798828e-05, 1.958012580871582e-05, 2.0265579223632812e-05]}, "gradients/encoder.encoder.layers.1.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 4.0, 3.0, 1.0, 1.0, 3.0, 1.0, 5.0, 10.0, 13.0, 7.0, 9.0, 12.0, 12.0, 14.0, 13.0, 21.0, 24.0, 24.0, 30.0, 29.0, 38.0, 47.0, 46.0, 41.0, 41.0, 41.0, 56.0, 35.0, 42.0, 36.0, 43.0, 34.0, 36.0, 31.0, 28.0, 23.0, 22.0, 21.0, 18.0, 23.0, 20.0, 12.0, 8.0, 10.0, 10.0, 6.0, 2.0, 4.0, 0.0, 2.0, 2.0, 2.0, 1.0, 0.0, 3.0, 1.0], "bins": [-2.7477741241455078e-05, -2.667214721441269e-05, -2.58665531873703e-05, -2.506095916032791e-05, -2.4255365133285522e-05, -2.3449771106243134e-05, -2.2644177079200745e-05, -2.1838583052158356e-05, -2.1032989025115967e-05, -2.0227394998073578e-05, -1.942180097103119e-05, -1.86162069439888e-05, -1.781061291694641e-05, -1.7005018889904022e-05, -1.6199424862861633e-05, -1.5393830835819244e-05, -1.4588236808776855e-05, -1.3782642781734467e-05, -1.2977048754692078e-05, -1.2171454727649689e-05, -1.13658607006073e-05, -1.0560266673564911e-05, -9.754672646522522e-06, -8.949078619480133e-06, -8.143484592437744e-06, -7.337890565395355e-06, -6.532296538352966e-06, -5.726702511310577e-06, -4.9211084842681885e-06, -4.1155144572257996e-06, -3.3099204301834106e-06, -2.5043264031410217e-06, -1.6987323760986328e-06, -8.931383490562439e-07, -8.754432201385498e-08, 7.180497050285339e-07, 1.5236437320709229e-06, 2.3292377591133118e-06, 3.1348317861557007e-06, 3.94042581319809e-06, 4.7460198402404785e-06, 5.5516138672828674e-06, 6.357207894325256e-06, 7.162801921367645e-06, 7.968395948410034e-06, 8.773989975452423e-06, 9.579584002494812e-06, 1.0385178029537201e-05, 1.119077205657959e-05, 1.1996366083621979e-05, 1.2801960110664368e-05, 1.3607554137706757e-05, 1.4413148164749146e-05, 1.5218742191791534e-05, 1.6024336218833923e-05, 1.6829930245876312e-05, 1.76355242729187e-05, 1.844111829996109e-05, 1.924671232700348e-05, 2.0052306354045868e-05, 2.0857900381088257e-05, 2.1663494408130646e-05, 2.2469088435173035e-05, 2.3274682462215424e-05, 2.4080276489257812e-05]}, "gradients/encoder.encoder.layers.1.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 2.0, 2.0, 2.0, 6.0, 4.0, 6.0, 5.0, 13.0, 9.0, 32.0, 29.0, 45.0, 68.0, 113.0, 175.0, 296.0, 524.0, 1024.0, 1815.0, 4970.0, 17373.0, 195898.0, 782329.0, 31384.0, 7133.0, 2579.0, 1168.0, 616.0, 353.0, 188.0, 143.0, 73.0, 61.0, 36.0, 25.0, 18.0, 19.0, 8.0, 6.0, 6.0, 2.0, 5.0, 3.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.8656253814697266e-05, -1.8125399947166443e-05, -1.759454607963562e-05, -1.7063692212104797e-05, -1.6532838344573975e-05, -1.6001984477043152e-05, -1.547113060951233e-05, -1.4940276741981506e-05, -1.4409422874450684e-05, -1.387856900691986e-05, -1.3347715139389038e-05, -1.2816861271858215e-05, -1.2286007404327393e-05, -1.175515353679657e-05, -1.1224299669265747e-05, -1.0693445801734924e-05, -1.0162591934204102e-05, -9.631738066673279e-06, -9.100884199142456e-06, -8.570030331611633e-06, -8.03917646408081e-06, -7.508322596549988e-06, -6.977468729019165e-06, -6.446614861488342e-06, -5.9157609939575195e-06, -5.384907126426697e-06, -4.854053258895874e-06, -4.323199391365051e-06, -3.7923455238342285e-06, -3.2614916563034058e-06, -2.730637788772583e-06, -2.1997839212417603e-06, -1.6689300537109375e-06, -1.1380761861801147e-06, -6.07222318649292e-07, -7.636845111846924e-08, 4.544854164123535e-07, 9.853392839431763e-07, 1.516193151473999e-06, 2.0470470190048218e-06, 2.5779008865356445e-06, 3.1087547540664673e-06, 3.63960862159729e-06, 4.170462489128113e-06, 4.7013163566589355e-06, 5.232170224189758e-06, 5.763024091720581e-06, 6.293877959251404e-06, 6.8247318267822266e-06, 7.355585694313049e-06, 7.886439561843872e-06, 8.417293429374695e-06, 8.948147296905518e-06, 9.47900116443634e-06, 1.0009855031967163e-05, 1.0540708899497986e-05, 1.1071562767028809e-05, 1.1602416634559631e-05, 1.2133270502090454e-05, 1.2664124369621277e-05, 1.31949782371521e-05, 1.3725832104682922e-05, 1.4256685972213745e-05, 1.4787539839744568e-05, 1.531839370727539e-05]}, "gradients/encoder.encoder.layers.1.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 2.0, 0.0, 2.0, 0.0, 3.0, 2.0, 0.0, 2.0, 0.0, 1.0, 7.0, 0.0, 3.0, 0.0, 11.0, 10.0, 0.0, 15.0, 0.0, 28.0, 41.0, 0.0, 44.0, 0.0, 86.0, 84.0, 0.0, 110.0, 0.0, 123.0, 0.0, 115.0, 86.0, 0.0, 67.0, 0.0, 54.0, 40.0, 0.0, 28.0, 0.0, 17.0, 9.0, 0.0, 14.0, 0.0, 9.0, 2.0, 0.0, 1.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-1.0728836059570312e-06, -1.037493348121643e-06, -1.0021030902862549e-06, -9.667128324508667e-07, -9.313225746154785e-07, -8.959323167800903e-07, -8.605420589447021e-07, -8.25151801109314e-07, -7.897615432739258e-07, -7.543712854385376e-07, -7.189810276031494e-07, -6.835907697677612e-07, -6.48200511932373e-07, -6.128102540969849e-07, -5.774199962615967e-07, -5.420297384262085e-07, -5.066394805908203e-07, -4.7124922275543213e-07, -4.3585896492004395e-07, -4.0046870708465576e-07, -3.650784492492676e-07, -3.296881914138794e-07, -2.942979335784912e-07, -2.5890767574310303e-07, -2.2351741790771484e-07, -1.8812716007232666e-07, -1.5273690223693848e-07, -1.1734664440155029e-07, -8.195638656616211e-08, -4.6566128730773926e-08, -1.1175870895385742e-08, 2.421438694000244e-08, 5.960464477539063e-08, 9.499490261077881e-08, 1.30385160446167e-07, 1.6577541828155518e-07, 2.0116567611694336e-07, 2.3655593395233154e-07, 2.7194619178771973e-07, 3.073364496231079e-07, 3.427267074584961e-07, 3.781169652938843e-07, 4.1350722312927246e-07, 4.4889748096466064e-07, 4.842877388000488e-07, 5.19677996635437e-07, 5.550682544708252e-07, 5.904585123062134e-07, 6.258487701416016e-07, 6.612390279769897e-07, 6.966292858123779e-07, 7.320195436477661e-07, 7.674098014831543e-07, 8.028000593185425e-07, 8.381903171539307e-07, 8.735805749893188e-07, 9.08970832824707e-07, 9.443610906600952e-07, 9.797513484954834e-07, 1.0151416063308716e-06, 1.0505318641662598e-06, 1.085922122001648e-06, 1.1213123798370361e-06, 1.1567026376724243e-06, 1.1920928955078125e-06]}, "gradients/encoder.encoder.layers.1.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 2.0, 2.0, 3.0, 2.0, 4.0, 3.0, 9.0, 4.0, 17.0, 27.0, 29.0, 52.0, 44.0, 86.0, 150.0, 113.0, 287.0, 433.0, 330.0, 820.0, 1365.0, 966.0, 2863.0, 5171.0, 4160.0, 14711.0, 13637.0, 60393.0, 233626.0, 369953.0, 233755.0, 60404.0, 13518.0, 14822.0, 7050.0, 2172.0, 2849.0, 995.0, 1308.0, 859.0, 308.0, 452.0, 295.0, 108.0, 144.0, 98.0, 37.0, 44.0, 27.0, 10.0, 18.0, 8.0, 12.0, 12.0, 1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-2.9802322387695312e-06, -2.882443368434906e-06, -2.7846544981002808e-06, -2.6868656277656555e-06, -2.5890767574310303e-06, -2.491287887096405e-06, -2.3934990167617798e-06, -2.2957101464271545e-06, -2.1979212760925293e-06, -2.100132405757904e-06, -2.002343535423279e-06, -1.9045546650886536e-06, -1.8067657947540283e-06, -1.708976924419403e-06, -1.6111880540847778e-06, -1.5133991837501526e-06, -1.4156103134155273e-06, -1.317821443080902e-06, -1.2200325727462769e-06, -1.1222437024116516e-06, -1.0244548320770264e-06, -9.266659617424011e-07, -8.288770914077759e-07, -7.310882210731506e-07, -6.332993507385254e-07, -5.355104804039001e-07, -4.377216100692749e-07, -3.3993273973464966e-07, -2.421438694000244e-07, -1.4435499906539917e-07, -4.6566128730773926e-08, 5.122274160385132e-08, 1.4901161193847656e-07, 2.468004822731018e-07, 3.4458935260772705e-07, 4.423782229423523e-07, 5.401670932769775e-07, 6.379559636116028e-07, 7.35744833946228e-07, 8.335337042808533e-07, 9.313225746154785e-07, 1.0291114449501038e-06, 1.126900315284729e-06, 1.2246891856193542e-06, 1.3224780559539795e-06, 1.4202669262886047e-06, 1.51805579662323e-06, 1.6158446669578552e-06, 1.7136335372924805e-06, 1.8114224076271057e-06, 1.909211277961731e-06, 2.007000148296356e-06, 2.1047890186309814e-06, 2.2025778889656067e-06, 2.300366759300232e-06, 2.398155629634857e-06, 2.4959444999694824e-06, 2.5937333703041077e-06, 2.691522240638733e-06, 2.789311110973358e-06, 2.8870999813079834e-06, 2.9848888516426086e-06, 3.082677721977234e-06, 3.180466592311859e-06, 3.2782554626464844e-06]}, "gradients/encoder.encoder.layers.1.attention.q_proj.bias": {"_type": "histogram", "values": [3.0, 0.0, 2.0, 0.0, 2.0, 3.0, 1.0, 0.0, 5.0, 1.0, 0.0, 4.0, 2.0, 0.0, 1.0, 9.0, 3.0, 9.0, 14.0, 14.0, 6.0, 20.0, 26.0, 27.0, 46.0, 62.0, 44.0, 87.0, 94.0, 109.0, 34.0, 93.0, 65.0, 25.0, 47.0, 54.0, 29.0, 10.0, 11.0, 13.0, 4.0, 9.0, 4.0, 2.0, 4.0, 3.0, 1.0, 5.0, 2.0, 0.0, 4.0, 2.0, 3.0, 0.0, 2.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.9802322387695312e-06, -2.878718078136444e-06, -2.777203917503357e-06, -2.6756897568702698e-06, -2.5741755962371826e-06, -2.4726614356040955e-06, -2.3711472749710083e-06, -2.269633114337921e-06, -2.168118953704834e-06, -2.066604793071747e-06, -1.9650906324386597e-06, -1.8635764718055725e-06, -1.7620623111724854e-06, -1.6605481505393982e-06, -1.559033989906311e-06, -1.4575198292732239e-06, -1.3560056686401367e-06, -1.2544915080070496e-06, -1.1529773473739624e-06, -1.0514631867408752e-06, -9.499490261077881e-07, -8.484348654747009e-07, -7.469207048416138e-07, -6.454065442085266e-07, -5.438923835754395e-07, -4.423782229423523e-07, -3.4086406230926514e-07, -2.39349901676178e-07, -1.3783574104309082e-07, -3.632158041000366e-08, 6.51925802230835e-08, 1.6670674085617065e-07, 2.682209014892578e-07, 3.6973506212234497e-07, 4.7124922275543213e-07, 5.727633833885193e-07, 6.742775440216064e-07, 7.757917046546936e-07, 8.773058652877808e-07, 9.78820025920868e-07, 1.080334186553955e-06, 1.1818483471870422e-06, 1.2833625078201294e-06, 1.3848766684532166e-06, 1.4863908290863037e-06, 1.5879049897193909e-06, 1.689419150352478e-06, 1.7909333109855652e-06, 1.8924474716186523e-06, 1.9939616322517395e-06, 2.0954757928848267e-06, 2.196989953517914e-06, 2.298504114151001e-06, 2.400018274784088e-06, 2.5015324354171753e-06, 2.6030465960502625e-06, 2.7045607566833496e-06, 2.8060749173164368e-06, 2.907589077949524e-06, 3.009103238582611e-06, 3.1106173992156982e-06, 3.2121315598487854e-06, 3.3136457204818726e-06, 3.4151598811149597e-06, 3.516674041748047e-06]}, "gradients/encoder.encoder.layers.1.layer_norm.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 1.0, 1.0, 1.0, 5.0, 2.0, 2.0, 3.0, 5.0, 4.0, 9.0, 7.0, 20.0, 17.0, 20.0, 21.0, 40.0, 47.0, 72.0, 104.0, 124.0, 124.0, 82.0, 80.0, 48.0, 38.0, 26.0, 16.0, 21.0, 13.0, 10.0, 8.0, 6.0, 7.0, 7.0, 6.0, 3.0, 2.0, 4.0, 1.0, 1.0, 3.0, 2.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-9.8428237834014e-05, -9.537742880638689e-05, -9.232662705471739e-05, -8.927581802709028e-05, -8.622501627542078e-05, -8.317420724779367e-05, -8.012339822016656e-05, -7.707258919253945e-05, -7.402178744086996e-05, -7.097097841324285e-05, -6.792017666157335e-05, -6.486936763394624e-05, -6.181855860631913e-05, -5.876775685464963e-05, -5.571694782702252e-05, -5.266614243737422e-05, -4.9615337047725916e-05, -4.656453165807761e-05, -4.351372626842931e-05, -4.04629172408022e-05, -3.7412111851153895e-05, -3.436130646150559e-05, -3.131049743387848e-05, -2.8259692044230178e-05, -2.5208886654581875e-05, -2.215808126493357e-05, -1.9107274056295864e-05, -1.6056466847658157e-05, -1.3005661458009854e-05, -9.954855158866849e-06, -6.904048859723844e-06, -3.853241651086137e-06, -8.024362614378333e-07, 2.248370037705172e-06, 5.299176336848177e-06, 8.349982635991182e-06, 1.1400788935134187e-05, 1.4451595234277193e-05, 1.7502401533420198e-05, 2.0553208742057905e-05, 2.3604014131706208e-05, 2.665481952135451e-05, 2.970562672999222e-05, 3.2756433938629925e-05, 3.580723932827823e-05, 3.885804471792653e-05, 4.190885374555364e-05, 4.4959659135201946e-05, 4.801046452485025e-05, 5.106126991449855e-05, 5.4112075304146856e-05, 5.7162884331773967e-05, 6.021368972142227e-05, 6.326449511107057e-05, 6.631530413869768e-05, 6.93661131663248e-05, 7.241691491799429e-05, 7.54677239456214e-05, 7.85185256972909e-05, 8.156933472491801e-05, 8.462014375254512e-05, 8.767094550421461e-05, 9.072175453184173e-05, 9.377255628351122e-05, 9.682336531113833e-05]}, "gradients/encoder.encoder.layers.1.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 2.0, 1.0, 2.0, 1.0, 5.0, 11.0, 11.0, 9.0, 10.0, 18.0, 16.0, 32.0, 31.0, 31.0, 34.0, 37.0, 47.0, 47.0, 55.0, 59.0, 46.0, 53.0, 60.0, 57.0, 45.0, 36.0, 40.0, 47.0, 24.0, 33.0, 25.0, 24.0, 12.0, 10.0, 13.0, 4.0, 6.0, 9.0, 6.0, 2.0, 1.0, 3.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-7.259845733642578e-05, -7.012113928794861e-05, -6.764382123947144e-05, -6.516650319099426e-05, -6.268918514251709e-05, -6.021186709403992e-05, -5.7734549045562744e-05, -5.525723099708557e-05, -5.27799129486084e-05, -5.0302594900131226e-05, -4.782527685165405e-05, -4.534795880317688e-05, -4.287064075469971e-05, -4.0393322706222534e-05, -3.791600465774536e-05, -3.543868660926819e-05, -3.2961368560791016e-05, -3.0484050512313843e-05, -2.800673246383667e-05, -2.5529414415359497e-05, -2.3052096366882324e-05, -2.057477831840515e-05, -1.809746026992798e-05, -1.5620142221450806e-05, -1.3142824172973633e-05, -1.066550612449646e-05, -8.188188076019287e-06, -5.710870027542114e-06, -3.2335519790649414e-06, -7.562339305877686e-07, 1.7210841178894043e-06, 4.198402166366577e-06, 6.67572021484375e-06, 9.153038263320923e-06, 1.1630356311798096e-05, 1.4107674360275269e-05, 1.658499240875244e-05, 1.9062310457229614e-05, 2.1539628505706787e-05, 2.401694655418396e-05, 2.6494264602661133e-05, 2.8971582651138306e-05, 3.144890069961548e-05, 3.392621874809265e-05, 3.6403536796569824e-05, 3.8880854845047e-05, 4.135817289352417e-05, 4.383549094200134e-05, 4.6312808990478516e-05, 4.879012703895569e-05, 5.126744508743286e-05, 5.3744763135910034e-05, 5.622208118438721e-05, 5.869939923286438e-05, 6.117671728134155e-05, 6.365403532981873e-05, 6.61313533782959e-05, 6.860867142677307e-05, 7.108598947525024e-05, 7.356330752372742e-05, 7.604062557220459e-05, 7.851794362068176e-05, 8.099526166915894e-05, 8.347257971763611e-05, 8.594989776611328e-05]}, "gradients/encoder.encoder.layers.0.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 6.0, 2.0, 5.0, 14.0, 13.0, 16.0, 28.0, 40.0, 64.0, 100.0, 160.0, 247.0, 454.0, 734.0, 1263.0, 2409.0, 5014.0, 13724.0, 62173.0, 1516283.0, 2501259.0, 66444.0, 12606.0, 5201.0, 2467.0, 1328.0, 729.0, 477.0, 334.0, 203.0, 119.0, 102.0, 65.0, 60.0, 31.0, 28.0, 22.0, 13.0, 16.0, 9.0, 7.0, 8.0, 4.0, 3.0, 6.0, 2.0, 3.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-4.07099723815918e-05, -3.914814442396164e-05, -3.758631646633148e-05, -3.6024488508701324e-05, -3.446266055107117e-05, -3.290083259344101e-05, -3.133900463581085e-05, -2.9777176678180695e-05, -2.8215348720550537e-05, -2.665352076292038e-05, -2.5091692805290222e-05, -2.3529864847660065e-05, -2.1968036890029907e-05, -2.040620893239975e-05, -1.8844380974769592e-05, -1.7282553017139435e-05, -1.5720725059509277e-05, -1.415889710187912e-05, -1.2597069144248962e-05, -1.1035241186618805e-05, -9.473413228988647e-06, -7.91158527135849e-06, -6.3497573137283325e-06, -4.787929356098175e-06, -3.2261013984680176e-06, -1.6642734408378601e-06, -1.0244548320770264e-07, 1.4593824744224548e-06, 3.0212104320526123e-06, 4.58303838968277e-06, 6.144866347312927e-06, 7.706694304943085e-06, 9.268522262573242e-06, 1.08303502202034e-05, 1.2392178177833557e-05, 1.3954006135463715e-05, 1.5515834093093872e-05, 1.707766205072403e-05, 1.8639490008354187e-05, 2.0201317965984344e-05, 2.1763145923614502e-05, 2.332497388124466e-05, 2.4886801838874817e-05, 2.6448629796504974e-05, 2.8010457754135132e-05, 2.957228571176529e-05, 3.113411366939545e-05, 3.2695941627025604e-05, 3.425776958465576e-05, 3.581959754228592e-05, 3.738142549991608e-05, 3.8943253457546234e-05, 4.050508141517639e-05, 4.206690937280655e-05, 4.3628737330436707e-05, 4.5190565288066864e-05, 4.675239324569702e-05, 4.831422120332718e-05, 4.9876049160957336e-05, 5.1437877118587494e-05, 5.299970507621765e-05, 5.456153303384781e-05, 5.6123360991477966e-05, 5.7685188949108124e-05, 5.924701690673828e-05]}, "gradients/encoder.encoder.layers.0.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 2.0, 3.0, 1.0, 2.0, 7.0, 7.0, 8.0, 14.0, 11.0, 13.0, 22.0, 28.0, 43.0, 53.0, 82.0, 81.0, 106.0, 93.0, 95.0, 84.0, 65.0, 42.0, 42.0, 26.0, 20.0, 13.0, 13.0, 8.0, 7.0, 4.0, 5.0, 4.0, 1.0, 3.0, 2.0, 3.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.633167266845703e-05, -1.5716999769210815e-05, -1.51023268699646e-05, -1.4487653970718384e-05, -1.3872981071472168e-05, -1.3258308172225952e-05, -1.2643635272979736e-05, -1.202896237373352e-05, -1.1414289474487305e-05, -1.0799616575241089e-05, -1.0184943675994873e-05, -9.570270776748657e-06, -8.955597877502441e-06, -8.340924978256226e-06, -7.72625207901001e-06, -7.111579179763794e-06, -6.496906280517578e-06, -5.882233381271362e-06, -5.2675604820251465e-06, -4.652887582778931e-06, -4.038214683532715e-06, -3.423541784286499e-06, -2.808868885040283e-06, -2.1941959857940674e-06, -1.5795230865478516e-06, -9.648501873016357e-07, -3.501772880554199e-07, 2.644956111907959e-07, 8.791685104370117e-07, 1.4938414096832275e-06, 2.1085143089294434e-06, 2.723187208175659e-06, 3.337860107421875e-06, 3.952533006668091e-06, 4.567205905914307e-06, 5.1818788051605225e-06, 5.796551704406738e-06, 6.411224603652954e-06, 7.02589750289917e-06, 7.640570402145386e-06, 8.255243301391602e-06, 8.869916200637817e-06, 9.484589099884033e-06, 1.0099261999130249e-05, 1.0713934898376465e-05, 1.132860779762268e-05, 1.1943280696868896e-05, 1.2557953596115112e-05, 1.3172626495361328e-05, 1.3787299394607544e-05, 1.440197229385376e-05, 1.5016645193099976e-05, 1.563131809234619e-05, 1.6245990991592407e-05, 1.6860663890838623e-05, 1.747533679008484e-05, 1.8090009689331055e-05, 1.870468258857727e-05, 1.9319355487823486e-05, 1.9934028387069702e-05, 2.0548701286315918e-05, 2.1163374185562134e-05, 2.177804708480835e-05, 2.2392719984054565e-05, 2.300739288330078e-05]}, "gradients/encoder.encoder.layers.0.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 3.0, 1.0, 4.0, 2.0, 7.0, 3.0, 9.0, 5.0, 14.0, 18.0, 14.0, 19.0, 41.0, 47.0, 89.0, 140.0, 156.0, 273.0, 382.0, 613.0, 881.0, 1446.0, 2545.0, 4430.0, 8306.0, 17092.0, 46955.0, 166852.0, 3101650.0, 694250.0, 89502.0, 29712.0, 12969.0, 7022.0, 3410.0, 1947.0, 1197.0, 765.0, 524.0, 319.0, 225.0, 119.0, 101.0, 67.0, 49.0, 35.0, 22.0, 25.0, 10.0, 7.0, 9.0, 5.0, 4.0, 3.0, 5.0], "bins": [-2.7358531951904297e-05, -2.663489431142807e-05, -2.5911256670951843e-05, -2.5187619030475616e-05, -2.446398138999939e-05, -2.3740343749523163e-05, -2.3016706109046936e-05, -2.229306846857071e-05, -2.1569430828094482e-05, -2.0845793187618256e-05, -2.012215554714203e-05, -1.9398517906665802e-05, -1.8674880266189575e-05, -1.795124262571335e-05, -1.722760498523712e-05, -1.6503967344760895e-05, -1.5780329704284668e-05, -1.5056692063808441e-05, -1.4333054423332214e-05, -1.3609416782855988e-05, -1.288577914237976e-05, -1.2162141501903534e-05, -1.1438503861427307e-05, -1.071486622095108e-05, -9.991228580474854e-06, -9.267590939998627e-06, -8.5439532995224e-06, -7.820315659046173e-06, -7.096678018569946e-06, -6.3730403780937195e-06, -5.649402737617493e-06, -4.925765097141266e-06, -4.202127456665039e-06, -3.4784898161888123e-06, -2.7548521757125854e-06, -2.0312145352363586e-06, -1.3075768947601318e-06, -5.83939254283905e-07, 1.3969838619232178e-07, 8.633360266685486e-07, 1.5869736671447754e-06, 2.310611307621002e-06, 3.034248948097229e-06, 3.757886588573456e-06, 4.481524229049683e-06, 5.2051618695259094e-06, 5.928799510002136e-06, 6.652437150478363e-06, 7.37607479095459e-06, 8.099712431430817e-06, 8.823350071907043e-06, 9.54698771238327e-06, 1.0270625352859497e-05, 1.0994262993335724e-05, 1.171790063381195e-05, 1.2441538274288177e-05, 1.3165175914764404e-05, 1.3888813555240631e-05, 1.4612451195716858e-05, 1.5336088836193085e-05, 1.605972647666931e-05, 1.678336411714554e-05, 1.7507001757621765e-05, 1.8230639398097992e-05, 1.895427703857422e-05]}, "gradients/encoder.encoder.layers.0.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 2.0, 3.0, 3.0, 0.0, 5.0, 7.0, 10.0, 7.0, 12.0, 14.0, 16.0, 34.0, 36.0, 57.0, 63.0, 79.0, 188.0, 402.0, 766.0, 1061.0, 592.0, 271.0, 136.0, 83.0, 64.0, 41.0, 23.0, 23.0, 25.0, 14.0, 9.0, 9.0, 6.0, 9.0, 4.0, 5.0, 3.0, 0.0, 1.0, 3.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-2.2411346435546875e-05, -2.1556392312049866e-05, -2.0701438188552856e-05, -1.9846484065055847e-05, -1.8991529941558838e-05, -1.813657581806183e-05, -1.728162169456482e-05, -1.642666757106781e-05, -1.55717134475708e-05, -1.4716759324073792e-05, -1.3861805200576782e-05, -1.3006851077079773e-05, -1.2151896953582764e-05, -1.1296942830085754e-05, -1.0441988706588745e-05, -9.587034583091736e-06, -8.732080459594727e-06, -7.877126336097717e-06, -7.022172212600708e-06, -6.167218089103699e-06, -5.3122639656066895e-06, -4.45730984210968e-06, -3.602355718612671e-06, -2.7474015951156616e-06, -1.8924474716186523e-06, -1.037493348121643e-06, -1.825392246246338e-07, 6.724148988723755e-07, 1.5273690223693848e-06, 2.382323145866394e-06, 3.2372772693634033e-06, 4.092231392860413e-06, 4.947185516357422e-06, 5.802139639854431e-06, 6.6570937633514404e-06, 7.51204788684845e-06, 8.367002010345459e-06, 9.221956133842468e-06, 1.0076910257339478e-05, 1.0931864380836487e-05, 1.1786818504333496e-05, 1.2641772627830505e-05, 1.3496726751327515e-05, 1.4351680874824524e-05, 1.5206634998321533e-05, 1.6061589121818542e-05, 1.6916543245315552e-05, 1.777149736881256e-05, 1.862645149230957e-05, 1.948140561580658e-05, 2.033635973930359e-05, 2.1191313862800598e-05, 2.2046267986297607e-05, 2.2901222109794617e-05, 2.3756176233291626e-05, 2.4611130356788635e-05, 2.5466084480285645e-05, 2.6321038603782654e-05, 2.7175992727279663e-05, 2.8030946850776672e-05, 2.888590097427368e-05, 2.974085509777069e-05, 3.05958092212677e-05, 3.145076334476471e-05, 3.230571746826172e-05]}, "gradients/encoder.encoder.layers.0.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 3.0, 2.0, 4.0, 4.0, 7.0, 2.0, 6.0, 8.0, 12.0, 9.0, 16.0, 31.0, 42.0, 55.0, 108.0, 130.0, 131.0, 103.0, 84.0, 55.0, 46.0, 31.0, 26.0, 24.0, 19.0, 14.0, 9.0, 9.0, 7.0, 2.0, 2.0, 2.0, 2.0, 2.0, 3.0, 0.0, 1.0, 3.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00011551639181561768, -0.00010982259846059605, -0.00010412880510557443, -9.84350117505528e-05, -9.274121839553118e-05, -8.704742504050955e-05, -8.135363168548793e-05, -7.565984560642391e-05, -6.996604497544467e-05, -6.427225162042305e-05, -5.857845826540142e-05, -5.28846649103798e-05, -4.719087155535817e-05, -4.1497078200336546e-05, -3.580328848329373e-05, -3.01094951282721e-05, -2.4415705411229283e-05, -1.8721912056207657e-05, -1.3028119610680733e-05, -7.334327165153809e-06, -1.6405338101321831e-06, 4.053259544889443e-06, 9.747051080921665e-06, 1.544084443594329e-05, 2.1134637790964916e-05, 2.6828431145986542e-05, 3.252222450100817e-05, 3.8216014218050987e-05, 4.390980757307261e-05, 4.960360092809424e-05, 5.5297394283115864e-05, 6.099118763813749e-05, 6.668498099315912e-05, 7.237877434818074e-05, 7.807256770320237e-05, 8.376636105822399e-05, 8.946015441324562e-05, 9.515394776826724e-05, 0.00010084774112328887, 0.00010654152720235288, 0.00011223532783333212, 0.00011792912118835375, 0.00012362291454337537, 0.00012931670062243938, 0.00013501050125341862, 0.00014070428733248264, 0.00014639808796346188, 0.0001520918740425259, 0.0001577856601215899, 0.0001634794462006539, 0.00016917324683163315, 0.00017486703291069716, 0.0001805608335416764, 0.00018625461962074041, 0.00019194842025171965, 0.00019764220633078367, 0.0002033360069617629, 0.00020902979304082692, 0.00021472359367180616, 0.00022041737975087017, 0.0002261111803818494, 0.00023180496646091342, 0.00023749876709189266, 0.00024319255317095667, 0.0002488863538019359]}, "gradients/encoder.encoder.layers.0.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 3.0, 3.0, 1.0, 1.0, 3.0, 4.0, 0.0, 5.0, 6.0, 10.0, 8.0, 6.0, 19.0, 10.0, 24.0, 27.0, 25.0, 28.0, 37.0, 23.0, 42.0, 37.0, 40.0, 46.0, 51.0, 42.0, 46.0, 58.0, 52.0, 44.0, 35.0, 31.0, 20.0, 46.0, 26.0, 19.0, 22.0, 26.0, 22.0, 12.0, 16.0, 12.0, 4.0, 6.0, 6.0, 6.0, 0.0, 2.0, 4.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0], "bins": [-8.088350296020508e-05, -7.843878120183945e-05, -7.599405944347382e-05, -7.354933768510818e-05, -7.110461592674255e-05, -6.865989416837692e-05, -6.621517241001129e-05, -6.377045065164566e-05, -6.132572889328003e-05, -5.88810071349144e-05, -5.643628537654877e-05, -5.3991563618183136e-05, -5.1546841859817505e-05, -4.9102120101451874e-05, -4.665739834308624e-05, -4.421267658472061e-05, -4.176795482635498e-05, -3.932323306798935e-05, -3.687851130962372e-05, -3.443378955125809e-05, -3.1989067792892456e-05, -2.9544346034526825e-05, -2.7099624276161194e-05, -2.4654902517795563e-05, -2.221018075942993e-05, -1.97654590010643e-05, -1.732073724269867e-05, -1.4876015484333038e-05, -1.2431293725967407e-05, -9.986571967601776e-06, -7.541850209236145e-06, -5.097128450870514e-06, -2.652406692504883e-06, -2.076849341392517e-07, 2.2370368242263794e-06, 4.6817585825920105e-06, 7.126480340957642e-06, 9.571202099323273e-06, 1.2015923857688904e-05, 1.4460645616054535e-05, 1.6905367374420166e-05, 1.9350089132785797e-05, 2.1794810891151428e-05, 2.423953264951706e-05, 2.668425440788269e-05, 2.912897616624832e-05, 3.157369792461395e-05, 3.4018419682979584e-05, 3.6463141441345215e-05, 3.8907863199710846e-05, 4.135258495807648e-05, 4.379730671644211e-05, 4.624202847480774e-05, 4.868675023317337e-05, 5.1131471991539e-05, 5.357619374990463e-05, 5.6020915508270264e-05, 5.8465637266635895e-05, 6.0910359025001526e-05, 6.335508078336716e-05, 6.579980254173279e-05, 6.824452430009842e-05, 7.068924605846405e-05, 7.313396781682968e-05, 7.557868957519531e-05]}, "gradients/encoder.encoder.layers.0.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 1.0, 0.0, 7.0, 9.0, 7.0, 9.0, 16.0, 20.0, 29.0, 37.0, 59.0, 126.0, 188.0, 352.0, 723.0, 1415.0, 3039.0, 6744.0, 18199.0, 59807.0, 340505.0, 509254.0, 72658.0, 21194.0, 7575.0, 3296.0, 1537.0, 794.0, 409.0, 223.0, 124.0, 77.0, 42.0, 28.0, 17.0, 13.0, 9.0, 8.0, 3.0, 5.0, 2.0, 1.0, 2.0, 2.0, 1.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-8.165836334228516e-05, -7.911212742328644e-05, -7.656589150428772e-05, -7.4019655585289e-05, -7.147341966629028e-05, -6.892718374729156e-05, -6.638094782829285e-05, -6.383471190929413e-05, -6.128847599029541e-05, -5.874224007129669e-05, -5.6196004152297974e-05, -5.3649768233299255e-05, -5.110353231430054e-05, -4.855729639530182e-05, -4.60110604763031e-05, -4.346482455730438e-05, -4.0918588638305664e-05, -3.8372352719306946e-05, -3.582611680030823e-05, -3.327988088130951e-05, -3.073364496231079e-05, -2.8187409043312073e-05, -2.5641173124313354e-05, -2.3094937205314636e-05, -2.0548701286315918e-05, -1.80024653673172e-05, -1.545622944831848e-05, -1.2909993529319763e-05, -1.0363757610321045e-05, -7.817521691322327e-06, -5.271285772323608e-06, -2.72504985332489e-06, -1.7881393432617188e-07, 2.3674219846725464e-06, 4.913657903671265e-06, 7.459893822669983e-06, 1.0006129741668701e-05, 1.255236566066742e-05, 1.5098601579666138e-05, 1.7644837498664856e-05, 2.0191073417663574e-05, 2.2737309336662292e-05, 2.528354525566101e-05, 2.782978117465973e-05, 3.0376017093658447e-05, 3.2922253012657166e-05, 3.5468488931655884e-05, 3.80147248506546e-05, 4.056096076965332e-05, 4.310719668865204e-05, 4.565343260765076e-05, 4.8199668526649475e-05, 5.074590444564819e-05, 5.329214036464691e-05, 5.583837628364563e-05, 5.838461220264435e-05, 6.0930848121643066e-05, 6.347708404064178e-05, 6.60233199596405e-05, 6.856955587863922e-05, 7.111579179763794e-05, 7.366202771663666e-05, 7.620826363563538e-05, 7.87544995546341e-05, 8.130073547363281e-05]}, "gradients/encoder.encoder.layers.0.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 3.0, 0.0, 0.0, 2.0, 0.0, 3.0, 2.0, 4.0, 6.0, 5.0, 8.0, 16.0, 15.0, 32.0, 38.0, 50.0, 62.0, 71.0, 99.0, 129.0, 108.0, 84.0, 79.0, 47.0, 48.0, 27.0, 21.0, 8.0, 13.0, 10.0, 7.0, 5.0, 4.0, 4.0, 5.0, 0.0, 2.0, 0.0, 2.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.4483928680419922e-05, -1.3819895684719086e-05, -1.315586268901825e-05, -1.2491829693317413e-05, -1.1827796697616577e-05, -1.1163763701915741e-05, -1.0499730706214905e-05, -9.835697710514069e-06, -9.171664714813232e-06, -8.507631719112396e-06, -7.84359872341156e-06, -7.179565727710724e-06, -6.515532732009888e-06, -5.8514997363090515e-06, -5.187466740608215e-06, -4.523433744907379e-06, -3.859400749206543e-06, -3.1953677535057068e-06, -2.5313347578048706e-06, -1.8673017621040344e-06, -1.2032687664031982e-06, -5.392357707023621e-07, 1.2479722499847412e-07, 7.888302206993103e-07, 1.4528632164001465e-06, 2.1168962121009827e-06, 2.780929207801819e-06, 3.444962203502655e-06, 4.108995199203491e-06, 4.773028194904327e-06, 5.4370611906051636e-06, 6.101094186306e-06, 6.765127182006836e-06, 7.429160177707672e-06, 8.093193173408508e-06, 8.757226169109344e-06, 9.42125916481018e-06, 1.0085292160511017e-05, 1.0749325156211853e-05, 1.141335815191269e-05, 1.2077391147613525e-05, 1.2741424143314362e-05, 1.3405457139015198e-05, 1.4069490134716034e-05, 1.473352313041687e-05, 1.5397556126117706e-05, 1.6061589121818542e-05, 1.672562211751938e-05, 1.7389655113220215e-05, 1.805368810892105e-05, 1.8717721104621887e-05, 1.9381754100322723e-05, 2.004578709602356e-05, 2.0709820091724396e-05, 2.1373853087425232e-05, 2.2037886083126068e-05, 2.2701919078826904e-05, 2.336595207452774e-05, 2.4029985070228577e-05, 2.4694018065929413e-05, 2.535805106163025e-05, 2.6022084057331085e-05, 2.668611705303192e-05, 2.7350150048732758e-05, 2.8014183044433594e-05]}, "gradients/encoder.encoder.layers.0.attention.v_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 3.0, 3.0, 4.0, 4.0, 7.0, 7.0, 11.0, 10.0, 19.0, 17.0, 22.0, 35.0, 40.0, 85.0, 105.0, 153.0, 233.0, 319.0, 524.0, 798.0, 1188.0, 2043.0, 3584.0, 7869.0, 16791.0, 47358.0, 265551.0, 596248.0, 64982.0, 20628.0, 8747.0, 4458.0, 2592.0, 1394.0, 832.0, 518.0, 431.0, 251.0, 191.0, 126.0, 93.0, 99.0, 49.0, 37.0, 32.0, 23.0, 16.0, 9.0, 10.0, 10.0, 2.0, 1.0, 2.0, 3.0, 3.0], "bins": [-3.4928321838378906e-05, -3.396160900592804e-05, -3.299489617347717e-05, -3.2028183341026306e-05, -3.106147050857544e-05, -3.0094757676124573e-05, -2.9128044843673706e-05, -2.816133201122284e-05, -2.7194619178771973e-05, -2.6227906346321106e-05, -2.526119351387024e-05, -2.4294480681419373e-05, -2.3327767848968506e-05, -2.236105501651764e-05, -2.1394342184066772e-05, -2.0427629351615906e-05, -1.946091651916504e-05, -1.8494203686714172e-05, -1.7527490854263306e-05, -1.656077802181244e-05, -1.5594065189361572e-05, -1.4627352356910706e-05, -1.3660639524459839e-05, -1.2693926692008972e-05, -1.1727213859558105e-05, -1.0760501027107239e-05, -9.793788194656372e-06, -8.827075362205505e-06, -7.860362529754639e-06, -6.893649697303772e-06, -5.926936864852905e-06, -4.9602240324020386e-06, -3.993511199951172e-06, -3.026798367500305e-06, -2.0600855350494385e-06, -1.0933727025985718e-06, -1.2665987014770508e-07, 8.400529623031616e-07, 1.8067657947540283e-06, 2.773478627204895e-06, 3.7401914596557617e-06, 4.706904292106628e-06, 5.673617124557495e-06, 6.640329957008362e-06, 7.6070427894592285e-06, 8.573755621910095e-06, 9.540468454360962e-06, 1.0507181286811829e-05, 1.1473894119262695e-05, 1.2440606951713562e-05, 1.3407319784164429e-05, 1.4374032616615295e-05, 1.5340745449066162e-05, 1.630745828151703e-05, 1.7274171113967896e-05, 1.8240883946418762e-05, 1.920759677886963e-05, 2.0174309611320496e-05, 2.1141022443771362e-05, 2.210773527622223e-05, 2.3074448108673096e-05, 2.4041160941123962e-05, 2.500787377357483e-05, 2.5974586606025696e-05, 2.6941299438476562e-05]}, "gradients/encoder.encoder.layers.0.attention.v_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 2.0, 3.0, 3.0, 4.0, 4.0, 6.0, 13.0, 8.0, 13.0, 10.0, 14.0, 22.0, 23.0, 33.0, 34.0, 54.0, 46.0, 46.0, 57.0, 56.0, 62.0, 55.0, 55.0, 57.0, 40.0, 48.0, 31.0, 42.0, 36.0, 27.0, 21.0, 16.0, 15.0, 12.0, 10.0, 5.0, 8.0, 2.0, 6.0, 4.0, 3.0, 2.0, 5.0, 1.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.8073787689208984e-05, -2.71722674369812e-05, -2.6270747184753418e-05, -2.5369226932525635e-05, -2.446770668029785e-05, -2.356618642807007e-05, -2.2664666175842285e-05, -2.1763145923614502e-05, -2.086162567138672e-05, -1.9960105419158936e-05, -1.9058585166931152e-05, -1.815706491470337e-05, -1.7255544662475586e-05, -1.6354024410247803e-05, -1.545250415802002e-05, -1.4550983905792236e-05, -1.3649463653564453e-05, -1.274794340133667e-05, -1.1846423149108887e-05, -1.0944902896881104e-05, -1.004338264465332e-05, -9.141862392425537e-06, -8.240342140197754e-06, -7.338821887969971e-06, -6.4373016357421875e-06, -5.535781383514404e-06, -4.634261131286621e-06, -3.732740879058838e-06, -2.8312206268310547e-06, -1.9297003746032715e-06, -1.0281801223754883e-06, -1.2665987014770508e-07, 7.748603820800781e-07, 1.6763806343078613e-06, 2.5779008865356445e-06, 3.4794211387634277e-06, 4.380941390991211e-06, 5.282461643218994e-06, 6.183981895446777e-06, 7.0855021476745605e-06, 7.987022399902344e-06, 8.888542652130127e-06, 9.79006290435791e-06, 1.0691583156585693e-05, 1.1593103408813477e-05, 1.249462366104126e-05, 1.3396143913269043e-05, 1.4297664165496826e-05, 1.519918441772461e-05, 1.6100704669952393e-05, 1.7002224922180176e-05, 1.790374517440796e-05, 1.8805265426635742e-05, 1.9706785678863525e-05, 2.060830593109131e-05, 2.1509826183319092e-05, 2.2411346435546875e-05, 2.3312866687774658e-05, 2.421438694000244e-05, 2.5115907192230225e-05, 2.6017427444458008e-05, 2.691894769668579e-05, 2.7820467948913574e-05, 2.8721988201141357e-05, 2.962350845336914e-05]}, "gradients/encoder.encoder.layers.0.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 3.0, 2.0, 3.0, 5.0, 8.0, 7.0, 18.0, 14.0, 33.0, 62.0, 101.0, 106.0, 236.0, 415.0, 752.0, 1415.0, 3782.0, 12562.0, 61196.0, 860242.0, 87969.0, 12699.0, 3486.0, 1656.0, 803.0, 410.0, 207.0, 135.0, 91.0, 38.0, 28.0, 31.0, 17.0, 9.0, 5.0, 6.0, 5.0, 3.0, 4.0, 0.0, 1.0, 2.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-1.5914440155029297e-05, -1.5453435480594635e-05, -1.4992430806159973e-05, -1.4531426131725311e-05, -1.407042145729065e-05, -1.3609416782855988e-05, -1.3148412108421326e-05, -1.2687407433986664e-05, -1.2226402759552002e-05, -1.176539808511734e-05, -1.1304393410682678e-05, -1.0843388736248016e-05, -1.0382384061813354e-05, -9.921379387378693e-06, -9.46037471294403e-06, -8.999370038509369e-06, -8.538365364074707e-06, -8.077360689640045e-06, -7.616356015205383e-06, -7.1553513407707214e-06, -6.6943466663360596e-06, -6.233341991901398e-06, -5.772337317466736e-06, -5.311332643032074e-06, -4.850327968597412e-06, -4.38932329416275e-06, -3.928318619728088e-06, -3.4673139452934265e-06, -3.0063092708587646e-06, -2.5453045964241028e-06, -2.084299921989441e-06, -1.623295247554779e-06, -1.1622905731201172e-06, -7.012858986854553e-07, -2.4028122425079346e-07, 2.207234501838684e-07, 6.817281246185303e-07, 1.1427327990531921e-06, 1.603737473487854e-06, 2.064742147922516e-06, 2.5257468223571777e-06, 2.9867514967918396e-06, 3.4477561712265015e-06, 3.908760845661163e-06, 4.369765520095825e-06, 4.830770194530487e-06, 5.291774868965149e-06, 5.752779543399811e-06, 6.213784217834473e-06, 6.6747888922691345e-06, 7.135793566703796e-06, 7.596798241138458e-06, 8.05780291557312e-06, 8.518807590007782e-06, 8.979812264442444e-06, 9.440816938877106e-06, 9.901821613311768e-06, 1.036282628774643e-05, 1.0823830962181091e-05, 1.1284835636615753e-05, 1.1745840311050415e-05, 1.2206844985485077e-05, 1.2667849659919739e-05, 1.31288543343544e-05, 1.3589859008789062e-05]}, "gradients/encoder.encoder.layers.0.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 2.0, 0.0, 2.0, 0.0, 0.0, 2.0, 1.0, 0.0, 3.0, 6.0, 9.0, 12.0, 23.0, 0.0, 35.0, 52.0, 69.0, 78.0, 107.0, 0.0, 132.0, 128.0, 105.0, 77.0, 0.0, 60.0, 37.0, 28.0, 17.0, 8.0, 0.0, 8.0, 4.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 4.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.4901161193847656e-06, -1.4407560229301453e-06, -1.391395926475525e-06, -1.3420358300209045e-06, -1.2926757335662842e-06, -1.2433156371116638e-06, -1.1939555406570435e-06, -1.144595444202423e-06, -1.0952353477478027e-06, -1.0458752512931824e-06, -9.96515154838562e-07, -9.471550583839417e-07, -8.977949619293213e-07, -8.484348654747009e-07, -7.990747690200806e-07, -7.497146725654602e-07, -7.003545761108398e-07, -6.509944796562195e-07, -6.016343832015991e-07, -5.522742867469788e-07, -5.029141902923584e-07, -4.5355409383773804e-07, -4.041939973831177e-07, -3.548339009284973e-07, -3.0547380447387695e-07, -2.561137080192566e-07, -2.0675361156463623e-07, -1.5739351511001587e-07, -1.0803341865539551e-07, -5.8673322200775146e-08, -9.313225746154785e-09, 4.0046870708465576e-08, 8.940696716308594e-08, 1.387670636177063e-07, 1.8812716007232666e-07, 2.3748725652694702e-07, 2.868473529815674e-07, 3.3620744943618774e-07, 3.855675458908081e-07, 4.3492764234542847e-07, 4.842877388000488e-07, 5.336478352546692e-07, 5.830079317092896e-07, 6.323680281639099e-07, 6.817281246185303e-07, 7.310882210731506e-07, 7.80448317527771e-07, 8.298084139823914e-07, 8.791685104370117e-07, 9.285286068916321e-07, 9.778887033462524e-07, 1.0272487998008728e-06, 1.0766088962554932e-06, 1.1259689927101135e-06, 1.1753290891647339e-06, 1.2246891856193542e-06, 1.2740492820739746e-06, 1.323409378528595e-06, 1.3727694749832153e-06, 1.4221295714378357e-06, 1.471489667892456e-06, 1.5208497643470764e-06, 1.5702098608016968e-06, 1.6195699572563171e-06, 1.6689300537109375e-06]}, "gradients/encoder.encoder.layers.0.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 3.0, 3.0, 6.0, 8.0, 4.0, 4.0, 8.0, 8.0, 19.0, 25.0, 23.0, 41.0, 51.0, 64.0, 94.0, 167.0, 207.0, 366.0, 543.0, 1018.0, 2313.0, 4713.0, 15666.0, 183570.0, 802181.0, 25431.0, 6021.0, 2922.0, 1219.0, 602.0, 448.0, 231.0, 175.0, 124.0, 78.0, 43.0, 45.0, 36.0, 25.0, 16.0, 6.0, 13.0, 12.0, 6.0, 3.0, 3.0, 2.0, 0.0, 1.0, 0.0, 3.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.621246337890625e-05, -1.559220254421234e-05, -1.4971941709518433e-05, -1.4351680874824524e-05, -1.3731420040130615e-05, -1.3111159205436707e-05, -1.2490898370742798e-05, -1.187063753604889e-05, -1.125037670135498e-05, -1.0630115866661072e-05, -1.0009855031967163e-05, -9.389594197273254e-06, -8.769333362579346e-06, -8.149072527885437e-06, -7.528811693191528e-06, -6.90855085849762e-06, -6.288290023803711e-06, -5.668029189109802e-06, -5.0477683544158936e-06, -4.427507519721985e-06, -3.807246685028076e-06, -3.1869858503341675e-06, -2.566725015640259e-06, -1.94646418094635e-06, -1.3262033462524414e-06, -7.059425115585327e-07, -8.568167686462402e-08, 5.345791578292847e-07, 1.1548399925231934e-06, 1.775100827217102e-06, 2.3953616619110107e-06, 3.0156224966049194e-06, 3.635883331298828e-06, 4.256144165992737e-06, 4.8764050006866455e-06, 5.496665835380554e-06, 6.116926670074463e-06, 6.737187504768372e-06, 7.35744833946228e-06, 7.977709174156189e-06, 8.597970008850098e-06, 9.218230843544006e-06, 9.838491678237915e-06, 1.0458752512931824e-05, 1.1079013347625732e-05, 1.1699274182319641e-05, 1.231953501701355e-05, 1.2939795851707458e-05, 1.3560056686401367e-05, 1.4180317521095276e-05, 1.4800578355789185e-05, 1.5420839190483093e-05, 1.6041100025177002e-05, 1.666136085987091e-05, 1.728162169456482e-05, 1.7901882529258728e-05, 1.8522143363952637e-05, 1.9142404198646545e-05, 1.9762665033340454e-05, 2.0382925868034363e-05, 2.100318670272827e-05, 2.162344753742218e-05, 2.224370837211609e-05, 2.2863969206809998e-05, 2.3484230041503906e-05]}, "gradients/encoder.encoder.layers.0.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 2.0, 2.0, 1.0, 1.0, 2.0, 2.0, 3.0, 5.0, 4.0, 3.0, 2.0, 4.0, 4.0, 7.0, 8.0, 9.0, 22.0, 22.0, 28.0, 54.0, 87.0, 160.0, 167.0, 133.0, 103.0, 52.0, 34.0, 23.0, 15.0, 13.0, 5.0, 5.0, 3.0, 1.0, 4.0, 5.0, 7.0, 5.0, 3.0, 3.0, 2.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-9.47713851928711e-06, -9.074807167053223e-06, -8.672475814819336e-06, -8.27014446258545e-06, -7.867813110351562e-06, -7.465481758117676e-06, -7.063150405883789e-06, -6.660819053649902e-06, -6.258487701416016e-06, -5.856156349182129e-06, -5.453824996948242e-06, -5.0514936447143555e-06, -4.649162292480469e-06, -4.246830940246582e-06, -3.844499588012695e-06, -3.4421682357788086e-06, -3.039836883544922e-06, -2.637505531311035e-06, -2.2351741790771484e-06, -1.8328428268432617e-06, -1.430511474609375e-06, -1.0281801223754883e-06, -6.258487701416016e-07, -2.2351741790771484e-07, 1.7881393432617188e-07, 5.811452865600586e-07, 9.834766387939453e-07, 1.385807991027832e-06, 1.7881393432617188e-06, 2.1904706954956055e-06, 2.592802047729492e-06, 2.995133399963379e-06, 3.3974647521972656e-06, 3.7997961044311523e-06, 4.202127456665039e-06, 4.604458808898926e-06, 5.0067901611328125e-06, 5.409121513366699e-06, 5.811452865600586e-06, 6.213784217834473e-06, 6.616115570068359e-06, 7.018446922302246e-06, 7.420778274536133e-06, 7.82310962677002e-06, 8.225440979003906e-06, 8.627772331237793e-06, 9.03010368347168e-06, 9.432435035705566e-06, 9.834766387939453e-06, 1.023709774017334e-05, 1.0639429092407227e-05, 1.1041760444641113e-05, 1.1444091796875e-05, 1.1846423149108887e-05, 1.2248754501342773e-05, 1.265108585357666e-05, 1.3053417205810547e-05, 1.3455748558044434e-05, 1.385807991027832e-05, 1.4260411262512207e-05, 1.4662742614746094e-05, 1.506507396697998e-05, 1.5467405319213867e-05, 1.5869736671447754e-05, 1.627206802368164e-05]}, "gradients/encoder.encoder.layers.0.layer_norm.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 4.0, 1.0, 2.0, 3.0, 4.0, 8.0, 8.0, 10.0, 19.0, 32.0, 63.0, 101.0, 187.0, 206.0, 135.0, 69.0, 50.0, 25.0, 23.0, 13.0, 15.0, 8.0, 5.0, 8.0, 4.0, 3.0, 3.0, 1.0, 2.0, 1.0, 0.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0001914964959723875, -0.0001832297712098807, -0.00017496306099928916, -0.00016669633623678237, -0.00015842962602619082, -0.00015016290126368403, -0.00014189619105309248, -0.0001336294662905857, -0.00012536274152807891, -0.00011709602404152974, -0.00010882930655498058, -0.00010056258179247379, -9.229587158188224e-05, -8.402914681937546e-05, -7.576242933282629e-05, -6.749571184627712e-05, -5.922900163568556e-05, -5.0962284149136394e-05, -4.2695566662587225e-05, -3.442884553805925e-05, -2.616212805151008e-05, -1.7895410564960912e-05, -9.628689440432936e-06, -1.3619719538837671e-06, 6.904745532665402e-06, 1.5171463928709272e-05, 2.3438182324753143e-05, 3.1704901630291715e-05, 3.9971619116840884e-05, 4.823833660339005e-05, 5.650505772791803e-05, 6.47717752144672e-05, 7.303847814910114e-05, 8.130519563565031e-05, 8.957191312219948e-05, 9.783863788470626e-05, 0.00010610534809529781, 0.0001143720728578046, 0.00012263879762031138, 0.00013090550783090293, 0.0001391722180414945, 0.00014743894280400127, 0.00015570565301459283, 0.0001639723777770996, 0.00017223908798769116, 0.00018050581275019795, 0.00018877253751270473, 0.00019703924772329628, 0.00020530597248580307, 0.00021357269724830985, 0.0002218394074589014, 0.0002301061322214082, 0.00023837284243199974, 0.0002466395671945065, 0.0002549062774050981, 0.0002631730167195201, 0.00027143972693011165, 0.0002797064371407032, 0.0002879731764551252, 0.00029623988666571677, 0.0003045065968763083, 0.0003127733070868999, 0.0003210400464013219, 0.00032930675661191344, 0.000337573466822505]}, "gradients/encoder.encoder.layers.0.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 3.0, 0.0, 1.0, 3.0, 8.0, 7.0, 5.0, 14.0, 9.0, 24.0, 16.0, 28.0, 36.0, 36.0, 41.0, 52.0, 67.0, 73.0, 95.0, 83.0, 82.0, 55.0, 48.0, 48.0, 46.0, 27.0, 26.0, 25.0, 17.0, 9.0, 4.0, 7.0, 5.0, 4.0, 3.0, 2.0, 3.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.0001240372657775879, -0.00012032128870487213, -0.00011660531163215637, -0.00011288933455944061, -0.00010917335748672485, -0.0001054573804140091, -0.00010174140334129333, -9.802542626857758e-05, -9.430944919586182e-05, -9.059347212314606e-05, -8.68774950504303e-05, -8.316151797771454e-05, -7.944554090499878e-05, -7.572956383228302e-05, -7.201358675956726e-05, -6.82976096868515e-05, -6.458163261413574e-05, -6.086565554141998e-05, -5.7149678468704224e-05, -5.3433701395988464e-05, -4.9717724323272705e-05, -4.6001747250556946e-05, -4.2285770177841187e-05, -3.856979310512543e-05, -3.485381603240967e-05, -3.113783895969391e-05, -2.742186188697815e-05, -2.370588481426239e-05, -1.998990774154663e-05, -1.627393066883087e-05, -1.2557953596115112e-05, -8.841976523399353e-06, -5.125999450683594e-06, -1.4100223779678345e-06, 2.305954694747925e-06, 6.021931767463684e-06, 9.737908840179443e-06, 1.3453885912895203e-05, 1.7169862985610962e-05, 2.088584005832672e-05, 2.460181713104248e-05, 2.831779420375824e-05, 3.2033771276474e-05, 3.574974834918976e-05, 3.946572542190552e-05, 4.318170249462128e-05, 4.6897679567337036e-05, 5.0613656640052795e-05, 5.4329633712768555e-05, 5.8045610785484314e-05, 6.176158785820007e-05, 6.547756493091583e-05, 6.919354200363159e-05, 7.290951907634735e-05, 7.662549614906311e-05, 8.034147322177887e-05, 8.405745029449463e-05, 8.777342736721039e-05, 9.148940443992615e-05, 9.52053815126419e-05, 9.892135858535767e-05, 0.00010263733565807343, 0.00010635331273078918, 0.00011006928980350494, 0.0001137852668762207]}, "gradients/encoder.encoder.pos_conv_embed.conv.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 1.0, 0.0, 1.0, 0.0, 3.0, 2.0, 2.0, 0.0, 8.0, 11.0, 13.0, 21.0, 20.0, 40.0, 54.0, 71.0, 233.0, 258.0, 81.0, 57.0, 35.0, 26.0, 19.0, 17.0, 15.0, 12.0, 7.0, 2.0, 3.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.504753112792969e-05, -3.391783684492111e-05, -3.278814256191254e-05, -3.165844827890396e-05, -3.0528753995895386e-05, -2.939905971288681e-05, -2.8269365429878235e-05, -2.713967114686966e-05, -2.6009976863861084e-05, -2.488028258085251e-05, -2.3750588297843933e-05, -2.2620894014835358e-05, -2.1491199731826782e-05, -2.0361505448818207e-05, -1.923181116580963e-05, -1.8102116882801056e-05, -1.697242259979248e-05, -1.5842728316783905e-05, -1.471303403377533e-05, -1.3583339750766754e-05, -1.2453645467758179e-05, -1.1323951184749603e-05, -1.0194256901741028e-05, -9.064562618732452e-06, -7.934868335723877e-06, -6.8051740527153015e-06, -5.675479769706726e-06, -4.545785486698151e-06, -3.416091203689575e-06, -2.2863969206809998e-06, -1.1567026376724243e-06, -2.7008354663848877e-08, 1.1026859283447266e-06, 2.232380211353302e-06, 3.3620744943618774e-06, 4.491768777370453e-06, 5.621463060379028e-06, 6.751157343387604e-06, 7.88085162639618e-06, 9.010545909404755e-06, 1.014024019241333e-05, 1.1269934475421906e-05, 1.2399628758430481e-05, 1.3529323041439056e-05, 1.4659017324447632e-05, 1.5788711607456207e-05, 1.6918405890464783e-05, 1.8048100173473358e-05, 1.9177794456481934e-05, 2.030748873949051e-05, 2.1437183022499084e-05, 2.256687730550766e-05, 2.3696571588516235e-05, 2.482626587152481e-05, 2.5955960154533386e-05, 2.708565443754196e-05, 2.8215348720550537e-05, 2.9345043003559113e-05, 3.0474737286567688e-05, 3.1604431569576263e-05, 3.273412585258484e-05, 3.3863820135593414e-05, 3.499351441860199e-05, 3.6123208701610565e-05, 3.725290298461914e-05]}, "gradients/encoder.encoder.pos_conv_embed.conv.weight_v": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 4.0, 3.0, 4.0, 4.0, 6.0, 11.0, 19.0, 34.0, 41.0, 64.0, 124.0, 217.0, 515.0, 1240.0, 4259.0, 35477.0, 8319879.0, 21401.0, 3375.0, 1001.0, 430.0, 183.0, 111.0, 74.0, 32.0, 20.0, 17.0, 16.0, 6.0, 8.0, 9.0, 3.0, 1.0, 2.0, 2.0, 4.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-8.122166036628187e-05, -7.845953950891271e-05, -7.569741137558594e-05, -7.293529051821679e-05, -7.017316966084763e-05, -6.741104880347848e-05, -6.464892067015171e-05, -6.188679981278256e-05, -5.91246789554134e-05, -5.636255446006544e-05, -5.3600433602696285e-05, -5.083830910734832e-05, -4.807618824997917e-05, -4.5314063754631206e-05, -4.2551939259283245e-05, -3.978981840191409e-05, -3.702769390656613e-05, -3.4265569411218166e-05, -3.150344855384901e-05, -2.874132405850105e-05, -2.5979203201131895e-05, -2.3217078705783933e-05, -2.0454956029425375e-05, -1.7692833353066817e-05, -1.4930710676708259e-05, -1.21685880003497e-05, -9.406465323991142e-06, -6.644341738137882e-06, -3.882219061779324e-06, -1.1200963854207657e-06, 1.6420272004324943e-06, 4.4041498767910525e-06, 7.166272553149611e-06, 9.928395229508169e-06, 1.2690517905866727e-05, 1.545264240121469e-05, 1.8214763258583844e-05, 2.0976887753931805e-05, 2.3739010430290364e-05, 2.6501133106648922e-05, 2.926325578300748e-05, 3.202537845936604e-05, 3.4787502954714e-05, 3.7549623812083155e-05, 4.031174830743112e-05, 4.307386916480027e-05, 4.583599366014823e-05, 4.859811451751739e-05, 5.136023901286535e-05, 5.412236350821331e-05, 5.6884484365582466e-05, 5.964660886093043e-05, 6.240873335627839e-05, 6.517085421364754e-05, 6.79329750710167e-05, 7.069509592838585e-05, 7.345722406171262e-05, 7.621934491908178e-05, 7.898147305240855e-05, 8.17435939097777e-05, 8.450571476714686e-05, 8.726783562451601e-05, 9.002996375784278e-05, 9.279208461521193e-05, 9.555420547258109e-05]}, "gradients/encoder.encoder.pos_conv_embed.conv.weight_g": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 3.0, 0.0, 1.0, 0.0, 0.0, 1.0, 3.0, 1.0, 1.0, 0.0, 2.0, 0.0, 2.0, 2.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 4.0, 3.0, 8.0, 18.0, 28.0, 4.0, 4.0, 9.0, 7.0, 2.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 2.0, 1.0, 1.0, 1.0, 1.0, 0.0, 2.0, 1.0], "bins": [-0.00020167362526990473, -0.00019640142272692174, -0.00019112923473585397, -0.00018585703219287097, -0.0001805848442018032, -0.0001753126416588202, -0.00017004043911583722, -0.00016476825112476945, -0.00015949606313370168, -0.0001542238605907187, -0.00014895167259965092, -0.00014367947005666792, -0.00013840728206560016, -0.00013313507952261716, -0.00012786287697963417, -0.0001225906889885664, -0.0001173184864455834, -0.00011204629117855802, -0.00010677409591153264, -0.00010150189336854964, -9.622970537748188e-05, -9.095750283449888e-05, -8.56853075674735e-05, -8.041311230044812e-05, -7.514091703342274e-05, -6.986872176639736e-05, -6.459652649937198e-05, -5.932432759436779e-05, -5.4052132327342406e-05, -4.8779937060317025e-05, -4.3507738155312836e-05, -3.8235542888287455e-05, -3.29633621731773e-05, -2.769116690615192e-05, -2.2418969820137136e-05, -1.714677273412235e-05, -1.187457746709697e-05, -6.602382200071588e-06, -1.3301832950673997e-06, 3.942011971957982e-06, 9.214207238983363e-06, 1.4486403415503446e-05, 1.975859959202353e-05, 2.5030796678038314e-05, 3.0302991945063695e-05, 3.557518721208908e-05, 4.0847386117093265e-05, 4.6119581384118646e-05, 5.139177665114403e-05, 5.666397191816941e-05, 6.193616718519479e-05, 6.720836972817779e-05, 7.248055771924555e-05, 7.775276026222855e-05, 8.302495552925393e-05, 8.829715079627931e-05, 9.356934606330469e-05, 9.884154133033007e-05, 0.00010411373659735546, 0.00010938593186438084, 0.00011465813440736383, 0.0001199303223984316, 0.0001252025249414146, 0.00013047471293248236, 0.00013574691547546536]}, "gradients/encoder.masked_spec_embed": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 2.0, 1.0, 0.0, 2.0, 3.0, 2.0, 8.0, 3.0, 10.0, 12.0, 8.0, 19.0, 19.0, 26.0, 49.0, 51.0, 50.0, 95.0, 78.0, 91.0, 92.0, 73.0, 64.0, 49.0, 40.0, 33.0, 34.0, 19.0, 21.0, 15.0, 10.0, 5.0, 8.0, 3.0, 5.0, 4.0, 5.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.9981136322021484e-05, -2.9102899134159088e-05, -2.8224661946296692e-05, -2.7346424758434296e-05, -2.64681875705719e-05, -2.5589950382709503e-05, -2.4711713194847107e-05, -2.383347600698471e-05, -2.2955238819122314e-05, -2.2077001631259918e-05, -2.1198764443397522e-05, -2.0320527255535126e-05, -1.944229006767273e-05, -1.8564052879810333e-05, -1.7685815691947937e-05, -1.680757850408554e-05, -1.5929341316223145e-05, -1.5051104128360748e-05, -1.4172866940498352e-05, -1.3294629752635956e-05, -1.241639256477356e-05, -1.1538155376911163e-05, -1.0659918189048767e-05, -9.781681001186371e-06, -8.903443813323975e-06, -8.025206625461578e-06, -7.146969437599182e-06, -6.268732249736786e-06, -5.39049506187439e-06, -4.512257874011993e-06, -3.634020686149597e-06, -2.755783498287201e-06, -1.8775463104248047e-06, -9.993091225624084e-07, -1.210719347000122e-07, 7.57165253162384e-07, 1.6354024410247803e-06, 2.5136396288871765e-06, 3.3918768167495728e-06, 4.270114004611969e-06, 5.148351192474365e-06, 6.0265883803367615e-06, 6.904825568199158e-06, 7.783062756061554e-06, 8.66129994392395e-06, 9.539537131786346e-06, 1.0417774319648743e-05, 1.1296011507511139e-05, 1.2174248695373535e-05, 1.3052485883235931e-05, 1.3930723071098328e-05, 1.4808960258960724e-05, 1.568719744682312e-05, 1.6565434634685516e-05, 1.7443671822547913e-05, 1.832190901041031e-05, 1.9200146198272705e-05, 2.00783833861351e-05, 2.0956620573997498e-05, 2.1834857761859894e-05, 2.271309494972229e-05, 2.3591332137584686e-05, 2.4469569325447083e-05, 2.534780651330948e-05, 2.6226043701171875e-05]}, "gradients/encoder.feature_projection.projection.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 1.0, 7.0, 3.0, 9.0, 10.0, 16.0, 23.0, 35.0, 88.0, 108.0, 213.0, 497.0, 1358.0, 5063.0, 25005.0, 372508.0, 103321.0, 11451.0, 2796.0, 967.0, 408.0, 159.0, 93.0, 46.0, 31.0, 22.0, 11.0, 12.0, 5.0, 5.0, 2.0, 2.0, 2.0, 2.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00021767616271972656, -0.00020609423518180847, -0.00019451230764389038, -0.0001829303801059723, -0.0001713484525680542, -0.0001597665250301361, -0.00014818459749221802, -0.00013660266995429993, -0.00012502074241638184, -0.00011343881487846375, -0.00010185688734054565, -9.027495980262756e-05, -7.869303226470947e-05, -6.711110472679138e-05, -5.552917718887329e-05, -4.39472496509552e-05, -3.236532211303711e-05, -2.078339457511902e-05, -9.201467037200928e-06, 2.380460500717163e-06, 1.3962388038635254e-05, 2.5544315576553345e-05, 3.7126243114471436e-05, 4.8708170652389526e-05, 6.029009819030762e-05, 7.187202572822571e-05, 8.34539532661438e-05, 9.503588080406189e-05, 0.00010661780834197998, 0.00011819973587989807, 0.00012978166341781616, 0.00014136359095573425, 0.00015294551849365234, 0.00016452744603157043, 0.00017610937356948853, 0.00018769130110740662, 0.0001992732286453247, 0.0002108551561832428, 0.0002224370837211609, 0.00023401901125907898, 0.00024560093879699707, 0.00025718286633491516, 0.00026876479387283325, 0.00028034672141075134, 0.00029192864894866943, 0.0003035105764865875, 0.0003150925040245056, 0.0003266744315624237, 0.0003382563591003418, 0.0003498382866382599, 0.000361420214176178, 0.00037300214171409607, 0.00038458406925201416, 0.00039616599678993225, 0.00040774792432785034, 0.00041932985186576843, 0.0004309117794036865, 0.0004424937069416046, 0.0004540756344795227, 0.0004656575620174408, 0.0004772394895553589, 0.000488821417093277, 0.0005004033446311951, 0.0005119852721691132, 0.0005235671997070312]}, "gradients/encoder.feature_projection.projection.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 1.0, 2.0, 4.0, 6.0, 6.0, 13.0, 15.0, 18.0, 31.0, 42.0, 56.0, 61.0, 94.0, 86.0, 86.0, 109.0, 82.0, 67.0, 75.0, 37.0, 34.0, 21.0, 14.0, 25.0, 12.0, 4.0, 12.0, 3.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.2172927856445312e-05, -2.1198764443397522e-05, -2.022460103034973e-05, -1.925043761730194e-05, -1.827627420425415e-05, -1.730211079120636e-05, -1.632794737815857e-05, -1.535378396511078e-05, -1.4379620552062988e-05, -1.3405457139015198e-05, -1.2431293725967407e-05, -1.1457130312919617e-05, -1.0482966899871826e-05, -9.508803486824036e-06, -8.534640073776245e-06, -7.560476660728455e-06, -6.586313247680664e-06, -5.6121498346328735e-06, -4.637986421585083e-06, -3.6638230085372925e-06, -2.689659595489502e-06, -1.7154961824417114e-06, -7.413327693939209e-07, 2.3283064365386963e-07, 1.2069940567016602e-06, 2.1811574697494507e-06, 3.155320882797241e-06, 4.129484295845032e-06, 5.103647708892822e-06, 6.077811121940613e-06, 7.051974534988403e-06, 8.026137948036194e-06, 9.000301361083984e-06, 9.974464774131775e-06, 1.0948628187179565e-05, 1.1922791600227356e-05, 1.2896955013275146e-05, 1.3871118426322937e-05, 1.4845281839370728e-05, 1.5819445252418518e-05, 1.679360866546631e-05, 1.77677720785141e-05, 1.874193549156189e-05, 1.971609890460968e-05, 2.069026231765747e-05, 2.166442573070526e-05, 2.2638589143753052e-05, 2.3612752556800842e-05, 2.4586915969848633e-05, 2.5561079382896423e-05, 2.6535242795944214e-05, 2.7509406208992004e-05, 2.8483569622039795e-05, 2.9457733035087585e-05, 3.0431896448135376e-05, 3.1406059861183167e-05, 3.238022327423096e-05, 3.335438668727875e-05, 3.432855010032654e-05, 3.530271351337433e-05, 3.627687692642212e-05, 3.725104033946991e-05, 3.82252037525177e-05, 3.919936716556549e-05, 4.017353057861328e-05]}, "gradients/encoder.feature_projection.layer_norm.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 3.0, 2.0, 2.0, 4.0, 8.0, 6.0, 14.0, 7.0, 14.0, 13.0, 14.0, 24.0, 38.0, 36.0, 36.0, 41.0, 28.0, 32.0, 32.0, 28.0, 21.0, 12.0, 15.0, 9.0, 8.0, 6.0, 6.0, 9.0, 2.0, 6.0, 3.0, 4.0, 0.0, 5.0, 1.0, 2.0, 0.0, 2.0, 1.0, 1.0, 0.0, 1.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0], "bins": [-2.5904526410158724e-05, -2.4931645384640433e-05, -2.3958764359122142e-05, -2.298588333360385e-05, -2.201300230808556e-05, -2.1040119463577867e-05, -2.0067238438059576e-05, -1.9094357412541285e-05, -1.8121476387022994e-05, -1.7148595361504704e-05, -1.6175714335986413e-05, -1.520283240097342e-05, -1.422995137545513e-05, -1.325707034993684e-05, -1.2284188414923847e-05, -1.1311307389405556e-05, -1.0338426363887265e-05, -9.365545338368975e-06, -8.392664312850684e-06, -7.4197823778376915e-06, -6.446901352319401e-06, -5.47402032680111e-06, -4.5011388465354685e-06, -3.528257366269827e-06, -2.555376340751536e-06, -1.58249508785957e-06, -6.096138349676039e-07, 3.6326741792436223e-07, 1.3361486708163284e-06, 2.309029696334619e-06, 3.2819111766002607e-06, 4.254792656865902e-06, 5.227677320363e-06, 6.200558345881291e-06, 7.173439826146932e-06, 8.146321306412574e-06, 9.119202331930865e-06, 1.0092083357449155e-05, 1.1064965292462148e-05, 1.2037846317980438e-05, 1.301072734349873e-05, 1.398360836901702e-05, 1.495648939453531e-05, 1.59293704200536e-05, 1.6902253264561296e-05, 1.7875132471090183e-05, 1.8848015315597877e-05, 1.9820896341116168e-05, 2.0793777366634458e-05, 2.176665839215275e-05, 2.273953941767104e-05, 2.371242044318933e-05, 2.468530146870762e-05, 2.5658184313215315e-05, 2.6631065338733606e-05, 2.7603946364251897e-05, 2.8576827389770187e-05, 2.9549708415288478e-05, 3.052259125979617e-05, 3.149547046632506e-05, 3.2468353310832754e-05, 3.344123251736164e-05, 3.4414115361869335e-05, 3.538699820637703e-05, 3.6359877412905917e-05]}, "gradients/encoder.feature_projection.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 3.0, 2.0, 3.0, 0.0, 2.0, 2.0, 2.0, 4.0, 3.0, 4.0, 7.0, 7.0, 8.0, 9.0, 4.0, 6.0, 14.0, 23.0, 34.0, 32.0, 44.0, 44.0, 48.0, 45.0, 30.0, 17.0, 12.0, 21.0, 9.0, 5.0, 11.0, 5.0, 7.0, 4.0, 4.0, 4.0, 4.0, 5.0, 6.0, 3.0, 2.0, 1.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 3.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-5.6803226470947266e-05, -5.496758967638016e-05, -5.313195288181305e-05, -5.129631608724594e-05, -4.946067929267883e-05, -4.7625042498111725e-05, -4.578940570354462e-05, -4.395376890897751e-05, -4.21181321144104e-05, -4.028249531984329e-05, -3.8446858525276184e-05, -3.6611221730709076e-05, -3.477558493614197e-05, -3.293994814157486e-05, -3.110431134700775e-05, -2.9268674552440643e-05, -2.7433037757873535e-05, -2.5597400963306427e-05, -2.376176416873932e-05, -2.192612737417221e-05, -2.0090490579605103e-05, -1.8254853785037994e-05, -1.6419216990470886e-05, -1.4583580195903778e-05, -1.274794340133667e-05, -1.0912306606769562e-05, -9.076669812202454e-06, -7.2410330176353455e-06, -5.405396223068237e-06, -3.569759428501129e-06, -1.734122633934021e-06, 1.0151416063308716e-07, 1.9371509552001953e-06, 3.7727877497673035e-06, 5.608424544334412e-06, 7.44406133890152e-06, 9.279698133468628e-06, 1.1115334928035736e-05, 1.2950971722602844e-05, 1.4786608517169952e-05, 1.662224531173706e-05, 1.845788210630417e-05, 2.0293518900871277e-05, 2.2129155695438385e-05, 2.3964792490005493e-05, 2.58004292845726e-05, 2.763606607913971e-05, 2.9471702873706818e-05, 3.1307339668273926e-05, 3.3142976462841034e-05, 3.497861325740814e-05, 3.681425005197525e-05, 3.864988684654236e-05, 4.0485523641109467e-05, 4.2321160435676575e-05, 4.415679723024368e-05, 4.599243402481079e-05, 4.78280708193779e-05, 4.966370761394501e-05, 5.1499344408512115e-05, 5.3334981203079224e-05, 5.517061799764633e-05, 5.700625479221344e-05, 5.884189158678055e-05, 6.0677528381347656e-05]}, "eval/loss": 18.359586715698242, "eval/bleu": 0.0, "eval/runtime": 3668.9753, "eval/samples_per_second": 4.023, "eval/steps_per_second": 1.006} \ No newline at end of file +{"train/loss": 4.3866, "train/learning_rate": 0.0002625133673028093, "train/epoch": 0.85, "train/global_step": 5500, "_runtime": 59333, "_timestamp": 1651657781, "_step": 5510, "gradients/decoder.model.decoder.layers.11.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1021.0], "bins": [-470.6937255859375, -463.2947692871094, -455.8958435058594, -448.49688720703125, -441.09796142578125, -433.6990051269531, -426.3000793457031, -418.901123046875, -411.502197265625, -404.1032409667969, -396.7043151855469, -389.30535888671875, -381.90643310546875, -374.5074768066406, -367.1085510253906, -359.7095947265625, -352.3106689453125, -344.9117126464844, -337.5127868652344, -330.11383056640625, -322.71490478515625, -315.3159484863281, -307.9170227050781, -300.51806640625, -293.119140625, -285.7201843261719, -278.3212585449219, -270.92230224609375, -263.52337646484375, -256.1244201660156, -248.72549438476562, -241.32655334472656, -233.92759704589844, -226.52865600585938, -219.1297149658203, -211.73077392578125, -204.3318328857422, -196.93289184570312, -189.53395080566406, -182.135009765625, -174.73606872558594, -167.33712768554688, -159.9381866455078, -152.53924560546875, -145.1403045654297, -137.74136352539062, -130.34242248535156, -122.9434814453125, -115.5445327758789, -108.14559173583984, -100.74665069580078, -93.34770965576172, -85.94876861572266, -78.54981994628906, -71.15087890625, -63.7519416809082, -56.353004455566406, -48.954063415527344, -41.55512237548828, -34.15618133544922, -26.757238388061523, -19.358295440673828, -11.959354400634766, -4.560413360595703, 2.838529109954834]}, "gradients/decoder.model.decoder.layers.11.final_layer_norm.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 2.0, 1.0, 4.0, 5.0, 1.0, 4.0, 11.0, 10.0, 13.0, 19.0, 17.0, 27.0, 37.0, 36.0, 33.0, 55.0, 69.0, 67.0, 67.0, 81.0, 65.0, 59.0, 66.0, 45.0, 43.0, 36.0, 26.0, 31.0, 24.0, 18.0, 5.0, 10.0, 9.0, 7.0, 5.0, 2.0, 2.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-18.23824119567871, -17.68547248840332, -17.13270378112793, -16.57993507385254, -16.02716636657715, -15.474397659301758, -14.92162799835205, -14.36885929107666, -13.81609058380127, -13.263321876525879, -12.710553169250488, -12.157784461975098, -11.60501480102539, -11.05224609375, -10.49947738647461, -9.946708679199219, -9.393939971923828, -8.841171264648438, -8.288402557373047, -7.735633373260498, -7.182864665985107, -6.630095958709717, -6.077326774597168, -5.524558067321777, -4.971789360046387, -4.419020652770996, -3.8662517070770264, -3.3134827613830566, -2.760714054107666, -2.2079453468322754, -1.6551764011383057, -1.102407455444336, -0.5496368408203125, 0.0031319856643676758, 0.5559008121490479, 1.108669638633728, 1.6614384651184082, 2.214207172393799, 2.7669761180877686, 3.3197450637817383, 3.872513771057129, 4.4252824783325195, 4.97805118560791, 5.530820369720459, 6.08358907699585, 6.63635778427124, 7.189126968383789, 7.74189567565918, 8.29466438293457, 8.847433090209961, 9.400201797485352, 9.952970504760742, 10.505739212036133, 11.058507919311523, 11.61127758026123, 12.164046287536621, 12.716814994812012, 13.269583702087402, 13.822352409362793, 14.375121116638184, 14.92789077758789, 15.480659484863281, 16.033428192138672, 16.586196899414062, 17.138965606689453]}, "gradients/decoder.model.decoder.layers.11.fc2.weight": {"_type": "histogram", "values": [2.0, 1.0, 2.0, 1.0, 4.0, 6.0, 7.0, 10.0, 21.0, 27.0, 47.0, 56.0, 64.0, 114.0, 138.0, 202.0, 256.0, 359.0, 533.0, 760.0, 982.0, 1350.0, 2122.0, 4041.0, 13227.0, 4115749.0, 38579.0, 6996.0, 2831.0, 1647.0, 1197.0, 813.0, 563.0, 413.0, 338.0, 214.0, 171.0, 133.0, 98.0, 64.0, 49.0, 34.0, 27.0, 20.0, 12.0, 8.0, 3.0, 5.0, 3.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-7.1171875, -6.841064453125, -6.56494140625, -6.288818359375, -6.0126953125, -5.736572265625, -5.46044921875, -5.184326171875, -4.908203125, -4.632080078125, -4.35595703125, -4.079833984375, -3.8037109375, -3.527587890625, -3.25146484375, -2.975341796875, -2.69921875, -2.423095703125, -2.14697265625, -1.870849609375, -1.5947265625, -1.318603515625, -1.04248046875, -0.766357421875, -0.490234375, -0.214111328125, 0.06201171875, 0.338134765625, 0.6142578125, 0.890380859375, 1.16650390625, 1.442626953125, 1.71875, 1.994873046875, 2.27099609375, 2.547119140625, 2.8232421875, 3.099365234375, 3.37548828125, 3.651611328125, 3.927734375, 4.203857421875, 4.47998046875, 4.756103515625, 5.0322265625, 5.308349609375, 5.58447265625, 5.860595703125, 6.13671875, 6.412841796875, 6.68896484375, 6.965087890625, 7.2412109375, 7.517333984375, 7.79345703125, 8.069580078125, 8.345703125, 8.621826171875, 8.89794921875, 9.174072265625, 9.4501953125, 9.726318359375, 10.00244140625, 10.278564453125, 10.5546875]}, "gradients/decoder.model.decoder.layers.11.fc2.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 2.0, 1.0, 2.0, 7.0, 2.0, 7.0, 3.0, 10.0, 20.0, 18.0, 30.0, 31.0, 45.0, 50.0, 51.0, 70.0, 74.0, 65.0, 80.0, 59.0, 75.0, 51.0, 67.0, 32.0, 37.0, 30.0, 26.0, 16.0, 14.0, 8.0, 7.0, 7.0, 2.0, 6.0, 2.0, 1.0, 0.0, 3.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0], "bins": [-11.7421875, -11.408935546875, -11.07568359375, -10.742431640625, -10.4091796875, -10.075927734375, -9.74267578125, -9.409423828125, -9.076171875, -8.742919921875, -8.40966796875, -8.076416015625, -7.7431640625, -7.409912109375, -7.07666015625, -6.743408203125, -6.41015625, -6.076904296875, -5.74365234375, -5.410400390625, -5.0771484375, -4.743896484375, -4.41064453125, -4.077392578125, -3.744140625, -3.410888671875, -3.07763671875, -2.744384765625, -2.4111328125, -2.077880859375, -1.74462890625, -1.411376953125, -1.078125, -0.744873046875, -0.41162109375, -0.078369140625, 0.2548828125, 0.588134765625, 0.92138671875, 1.254638671875, 1.587890625, 1.921142578125, 2.25439453125, 2.587646484375, 2.9208984375, 3.254150390625, 3.58740234375, 3.920654296875, 4.25390625, 4.587158203125, 4.92041015625, 5.253662109375, 5.5869140625, 5.920166015625, 6.25341796875, 6.586669921875, 6.919921875, 7.253173828125, 7.58642578125, 7.919677734375, 8.2529296875, 8.586181640625, 8.91943359375, 9.252685546875, 9.5859375]}, "gradients/decoder.model.decoder.layers.11.fc1.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 5.0, 2.0, 4.0, 7.0, 21.0, 143.0, 4194039.0, 35.0, 10.0, 3.0, 3.0, 6.0, 0.0, 2.0, 2.0, 4.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-203.5, -188.0078125, -172.515625, -157.0234375, -141.53125, -126.0390625, -110.546875, -95.0546875, -79.5625, -64.0703125, -48.578125, -33.0859375, -17.59375, -2.1015625, 13.390625, 28.8828125, 44.375, 59.8671875, 75.359375, 90.8515625, 106.34375, 121.8359375, 137.328125, 152.8203125, 168.3125, 183.8046875, 199.296875, 214.7890625, 230.28125, 245.7734375, 261.265625, 276.7578125, 292.25, 307.7421875, 323.234375, 338.7265625, 354.21875, 369.7109375, 385.203125, 400.6953125, 416.1875, 431.6796875, 447.171875, 462.6640625, 478.15625, 493.6484375, 509.140625, 524.6328125, 540.125, 555.6171875, 571.109375, 586.6015625, 602.09375, 617.5859375, 633.078125, 648.5703125, 664.0625, 679.5546875, 695.046875, 710.5390625, 726.03125, 741.5234375, 757.015625, 772.5078125, 788.0]}, "gradients/decoder.model.decoder.layers.11.fc1.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 5.0, 2.0, 4.0, 8.0, 16.0, 68.0, 3916.0, 31.0, 9.0, 3.0, 2.0, 5.0, 1.0, 2.0, 1.0, 5.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.533203125, -2.340667724609375, -2.14813232421875, -1.955596923828125, -1.7630615234375, -1.570526123046875, -1.37799072265625, -1.185455322265625, -0.992919921875, -0.800384521484375, -0.60784912109375, -0.415313720703125, -0.2227783203125, -0.030242919921875, 0.16229248046875, 0.354827880859375, 0.54736328125, 0.739898681640625, 0.93243408203125, 1.124969482421875, 1.3175048828125, 1.510040283203125, 1.70257568359375, 1.895111083984375, 2.087646484375, 2.280181884765625, 2.47271728515625, 2.665252685546875, 2.8577880859375, 3.050323486328125, 3.24285888671875, 3.435394287109375, 3.6279296875, 3.820465087890625, 4.01300048828125, 4.205535888671875, 4.3980712890625, 4.590606689453125, 4.78314208984375, 4.975677490234375, 5.168212890625, 5.360748291015625, 5.55328369140625, 5.745819091796875, 5.9383544921875, 6.130889892578125, 6.32342529296875, 6.515960693359375, 6.70849609375, 6.901031494140625, 7.09356689453125, 7.286102294921875, 7.4786376953125, 7.671173095703125, 7.86370849609375, 8.056243896484375, 8.248779296875, 8.441314697265625, 8.63385009765625, 8.826385498046875, 9.0189208984375, 9.211456298828125, 9.40399169921875, 9.596527099609375, 9.7890625]}, "gradients/decoder.model.decoder.layers.11.encoder_attn_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 1.0, 2.0, 3.0, 5.0, 11.0, 35.0, 171.0, 510.0, 216.0, 48.0, 10.0, 4.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-13.455660820007324, -12.862027168273926, -12.268392562866211, -11.674758911132812, -11.081125259399414, -10.487491607666016, -9.893857955932617, -9.300223350524902, -8.706589698791504, -8.112956047058105, -7.519321918487549, -6.925687789916992, -6.332054138183594, -5.738420486450195, -5.144786357879639, -4.551152229309082, -3.9575185775756836, -3.363884687423706, -2.7702507972717285, -2.176616907119751, -1.5829830169677734, -0.9893491268157959, -0.39571523666381836, 0.19791889190673828, 0.7915525436401367, 1.3851864337921143, 1.9788203239440918, 2.5724542140960693, 3.166088104248047, 3.7597219944000244, 4.353355884552002, 4.946990013122559, 5.540624618530273, 6.134258270263672, 6.7278923988342285, 7.321526527404785, 7.915160179138184, 8.508793830871582, 9.102428436279297, 9.696062088012695, 10.289695739746094, 10.883329391479492, 11.47696304321289, 12.070597648620605, 12.664231300354004, 13.257864952087402, 13.851499557495117, 14.445133209228516, 15.038766860961914, 15.632400512695312, 16.22603416442871, 16.81966781616211, 17.41330337524414, 18.00693702697754, 18.600570678710938, 19.194204330444336, 19.787837982177734, 20.381471633911133, 20.97510528564453, 21.56873893737793, 22.162372589111328, 22.75600814819336, 23.349641799926758, 23.943275451660156, 24.536909103393555]}, "gradients/decoder.model.decoder.layers.11.encoder_attn_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 2.0, 3.0, 5.0, 7.0, 9.0, 12.0, 12.0, 20.0, 24.0, 22.0, 33.0, 44.0, 51.0, 64.0, 79.0, 81.0, 70.0, 73.0, 67.0, 55.0, 63.0, 49.0, 34.0, 32.0, 21.0, 17.0, 14.0, 14.0, 4.0, 10.0, 4.0, 3.0, 4.0, 3.0, 0.0, 4.0, 0.0, 0.0, 1.0, 1.0, 2.0, 1.0], "bins": [-16.789762496948242, -16.36899185180664, -15.948221206665039, -15.527450561523438, -15.10667896270752, -14.685908317565918, -14.265137672424316, -13.844367027282715, -13.423595428466797, -13.002824783325195, -12.582054138183594, -12.161283493041992, -11.740511894226074, -11.319741249084473, -10.898970603942871, -10.47819995880127, -10.057429313659668, -9.636658668518066, -9.215888023376465, -8.795116424560547, -8.374345779418945, -7.953575134277344, -7.532804489135742, -7.112033843994141, -6.691262722015381, -6.270492076873779, -5.8497209548950195, -5.428950309753418, -5.008179664611816, -4.587408542633057, -4.166637897491455, -3.7458670139312744, -3.3250961303710938, -2.904325246810913, -2.4835543632507324, -2.062783718109131, -1.6420128345489502, -1.2212419509887695, -0.800471305847168, -0.3797004222869873, 0.04107046127319336, 0.46184128522872925, 0.8826121091842651, 1.3033828735351562, 1.724153757095337, 2.1449246406555176, 2.565695285797119, 2.9864661693573, 3.4072370529174805, 3.828007936477661, 4.248778820037842, 4.669549465179443, 5.090320587158203, 5.511091232299805, 5.931861877441406, 6.352632522583008, 6.773403644561768, 7.194174289703369, 7.614945411682129, 8.03571605682373, 8.456486701965332, 8.87725830078125, 9.298028945922852, 9.718799591064453, 10.139570236206055]}, "gradients/decoder.model.decoder.layers.11.encoder_attn.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 4.0, 3.0, 2.0, 4.0, 11.0, 3.0, 14.0, 13.0, 30.0, 30.0, 51.0, 77.0, 98.0, 162.0, 269.0, 444.0, 869.0, 1748.0, 4078.0, 12778.0, 158175.0, 842236.0, 17907.0, 5057.0, 2142.0, 1000.0, 531.0, 301.0, 184.0, 113.0, 75.0, 45.0, 28.0, 20.0, 17.0, 14.0, 3.0, 9.0, 8.0, 4.0, 4.0, 2.0, 0.0, 3.0, 1.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-1.6591796875, -1.6021881103515625, -1.545196533203125, -1.4882049560546875, -1.43121337890625, -1.3742218017578125, -1.317230224609375, -1.2602386474609375, -1.2032470703125, -1.1462554931640625, -1.089263916015625, -1.0322723388671875, -0.97528076171875, -0.9182891845703125, -0.861297607421875, -0.8043060302734375, -0.747314453125, -0.6903228759765625, -0.633331298828125, -0.5763397216796875, -0.51934814453125, -0.4623565673828125, -0.405364990234375, -0.3483734130859375, -0.2913818359375, -0.2343902587890625, -0.177398681640625, -0.1204071044921875, -0.06341552734375, -0.0064239501953125, 0.050567626953125, 0.1075592041015625, 0.16455078125, 0.2215423583984375, 0.278533935546875, 0.3355255126953125, 0.39251708984375, 0.4495086669921875, 0.506500244140625, 0.5634918212890625, 0.6204833984375, 0.6774749755859375, 0.734466552734375, 0.7914581298828125, 0.84844970703125, 0.9054412841796875, 0.962432861328125, 1.0194244384765625, 1.076416015625, 1.1334075927734375, 1.190399169921875, 1.2473907470703125, 1.30438232421875, 1.3613739013671875, 1.418365478515625, 1.4753570556640625, 1.5323486328125, 1.5893402099609375, 1.646331787109375, 1.7033233642578125, 1.76031494140625, 1.8173065185546875, 1.874298095703125, 1.9312896728515625, 1.98828125]}, "gradients/decoder.model.decoder.layers.11.encoder_attn.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 6.0, 2.0, 4.0, 3.0, 11.0, 18.0, 17.0, 20.0, 28.0, 49.0, 51.0, 64.0, 80.0, 119.0, 104.0, 78.0, 88.0, 69.0, 55.0, 37.0, 25.0, 21.0, 17.0, 14.0, 8.0, 3.0, 3.0, 5.0, 4.0, 2.0, 2.0, 1.0, 2.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-7.5390625, -7.34234619140625, -7.1456298828125, -6.94891357421875, -6.752197265625, -6.55548095703125, -6.3587646484375, -6.16204833984375, -5.96533203125, -5.76861572265625, -5.5718994140625, -5.37518310546875, -5.178466796875, -4.98175048828125, -4.7850341796875, -4.58831787109375, -4.3916015625, -4.19488525390625, -3.9981689453125, -3.80145263671875, -3.604736328125, -3.40802001953125, -3.2113037109375, -3.01458740234375, -2.81787109375, -2.62115478515625, -2.4244384765625, -2.22772216796875, -2.031005859375, -1.83428955078125, -1.6375732421875, -1.44085693359375, -1.244140625, -1.04742431640625, -0.8507080078125, -0.65399169921875, -0.457275390625, -0.26055908203125, -0.0638427734375, 0.13287353515625, 0.32958984375, 0.52630615234375, 0.7230224609375, 0.91973876953125, 1.116455078125, 1.31317138671875, 1.5098876953125, 1.70660400390625, 1.9033203125, 2.10003662109375, 2.2967529296875, 2.49346923828125, 2.690185546875, 2.88690185546875, 3.0836181640625, 3.28033447265625, 3.47705078125, 3.67376708984375, 3.8704833984375, 4.06719970703125, 4.263916015625, 4.46063232421875, 4.6573486328125, 4.85406494140625, 5.05078125]}, "gradients/decoder.model.decoder.layers.11.encoder_attn.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 4.0, 1.0, 2.0, 3.0, 5.0, 4.0, 12.0, 8.0, 17.0, 14.0, 20.0, 36.0, 40.0, 51.0, 65.0, 71.0, 98.0, 126.0, 180.0, 307.0, 858.0, 3238.0, 24613.0, 724790.0, 275894.0, 14179.0, 2248.0, 651.0, 266.0, 213.0, 108.0, 87.0, 83.0, 71.0, 47.0, 35.0, 23.0, 22.0, 21.0, 13.0, 11.0, 7.0, 10.0, 5.0, 5.0, 1.0, 0.0, 4.0, 2.0, 0.0, 0.0, 1.0, 2.0, 0.0, 1.0], "bins": [-0.1982421875, -0.1922130584716797, -0.18618392944335938, -0.18015480041503906, -0.17412567138671875, -0.16809654235839844, -0.16206741333007812, -0.1560382843017578, -0.1500091552734375, -0.1439800262451172, -0.13795089721679688, -0.13192176818847656, -0.12589263916015625, -0.11986351013183594, -0.11383438110351562, -0.10780525207519531, -0.101776123046875, -0.09574699401855469, -0.08971786499023438, -0.08368873596191406, -0.07765960693359375, -0.07163047790527344, -0.06560134887695312, -0.05957221984863281, -0.0535430908203125, -0.04751396179199219, -0.041484832763671875, -0.03545570373535156, -0.02942657470703125, -0.023397445678710938, -0.017368316650390625, -0.011339187622070312, -0.00531005859375, 0.0007190704345703125, 0.006748199462890625, 0.012777328491210938, 0.01880645751953125, 0.024835586547851562, 0.030864715576171875, 0.03689384460449219, 0.0429229736328125, 0.04895210266113281, 0.054981231689453125, 0.06101036071777344, 0.06703948974609375, 0.07306861877441406, 0.07909774780273438, 0.08512687683105469, 0.091156005859375, 0.09718513488769531, 0.10321426391601562, 0.10924339294433594, 0.11527252197265625, 0.12130165100097656, 0.12733078002929688, 0.1333599090576172, 0.1393890380859375, 0.1454181671142578, 0.15144729614257812, 0.15747642517089844, 0.16350555419921875, 0.16953468322753906, 0.17556381225585938, 0.1815929412841797, 0.1876220703125]}, "gradients/decoder.model.decoder.layers.11.encoder_attn.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 3.0, 1.0, 1.0, 1.0, 4.0, 4.0, 5.0, 6.0, 10.0, 8.0, 8.0, 18.0, 17.0, 16.0, 25.0, 24.0, 30.0, 31.0, 33.0, 44.0, 37.0, 51.0, 41.0, 62.0, 46.0, 45.0, 45.0, 40.0, 44.0, 44.0, 38.0, 28.0, 23.0, 54.0, 15.0, 23.0, 16.0, 9.0, 16.0, 15.0, 8.0, 5.0, 7.0, 6.0, 4.0, 1.0, 0.0, 1.0, 4.0, 1.0, 0.0, 0.0, 0.0, 3.0, 0.0, 1.0], "bins": [-5.75, -5.5743408203125, -5.398681640625, -5.2230224609375, -5.04736328125, -4.8717041015625, -4.696044921875, -4.5203857421875, -4.3447265625, -4.1690673828125, -3.993408203125, -3.8177490234375, -3.64208984375, -3.4664306640625, -3.290771484375, -3.1151123046875, -2.939453125, -2.7637939453125, -2.588134765625, -2.4124755859375, -2.23681640625, -2.0611572265625, -1.885498046875, -1.7098388671875, -1.5341796875, -1.3585205078125, -1.182861328125, -1.0072021484375, -0.83154296875, -0.6558837890625, -0.480224609375, -0.3045654296875, -0.12890625, 0.0467529296875, 0.222412109375, 0.3980712890625, 0.57373046875, 0.7493896484375, 0.925048828125, 1.1007080078125, 1.2763671875, 1.4520263671875, 1.627685546875, 1.8033447265625, 1.97900390625, 2.1546630859375, 2.330322265625, 2.5059814453125, 2.681640625, 2.8572998046875, 3.032958984375, 3.2086181640625, 3.38427734375, 3.5599365234375, 3.735595703125, 3.9112548828125, 4.0869140625, 4.2625732421875, 4.438232421875, 4.6138916015625, 4.78955078125, 4.9652099609375, 5.140869140625, 5.3165283203125, 5.4921875]}, "gradients/decoder.model.decoder.layers.11.encoder_attn.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 3.0, 2.0, 0.0, 6.0, 4.0, 7.0, 13.0, 18.0, 19.0, 22.0, 28.0, 37.0, 51.0, 79.0, 145.0, 323.0, 595.0, 1106.0, 2450.0, 5428.0, 12499.0, 32151.0, 112810.0, 709809.0, 114927.0, 32771.0, 12768.0, 5635.0, 2486.0, 1146.0, 544.0, 278.0, 142.0, 95.0, 52.0, 38.0, 15.0, 12.0, 11.0, 10.0, 8.0, 9.0, 3.0, 2.0, 5.0, 2.0, 3.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0028247833251953125, -0.0027404725551605225, -0.0026561617851257324, -0.0025718510150909424, -0.0024875402450561523, -0.0024032294750213623, -0.0023189187049865723, -0.0022346079349517822, -0.002150297164916992, -0.002065986394882202, -0.001981675624847412, -0.001897364854812622, -0.001813054084777832, -0.001728743314743042, -0.001644432544708252, -0.001560121774673462, -0.0014758110046386719, -0.0013915002346038818, -0.0013071894645690918, -0.0012228786945343018, -0.0011385679244995117, -0.0010542571544647217, -0.0009699463844299316, -0.0008856356143951416, -0.0008013248443603516, -0.0007170140743255615, -0.0006327033042907715, -0.0005483925342559814, -0.0004640817642211914, -0.00037977099418640137, -0.00029546022415161133, -0.0002111494541168213, -0.00012683868408203125, -4.252791404724121e-05, 4.178285598754883e-05, 0.00012609362602233887, 0.0002104043960571289, 0.00029471516609191895, 0.000379025936126709, 0.000463336706161499, 0.0005476474761962891, 0.0006319582462310791, 0.0007162690162658691, 0.0008005797863006592, 0.0008848905563354492, 0.0009692013263702393, 0.0010535120964050293, 0.0011378228664398193, 0.0012221336364746094, 0.0013064444065093994, 0.0013907551765441895, 0.0014750659465789795, 0.0015593767166137695, 0.0016436874866485596, 0.0017279982566833496, 0.0018123090267181396, 0.0018966197967529297, 0.0019809305667877197, 0.0020652413368225098, 0.0021495521068573, 0.00223386287689209, 0.00231817364692688, 0.00240248441696167, 0.00248679518699646, 0.00257110595703125]}, "gradients/decoder.model.decoder.layers.11.encoder_attn.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 4.0, 1.0, 6.0, 14.0, 31.0, 58.0, 156.0, 479.0, 160.0, 53.0, 21.0, 13.0, 8.0, 3.0, 2.0, 3.0, 2.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0001430511474609375, -0.0001383572816848755, -0.00013366341590881348, -0.00012896955013275146, -0.00012427568435668945, -0.00011958181858062744, -0.00011488795280456543, -0.00011019408702850342, -0.0001055002212524414, -0.0001008063554763794, -9.611248970031738e-05, -9.141862392425537e-05, -8.672475814819336e-05, -8.203089237213135e-05, -7.733702659606934e-05, -7.264316082000732e-05, -6.794929504394531e-05, -6.32554292678833e-05, -5.856156349182129e-05, -5.386769771575928e-05, -4.9173831939697266e-05, -4.4479966163635254e-05, -3.978610038757324e-05, -3.509223461151123e-05, -3.039836883544922e-05, -2.5704503059387207e-05, -2.1010637283325195e-05, -1.6316771507263184e-05, -1.1622905731201172e-05, -6.92903995513916e-06, -2.2351741790771484e-06, 2.4586915969848633e-06, 7.152557373046875e-06, 1.1846423149108887e-05, 1.65402889251709e-05, 2.123415470123291e-05, 2.5928020477294922e-05, 3.0621886253356934e-05, 3.5315752029418945e-05, 4.000961780548096e-05, 4.470348358154297e-05, 4.939734935760498e-05, 5.409121513366699e-05, 5.8785080909729004e-05, 6.347894668579102e-05, 6.817281246185303e-05, 7.286667823791504e-05, 7.756054401397705e-05, 8.225440979003906e-05, 8.694827556610107e-05, 9.164214134216309e-05, 9.63360071182251e-05, 0.00010102987289428711, 0.00010572373867034912, 0.00011041760444641113, 0.00011511147022247314, 0.00011980533599853516, 0.00012449920177459717, 0.00012919306755065918, 0.0001338869333267212, 0.0001385807991027832, 0.00014327466487884521, 0.00014796853065490723, 0.00015266239643096924, 0.00015735626220703125]}, "gradients/decoder.model.decoder.layers.11.encoder_attn.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 0.0, 1.0, 0.0, 2.0, 2.0, 5.0, 1.0, 1.0, 5.0, 1.0, 1.0, 5.0, 4.0, 2.0, 5.0, 6.0, 13.0, 8.0, 14.0, 26.0, 25.0, 69.0, 118.0, 919696.0, 128196.0, 148.0, 56.0, 34.0, 26.0, 14.0, 15.0, 10.0, 14.0, 7.0, 9.0, 4.0, 3.0, 3.0, 3.0, 3.0, 5.0, 1.0, 2.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0], "bins": [-0.024139404296875, -0.02338433265686035, -0.022629261016845703, -0.021874189376831055, -0.021119117736816406, -0.020364046096801758, -0.01960897445678711, -0.01885390281677246, -0.018098831176757812, -0.017343759536743164, -0.016588687896728516, -0.015833616256713867, -0.015078544616699219, -0.01432347297668457, -0.013568401336669922, -0.012813329696655273, -0.012058258056640625, -0.011303186416625977, -0.010548114776611328, -0.00979304313659668, -0.009037971496582031, -0.008282899856567383, -0.007527828216552734, -0.006772756576538086, -0.0060176849365234375, -0.005262613296508789, -0.004507541656494141, -0.003752470016479492, -0.0029973983764648438, -0.0022423267364501953, -0.0014872550964355469, -0.0007321834564208984, 2.288818359375e-05, 0.0007779598236083984, 0.0015330314636230469, 0.0022881031036376953, 0.0030431747436523438, 0.003798246383666992, 0.004553318023681641, 0.005308389663696289, 0.0060634613037109375, 0.006818532943725586, 0.007573604583740234, 0.008328676223754883, 0.009083747863769531, 0.00983881950378418, 0.010593891143798828, 0.011348962783813477, 0.012104034423828125, 0.012859106063842773, 0.013614177703857422, 0.01436924934387207, 0.015124320983886719, 0.015879392623901367, 0.016634464263916016, 0.017389535903930664, 0.018144607543945312, 0.01889967918395996, 0.01965475082397461, 0.020409822463989258, 0.021164894104003906, 0.021919965744018555, 0.022675037384033203, 0.02343010902404785, 0.0241851806640625]}, "gradients/decoder.model.decoder.layers.11.encoder_attn.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 2.0, 0.0, 1.0, 0.0, 3.0, 7.0, 1.0, 3.0, 1.0, 1.0, 5.0, 2.0, 7.0, 2.0, 9.0, 16.0, 6.0, 19.0, 21.0, 29.0, 49.0, 81.0, 243.0, 212.0, 90.0, 45.0, 33.0, 23.0, 17.0, 9.0, 20.0, 8.0, 14.0, 8.0, 1.0, 6.0, 3.0, 3.0, 2.0, 6.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0], "bins": [-0.0007367134094238281, -0.0007136762142181396, -0.0006906390190124512, -0.0006676018238067627, -0.0006445646286010742, -0.0006215274333953857, -0.0005984902381896973, -0.0005754530429840088, -0.0005524158477783203, -0.0005293786525726318, -0.0005063414573669434, -0.0004833042621612549, -0.0004602670669555664, -0.00043722987174987793, -0.00041419267654418945, -0.000391155481338501, -0.0003681182861328125, -0.000345081090927124, -0.00032204389572143555, -0.00029900670051574707, -0.0002759695053100586, -0.0002529323101043701, -0.00022989511489868164, -0.00020685791969299316, -0.0001838207244873047, -0.0001607835292816162, -0.00013774633407592773, -0.00011470913887023926, -9.167194366455078e-05, -6.86347484588623e-05, -4.559755325317383e-05, -2.256035804748535e-05, 4.76837158203125e-07, 2.35140323638916e-05, 4.655122756958008e-05, 6.958842277526855e-05, 9.262561798095703e-05, 0.00011566281318664551, 0.00013870000839233398, 0.00016173720359802246, 0.00018477439880371094, 0.00020781159400939941, 0.0002308487892150879, 0.00025388598442077637, 0.00027692317962646484, 0.0002999603748321533, 0.0003229975700378418, 0.0003460347652435303, 0.00036907196044921875, 0.0003921091556549072, 0.0004151463508605957, 0.0004381835460662842, 0.00046122074127197266, 0.00048425793647766113, 0.0005072951316833496, 0.0005303323268890381, 0.0005533695220947266, 0.000576406717300415, 0.0005994439125061035, 0.000622481107711792, 0.0006455183029174805, 0.0006685554981231689, 0.0006915926933288574, 0.0007146298885345459, 0.0007376670837402344]}, "gradients/decoder.model.decoder.layers.11.self_attn_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 3.0, 20.0, 167.0, 750.0, 65.0, 11.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-9.859306335449219, -9.064530372619629, -8.269754409790039, -7.474978446960449, -6.680202484130859, -5.8854265213012695, -5.09065055847168, -4.29587459564209, -3.5010986328125, -2.70632266998291, -1.9115467071533203, -1.1167707443237305, -0.3219947814941406, 0.4727811813354492, 1.267557144165039, 2.062333106994629, 2.8571090698242188, 3.6518850326538086, 4.446660995483398, 5.241436958312988, 6.036212921142578, 6.830988883972168, 7.625764846801758, 8.420540809631348, 9.215316772460938, 10.010092735290527, 10.804868698120117, 11.599644660949707, 12.394420623779297, 13.189196586608887, 13.983972549438477, 14.778748512268066, 15.573524475097656, 16.368301391601562, 17.163076400756836, 17.95785140991211, 18.752628326416016, 19.547405242919922, 20.342180252075195, 21.13695526123047, 21.931732177734375, 22.72650909423828, 23.521284103393555, 24.316059112548828, 25.110836029052734, 25.90561294555664, 26.700387954711914, 27.495162963867188, 28.289939880371094, 29.084716796875, 29.879491806030273, 30.674266815185547, 31.469043731689453, 32.26382064819336, 33.05859375, 33.853370666503906, 34.64814758300781, 35.44292449951172, 36.237701416015625, 37.032474517822266, 37.82725143432617, 38.62202835083008, 39.41680145263672, 40.211578369140625, 41.00635528564453]}, "gradients/decoder.model.decoder.layers.11.self_attn_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 4.0, 4.0, 3.0, 3.0, 8.0, 12.0, 18.0, 22.0, 31.0, 24.0, 54.0, 68.0, 83.0, 111.0, 98.0, 101.0, 85.0, 79.0, 56.0, 34.0, 33.0, 22.0, 17.0, 7.0, 9.0, 8.0, 3.0, 4.0, 3.0, 2.0, 4.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 3.0], "bins": [-7.981085300445557, -7.78444766998291, -7.587810039520264, -7.391172409057617, -7.194534778594971, -6.997897148132324, -6.801259994506836, -6.604621887207031, -6.407984733581543, -6.2113471031188965, -6.01470947265625, -5.8180718421936035, -5.621434211730957, -5.4247965812683105, -5.228158950805664, -5.031521797180176, -4.834883689880371, -4.638246059417725, -4.441608428955078, -4.244970798492432, -4.048333168029785, -3.8516955375671387, -3.6550581455230713, -3.458420515060425, -3.2617828845977783, -3.065145254135132, -2.8685076236724854, -2.671870231628418, -2.4752326011657715, -2.278594970703125, -2.0819573402404785, -1.885319709777832, -1.6886820793151855, -1.492044448852539, -1.2954068183898926, -1.0987693071365356, -0.9021316766738892, -0.7054940462112427, -0.5088565349578857, -0.31221890449523926, -0.11558127403259277, 0.08105632662773132, 0.2776939272880554, 0.47433149814605713, 0.6709691286087036, 0.8676067590713501, 1.064244270324707, 1.2608819007873535, 1.45751953125, 1.6541571617126465, 1.850794792175293, 2.0474324226379395, 2.244070053100586, 2.4407076835632324, 2.6373450756073, 2.8339827060699463, 3.0306203365325928, 3.2272579669952393, 3.4238955974578857, 3.620532989501953, 3.8171706199645996, 4.013808250427246, 4.210445880889893, 4.407083511352539, 4.6037211418151855]}, "gradients/decoder.model.decoder.layers.11.self_attn.out_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 2.0, 2.0, 3.0, 4.0, 2.0, 9.0, 16.0, 18.0, 18.0, 27.0, 37.0, 65.0, 122.0, 170.0, 331.0, 649.0, 1546.0, 5474.0, 38124.0, 871602.0, 116584.0, 9601.0, 2318.0, 872.0, 388.0, 195.0, 113.0, 79.0, 64.0, 46.0, 15.0, 21.0, 8.0, 8.0, 4.0, 9.0, 3.0, 1.0, 1.0, 1.0, 1.0, 5.0, 3.0, 1.0, 0.0, 2.0, 0.0, 3.0], "bins": [-6.61328125, -6.42791748046875, -6.2425537109375, -6.05718994140625, -5.871826171875, -5.68646240234375, -5.5010986328125, -5.31573486328125, -5.13037109375, -4.94500732421875, -4.7596435546875, -4.57427978515625, -4.388916015625, -4.20355224609375, -4.0181884765625, -3.83282470703125, -3.6474609375, -3.46209716796875, -3.2767333984375, -3.09136962890625, -2.906005859375, -2.72064208984375, -2.5352783203125, -2.34991455078125, -2.16455078125, -1.97918701171875, -1.7938232421875, -1.60845947265625, -1.423095703125, -1.23773193359375, -1.0523681640625, -0.86700439453125, -0.681640625, -0.49627685546875, -0.3109130859375, -0.12554931640625, 0.059814453125, 0.24517822265625, 0.4305419921875, 0.61590576171875, 0.80126953125, 0.98663330078125, 1.1719970703125, 1.35736083984375, 1.542724609375, 1.72808837890625, 1.9134521484375, 2.09881591796875, 2.2841796875, 2.46954345703125, 2.6549072265625, 2.84027099609375, 3.025634765625, 3.21099853515625, 3.3963623046875, 3.58172607421875, 3.76708984375, 3.95245361328125, 4.1378173828125, 4.32318115234375, 4.508544921875, 4.69390869140625, 4.8792724609375, 5.06463623046875, 5.25]}, "gradients/decoder.model.decoder.layers.11.self_attn.out_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 2.0, 1.0, 2.0, 2.0, 5.0, 4.0, 5.0, 8.0, 7.0, 15.0, 14.0, 10.0, 32.0, 29.0, 42.0, 50.0, 68.0, 73.0, 97.0, 89.0, 73.0, 67.0, 61.0, 63.0, 49.0, 24.0, 22.0, 26.0, 21.0, 10.0, 6.0, 6.0, 6.0, 1.0, 4.0, 6.0, 2.0, 2.0, 3.0, 2.0, 2.0, 2.0, 1.0, 0.0, 0.0, 1.0, 1.0], "bins": [-3.1796875, -3.092742919921875, -3.00579833984375, -2.918853759765625, -2.8319091796875, -2.744964599609375, -2.65802001953125, -2.571075439453125, -2.484130859375, -2.397186279296875, -2.31024169921875, -2.223297119140625, -2.1363525390625, -2.049407958984375, -1.96246337890625, -1.875518798828125, -1.78857421875, -1.701629638671875, -1.61468505859375, -1.527740478515625, -1.4407958984375, -1.353851318359375, -1.26690673828125, -1.179962158203125, -1.093017578125, -1.006072998046875, -0.91912841796875, -0.832183837890625, -0.7452392578125, -0.658294677734375, -0.57135009765625, -0.484405517578125, -0.3974609375, -0.310516357421875, -0.22357177734375, -0.136627197265625, -0.0496826171875, 0.037261962890625, 0.12420654296875, 0.211151123046875, 0.298095703125, 0.385040283203125, 0.47198486328125, 0.558929443359375, 0.6458740234375, 0.732818603515625, 0.81976318359375, 0.906707763671875, 0.99365234375, 1.080596923828125, 1.16754150390625, 1.254486083984375, 1.3414306640625, 1.428375244140625, 1.51531982421875, 1.602264404296875, 1.689208984375, 1.776153564453125, 1.86309814453125, 1.950042724609375, 2.0369873046875, 2.123931884765625, 2.21087646484375, 2.297821044921875, 2.384765625]}, "gradients/decoder.model.decoder.layers.11.self_attn.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 3.0, 1.0, 0.0, 3.0, 1.0, 2.0, 1.0, 2.0, 0.0, 3.0, 2.0, 4.0, 4.0, 2.0, 6.0, 3.0, 6.0, 5.0, 9.0, 7.0, 13.0, 11.0, 18.0, 23.0, 39.0, 71.0, 99.0, 295.0, 14044.0, 1033173.0, 341.0, 140.0, 60.0, 41.0, 23.0, 11.0, 13.0, 8.0, 12.0, 9.0, 8.0, 11.0, 7.0, 3.0, 9.0, 4.0, 1.0, 3.0, 3.0, 2.0, 3.0, 3.0, 4.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-98.875, -95.79296875, -92.7109375, -89.62890625, -86.546875, -83.46484375, -80.3828125, -77.30078125, -74.21875, -71.13671875, -68.0546875, -64.97265625, -61.890625, -58.80859375, -55.7265625, -52.64453125, -49.5625, -46.48046875, -43.3984375, -40.31640625, -37.234375, -34.15234375, -31.0703125, -27.98828125, -24.90625, -21.82421875, -18.7421875, -15.66015625, -12.578125, -9.49609375, -6.4140625, -3.33203125, -0.25, 2.83203125, 5.9140625, 8.99609375, 12.078125, 15.16015625, 18.2421875, 21.32421875, 24.40625, 27.48828125, 30.5703125, 33.65234375, 36.734375, 39.81640625, 42.8984375, 45.98046875, 49.0625, 52.14453125, 55.2265625, 58.30859375, 61.390625, 64.47265625, 67.5546875, 70.63671875, 73.71875, 76.80078125, 79.8828125, 82.96484375, 86.046875, 89.12890625, 92.2109375, 95.29296875, 98.375]}, "gradients/decoder.model.decoder.layers.11.self_attn.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 2.0, 2.0, 0.0, 2.0, 4.0, 3.0, 0.0, 3.0, 9.0, 4.0, 9.0, 9.0, 9.0, 21.0, 14.0, 25.0, 19.0, 19.0, 37.0, 34.0, 55.0, 73.0, 88.0, 105.0, 91.0, 68.0, 49.0, 51.0, 36.0, 39.0, 17.0, 20.0, 16.0, 15.0, 10.0, 11.0, 11.0, 7.0, 5.0, 5.0, 7.0, 3.0, 3.0, 3.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0], "bins": [-8.90625, -8.633544921875, -8.36083984375, -8.088134765625, -7.8154296875, -7.542724609375, -7.27001953125, -6.997314453125, -6.724609375, -6.451904296875, -6.17919921875, -5.906494140625, -5.6337890625, -5.361083984375, -5.08837890625, -4.815673828125, -4.54296875, -4.270263671875, -3.99755859375, -3.724853515625, -3.4521484375, -3.179443359375, -2.90673828125, -2.634033203125, -2.361328125, -2.088623046875, -1.81591796875, -1.543212890625, -1.2705078125, -0.997802734375, -0.72509765625, -0.452392578125, -0.1796875, 0.093017578125, 0.36572265625, 0.638427734375, 0.9111328125, 1.183837890625, 1.45654296875, 1.729248046875, 2.001953125, 2.274658203125, 2.54736328125, 2.820068359375, 3.0927734375, 3.365478515625, 3.63818359375, 3.910888671875, 4.18359375, 4.456298828125, 4.72900390625, 5.001708984375, 5.2744140625, 5.547119140625, 5.81982421875, 6.092529296875, 6.365234375, 6.637939453125, 6.91064453125, 7.183349609375, 7.4560546875, 7.728759765625, 8.00146484375, 8.274169921875, 8.546875]}, "gradients/decoder.model.decoder.layers.11.self_attn.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 6.0, 10.0, 31.0, 117.0, 1048271.0, 90.0, 17.0, 9.0, 11.0, 2.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-192.75, -187.306640625, -181.86328125, -176.419921875, -170.9765625, -165.533203125, -160.08984375, -154.646484375, -149.203125, -143.759765625, -138.31640625, -132.873046875, -127.4296875, -121.986328125, -116.54296875, -111.099609375, -105.65625, -100.212890625, -94.76953125, -89.326171875, -83.8828125, -78.439453125, -72.99609375, -67.552734375, -62.109375, -56.666015625, -51.22265625, -45.779296875, -40.3359375, -34.892578125, -29.44921875, -24.005859375, -18.5625, -13.119140625, -7.67578125, -2.232421875, 3.2109375, 8.654296875, 14.09765625, 19.541015625, 24.984375, 30.427734375, 35.87109375, 41.314453125, 46.7578125, 52.201171875, 57.64453125, 63.087890625, 68.53125, 73.974609375, 79.41796875, 84.861328125, 90.3046875, 95.748046875, 101.19140625, 106.634765625, 112.078125, 117.521484375, 122.96484375, 128.408203125, 133.8515625, 139.294921875, 144.73828125, 150.181640625, 155.625]}, "gradients/decoder.model.decoder.layers.11.self_attn.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 3.0, 1.0, 5.0, 8.0, 6.0, 13.0, 11.0, 22.0, 75.0, 686.0, 104.0, 33.0, 16.0, 8.0, 7.0, 8.0, 3.0, 1.0, 1.0, 2.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.001953125, -0.0019025802612304688, -0.0018520355224609375, -0.0018014907836914062, -0.001750946044921875, -0.0017004013061523438, -0.0016498565673828125, -0.0015993118286132812, -0.00154876708984375, -0.0014982223510742188, -0.0014476776123046875, -0.0013971328735351562, -0.001346588134765625, -0.0012960433959960938, -0.0012454986572265625, -0.0011949539184570312, -0.0011444091796875, -0.0010938644409179688, -0.0010433197021484375, -0.0009927749633789062, -0.000942230224609375, -0.0008916854858398438, -0.0008411407470703125, -0.0007905960083007812, -0.00074005126953125, -0.0006895065307617188, -0.0006389617919921875, -0.0005884170532226562, -0.000537872314453125, -0.00048732757568359375, -0.0004367828369140625, -0.00038623809814453125, -0.000335693359375, -0.00028514862060546875, -0.0002346038818359375, -0.00018405914306640625, -0.000133514404296875, -8.296966552734375e-05, -3.24249267578125e-05, 1.811981201171875e-05, 6.866455078125e-05, 0.00011920928955078125, 0.0001697540283203125, 0.00022029876708984375, 0.000270843505859375, 0.00032138824462890625, 0.0003719329833984375, 0.00042247772216796875, 0.0004730224609375, 0.0005235671997070312, 0.0005741119384765625, 0.0006246566772460938, 0.000675201416015625, 0.0007257461547851562, 0.0007762908935546875, 0.0008268356323242188, 0.00087738037109375, 0.0009279251098632812, 0.0009784698486328125, 0.0010290145874023438, 0.001079559326171875, 0.0011301040649414062, 0.0011806488037109375, 0.0012311935424804688, 0.00128173828125]}, "gradients/decoder.model.decoder.layers.11.self_attn.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 2.0, 1.0, 9.0, 7.0, 10.0, 26.0, 101.0, 1048236.0, 95.0, 34.0, 19.0, 10.0, 3.0, 4.0, 3.0, 0.0, 3.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-152.75, -146.98046875, -141.2109375, -135.44140625, -129.671875, -123.90234375, -118.1328125, -112.36328125, -106.59375, -100.82421875, -95.0546875, -89.28515625, -83.515625, -77.74609375, -71.9765625, -66.20703125, -60.4375, -54.66796875, -48.8984375, -43.12890625, -37.359375, -31.58984375, -25.8203125, -20.05078125, -14.28125, -8.51171875, -2.7421875, 3.02734375, 8.796875, 14.56640625, 20.3359375, 26.10546875, 31.875, 37.64453125, 43.4140625, 49.18359375, 54.953125, 60.72265625, 66.4921875, 72.26171875, 78.03125, 83.80078125, 89.5703125, 95.33984375, 101.109375, 106.87890625, 112.6484375, 118.41796875, 124.1875, 129.95703125, 135.7265625, 141.49609375, 147.265625, 153.03515625, 158.8046875, 164.57421875, 170.34375, 176.11328125, 181.8828125, 187.65234375, 193.421875, 199.19140625, 204.9609375, 210.73046875, 216.5]}, "gradients/decoder.model.decoder.layers.11.self_attn.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 2.0, 1.0, 3.0, 9.0, 6.0, 9.0, 24.0, 70.0, 785.0, 46.0, 22.0, 13.0, 11.0, 2.0, 4.0, 2.0, 0.0, 3.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6.96484375, -6.69940185546875, -6.4339599609375, -6.16851806640625, -5.903076171875, -5.63763427734375, -5.3721923828125, -5.10675048828125, -4.84130859375, -4.57586669921875, -4.3104248046875, -4.04498291015625, -3.779541015625, -3.51409912109375, -3.2486572265625, -2.98321533203125, -2.7177734375, -2.45233154296875, -2.1868896484375, -1.92144775390625, -1.656005859375, -1.39056396484375, -1.1251220703125, -0.85968017578125, -0.59423828125, -0.32879638671875, -0.0633544921875, 0.20208740234375, 0.467529296875, 0.73297119140625, 0.9984130859375, 1.26385498046875, 1.529296875, 1.79473876953125, 2.0601806640625, 2.32562255859375, 2.591064453125, 2.85650634765625, 3.1219482421875, 3.38739013671875, 3.65283203125, 3.91827392578125, 4.1837158203125, 4.44915771484375, 4.714599609375, 4.98004150390625, 5.2454833984375, 5.51092529296875, 5.7763671875, 6.04180908203125, 6.3072509765625, 6.57269287109375, 6.838134765625, 7.10357666015625, 7.3690185546875, 7.63446044921875, 7.89990234375, 8.16534423828125, 8.4307861328125, 8.69622802734375, 8.961669921875, 9.22711181640625, 9.4925537109375, 9.75799560546875, 10.0234375]}, "gradients/decoder.model.decoder.layers.10.final_layer_norm.weight": {"_type": "histogram", "values": [1020.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-11.562519073486328, 8.62278938293457, 28.80809783935547, 48.993404388427734, 69.1787109375, 89.36402893066406, 109.54933166503906, 129.73464965820312, 149.91995239257812, 170.10525512695312, 190.2905731201172, 210.4758758544922, 230.66119384765625, 250.84649658203125, 271.03179931640625, 291.21710205078125, 311.40240478515625, 331.58770751953125, 351.77301025390625, 371.95831298828125, 392.1436462402344, 412.3289489746094, 432.5142517089844, 452.6995849609375, 472.8848876953125, 493.0701904296875, 513.2554931640625, 533.4407958984375, 553.6260986328125, 573.8114013671875, 593.9967041015625, 614.1820678710938, 634.3673706054688, 654.5526733398438, 674.7379760742188, 694.9232788085938, 715.1085815429688, 735.2939453125, 755.479248046875, 775.66455078125, 795.849853515625, 816.03515625, 836.220458984375, 856.40576171875, 876.591064453125, 896.7763671875, 916.961669921875, 937.1470336914062, 957.332275390625, 977.517578125, 997.702880859375, 1017.88818359375, 1038.073486328125, 1058.2587890625, 1078.444091796875, 1098.62939453125, 1118.8148193359375, 1139.0001220703125, 1159.1854248046875, 1179.3707275390625, 1199.5560302734375, 1219.7413330078125, 1239.9266357421875, 1260.1119384765625, 1280.2972412109375]}, "gradients/decoder.model.decoder.layers.10.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 2.0, 2.0, 2.0, 6.0, 5.0, 9.0, 15.0, 36.0, 57.0, 80.0, 120.0, 124.0, 144.0, 116.0, 109.0, 71.0, 53.0, 36.0, 16.0, 11.0, 3.0, 3.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6.874516487121582, -6.022015571594238, -5.1695146560668945, -4.317014217376709, -3.4645133018493652, -2.6120123863220215, -1.759511947631836, -0.9070110321044922, -0.05451011657714844, 0.7979906797409058, 1.65049147605896, 2.5029921531677246, 3.3554930686950684, 4.207993984222412, 5.060494422912598, 5.912995338439941, 6.765496253967285, 7.617997169494629, 8.470498085021973, 9.322998046875, 10.175498962402344, 11.027999877929688, 11.880500793457031, 12.733001708984375, 13.585502624511719, 14.438003540039062, 15.290504455566406, 16.14300537109375, 16.995506286621094, 17.848007202148438, 18.70050811767578, 19.553009033203125, 20.40550994873047, 21.258010864257812, 22.110511779785156, 22.9630126953125, 23.815513610839844, 24.668014526367188, 25.52051544189453, 26.373016357421875, 27.22551727294922, 28.078018188476562, 28.930519104003906, 29.78302001953125, 30.635520935058594, 31.488021850585938, 32.34052276611328, 33.193023681640625, 34.04552459716797, 34.89802551269531, 35.750526428222656, 36.60302734375, 37.455528259277344, 38.30802917480469, 39.16053009033203, 40.013031005859375, 40.86552810668945, 41.7180290222168, 42.57052993774414, 43.423030853271484, 44.27553176879883, 45.12803268432617, 45.980533599853516, 46.83303451538086, 47.6855354309082]}, "gradients/decoder.model.decoder.layers.10.fc2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 3.0, 5.0, 3.0, 7.0, 8.0, 11.0, 22.0, 27.0, 62.0, 113.0, 184.0, 310.0, 528.0, 1537.0, 4181227.0, 8397.0, 812.0, 408.0, 245.0, 138.0, 79.0, 74.0, 29.0, 25.0, 10.0, 11.0, 7.0, 2.0, 4.0, 0.0, 3.0, 0.0, 2.0], "bins": [-46.21875, -45.209228515625, -44.19970703125, -43.190185546875, -42.1806640625, -41.171142578125, -40.16162109375, -39.152099609375, -38.142578125, -37.133056640625, -36.12353515625, -35.114013671875, -34.1044921875, -33.094970703125, -32.08544921875, -31.075927734375, -30.06640625, -29.056884765625, -28.04736328125, -27.037841796875, -26.0283203125, -25.018798828125, -24.00927734375, -22.999755859375, -21.990234375, -20.980712890625, -19.97119140625, -18.961669921875, -17.9521484375, -16.942626953125, -15.93310546875, -14.923583984375, -13.9140625, -12.904541015625, -11.89501953125, -10.885498046875, -9.8759765625, -8.866455078125, -7.85693359375, -6.847412109375, -5.837890625, -4.828369140625, -3.81884765625, -2.809326171875, -1.7998046875, -0.790283203125, 0.21923828125, 1.228759765625, 2.23828125, 3.247802734375, 4.25732421875, 5.266845703125, 6.2763671875, 7.285888671875, 8.29541015625, 9.304931640625, 10.314453125, 11.323974609375, 12.33349609375, 13.343017578125, 14.3525390625, 15.362060546875, 16.37158203125, 17.381103515625, 18.390625]}, "gradients/decoder.model.decoder.layers.10.fc2.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 2.0, 1.0, 1.0, 2.0, 3.0, 1.0, 3.0, 3.0, 11.0, 8.0, 11.0, 22.0, 28.0, 37.0, 42.0, 48.0, 60.0, 78.0, 78.0, 76.0, 66.0, 72.0, 72.0, 74.0, 42.0, 39.0, 32.0, 29.0, 18.0, 11.0, 9.0, 10.0, 11.0, 4.0, 3.0, 2.0, 2.0, 3.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.00390625, -2.92498779296875, -2.8460693359375, -2.76715087890625, -2.688232421875, -2.60931396484375, -2.5303955078125, -2.45147705078125, -2.37255859375, -2.29364013671875, -2.2147216796875, -2.13580322265625, -2.056884765625, -1.97796630859375, -1.8990478515625, -1.82012939453125, -1.7412109375, -1.66229248046875, -1.5833740234375, -1.50445556640625, -1.425537109375, -1.34661865234375, -1.2677001953125, -1.18878173828125, -1.10986328125, -1.03094482421875, -0.9520263671875, -0.87310791015625, -0.794189453125, -0.71527099609375, -0.6363525390625, -0.55743408203125, -0.478515625, -0.39959716796875, -0.3206787109375, -0.24176025390625, -0.162841796875, -0.08392333984375, -0.0050048828125, 0.07391357421875, 0.15283203125, 0.23175048828125, 0.3106689453125, 0.38958740234375, 0.468505859375, 0.54742431640625, 0.6263427734375, 0.70526123046875, 0.7841796875, 0.86309814453125, 0.9420166015625, 1.02093505859375, 1.099853515625, 1.17877197265625, 1.2576904296875, 1.33660888671875, 1.41552734375, 1.49444580078125, 1.5733642578125, 1.65228271484375, 1.731201171875, 1.81011962890625, 1.8890380859375, 1.96795654296875, 2.046875]}, "gradients/decoder.model.decoder.layers.10.fc1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 2.0, 1.0, 0.0, 5.0, 8.0, 2.0, 6.0, 8.0, 20.0, 26.0, 1137.0, 4192907.0, 121.0, 16.0, 12.0, 1.0, 4.0, 3.0, 0.0, 4.0, 1.0, 1.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-192.75, -187.8408203125, -182.931640625, -178.0224609375, -173.11328125, -168.2041015625, -163.294921875, -158.3857421875, -153.4765625, -148.5673828125, -143.658203125, -138.7490234375, -133.83984375, -128.9306640625, -124.021484375, -119.1123046875, -114.203125, -109.2939453125, -104.384765625, -99.4755859375, -94.56640625, -89.6572265625, -84.748046875, -79.8388671875, -74.9296875, -70.0205078125, -65.111328125, -60.2021484375, -55.29296875, -50.3837890625, -45.474609375, -40.5654296875, -35.65625, -30.7470703125, -25.837890625, -20.9287109375, -16.01953125, -11.1103515625, -6.201171875, -1.2919921875, 3.6171875, 8.5263671875, 13.435546875, 18.3447265625, 23.25390625, 28.1630859375, 33.072265625, 37.9814453125, 42.890625, 47.7998046875, 52.708984375, 57.6181640625, 62.52734375, 67.4365234375, 72.345703125, 77.2548828125, 82.1640625, 87.0732421875, 91.982421875, 96.8916015625, 101.80078125, 106.7099609375, 111.619140625, 116.5283203125, 121.4375]}, "gradients/decoder.model.decoder.layers.10.fc1.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 3.0, 0.0, 2.0, 3.0, 3.0, 10.0, 9.0, 35.0, 3929.0, 63.0, 11.0, 6.0, 0.0, 2.0, 2.0, 3.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.0390625, -4.91143798828125, -4.7838134765625, -4.65618896484375, -4.528564453125, -4.40093994140625, -4.2733154296875, -4.14569091796875, -4.01806640625, -3.89044189453125, -3.7628173828125, -3.63519287109375, -3.507568359375, -3.37994384765625, -3.2523193359375, -3.12469482421875, -2.9970703125, -2.86944580078125, -2.7418212890625, -2.61419677734375, -2.486572265625, -2.35894775390625, -2.2313232421875, -2.10369873046875, -1.97607421875, -1.84844970703125, -1.7208251953125, -1.59320068359375, -1.465576171875, -1.33795166015625, -1.2103271484375, -1.08270263671875, -0.955078125, -0.82745361328125, -0.6998291015625, -0.57220458984375, -0.444580078125, -0.31695556640625, -0.1893310546875, -0.06170654296875, 0.06591796875, 0.19354248046875, 0.3211669921875, 0.44879150390625, 0.576416015625, 0.70404052734375, 0.8316650390625, 0.95928955078125, 1.0869140625, 1.21453857421875, 1.3421630859375, 1.46978759765625, 1.597412109375, 1.72503662109375, 1.8526611328125, 1.98028564453125, 2.10791015625, 2.23553466796875, 2.3631591796875, 2.49078369140625, 2.618408203125, 2.74603271484375, 2.8736572265625, 3.00128173828125, 3.12890625]}, "gradients/decoder.model.decoder.layers.10.encoder_attn_layer_norm.weight": {"_type": "histogram", "values": [1.0, 4.0, 2.0, 9.0, 33.0, 115.0, 577.0, 256.0, 16.0, 7.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.922637939453125, -3.3467812538146973, -2.7709243297576904, -2.1950674057006836, -1.6192107200622559, -1.0433540344238281, -0.4674971103668213, 0.10835981369018555, 0.6842164993286133, 1.2600733041763306, 1.8359301090240479, 2.4117870330810547, 2.9876437187194824, 3.56350040435791, 4.139357566833496, 4.715214252471924, 5.291070938110352, 5.866927623748779, 6.442784309387207, 7.018641471862793, 7.594498157501221, 8.170354843139648, 8.746212005615234, 9.32206916809082, 9.89792537689209, 10.473782539367676, 11.049638748168945, 11.625495910644531, 12.201353073120117, 12.777209281921387, 13.353066444396973, 13.928922653198242, 14.504779815673828, 15.080636978149414, 15.656493186950684, 16.232349395751953, 16.80820655822754, 17.384063720703125, 17.95992088317871, 18.535778045654297, 19.11163330078125, 19.687490463256836, 20.263347625732422, 20.839202880859375, 21.41506004333496, 21.990917205810547, 22.566774368286133, 23.14263153076172, 23.718488693237305, 24.29434585571289, 24.870203018188477, 25.44605827331543, 26.021915435791016, 26.5977725982666, 27.173629760742188, 27.749486923217773, 28.32534408569336, 28.901201248168945, 29.47705841064453, 30.052913665771484, 30.62877082824707, 31.204627990722656, 31.780485153198242, 32.35634231567383, 32.93219757080078]}, "gradients/decoder.model.decoder.layers.10.encoder_attn_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 2.0, 2.0, 3.0, 10.0, 7.0, 14.0, 17.0, 19.0, 28.0, 41.0, 62.0, 47.0, 81.0, 80.0, 104.0, 86.0, 70.0, 57.0, 60.0, 53.0, 48.0, 28.0, 23.0, 22.0, 11.0, 15.0, 8.0, 5.0, 3.0, 3.0, 3.0, 2.0, 0.0, 2.0, 1.0], "bins": [-5.614140510559082, -5.487889289855957, -5.361637592315674, -5.235386371612549, -5.109135150909424, -4.982883453369141, -4.856632232666016, -4.730381011962891, -4.604129314422607, -4.477878093719482, -4.351626396179199, -4.225375175476074, -4.099123954772949, -3.972872257232666, -3.846621036529541, -3.720369577407837, -3.594118356704712, -3.467866897583008, -3.341615676879883, -3.2153642177581787, -3.0891127586364746, -2.9628615379333496, -2.8366100788116455, -2.7103586196899414, -2.5841073989868164, -2.4578559398651123, -2.3316047191619873, -2.205353260040283, -2.079101800918579, -1.9528504610061646, -1.82659912109375, -1.700347661972046, -1.574096441268921, -1.4478451013565063, -1.3215936422348022, -1.1953423023223877, -1.0690908432006836, -0.942839503288269, -0.8165881633758545, -0.6903367638587952, -0.5640853643417358, -0.4378339648246765, -0.3115825951099396, -0.18533122539520264, -0.05907982587814331, 0.06717157363891602, 0.19342291355133057, 0.3196743130683899, 0.4459257125854492, 0.5721771121025085, 0.6984285116195679, 0.8246798515319824, 0.9509312510490417, 1.077182650566101, 1.2034339904785156, 1.3296854496002197, 1.4559367895126343, 1.5821881294250488, 1.708439588546753, 1.8346909284591675, 1.960942268371582, 2.087193727493286, 2.2134451866149902, 2.3396964073181152, 2.4659478664398193]}, "gradients/decoder.model.decoder.layers.10.encoder_attn.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 4.0, 1.0, 2.0, 1.0, 2.0, 2.0, 3.0, 7.0, 3.0, 4.0, 11.0, 19.0, 28.0, 52.0, 84.0, 170.0, 366.0, 807.0, 2049.0, 7076.0, 97485.0, 919531.0, 15500.0, 3170.0, 1128.0, 537.0, 228.0, 123.0, 71.0, 32.0, 22.0, 13.0, 7.0, 4.0, 4.0, 2.0, 2.0, 3.0, 1.0, 3.0, 2.0, 3.0, 1.0, 0.0, 0.0, 2.0, 2.0, 0.0, 1.0, 1.0, 1.0], "bins": [-1.5, -1.456146240234375, -1.41229248046875, -1.368438720703125, -1.3245849609375, -1.280731201171875, -1.23687744140625, -1.193023681640625, -1.149169921875, -1.105316162109375, -1.06146240234375, -1.017608642578125, -0.9737548828125, -0.929901123046875, -0.88604736328125, -0.842193603515625, -0.79833984375, -0.754486083984375, -0.71063232421875, -0.666778564453125, -0.6229248046875, -0.579071044921875, -0.53521728515625, -0.491363525390625, -0.447509765625, -0.403656005859375, -0.35980224609375, -0.315948486328125, -0.2720947265625, -0.228240966796875, -0.18438720703125, -0.140533447265625, -0.0966796875, -0.052825927734375, -0.00897216796875, 0.034881591796875, 0.0787353515625, 0.122589111328125, 0.16644287109375, 0.210296630859375, 0.254150390625, 0.298004150390625, 0.34185791015625, 0.385711669921875, 0.4295654296875, 0.473419189453125, 0.51727294921875, 0.561126708984375, 0.60498046875, 0.648834228515625, 0.69268798828125, 0.736541748046875, 0.7803955078125, 0.824249267578125, 0.86810302734375, 0.911956787109375, 0.955810546875, 0.999664306640625, 1.04351806640625, 1.087371826171875, 1.1312255859375, 1.175079345703125, 1.21893310546875, 1.262786865234375, 1.306640625]}, "gradients/decoder.model.decoder.layers.10.encoder_attn.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0, 1.0, 5.0, 3.0, 4.0, 24.0, 20.0, 31.0, 45.0, 59.0, 92.0, 121.0, 139.0, 117.0, 86.0, 77.0, 63.0, 43.0, 26.0, 23.0, 12.0, 8.0, 5.0, 4.0, 2.0, 3.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.68359375, -4.574371337890625, -4.46514892578125, -4.355926513671875, -4.2467041015625, -4.137481689453125, -4.02825927734375, -3.919036865234375, -3.809814453125, -3.700592041015625, -3.59136962890625, -3.482147216796875, -3.3729248046875, -3.263702392578125, -3.15447998046875, -3.045257568359375, -2.93603515625, -2.826812744140625, -2.71759033203125, -2.608367919921875, -2.4991455078125, -2.389923095703125, -2.28070068359375, -2.171478271484375, -2.062255859375, -1.953033447265625, -1.84381103515625, -1.734588623046875, -1.6253662109375, -1.516143798828125, -1.40692138671875, -1.297698974609375, -1.1884765625, -1.079254150390625, -0.97003173828125, -0.860809326171875, -0.7515869140625, -0.642364501953125, -0.53314208984375, -0.423919677734375, -0.314697265625, -0.205474853515625, -0.09625244140625, 0.012969970703125, 0.1221923828125, 0.231414794921875, 0.34063720703125, 0.449859619140625, 0.55908203125, 0.668304443359375, 0.77752685546875, 0.886749267578125, 0.9959716796875, 1.105194091796875, 1.21441650390625, 1.323638916015625, 1.432861328125, 1.542083740234375, 1.65130615234375, 1.760528564453125, 1.8697509765625, 1.978973388671875, 2.08819580078125, 2.197418212890625, 2.306640625]}, "gradients/decoder.model.decoder.layers.10.encoder_attn.v_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 3.0, 3.0, 3.0, 4.0, 9.0, 8.0, 9.0, 7.0, 19.0, 21.0, 28.0, 29.0, 38.0, 44.0, 55.0, 62.0, 86.0, 95.0, 112.0, 184.0, 311.0, 754.0, 2412.0, 12687.0, 162830.0, 821130.0, 40021.0, 5083.0, 1229.0, 425.0, 236.0, 140.0, 96.0, 80.0, 64.0, 55.0, 42.0, 36.0, 26.0, 18.0, 15.0, 13.0, 13.0, 6.0, 5.0, 9.0, 4.0, 4.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 1.0, 2.0], "bins": [-0.102294921875, -0.09912395477294922, -0.09595298767089844, -0.09278202056884766, -0.08961105346679688, -0.0864400863647461, -0.08326911926269531, -0.08009815216064453, -0.07692718505859375, -0.07375621795654297, -0.07058525085449219, -0.0674142837524414, -0.06424331665039062, -0.061072349548339844, -0.05790138244628906, -0.05473041534423828, -0.0515594482421875, -0.04838848114013672, -0.04521751403808594, -0.042046546936035156, -0.038875579833984375, -0.035704612731933594, -0.03253364562988281, -0.02936267852783203, -0.02619171142578125, -0.02302074432373047, -0.019849777221679688, -0.016678810119628906, -0.013507843017578125, -0.010336875915527344, -0.0071659088134765625, -0.003994941711425781, -0.000823974609375, 0.0023469924926757812, 0.0055179595947265625, 0.008688926696777344, 0.011859893798828125, 0.015030860900878906, 0.018201828002929688, 0.02137279510498047, 0.02454376220703125, 0.02771472930908203, 0.030885696411132812, 0.034056663513183594, 0.037227630615234375, 0.040398597717285156, 0.04356956481933594, 0.04674053192138672, 0.0499114990234375, 0.05308246612548828, 0.05625343322753906, 0.059424400329589844, 0.06259536743164062, 0.0657663345336914, 0.06893730163574219, 0.07210826873779297, 0.07527923583984375, 0.07845020294189453, 0.08162117004394531, 0.0847921371459961, 0.08796310424804688, 0.09113407135009766, 0.09430503845214844, 0.09747600555419922, 0.10064697265625]}, "gradients/decoder.model.decoder.layers.10.encoder_attn.v_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 5.0, 1.0, 4.0, 3.0, 9.0, 7.0, 6.0, 8.0, 13.0, 17.0, 13.0, 19.0, 18.0, 23.0, 15.0, 25.0, 34.0, 34.0, 30.0, 41.0, 37.0, 60.0, 45.0, 45.0, 46.0, 48.0, 50.0, 32.0, 39.0, 36.0, 35.0, 37.0, 25.0, 25.0, 23.0, 15.0, 17.0, 15.0, 9.0, 8.0, 13.0, 11.0, 2.0, 3.0, 5.0, 2.0, 2.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 2.0, 2.0, 1.0], "bins": [-3.0234375, -2.9298095703125, -2.836181640625, -2.7425537109375, -2.64892578125, -2.5552978515625, -2.461669921875, -2.3680419921875, -2.2744140625, -2.1807861328125, -2.087158203125, -1.9935302734375, -1.89990234375, -1.8062744140625, -1.712646484375, -1.6190185546875, -1.525390625, -1.4317626953125, -1.338134765625, -1.2445068359375, -1.15087890625, -1.0572509765625, -0.963623046875, -0.8699951171875, -0.7763671875, -0.6827392578125, -0.589111328125, -0.4954833984375, -0.40185546875, -0.3082275390625, -0.214599609375, -0.1209716796875, -0.02734375, 0.0662841796875, 0.159912109375, 0.2535400390625, 0.34716796875, 0.4407958984375, 0.534423828125, 0.6280517578125, 0.7216796875, 0.8153076171875, 0.908935546875, 1.0025634765625, 1.09619140625, 1.1898193359375, 1.283447265625, 1.3770751953125, 1.470703125, 1.5643310546875, 1.657958984375, 1.7515869140625, 1.84521484375, 1.9388427734375, 2.032470703125, 2.1260986328125, 2.2197265625, 2.3133544921875, 2.406982421875, 2.5006103515625, 2.59423828125, 2.6878662109375, 2.781494140625, 2.8751220703125, 2.96875]}, "gradients/decoder.model.decoder.layers.10.encoder_attn.k_proj.weight": {"_type": "histogram", "values": [3.0, 0.0, 0.0, 0.0, 2.0, 1.0, 3.0, 3.0, 1.0, 12.0, 10.0, 18.0, 10.0, 36.0, 21.0, 43.0, 88.0, 114.0, 152.0, 188.0, 257.0, 404.0, 590.0, 888.0, 1386.0, 2019.0, 3286.0, 5317.0, 9481.0, 17464.0, 36314.0, 91391.0, 562631.0, 201582.0, 56610.0, 25629.0, 12884.0, 7333.0, 4381.0, 2738.0, 1751.0, 1146.0, 796.0, 462.0, 343.0, 225.0, 154.0, 120.0, 84.0, 55.0, 53.0, 18.0, 17.0, 23.0, 14.0, 8.0, 8.0, 2.0, 2.0, 0.0, 0.0, 2.0, 1.0, 2.0], "bins": [-0.002422332763671875, -0.0023484230041503906, -0.0022745132446289062, -0.002200603485107422, -0.0021266937255859375, -0.002052783966064453, -0.0019788742065429688, -0.0019049644470214844, -0.0018310546875, -0.0017571449279785156, -0.0016832351684570312, -0.0016093254089355469, -0.0015354156494140625, -0.0014615058898925781, -0.0013875961303710938, -0.0013136863708496094, -0.001239776611328125, -0.0011658668518066406, -0.0010919570922851562, -0.0010180473327636719, -0.0009441375732421875, -0.0008702278137207031, -0.0007963180541992188, -0.0007224082946777344, -0.00064849853515625, -0.0005745887756347656, -0.0005006790161132812, -0.0004267692565917969, -0.0003528594970703125, -0.0002789497375488281, -0.00020503997802734375, -0.00013113021850585938, -5.7220458984375e-05, 1.6689300537109375e-05, 9.059906005859375e-05, 0.00016450881958007812, 0.0002384185791015625, 0.0003123283386230469, 0.00038623809814453125, 0.0004601478576660156, 0.0005340576171875, 0.0006079673767089844, 0.0006818771362304688, 0.0007557868957519531, 0.0008296966552734375, 0.0009036064147949219, 0.0009775161743164062, 0.0010514259338378906, 0.001125335693359375, 0.0011992454528808594, 0.0012731552124023438, 0.0013470649719238281, 0.0014209747314453125, 0.0014948844909667969, 0.0015687942504882812, 0.0016427040100097656, 0.00171661376953125, 0.0017905235290527344, 0.0018644332885742188, 0.0019383430480957031, 0.0020122528076171875, 0.002086162567138672, 0.0021600723266601562, 0.0022339820861816406, 0.002307891845703125]}, "gradients/decoder.model.decoder.layers.10.encoder_attn.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 2.0, 1.0, 2.0, 3.0, 0.0, 1.0, 3.0, 8.0, 7.0, 7.0, 9.0, 10.0, 10.0, 16.0, 18.0, 30.0, 45.0, 61.0, 88.0, 189.0, 178.0, 88.0, 61.0, 39.0, 31.0, 17.0, 15.0, 11.0, 10.0, 8.0, 8.0, 5.0, 7.0, 4.0, 6.0, 5.0, 1.0, 3.0, 1.0, 2.0, 0.0, 2.0, 3.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0001659393310546875, -0.0001611858606338501, -0.0001564323902130127, -0.0001516789197921753, -0.0001469254493713379, -0.0001421719789505005, -0.00013741850852966309, -0.00013266503810882568, -0.00012791156768798828, -0.00012315809726715088, -0.00011840462684631348, -0.00011365115642547607, -0.00010889768600463867, -0.00010414421558380127, -9.939074516296387e-05, -9.463727474212646e-05, -8.988380432128906e-05, -8.513033390045166e-05, -8.037686347961426e-05, -7.562339305877686e-05, -7.086992263793945e-05, -6.611645221710205e-05, -6.136298179626465e-05, -5.6609511375427246e-05, -5.1856040954589844e-05, -4.710257053375244e-05, -4.234910011291504e-05, -3.759562969207764e-05, -3.2842159271240234e-05, -2.8088688850402832e-05, -2.333521842956543e-05, -1.8581748008728027e-05, -1.3828277587890625e-05, -9.074807167053223e-06, -4.32133674621582e-06, 4.3213367462158203e-07, 5.185604095458984e-06, 9.939074516296387e-06, 1.4692544937133789e-05, 1.944601535797119e-05, 2.4199485778808594e-05, 2.8952956199645996e-05, 3.37064266204834e-05, 3.84598970413208e-05, 4.32133674621582e-05, 4.7966837882995605e-05, 5.272030830383301e-05, 5.747377872467041e-05, 6.222724914550781e-05, 6.698071956634521e-05, 7.173418998718262e-05, 7.648766040802002e-05, 8.124113082885742e-05, 8.599460124969482e-05, 9.074807167053223e-05, 9.550154209136963e-05, 0.00010025501251220703, 0.00010500848293304443, 0.00010976195335388184, 0.00011451542377471924, 0.00011926889419555664, 0.00012402236461639404, 0.00012877583503723145, 0.00013352930545806885, 0.00013828277587890625]}, "gradients/decoder.model.decoder.layers.10.encoder_attn.q_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 2.0, 2.0, 0.0, 5.0, 5.0, 5.0, 8.0, 9.0, 11.0, 17.0, 22.0, 30.0, 45.0, 83.0, 137.0, 440.0, 2053.0, 16139.0, 907622.0, 114363.0, 6092.0, 966.0, 202.0, 99.0, 52.0, 35.0, 34.0, 17.0, 15.0, 7.0, 10.0, 9.0, 5.0, 2.0, 3.0, 3.0, 4.0, 2.0, 2.0, 3.0, 2.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.004398345947265625, -0.004227817058563232, -0.00405728816986084, -0.0038867592811584473, -0.0037162303924560547, -0.003545701503753662, -0.0033751726150512695, -0.003204643726348877, -0.0030341148376464844, -0.002863585948944092, -0.0026930570602416992, -0.0025225281715393066, -0.002351999282836914, -0.0021814703941345215, -0.002010941505432129, -0.0018404126167297363, -0.0016698837280273438, -0.0014993548393249512, -0.0013288259506225586, -0.001158297061920166, -0.0009877681732177734, -0.0008172392845153809, -0.0006467103958129883, -0.0004761815071105957, -0.0003056526184082031, -0.00013512372970581055, 3.540515899658203e-05, 0.0002059340476989746, 0.0003764629364013672, 0.0005469918251037598, 0.0007175207138061523, 0.0008880496025085449, 0.0010585784912109375, 0.00122910737991333, 0.0013996362686157227, 0.0015701651573181152, 0.0017406940460205078, 0.0019112229347229004, 0.002081751823425293, 0.0022522807121276855, 0.002422809600830078, 0.0025933384895324707, 0.0027638673782348633, 0.002934396266937256, 0.0031049251556396484, 0.003275454044342041, 0.0034459829330444336, 0.003616511821746826, 0.0037870407104492188, 0.003957569599151611, 0.004128098487854004, 0.0042986273765563965, 0.004469156265258789, 0.004639685153961182, 0.004810214042663574, 0.004980742931365967, 0.005151271820068359, 0.005321800708770752, 0.0054923295974731445, 0.005662858486175537, 0.00583338737487793, 0.006003916263580322, 0.006174445152282715, 0.006344974040985107, 0.0065155029296875]}, "gradients/decoder.model.decoder.layers.10.encoder_attn.q_proj.bias": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 2.0, 0.0, 3.0, 4.0, 2.0, 4.0, 2.0, 9.0, 6.0, 6.0, 16.0, 19.0, 23.0, 25.0, 34.0, 31.0, 54.0, 69.0, 281.0, 151.0, 55.0, 43.0, 31.0, 35.0, 19.0, 23.0, 9.0, 11.0, 4.0, 8.0, 6.0, 3.0, 6.0, 1.0, 3.0, 4.0, 1.0, 1.0, 2.0, 5.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.00039839744567871094, -0.0003829561173915863, -0.00036751478910446167, -0.00035207346081733704, -0.0003366321325302124, -0.00032119080424308777, -0.00030574947595596313, -0.0002903081476688385, -0.00027486681938171387, -0.00025942549109458923, -0.0002439841628074646, -0.00022854283452033997, -0.00021310150623321533, -0.0001976601779460907, -0.00018221884965896606, -0.00016677752137184143, -0.0001513361930847168, -0.00013589486479759216, -0.00012045353651046753, -0.0001050122082233429, -8.957087993621826e-05, -7.412955164909363e-05, -5.8688223361968994e-05, -4.324689507484436e-05, -2.7805566787719727e-05, -1.2364238500595093e-05, 3.077089786529541e-06, 1.8518418073654175e-05, 3.395974636077881e-05, 4.940107464790344e-05, 6.484240293502808e-05, 8.028373122215271e-05, 9.572505950927734e-05, 0.00011116638779640198, 0.0001266077160835266, 0.00014204904437065125, 0.00015749037265777588, 0.0001729317009449005, 0.00018837302923202515, 0.00020381435751914978, 0.00021925568580627441, 0.00023469701409339905, 0.0002501383423805237, 0.0002655796706676483, 0.00028102099895477295, 0.0002964623272418976, 0.0003119036555290222, 0.00032734498381614685, 0.0003427863121032715, 0.0003582276403903961, 0.00037366896867752075, 0.0003891102969646454, 0.00040455162525177, 0.00041999295353889465, 0.0004354342818260193, 0.0004508756101131439, 0.00046631693840026855, 0.0004817582666873932, 0.0004971995949745178, 0.0005126409232616425, 0.0005280822515487671, 0.0005435235798358917, 0.0005589649081230164, 0.000574406236410141, 0.0005898475646972656]}, "gradients/decoder.model.decoder.layers.10.self_attn_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 17.0, 153.0, 810.0, 32.0, 4.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.674267292022705, -5.006224632263184, -4.338181972503662, -3.6701390743255615, -3.00209641456604, -2.3340537548065186, -1.666010856628418, -0.9979681968688965, -0.329925537109375, 0.33811718225479126, 1.0061599016189575, 1.6742026805877686, 2.34224534034729, 3.0102880001068115, 3.678330898284912, 4.346373558044434, 5.014416217803955, 5.682458877563477, 6.350501537322998, 7.0185441970825195, 7.686587333679199, 8.354629516601562, 9.022672653198242, 9.690715789794922, 10.358757972717285, 11.026801109313965, 11.694843292236328, 12.362886428833008, 13.030928611755371, 13.69897174835205, 14.367013931274414, 15.035057067871094, 15.703100204467773, 16.371143341064453, 17.039186477661133, 17.70722770690918, 18.37527084350586, 19.04331398010254, 19.71135711669922, 20.379398345947266, 21.047441482543945, 21.715484619140625, 22.383527755737305, 23.05156898498535, 23.71961212158203, 24.38765525817871, 25.05569839477539, 25.723739624023438, 26.39178466796875, 27.05982780456543, 27.72787094116211, 28.395912170410156, 29.063955307006836, 29.731998443603516, 30.400041580200195, 31.068084716796875, 31.736125946044922, 32.40416717529297, 33.07221221923828, 33.74025344848633, 34.40829849243164, 35.07633972167969, 35.744380950927734, 36.41242599487305, 37.080467224121094]}, "gradients/decoder.model.decoder.layers.10.self_attn_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0, 1.0, 2.0, 4.0, 13.0, 18.0, 24.0, 35.0, 70.0, 76.0, 122.0, 128.0, 137.0, 98.0, 88.0, 69.0, 45.0, 27.0, 26.0, 14.0, 7.0, 2.0, 2.0, 3.0, 0.0, 1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.540681838989258, -4.4275102615356445, -4.3143391609191895, -4.201167583465576, -4.087996482849121, -3.974824905395508, -3.8616535663604736, -3.7484822273254395, -3.635310649871826, -3.522139310836792, -3.408967971801758, -3.2957963943481445, -3.1826250553131104, -3.069453716278076, -2.956282377243042, -2.843111038208008, -2.7299396991729736, -2.6167683601379395, -2.5035970211029053, -2.390425682067871, -2.277254104614258, -2.1640827655792236, -2.0509114265441895, -1.9377400875091553, -1.8245686292648315, -1.7113972902297974, -1.5982258319854736, -1.4850544929504395, -1.3718831539154053, -1.2587116956710815, -1.1455403566360474, -1.0323688983917236, -0.9191975593566895, -0.8060261607170105, -0.6928547620773315, -0.5796834230422974, -0.4665120244026184, -0.35334062576293945, -0.24016928672790527, -0.12699788808822632, -0.013826489448547363, 0.0993448942899704, 0.21251627802848816, 0.3256876468658447, 0.4388590455055237, 0.5520304441452026, 0.6652017831802368, 0.7783731818199158, 0.8915445804595947, 1.004715919494629, 1.1178873777389526, 1.2310587167739868, 1.3442301750183105, 1.4574015140533447, 1.570572853088379, 1.683744192123413, 1.7969156503677368, 1.910086989402771, 2.0232584476470947, 2.136429786682129, 2.249601125717163, 2.3627724647521973, 2.4759440422058105, 2.5891153812408447, 2.702286720275879]}, "gradients/decoder.model.decoder.layers.10.self_attn.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 2.0, 2.0, 6.0, 3.0, 2.0, 5.0, 5.0, 7.0, 4.0, 12.0, 19.0, 16.0, 22.0, 32.0, 42.0, 107.0, 340.0, 1859.0, 19075.0, 955304.0, 67465.0, 3268.0, 570.0, 169.0, 75.0, 47.0, 23.0, 28.0, 13.0, 12.0, 12.0, 4.0, 6.0, 3.0, 4.0, 2.0, 2.0, 1.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.84765625, -4.63372802734375, -4.4197998046875, -4.20587158203125, -3.991943359375, -3.77801513671875, -3.5640869140625, -3.35015869140625, -3.13623046875, -2.92230224609375, -2.7083740234375, -2.49444580078125, -2.280517578125, -2.06658935546875, -1.8526611328125, -1.63873291015625, -1.4248046875, -1.21087646484375, -0.9969482421875, -0.78302001953125, -0.569091796875, -0.35516357421875, -0.1412353515625, 0.07269287109375, 0.28662109375, 0.50054931640625, 0.7144775390625, 0.92840576171875, 1.142333984375, 1.35626220703125, 1.5701904296875, 1.78411865234375, 1.998046875, 2.21197509765625, 2.4259033203125, 2.63983154296875, 2.853759765625, 3.06768798828125, 3.2816162109375, 3.49554443359375, 3.70947265625, 3.92340087890625, 4.1373291015625, 4.35125732421875, 4.565185546875, 4.77911376953125, 4.9930419921875, 5.20697021484375, 5.4208984375, 5.63482666015625, 5.8487548828125, 6.06268310546875, 6.276611328125, 6.49053955078125, 6.7044677734375, 6.91839599609375, 7.13232421875, 7.34625244140625, 7.5601806640625, 7.77410888671875, 7.988037109375, 8.20196533203125, 8.4158935546875, 8.62982177734375, 8.84375]}, "gradients/decoder.model.decoder.layers.10.self_attn.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 8.0, 4.0, 10.0, 11.0, 12.0, 20.0, 23.0, 36.0, 35.0, 42.0, 60.0, 66.0, 80.0, 79.0, 85.0, 65.0, 69.0, 56.0, 58.0, 40.0, 31.0, 38.0, 22.0, 18.0, 13.0, 8.0, 1.0, 4.0, 7.0, 5.0, 4.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.485107421875, -0.4672966003417969, -0.44948577880859375, -0.4316749572753906, -0.4138641357421875, -0.3960533142089844, -0.37824249267578125, -0.3604316711425781, -0.342620849609375, -0.3248100280761719, -0.30699920654296875, -0.2891883850097656, -0.2713775634765625, -0.2535667419433594, -0.23575592041015625, -0.21794509887695312, -0.20013427734375, -0.18232345581054688, -0.16451263427734375, -0.14670181274414062, -0.1288909912109375, -0.11108016967773438, -0.09326934814453125, -0.07545852661132812, -0.057647705078125, -0.039836883544921875, -0.02202606201171875, -0.004215240478515625, 0.0135955810546875, 0.031406402587890625, 0.04921722412109375, 0.06702804565429688, 0.0848388671875, 0.10264968872070312, 0.12046051025390625, 0.13827133178710938, 0.1560821533203125, 0.17389297485351562, 0.19170379638671875, 0.20951461791992188, 0.227325439453125, 0.24513626098632812, 0.26294708251953125, 0.2807579040527344, 0.2985687255859375, 0.3163795471191406, 0.33419036865234375, 0.3520011901855469, 0.36981201171875, 0.3876228332519531, 0.40543365478515625, 0.4232444763183594, 0.4410552978515625, 0.4588661193847656, 0.47667694091796875, 0.4944877624511719, 0.512298583984375, 0.5301094055175781, 0.5479202270507812, 0.5657310485839844, 0.5835418701171875, 0.6013526916503906, 0.6191635131835938, 0.6369743347167969, 0.65478515625]}, "gradients/decoder.model.decoder.layers.10.self_attn.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 3.0, 3.0, 3.0, 7.0, 3.0, 9.0, 9.0, 9.0, 15.0, 13.0, 22.0, 25.0, 16.0, 37.0, 61.0, 78.0, 85.0, 140.0, 496.0, 32061.0, 1011801.0, 2887.0, 303.0, 119.0, 77.0, 56.0, 52.0, 33.0, 31.0, 31.0, 16.0, 11.0, 13.0, 8.0, 4.0, 9.0, 3.0, 5.0, 6.0, 5.0, 2.0, 4.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-11.8125, -11.45751953125, -11.1025390625, -10.74755859375, -10.392578125, -10.03759765625, -9.6826171875, -9.32763671875, -8.97265625, -8.61767578125, -8.2626953125, -7.90771484375, -7.552734375, -7.19775390625, -6.8427734375, -6.48779296875, -6.1328125, -5.77783203125, -5.4228515625, -5.06787109375, -4.712890625, -4.35791015625, -4.0029296875, -3.64794921875, -3.29296875, -2.93798828125, -2.5830078125, -2.22802734375, -1.873046875, -1.51806640625, -1.1630859375, -0.80810546875, -0.453125, -0.09814453125, 0.2568359375, 0.61181640625, 0.966796875, 1.32177734375, 1.6767578125, 2.03173828125, 2.38671875, 2.74169921875, 3.0966796875, 3.45166015625, 3.806640625, 4.16162109375, 4.5166015625, 4.87158203125, 5.2265625, 5.58154296875, 5.9365234375, 6.29150390625, 6.646484375, 7.00146484375, 7.3564453125, 7.71142578125, 8.06640625, 8.42138671875, 8.7763671875, 9.13134765625, 9.486328125, 9.84130859375, 10.1962890625, 10.55126953125, 10.90625]}, "gradients/decoder.model.decoder.layers.10.self_attn.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 3.0, 2.0, 5.0, 8.0, 8.0, 9.0, 12.0, 11.0, 13.0, 18.0, 17.0, 29.0, 37.0, 53.0, 42.0, 56.0, 64.0, 71.0, 93.0, 63.0, 75.0, 55.0, 54.0, 42.0, 44.0, 26.0, 21.0, 21.0, 10.0, 11.0, 6.0, 8.0, 1.0, 6.0, 3.0, 9.0, 3.0, 3.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-1.642578125, -1.593170166015625, -1.54376220703125, -1.494354248046875, -1.4449462890625, -1.395538330078125, -1.34613037109375, -1.296722412109375, -1.247314453125, -1.197906494140625, -1.14849853515625, -1.099090576171875, -1.0496826171875, -1.000274658203125, -0.95086669921875, -0.901458740234375, -0.85205078125, -0.802642822265625, -0.75323486328125, -0.703826904296875, -0.6544189453125, -0.605010986328125, -0.55560302734375, -0.506195068359375, -0.456787109375, -0.407379150390625, -0.35797119140625, -0.308563232421875, -0.2591552734375, -0.209747314453125, -0.16033935546875, -0.110931396484375, -0.0615234375, -0.012115478515625, 0.03729248046875, 0.086700439453125, 0.1361083984375, 0.185516357421875, 0.23492431640625, 0.284332275390625, 0.333740234375, 0.383148193359375, 0.43255615234375, 0.481964111328125, 0.5313720703125, 0.580780029296875, 0.63018798828125, 0.679595947265625, 0.72900390625, 0.778411865234375, 0.82781982421875, 0.877227783203125, 0.9266357421875, 0.976043701171875, 1.02545166015625, 1.074859619140625, 1.124267578125, 1.173675537109375, 1.22308349609375, 1.272491455078125, 1.3218994140625, 1.371307373046875, 1.42071533203125, 1.470123291015625, 1.51953125]}, "gradients/decoder.model.decoder.layers.10.self_attn.k_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 3.0, 0.0, 1.0, 2.0, 4.0, 1.0, 2.0, 3.0, 4.0, 3.0, 7.0, 3.0, 11.0, 16.0, 65.0, 225.0, 1224.0, 19112.0, 1017098.0, 9672.0, 874.0, 145.0, 34.0, 10.0, 13.0, 4.0, 4.0, 5.0, 3.0, 3.0, 1.0, 1.0, 3.0, 1.0, 2.0, 1.0, 2.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.025390625, -1.960906982421875, -1.89642333984375, -1.831939697265625, -1.7674560546875, -1.702972412109375, -1.63848876953125, -1.574005126953125, -1.509521484375, -1.445037841796875, -1.38055419921875, -1.316070556640625, -1.2515869140625, -1.187103271484375, -1.12261962890625, -1.058135986328125, -0.99365234375, -0.929168701171875, -0.86468505859375, -0.800201416015625, -0.7357177734375, -0.671234130859375, -0.60675048828125, -0.542266845703125, -0.477783203125, -0.413299560546875, -0.34881591796875, -0.284332275390625, -0.2198486328125, -0.155364990234375, -0.09088134765625, -0.026397705078125, 0.0380859375, 0.102569580078125, 0.16705322265625, 0.231536865234375, 0.2960205078125, 0.360504150390625, 0.42498779296875, 0.489471435546875, 0.553955078125, 0.618438720703125, 0.68292236328125, 0.747406005859375, 0.8118896484375, 0.876373291015625, 0.94085693359375, 1.005340576171875, 1.06982421875, 1.134307861328125, 1.19879150390625, 1.263275146484375, 1.3277587890625, 1.392242431640625, 1.45672607421875, 1.521209716796875, 1.585693359375, 1.650177001953125, 1.71466064453125, 1.779144287109375, 1.8436279296875, 1.908111572265625, 1.97259521484375, 2.037078857421875, 2.1015625]}, "gradients/decoder.model.decoder.layers.10.self_attn.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 7.0, 3.0, 2.0, 6.0, 10.0, 6.0, 7.0, 14.0, 13.0, 28.0, 26.0, 48.0, 58.0, 501.0, 92.0, 47.0, 40.0, 21.0, 20.0, 16.0, 13.0, 9.0, 8.0, 3.0, 3.0, 1.0, 3.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 0.0, 2.0, 0.0, 0.0, 1.0, 1.0, 1.0], "bins": [-0.0001316070556640625, -0.00012812111526727676, -0.00012463517487049103, -0.00012114923447370529, -0.00011766329407691956, -0.00011417735368013382, -0.00011069141328334808, -0.00010720547288656235, -0.00010371953248977661, -0.00010023359209299088, -9.674765169620514e-05, -9.32617112994194e-05, -8.977577090263367e-05, -8.628983050584793e-05, -8.28038901090622e-05, -7.931794971227646e-05, -7.583200931549072e-05, -7.234606891870499e-05, -6.886012852191925e-05, -6.537418812513351e-05, -6.188824772834778e-05, -5.840230733156204e-05, -5.4916366934776306e-05, -5.143042653799057e-05, -4.7944486141204834e-05, -4.44585457444191e-05, -4.097260534763336e-05, -3.7486664950847626e-05, -3.400072455406189e-05, -3.0514784157276154e-05, -2.7028843760490417e-05, -2.354290336370468e-05, -2.0056962966918945e-05, -1.657102257013321e-05, -1.3085082173347473e-05, -9.599141776561737e-06, -6.113201379776001e-06, -2.627260982990265e-06, 8.586794137954712e-07, 4.344619810581207e-06, 7.830560207366943e-06, 1.131650060415268e-05, 1.4802441000938416e-05, 1.828838139772415e-05, 2.1774321794509888e-05, 2.5260262191295624e-05, 2.874620258808136e-05, 3.2232142984867096e-05, 3.571808338165283e-05, 3.920402377843857e-05, 4.2689964175224304e-05, 4.617590457201004e-05, 4.9661844968795776e-05, 5.314778536558151e-05, 5.663372576236725e-05, 6.0119666159152985e-05, 6.360560655593872e-05, 6.709154695272446e-05, 7.057748734951019e-05, 7.406342774629593e-05, 7.754936814308167e-05, 8.10353085398674e-05, 8.452124893665314e-05, 8.800718933343887e-05, 9.149312973022461e-05]}, "gradients/decoder.model.decoder.layers.10.self_attn.q_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 3.0, 3.0, 2.0, 4.0, 4.0, 3.0, 2.0, 3.0, 7.0, 17.0, 20.0, 47.0, 222.0, 1751.0, 1009801.0, 35527.0, 947.0, 112.0, 25.0, 13.0, 8.0, 10.0, 4.0, 5.0, 8.0, 7.0, 1.0, 2.0, 0.0, 2.0, 0.0, 3.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.345703125, -2.26971435546875, -2.1937255859375, -2.11773681640625, -2.041748046875, -1.96575927734375, -1.8897705078125, -1.81378173828125, -1.73779296875, -1.66180419921875, -1.5858154296875, -1.50982666015625, -1.433837890625, -1.35784912109375, -1.2818603515625, -1.20587158203125, -1.1298828125, -1.05389404296875, -0.9779052734375, -0.90191650390625, -0.825927734375, -0.74993896484375, -0.6739501953125, -0.59796142578125, -0.52197265625, -0.44598388671875, -0.3699951171875, -0.29400634765625, -0.218017578125, -0.14202880859375, -0.0660400390625, 0.00994873046875, 0.0859375, 0.16192626953125, 0.2379150390625, 0.31390380859375, 0.389892578125, 0.46588134765625, 0.5418701171875, 0.61785888671875, 0.69384765625, 0.76983642578125, 0.8458251953125, 0.92181396484375, 0.997802734375, 1.07379150390625, 1.1497802734375, 1.22576904296875, 1.3017578125, 1.37774658203125, 1.4537353515625, 1.52972412109375, 1.605712890625, 1.68170166015625, 1.7576904296875, 1.83367919921875, 1.90966796875, 1.98565673828125, 2.0616455078125, 2.13763427734375, 2.213623046875, 2.28961181640625, 2.3656005859375, 2.44158935546875, 2.517578125]}, "gradients/decoder.model.decoder.layers.10.self_attn.q_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 3.0, 4.0, 4.0, 3.0, 0.0, 3.0, 5.0, 9.0, 12.0, 15.0, 22.0, 40.0, 611.0, 185.0, 27.0, 14.0, 11.0, 6.0, 6.0, 2.0, 6.0, 5.0, 2.0, 6.0, 2.0, 3.0, 0.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.393798828125, -0.38105010986328125, -0.3683013916015625, -0.35555267333984375, -0.342803955078125, -0.33005523681640625, -0.3173065185546875, -0.30455780029296875, -0.29180908203125, -0.27906036376953125, -0.2663116455078125, -0.25356292724609375, -0.240814208984375, -0.22806549072265625, -0.2153167724609375, -0.20256805419921875, -0.1898193359375, -0.17707061767578125, -0.1643218994140625, -0.15157318115234375, -0.138824462890625, -0.12607574462890625, -0.1133270263671875, -0.10057830810546875, -0.08782958984375, -0.07508087158203125, -0.0623321533203125, -0.04958343505859375, -0.036834716796875, -0.02408599853515625, -0.0113372802734375, 0.00141143798828125, 0.01416015625, 0.02690887451171875, 0.0396575927734375, 0.05240631103515625, 0.065155029296875, 0.07790374755859375, 0.0906524658203125, 0.10340118408203125, 0.11614990234375, 0.12889862060546875, 0.1416473388671875, 0.15439605712890625, 0.167144775390625, 0.17989349365234375, 0.1926422119140625, 0.20539093017578125, 0.2181396484375, 0.23088836669921875, 0.2436370849609375, 0.25638580322265625, 0.269134521484375, 0.28188323974609375, 0.2946319580078125, 0.30738067626953125, 0.32012939453125, 0.33287811279296875, 0.3456268310546875, 0.35837554931640625, 0.371124267578125, 0.38387298583984375, 0.3966217041015625, 0.40937042236328125, 0.422119140625]}, "gradients/decoder.model.decoder.layers.9.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0, 1.0, 8.0, 10.0, 10.0, 15.0, 36.0, 73.0, 104.0, 191.0, 219.0, 130.0, 89.0, 49.0, 22.0, 19.0, 16.0, 3.0, 6.0, 6.0, 6.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.6972291469573975, -2.5582737922668457, -2.419318437576294, -2.280363082885742, -2.1414077281951904, -2.0024523735046387, -1.863497018814087, -1.7245416641235352, -1.5855863094329834, -1.4466309547424316, -1.3076756000518799, -1.1687202453613281, -1.0297648906707764, -0.8908095359802246, -0.7518541812896729, -0.6128988265991211, -0.47394347190856934, -0.3349881172180176, -0.19603276252746582, -0.05707740783691406, 0.0818779468536377, 0.22083330154418945, 0.3597886562347412, 0.49874401092529297, 0.6376993656158447, 0.7766547203063965, 0.9156100749969482, 1.0545654296875, 1.1935207843780518, 1.3324761390686035, 1.4714314937591553, 1.610386848449707, 1.749342441558838, 1.8882977962493896, 2.0272531509399414, 2.166208505630493, 2.305163860321045, 2.4441192150115967, 2.5830745697021484, 2.7220299243927, 2.860985279083252, 2.9999406337738037, 3.1388959884643555, 3.2778513431549072, 3.416806697845459, 3.5557620525360107, 3.6947174072265625, 3.8336727619171143, 3.972628116607666, 4.111583709716797, 4.2505388259887695, 4.389493942260742, 4.528449535369873, 4.667405128479004, 4.806360244750977, 4.945315361022949, 5.08427095413208, 5.223226547241211, 5.362181663513184, 5.501136779785156, 5.640092372894287, 5.779047966003418, 5.918003082275391, 6.056958198547363, 6.195913791656494]}, "gradients/decoder.model.decoder.layers.9.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 3.0, 1.0, 2.0, 2.0, 4.0, 10.0, 16.0, 16.0, 28.0, 16.0, 30.0, 24.0, 43.0, 31.0, 56.0, 43.0, 50.0, 51.0, 57.0, 65.0, 47.0, 53.0, 64.0, 46.0, 43.0, 36.0, 33.0, 30.0, 26.0, 27.0, 13.0, 11.0, 13.0, 6.0, 13.0, 3.0, 7.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.1941077709198, -2.081092119216919, -1.968076467514038, -1.8550608158111572, -1.7420451641082764, -1.6290295124053955, -1.5160139799118042, -1.4029983282089233, -1.2899826765060425, -1.1769670248031616, -1.0639513731002808, -0.9509357810020447, -0.8379201292991638, -0.724904477596283, -0.6118888854980469, -0.498873233795166, -0.38585758209228516, -0.2728419303894043, -0.15982630848884583, -0.046810686588287354, 0.0662049651145935, 0.17922061681747437, 0.29223620891571045, 0.4052518606185913, 0.5182675123214722, 0.631283164024353, 0.7442988157272339, 0.85731440782547, 0.9703300595283508, 1.083345651626587, 1.1963613033294678, 1.3093769550323486, 1.4223923683166504, 1.5354080200195312, 1.648423671722412, 1.761439323425293, 1.8744549751281738, 1.9874706268310547, 2.1004862785339355, 2.2135019302368164, 2.3265175819396973, 2.439533233642578, 2.552548885345459, 2.66556453704834, 2.7785801887512207, 2.8915958404541016, 3.0046114921569824, 3.1176271438598633, 3.230642557144165, 3.343658208847046, 3.4566738605499268, 3.5696895122528076, 3.6827051639556885, 3.7957208156585693, 3.908736228942871, 4.021751880645752, 4.134767532348633, 4.247783184051514, 4.3607988357543945, 4.473814487457275, 4.586830139160156, 4.699845790863037, 4.812861442565918, 4.925877094268799, 5.03889274597168]}, "gradients/decoder.model.decoder.layers.9.fc2.weight": {"_type": "histogram", "values": [2.0, 0.0, 3.0, 1.0, 1.0, 4.0, 5.0, 5.0, 14.0, 16.0, 29.0, 43.0, 45.0, 70.0, 105.0, 137.0, 197.0, 265.0, 396.0, 567.0, 769.0, 1162.0, 1665.0, 2650.0, 4963.0, 4142601.0, 26718.0, 4222.0, 2394.0, 1536.0, 978.0, 721.0, 479.0, 405.0, 301.0, 222.0, 170.0, 117.0, 99.0, 66.0, 52.0, 31.0, 18.0, 21.0, 8.0, 8.0, 4.0, 5.0, 2.0, 0.0, 5.0, 0.0, 3.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-6.578125, -6.3245849609375, -6.071044921875, -5.8175048828125, -5.56396484375, -5.3104248046875, -5.056884765625, -4.8033447265625, -4.5498046875, -4.2962646484375, -4.042724609375, -3.7891845703125, -3.53564453125, -3.2821044921875, -3.028564453125, -2.7750244140625, -2.521484375, -2.2679443359375, -2.014404296875, -1.7608642578125, -1.50732421875, -1.2537841796875, -1.000244140625, -0.7467041015625, -0.4931640625, -0.2396240234375, 0.013916015625, 0.2674560546875, 0.52099609375, 0.7745361328125, 1.028076171875, 1.2816162109375, 1.53515625, 1.7886962890625, 2.042236328125, 2.2957763671875, 2.54931640625, 2.8028564453125, 3.056396484375, 3.3099365234375, 3.5634765625, 3.8170166015625, 4.070556640625, 4.3240966796875, 4.57763671875, 4.8311767578125, 5.084716796875, 5.3382568359375, 5.591796875, 5.8453369140625, 6.098876953125, 6.3524169921875, 6.60595703125, 6.8594970703125, 7.113037109375, 7.3665771484375, 7.6201171875, 7.8736572265625, 8.127197265625, 8.3807373046875, 8.63427734375, 8.8878173828125, 9.141357421875, 9.3948974609375, 9.6484375]}, "gradients/decoder.model.decoder.layers.9.fc2.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 2.0, 1.0, 2.0, 4.0, 6.0, 6.0, 5.0, 14.0, 13.0, 9.0, 12.0, 22.0, 20.0, 28.0, 32.0, 22.0, 40.0, 42.0, 35.0, 40.0, 53.0, 53.0, 49.0, 46.0, 40.0, 42.0, 54.0, 39.0, 35.0, 38.0, 25.0, 17.0, 25.0, 16.0, 22.0, 17.0, 17.0, 12.0, 16.0, 10.0, 8.0, 10.0, 6.0, 4.0, 3.0, 2.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.286376953125, -0.2755241394042969, -0.26467132568359375, -0.2538185119628906, -0.2429656982421875, -0.23211288452148438, -0.22126007080078125, -0.21040725708007812, -0.199554443359375, -0.18870162963867188, -0.17784881591796875, -0.16699600219726562, -0.1561431884765625, -0.14529037475585938, -0.13443756103515625, -0.12358474731445312, -0.11273193359375, -0.10187911987304688, -0.09102630615234375, -0.08017349243164062, -0.0693206787109375, -0.058467864990234375, -0.04761505126953125, -0.036762237548828125, -0.025909423828125, -0.015056610107421875, -0.00420379638671875, 0.006649017333984375, 0.0175018310546875, 0.028354644775390625, 0.03920745849609375, 0.050060272216796875, 0.0609130859375, 0.07176589965820312, 0.08261871337890625, 0.09347152709960938, 0.1043243408203125, 0.11517715454101562, 0.12602996826171875, 0.13688278198242188, 0.147735595703125, 0.15858840942382812, 0.16944122314453125, 0.18029403686523438, 0.1911468505859375, 0.20199966430664062, 0.21285247802734375, 0.22370529174804688, 0.23455810546875, 0.24541091918945312, 0.25626373291015625, 0.2671165466308594, 0.2779693603515625, 0.2888221740722656, 0.29967498779296875, 0.3105278015136719, 0.321380615234375, 0.3322334289550781, 0.34308624267578125, 0.3539390563964844, 0.3647918701171875, 0.3756446838378906, 0.38649749755859375, 0.3973503112792969, 0.408203125]}, "gradients/decoder.model.decoder.layers.9.fc1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 2.0, 2.0, 1.0, 1.0, 3.0, 0.0, 6.0, 43.0, 1387.0, 4192434.0, 367.0, 28.0, 6.0, 1.0, 2.0, 1.0, 1.0, 3.0, 1.0, 1.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-85.1875, -82.96630859375, -80.7451171875, -78.52392578125, -76.302734375, -74.08154296875, -71.8603515625, -69.63916015625, -67.41796875, -65.19677734375, -62.9755859375, -60.75439453125, -58.533203125, -56.31201171875, -54.0908203125, -51.86962890625, -49.6484375, -47.42724609375, -45.2060546875, -42.98486328125, -40.763671875, -38.54248046875, -36.3212890625, -34.10009765625, -31.87890625, -29.65771484375, -27.4365234375, -25.21533203125, -22.994140625, -20.77294921875, -18.5517578125, -16.33056640625, -14.109375, -11.88818359375, -9.6669921875, -7.44580078125, -5.224609375, -3.00341796875, -0.7822265625, 1.43896484375, 3.66015625, 5.88134765625, 8.1025390625, 10.32373046875, 12.544921875, 14.76611328125, 16.9873046875, 19.20849609375, 21.4296875, 23.65087890625, 25.8720703125, 28.09326171875, 30.314453125, 32.53564453125, 34.7568359375, 36.97802734375, 39.19921875, 41.42041015625, 43.6416015625, 45.86279296875, 48.083984375, 50.30517578125, 52.5263671875, 54.74755859375, 56.96875]}, "gradients/decoder.model.decoder.layers.9.fc1.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 4.0, 2.0, 1.0, 2.0, 2.0, 3.0, 2.0, 12.0, 644.0, 3388.0, 6.0, 5.0, 1.0, 0.0, 3.0, 3.0, 2.0, 1.0, 1.0, 0.0, 2.0, 1.0, 1.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.556640625, -2.4911041259765625, -2.425567626953125, -2.3600311279296875, -2.29449462890625, -2.2289581298828125, -2.163421630859375, -2.0978851318359375, -2.0323486328125, -1.9668121337890625, -1.901275634765625, -1.8357391357421875, -1.77020263671875, -1.7046661376953125, -1.639129638671875, -1.5735931396484375, -1.508056640625, -1.4425201416015625, -1.376983642578125, -1.3114471435546875, -1.24591064453125, -1.1803741455078125, -1.114837646484375, -1.0493011474609375, -0.9837646484375, -0.9182281494140625, -0.852691650390625, -0.7871551513671875, -0.72161865234375, -0.6560821533203125, -0.590545654296875, -0.5250091552734375, -0.45947265625, -0.3939361572265625, -0.328399658203125, -0.2628631591796875, -0.19732666015625, -0.1317901611328125, -0.066253662109375, -0.0007171630859375, 0.0648193359375, 0.1303558349609375, 0.195892333984375, 0.2614288330078125, 0.32696533203125, 0.3925018310546875, 0.458038330078125, 0.5235748291015625, 0.589111328125, 0.6546478271484375, 0.720184326171875, 0.7857208251953125, 0.85125732421875, 0.9167938232421875, 0.982330322265625, 1.0478668212890625, 1.1134033203125, 1.1789398193359375, 1.244476318359375, 1.3100128173828125, 1.37554931640625, 1.4410858154296875, 1.506622314453125, 1.5721588134765625, 1.6376953125]}, "gradients/decoder.model.decoder.layers.9.encoder_attn_layer_norm.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 3.0, 1.0, 4.0, 2.0, 1.0, 4.0, 6.0, 5.0, 6.0, 12.0, 11.0, 11.0, 24.0, 26.0, 34.0, 54.0, 94.0, 143.0, 198.0, 117.0, 77.0, 42.0, 36.0, 23.0, 22.0, 9.0, 10.0, 9.0, 7.0, 7.0, 4.0, 3.0, 3.0, 3.0, 1.0, 1.0, 2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.9733994007110596, -1.9104948043823242, -1.8475902080535889, -1.784685730934143, -1.7217811346054077, -1.6588765382766724, -1.5959720611572266, -1.5330674648284912, -1.4701628684997559, -1.4072582721710205, -1.3443536758422852, -1.2814491987228394, -1.218544602394104, -1.1556400060653687, -1.0927355289459229, -1.0298309326171875, -0.9669263362884521, -0.9040217399597168, -0.8411172032356262, -0.7782126665115356, -0.7153080701828003, -0.6524034738540649, -0.5894989371299744, -0.5265944004058838, -0.46368980407714844, -0.4007852375507355, -0.3378806710243225, -0.27497610449790955, -0.21207153797149658, -0.14916697144508362, -0.08626240491867065, -0.02335783839225769, 0.039546966552734375, 0.10245153307914734, 0.1653560996055603, 0.22826066613197327, 0.29116523265838623, 0.3540697991847992, 0.41697436571121216, 0.4798789322376251, 0.5427834987640381, 0.6056880950927734, 0.668592631816864, 0.7314971685409546, 0.7944017648696899, 0.8573063611984253, 0.9202108979225159, 0.9831154346466064, 1.0460200309753418, 1.1089246273040771, 1.1718292236328125, 1.2347337007522583, 1.2976382970809937, 1.360542893409729, 1.4234473705291748, 1.4863519668579102, 1.5492565631866455, 1.6121611595153809, 1.6750657558441162, 1.737970232963562, 1.8008748292922974, 1.8637794256210327, 1.9266839027404785, 1.9895884990692139, 2.052493095397949]}, "gradients/decoder.model.decoder.layers.9.encoder_attn_layer_norm.bias": {"_type": "histogram", "values": [1.0, 1.0, 3.0, 3.0, 1.0, 2.0, 3.0, 2.0, 8.0, 7.0, 6.0, 10.0, 10.0, 9.0, 10.0, 10.0, 17.0, 12.0, 21.0, 28.0, 25.0, 32.0, 25.0, 32.0, 36.0, 28.0, 38.0, 27.0, 29.0, 38.0, 45.0, 40.0, 46.0, 43.0, 49.0, 32.0, 26.0, 28.0, 27.0, 25.0, 17.0, 20.0, 19.0, 23.0, 16.0, 12.0, 17.0, 14.0, 10.0, 5.0, 9.0, 3.0, 8.0, 1.0, 2.0, 1.0, 4.0, 2.0, 2.0, 2.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.912817120552063, -0.8831164836883545, -0.8534157872200012, -0.8237151503562927, -0.7940144538879395, -0.764313817024231, -0.7346131801605225, -0.7049124836921692, -0.6752117872238159, -0.6455111503601074, -0.6158104538917542, -0.5861098170280457, -0.5564091205596924, -0.5267084836959839, -0.497007817029953, -0.4673071503639221, -0.4376065135002136, -0.40790584683418274, -0.37820518016815186, -0.34850454330444336, -0.3188038468360901, -0.2891032099723816, -0.2594025433063507, -0.22970187664031982, -0.20000120997428894, -0.17030054330825806, -0.14059987664222717, -0.11089922487735748, -0.0811985582113266, -0.051497891545295715, -0.021797239780426025, 0.007903426885604858, 0.03760415315628052, 0.0673048198223114, 0.09700547903776169, 0.12670613825321198, 0.15640680491924286, 0.18610747158527374, 0.21580812335014343, 0.24550879001617432, 0.2752094566822052, 0.3049101233482361, 0.33461079001426697, 0.36431145668029785, 0.39401209354400635, 0.4237127900123596, 0.4534134268760681, 0.483114093542099, 0.5128147602081299, 0.5425153970718384, 0.5722160935401917, 0.6019167304039001, 0.6316174268722534, 0.6613180637359619, 0.6910187005996704, 0.7207193970680237, 0.750420093536377, 0.7801207304000854, 0.8098214268684387, 0.8395220637321472, 0.8692227602005005, 0.898923397064209, 0.9286240339279175, 0.9583247303962708, 0.9880253672599792]}, "gradients/decoder.model.decoder.layers.9.encoder_attn.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 4.0, 3.0, 3.0, 3.0, 3.0, 9.0, 7.0, 7.0, 23.0, 24.0, 32.0, 52.0, 62.0, 82.0, 144.0, 189.0, 236.0, 382.0, 559.0, 785.0, 1217.0, 1978.0, 3400.0, 7822.0, 26963.0, 201291.0, 734384.0, 46300.0, 11445.0, 4468.0, 2261.0, 1410.0, 949.0, 627.0, 437.0, 291.0, 230.0, 123.0, 116.0, 65.0, 39.0, 42.0, 33.0, 21.0, 13.0, 14.0, 8.0, 7.0, 2.0, 1.0, 2.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0, 1.0], "bins": [-0.212890625, -0.20627403259277344, -0.19965744018554688, -0.1930408477783203, -0.18642425537109375, -0.1798076629638672, -0.17319107055664062, -0.16657447814941406, -0.1599578857421875, -0.15334129333496094, -0.14672470092773438, -0.1401081085205078, -0.13349151611328125, -0.1268749237060547, -0.12025833129882812, -0.11364173889160156, -0.107025146484375, -0.10040855407714844, -0.09379196166992188, -0.08717536926269531, -0.08055877685546875, -0.07394218444824219, -0.06732559204101562, -0.06070899963378906, -0.0540924072265625, -0.04747581481933594, -0.040859222412109375, -0.03424263000488281, -0.02762603759765625, -0.021009445190429688, -0.014392852783203125, -0.0077762603759765625, -0.00115966796875, 0.0054569244384765625, 0.012073516845703125, 0.018690109252929688, 0.02530670166015625, 0.03192329406738281, 0.038539886474609375, 0.04515647888183594, 0.0517730712890625, 0.05838966369628906, 0.06500625610351562, 0.07162284851074219, 0.07823944091796875, 0.08485603332519531, 0.09147262573242188, 0.09808921813964844, 0.104705810546875, 0.11132240295410156, 0.11793899536132812, 0.12455558776855469, 0.13117218017578125, 0.1377887725830078, 0.14440536499023438, 0.15102195739746094, 0.1576385498046875, 0.16425514221191406, 0.17087173461914062, 0.1774883270263672, 0.18410491943359375, 0.1907215118408203, 0.19733810424804688, 0.20395469665527344, 0.2105712890625]}, "gradients/decoder.model.decoder.layers.9.encoder_attn.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 5.0, 6.0, 8.0, 6.0, 9.0, 8.0, 8.0, 15.0, 8.0, 22.0, 16.0, 17.0, 25.0, 32.0, 33.0, 38.0, 44.0, 38.0, 46.0, 52.0, 44.0, 53.0, 56.0, 50.0, 59.0, 60.0, 28.0, 33.0, 25.0, 25.0, 18.0, 15.0, 12.0, 17.0, 16.0, 15.0, 12.0, 7.0, 10.0, 6.0, 5.0, 2.0, 4.0, 0.0, 1.0, 1.0, 2.0, 2.0, 3.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.59228515625, -0.574005126953125, -0.55572509765625, -0.537445068359375, -0.5191650390625, -0.500885009765625, -0.48260498046875, -0.464324951171875, -0.446044921875, -0.427764892578125, -0.40948486328125, -0.391204833984375, -0.3729248046875, -0.354644775390625, -0.33636474609375, -0.318084716796875, -0.2998046875, -0.281524658203125, -0.26324462890625, -0.244964599609375, -0.2266845703125, -0.208404541015625, -0.19012451171875, -0.171844482421875, -0.153564453125, -0.135284423828125, -0.11700439453125, -0.098724365234375, -0.0804443359375, -0.062164306640625, -0.04388427734375, -0.025604248046875, -0.00732421875, 0.010955810546875, 0.02923583984375, 0.047515869140625, 0.0657958984375, 0.084075927734375, 0.10235595703125, 0.120635986328125, 0.138916015625, 0.157196044921875, 0.17547607421875, 0.193756103515625, 0.2120361328125, 0.230316162109375, 0.24859619140625, 0.266876220703125, 0.28515625, 0.303436279296875, 0.32171630859375, 0.339996337890625, 0.3582763671875, 0.376556396484375, 0.39483642578125, 0.413116455078125, 0.431396484375, 0.449676513671875, 0.46795654296875, 0.486236572265625, 0.5045166015625, 0.522796630859375, 0.54107666015625, 0.559356689453125, 0.57763671875]}, "gradients/decoder.model.decoder.layers.9.encoder_attn.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 1.0, 1.0, 2.0, 0.0, 1.0, 1.0, 3.0, 5.0, 6.0, 9.0, 11.0, 14.0, 12.0, 10.0, 20.0, 28.0, 39.0, 46.0, 51.0, 74.0, 82.0, 99.0, 166.0, 218.0, 459.0, 1258.0, 5422.0, 43573.0, 849451.0, 133089.0, 10625.0, 2058.0, 654.0, 323.0, 185.0, 133.0, 83.0, 66.0, 58.0, 48.0, 36.0, 32.0, 19.0, 18.0, 16.0, 11.0, 13.0, 12.0, 8.0, 6.0, 3.0, 3.0, 2.0, 1.0, 3.0, 1.0, 1.0, 2.0, 0.0, 1.0, 1.0], "bins": [-0.0386962890625, -0.0374751091003418, -0.036253929138183594, -0.03503274917602539, -0.03381156921386719, -0.032590389251708984, -0.03136920928955078, -0.030148029327392578, -0.028926849365234375, -0.027705669403076172, -0.02648448944091797, -0.025263309478759766, -0.024042129516601562, -0.02282094955444336, -0.021599769592285156, -0.020378589630126953, -0.01915740966796875, -0.017936229705810547, -0.016715049743652344, -0.01549386978149414, -0.014272689819335938, -0.013051509857177734, -0.011830329895019531, -0.010609149932861328, -0.009387969970703125, -0.008166790008544922, -0.006945610046386719, -0.005724430084228516, -0.0045032501220703125, -0.0032820701599121094, -0.0020608901977539062, -0.0008397102355957031, 0.0003814697265625, 0.0016026496887207031, 0.0028238296508789062, 0.004045009613037109, 0.0052661895751953125, 0.006487369537353516, 0.007708549499511719, 0.008929729461669922, 0.010150909423828125, 0.011372089385986328, 0.012593269348144531, 0.013814449310302734, 0.015035629272460938, 0.01625680923461914, 0.017477989196777344, 0.018699169158935547, 0.01992034912109375, 0.021141529083251953, 0.022362709045410156, 0.02358388900756836, 0.024805068969726562, 0.026026248931884766, 0.02724742889404297, 0.028468608856201172, 0.029689788818359375, 0.030910968780517578, 0.03213214874267578, 0.033353328704833984, 0.03457450866699219, 0.03579568862915039, 0.037016868591308594, 0.0382380485534668, 0.039459228515625]}, "gradients/decoder.model.decoder.layers.9.encoder_attn.v_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 2.0, 2.0, 0.0, 2.0, 0.0, 3.0, 3.0, 2.0, 4.0, 8.0, 8.0, 11.0, 6.0, 4.0, 15.0, 17.0, 15.0, 20.0, 31.0, 25.0, 34.0, 21.0, 35.0, 32.0, 50.0, 30.0, 46.0, 49.0, 50.0, 41.0, 43.0, 42.0, 35.0, 46.0, 30.0, 42.0, 27.0, 33.0, 23.0, 21.0, 21.0, 14.0, 4.0, 16.0, 8.0, 5.0, 8.0, 8.0, 5.0, 4.0, 4.0, 2.0, 3.0, 1.0, 1.0, 3.0, 1.0, 2.0, 0.0, 1.0, 2.0], "bins": [-1.083984375, -1.0497589111328125, -1.015533447265625, -0.9813079833984375, -0.94708251953125, -0.9128570556640625, -0.878631591796875, -0.8444061279296875, -0.8101806640625, -0.7759552001953125, -0.741729736328125, -0.7075042724609375, -0.67327880859375, -0.6390533447265625, -0.604827880859375, -0.5706024169921875, -0.536376953125, -0.5021514892578125, -0.467926025390625, -0.4337005615234375, -0.39947509765625, -0.3652496337890625, -0.331024169921875, -0.2967987060546875, -0.2625732421875, -0.2283477783203125, -0.194122314453125, -0.1598968505859375, -0.12567138671875, -0.0914459228515625, -0.057220458984375, -0.0229949951171875, 0.01123046875, 0.0454559326171875, 0.079681396484375, 0.1139068603515625, 0.14813232421875, 0.1823577880859375, 0.216583251953125, 0.2508087158203125, 0.2850341796875, 0.3192596435546875, 0.353485107421875, 0.3877105712890625, 0.42193603515625, 0.4561614990234375, 0.490386962890625, 0.5246124267578125, 0.558837890625, 0.5930633544921875, 0.627288818359375, 0.6615142822265625, 0.69573974609375, 0.7299652099609375, 0.764190673828125, 0.7984161376953125, 0.8326416015625, 0.8668670654296875, 0.901092529296875, 0.9353179931640625, 0.96954345703125, 1.0037689208984375, 1.037994384765625, 1.0722198486328125, 1.1064453125]}, "gradients/decoder.model.decoder.layers.9.encoder_attn.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 1.0, 5.0, 3.0, 10.0, 6.0, 15.0, 7.0, 26.0, 31.0, 47.0, 73.0, 103.0, 134.0, 210.0, 286.0, 512.0, 769.0, 1182.0, 2172.0, 3696.0, 6943.0, 13233.0, 27298.0, 65680.0, 338542.0, 458180.0, 69553.0, 28715.0, 13632.0, 7222.0, 4093.0, 2324.0, 1444.0, 867.0, 549.0, 321.0, 201.0, 137.0, 112.0, 74.0, 45.0, 29.0, 32.0, 15.0, 15.0, 5.0, 11.0, 1.0, 5.0, 2.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.0006499290466308594, -0.0006296411156654358, -0.0006093531847000122, -0.0005890652537345886, -0.000568777322769165, -0.0005484893918037415, -0.0005282014608383179, -0.0005079135298728943, -0.0004876255989074707, -0.0004673376679420471, -0.00044704973697662354, -0.00042676180601119995, -0.00040647387504577637, -0.0003861859440803528, -0.0003658980131149292, -0.0003456100821495056, -0.00032532215118408203, -0.00030503422021865845, -0.00028474628925323486, -0.0002644583582878113, -0.0002441704273223877, -0.0002238824963569641, -0.00020359456539154053, -0.00018330663442611694, -0.00016301870346069336, -0.00014273077249526978, -0.0001224428415298462, -0.00010215491056442261, -8.186697959899902e-05, -6.157904863357544e-05, -4.1291117668151855e-05, -2.100318670272827e-05, -7.152557373046875e-07, 1.9572675228118896e-05, 3.986060619354248e-05, 6.0148537158966064e-05, 8.043646812438965e-05, 0.00010072439908981323, 0.00012101233005523682, 0.0001413002610206604, 0.00016158819198608398, 0.00018187612295150757, 0.00020216405391693115, 0.00022245198488235474, 0.00024273991584777832, 0.0002630278468132019, 0.0002833157777786255, 0.00030360370874404907, 0.00032389163970947266, 0.00034417957067489624, 0.0003644675016403198, 0.0003847554326057434, 0.000405043363571167, 0.0004253312945365906, 0.00044561922550201416, 0.00046590715646743774, 0.00048619508743286133, 0.0005064830183982849, 0.0005267709493637085, 0.0005470588803291321, 0.0005673468112945557, 0.0005876347422599792, 0.0006079226732254028, 0.0006282106041908264, 0.00064849853515625]}, "gradients/decoder.model.decoder.layers.9.encoder_attn.k_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 1.0, 0.0, 2.0, 2.0, 2.0, 4.0, 2.0, 2.0, 5.0, 8.0, 10.0, 14.0, 39.0, 93.0, 517.0, 183.0, 56.0, 35.0, 9.0, 8.0, 6.0, 7.0, 2.0, 1.0, 3.0, 1.0, 3.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00010514259338378906, -0.00010070018470287323, -9.62577760219574e-05, -9.181536734104156e-05, -8.737295866012573e-05, -8.29305499792099e-05, -7.848814129829407e-05, -7.404573261737823e-05, -6.96033239364624e-05, -6.516091525554657e-05, -6.071850657463074e-05, -5.6276097893714905e-05, -5.183368921279907e-05, -4.739128053188324e-05, -4.294887185096741e-05, -3.8506463170051575e-05, -3.406405448913574e-05, -2.962164580821991e-05, -2.5179237127304077e-05, -2.0736828446388245e-05, -1.6294419765472412e-05, -1.185201108455658e-05, -7.409602403640747e-06, -2.9671937227249146e-06, 1.475214958190918e-06, 5.9176236391067505e-06, 1.0360032320022583e-05, 1.4802441000938416e-05, 1.9244849681854248e-05, 2.368725836277008e-05, 2.8129667043685913e-05, 3.2572075724601746e-05, 3.701448440551758e-05, 4.145689308643341e-05, 4.589930176734924e-05, 5.0341710448265076e-05, 5.478411912918091e-05, 5.922652781009674e-05, 6.366893649101257e-05, 6.81113451719284e-05, 7.255375385284424e-05, 7.699616253376007e-05, 8.14385712146759e-05, 8.588097989559174e-05, 9.032338857650757e-05, 9.47657972574234e-05, 9.920820593833923e-05, 0.00010365061461925507, 0.0001080930233001709, 0.00011253543198108673, 0.00011697784066200256, 0.0001214202493429184, 0.00012586265802383423, 0.00013030506670475006, 0.0001347474753856659, 0.00013918988406658173, 0.00014363229274749756, 0.0001480747014284134, 0.00015251711010932922, 0.00015695951879024506, 0.0001614019274711609, 0.00016584433615207672, 0.00017028674483299255, 0.00017472915351390839, 0.00017917156219482422]}, "gradients/decoder.model.decoder.layers.9.encoder_attn.q_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 4.0, 3.0, 2.0, 3.0, 6.0, 9.0, 4.0, 12.0, 22.0, 20.0, 41.0, 59.0, 96.0, 128.0, 250.0, 420.0, 734.0, 1342.0, 2758.0, 5928.0, 14224.0, 44465.0, 360818.0, 539385.0, 49803.0, 15532.0, 6236.0, 2925.0, 1452.0, 792.0, 430.0, 251.0, 154.0, 93.0, 50.0, 36.0, 28.0, 17.0, 15.0, 7.0, 8.0, 3.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.0005731582641601562, -0.00055680051445961, -0.0005404427647590637, -0.0005240850150585175, -0.0005077272653579712, -0.0004913695156574249, -0.00047501176595687866, -0.0004586540162563324, -0.00044229626655578613, -0.00042593851685523987, -0.0004095807671546936, -0.00039322301745414734, -0.0003768652677536011, -0.0003605075180530548, -0.00034414976835250854, -0.0003277920186519623, -0.000311434268951416, -0.00029507651925086975, -0.0002787187695503235, -0.0002623610198497772, -0.00024600327014923096, -0.0002296455204486847, -0.00021328777074813843, -0.00019693002104759216, -0.0001805722713470459, -0.00016421452164649963, -0.00014785677194595337, -0.0001314990222454071, -0.00011514127254486084, -9.878352284431458e-05, -8.242577314376831e-05, -6.606802344322205e-05, -4.971027374267578e-05, -3.3352524042129517e-05, -1.6994774341583252e-05, -6.370246410369873e-07, 1.5720725059509277e-05, 3.207847476005554e-05, 4.843622446060181e-05, 6.479397416114807e-05, 8.115172386169434e-05, 9.75094735622406e-05, 0.00011386722326278687, 0.00013022497296333313, 0.0001465827226638794, 0.00016294047236442566, 0.00017929822206497192, 0.0001956559717655182, 0.00021201372146606445, 0.00022837147116661072, 0.000244729220867157, 0.00026108697056770325, 0.0002774447202682495, 0.0002938024699687958, 0.00031016021966934204, 0.0003265179693698883, 0.00034287571907043457, 0.00035923346877098083, 0.0003755912184715271, 0.00039194896817207336, 0.00040830671787261963, 0.0004246644675731659, 0.00044102221727371216, 0.0004573799669742584, 0.0004737377166748047]}, "gradients/decoder.model.decoder.layers.9.encoder_attn.q_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 4.0, 1.0, 0.0, 2.0, 3.0, 0.0, 5.0, 4.0, 5.0, 5.0, 2.0, 8.0, 7.0, 8.0, 9.0, 13.0, 19.0, 25.0, 27.0, 37.0, 80.0, 92.0, 157.0, 183.0, 96.0, 45.0, 37.0, 34.0, 24.0, 15.0, 7.0, 11.0, 9.0, 9.0, 12.0, 4.0, 4.0, 5.0, 1.0, 2.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.00011414289474487305, -0.00011088419705629349, -0.00010762549936771393, -0.00010436680167913437, -0.00010110810399055481, -9.784940630197525e-05, -9.459070861339569e-05, -9.133201092481613e-05, -8.807331323623657e-05, -8.481461554765701e-05, -8.155591785907745e-05, -7.82972201704979e-05, -7.503852248191833e-05, -7.177982479333878e-05, -6.852112710475922e-05, -6.526242941617966e-05, -6.20037317276001e-05, -5.874503403902054e-05, -5.548633635044098e-05, -5.222763866186142e-05, -4.896894097328186e-05, -4.57102432847023e-05, -4.245154559612274e-05, -3.919284790754318e-05, -3.593415021896362e-05, -3.2675452530384064e-05, -2.9416754841804504e-05, -2.6158057153224945e-05, -2.2899359464645386e-05, -1.9640661776065826e-05, -1.6381964087486267e-05, -1.3123266398906708e-05, -9.864568710327148e-06, -6.605871021747589e-06, -3.3471733331680298e-06, -8.847564458847046e-08, 3.170222043991089e-06, 6.428919732570648e-06, 9.687617421150208e-06, 1.2946315109729767e-05, 1.6205012798309326e-05, 1.9463710486888885e-05, 2.2722408175468445e-05, 2.5981105864048004e-05, 2.9239803552627563e-05, 3.249850124120712e-05, 3.575719892978668e-05, 3.901589661836624e-05, 4.22745943069458e-05, 4.553329199552536e-05, 4.879198968410492e-05, 5.205068737268448e-05, 5.530938506126404e-05, 5.85680827498436e-05, 6.182678043842316e-05, 6.508547812700272e-05, 6.834417581558228e-05, 7.160287350416183e-05, 7.48615711927414e-05, 7.812026888132095e-05, 8.137896656990051e-05, 8.463766425848007e-05, 8.789636194705963e-05, 9.115505963563919e-05, 9.441375732421875e-05]}, "gradients/decoder.model.decoder.layers.9.self_attn_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 4.0, 12.0, 13.0, 39.0, 63.0, 184.0, 455.0, 136.0, 52.0, 23.0, 18.0, 6.0, 6.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0], "bins": [-4.590153217315674, -4.500831604003906, -4.411509990692139, -4.322188377380371, -4.2328667640686035, -4.143545150756836, -4.054223537445068, -3.96490216255188, -3.8755805492401123, -3.7862589359283447, -3.696937322616577, -3.6076157093048096, -3.518294334411621, -3.4289727210998535, -3.339651107788086, -3.2503294944763184, -3.161007881164551, -3.071686267852783, -2.9823646545410156, -2.893043041229248, -2.8037214279174805, -2.714400053024292, -2.6250784397125244, -2.535756826400757, -2.4464352130889893, -2.3571135997772217, -2.267791986465454, -2.1784703731536865, -2.089148998260498, -1.999827265739441, -1.910505771636963, -1.8211841583251953, -1.7318626642227173, -1.6425410509109497, -1.5532195568084717, -1.463897943496704, -1.3745763301849365, -1.285254716873169, -1.1959331035614014, -1.1066116094589233, -1.0172899961471558, -0.9279683828353882, -0.8386468291282654, -0.7493252754211426, -0.660003662109375, -0.5706820487976074, -0.4813604950904846, -0.3920389413833618, -0.30271732807159424, -0.21339574456214905, -0.12407416105270386, -0.03475257754325867, 0.05456900596618652, 0.1438905894756317, 0.2332121729850769, 0.3225337266921997, 0.4118553400039673, 0.5011769533157349, 0.5904985070228577, 0.6798200607299805, 0.769141674041748, 0.8584632873535156, 0.9477848410606384, 1.0371063947677612, 1.1264280080795288]}, "gradients/decoder.model.decoder.layers.9.self_attn_layer_norm.bias": {"_type": "histogram", "values": [1.0, 2.0, 1.0, 0.0, 1.0, 1.0, 0.0, 3.0, 6.0, 3.0, 6.0, 9.0, 5.0, 5.0, 7.0, 15.0, 12.0, 11.0, 14.0, 18.0, 18.0, 29.0, 30.0, 23.0, 35.0, 30.0, 39.0, 32.0, 33.0, 33.0, 35.0, 43.0, 40.0, 54.0, 38.0, 53.0, 40.0, 30.0, 32.0, 23.0, 22.0, 22.0, 24.0, 15.0, 17.0, 18.0, 14.0, 16.0, 9.0, 9.0, 9.0, 12.0, 4.0, 5.0, 1.0, 3.0, 0.0, 3.0, 4.0, 0.0, 3.0, 2.0, 0.0, 2.0], "bins": [-0.47548338770866394, -0.46064260601997375, -0.4458017945289612, -0.430961012840271, -0.4161202311515808, -0.4012794494628906, -0.38643863797187805, -0.37159785628318787, -0.3567570447921753, -0.3419162631034851, -0.32707545161247253, -0.31223466992378235, -0.29739388823509216, -0.2825530767440796, -0.2677122950553894, -0.2528715133666992, -0.23803073167800903, -0.22318993508815765, -0.20834915339946747, -0.1935083568096161, -0.1786675751209259, -0.16382677853107452, -0.14898598194122314, -0.13414520025253296, -0.11930440366268158, -0.1044636145234108, -0.08962282538414001, -0.07478202879428864, -0.05994123965501785, -0.04510045051574707, -0.03025965392589569, -0.015418864786624908, -0.0005780458450317383, 0.014262745156884193, 0.029103536158800125, 0.043944329023361206, 0.05878511816263199, 0.07362590730190277, 0.08846670389175415, 0.10330749303102493, 0.11814828217029572, 0.1329890787601471, 0.14782986044883728, 0.16267065703868866, 0.17751145362854004, 0.19235223531723022, 0.2071930319070816, 0.22203382849693298, 0.23687461018562317, 0.25171539187431335, 0.2665562033653259, 0.2813969850540161, 0.2962377667427063, 0.3110785484313965, 0.32591935992240906, 0.34076014161109924, 0.3556009531021118, 0.370441734790802, 0.3852825462818146, 0.40012332797050476, 0.41496410965919495, 0.4298049211502075, 0.4446457028388977, 0.4594864845275879, 0.4743272662162781]}, "gradients/decoder.model.decoder.layers.9.self_attn.out_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 2.0, 1.0, 2.0, 5.0, 2.0, 2.0, 3.0, 4.0, 5.0, 0.0, 3.0, 5.0, 7.0, 11.0, 22.0, 41.0, 71.0, 112.0, 224.0, 342.0, 644.0, 1161.0, 2077.0, 3779.0, 6517.0, 11659.0, 22837.0, 64082.0, 635750.0, 217636.0, 42001.0, 17705.0, 9477.0, 5403.0, 3102.0, 1700.0, 948.0, 516.0, 272.0, 171.0, 126.0, 53.0, 32.0, 17.0, 7.0, 6.0, 3.0, 9.0, 1.0, 3.0, 3.0, 1.0, 2.0, 0.0, 2.0, 0.0, 2.0, 1.0, 1.0, 3.0], "bins": [-0.5341796875, -0.5179061889648438, -0.5016326904296875, -0.48535919189453125, -0.469085693359375, -0.45281219482421875, -0.4365386962890625, -0.42026519775390625, -0.40399169921875, -0.38771820068359375, -0.3714447021484375, -0.35517120361328125, -0.338897705078125, -0.32262420654296875, -0.3063507080078125, -0.29007720947265625, -0.2738037109375, -0.25753021240234375, -0.2412567138671875, -0.22498321533203125, -0.208709716796875, -0.19243621826171875, -0.1761627197265625, -0.15988922119140625, -0.14361572265625, -0.12734222412109375, -0.1110687255859375, -0.09479522705078125, -0.078521728515625, -0.06224822998046875, -0.0459747314453125, -0.02970123291015625, -0.013427734375, 0.00284576416015625, 0.0191192626953125, 0.03539276123046875, 0.051666259765625, 0.06793975830078125, 0.0842132568359375, 0.10048675537109375, 0.11676025390625, 0.13303375244140625, 0.1493072509765625, 0.16558074951171875, 0.181854248046875, 0.19812774658203125, 0.2144012451171875, 0.23067474365234375, 0.2469482421875, 0.26322174072265625, 0.2794952392578125, 0.29576873779296875, 0.312042236328125, 0.32831573486328125, 0.3445892333984375, 0.36086273193359375, 0.37713623046875, 0.39340972900390625, 0.4096832275390625, 0.42595672607421875, 0.442230224609375, 0.45850372314453125, 0.4747772216796875, 0.49105072021484375, 0.50732421875]}, "gradients/decoder.model.decoder.layers.9.self_attn.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 1.0, 4.0, 5.0, 5.0, 5.0, 6.0, 14.0, 8.0, 14.0, 15.0, 32.0, 19.0, 30.0, 27.0, 33.0, 47.0, 45.0, 49.0, 56.0, 40.0, 67.0, 75.0, 66.0, 64.0, 47.0, 32.0, 36.0, 35.0, 18.0, 20.0, 20.0, 14.0, 17.0, 8.0, 15.0, 7.0, 3.0, 5.0, 0.0, 2.0, 4.0, 1.0, 1.0, 1.0, 1.0, 2.0, 0.0, 2.0], "bins": [-0.036865234375, -0.03585386276245117, -0.034842491149902344, -0.033831119537353516, -0.03281974792480469, -0.03180837631225586, -0.03079700469970703, -0.029785633087158203, -0.028774261474609375, -0.027762889862060547, -0.02675151824951172, -0.02574014663696289, -0.024728775024414062, -0.023717403411865234, -0.022706031799316406, -0.021694660186767578, -0.02068328857421875, -0.019671916961669922, -0.018660545349121094, -0.017649173736572266, -0.016637802124023438, -0.01562643051147461, -0.014615058898925781, -0.013603687286376953, -0.012592315673828125, -0.011580944061279297, -0.010569572448730469, -0.00955820083618164, -0.008546829223632812, -0.007535457611083984, -0.006524085998535156, -0.005512714385986328, -0.0045013427734375, -0.003489971160888672, -0.0024785995483398438, -0.0014672279357910156, -0.0004558563232421875, 0.0005555152893066406, 0.0015668869018554688, 0.002578258514404297, 0.003589630126953125, 0.004601001739501953, 0.005612373352050781, 0.006623744964599609, 0.0076351165771484375, 0.008646488189697266, 0.009657859802246094, 0.010669231414794922, 0.01168060302734375, 0.012691974639892578, 0.013703346252441406, 0.014714717864990234, 0.015726089477539062, 0.01673746109008789, 0.01774883270263672, 0.018760204315185547, 0.019771575927734375, 0.020782947540283203, 0.02179431915283203, 0.02280569076538086, 0.023817062377929688, 0.024828433990478516, 0.025839805603027344, 0.026851177215576172, 0.027862548828125]}, "gradients/decoder.model.decoder.layers.9.self_attn.v_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 2.0, 1.0, 3.0, 6.0, 2.0, 8.0, 12.0, 16.0, 21.0, 24.0, 44.0, 48.0, 91.0, 131.0, 190.0, 274.0, 342.0, 582.0, 828.0, 1196.0, 1892.0, 3184.0, 5323.0, 8789.0, 15728.0, 39918.0, 206956.0, 647943.0, 65558.0, 21488.0, 10868.0, 6504.0, 3881.0, 2303.0, 1430.0, 927.0, 664.0, 428.0, 290.0, 216.0, 147.0, 96.0, 76.0, 37.0, 30.0, 22.0, 15.0, 12.0, 12.0, 2.0, 6.0, 3.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.33203125, -0.32138824462890625, -0.3107452392578125, -0.30010223388671875, -0.289459228515625, -0.27881622314453125, -0.2681732177734375, -0.25753021240234375, -0.24688720703125, -0.23624420166015625, -0.2256011962890625, -0.21495819091796875, -0.204315185546875, -0.19367218017578125, -0.1830291748046875, -0.17238616943359375, -0.1617431640625, -0.15110015869140625, -0.1404571533203125, -0.12981414794921875, -0.119171142578125, -0.10852813720703125, -0.0978851318359375, -0.08724212646484375, -0.07659912109375, -0.06595611572265625, -0.0553131103515625, -0.04467010498046875, -0.034027099609375, -0.02338409423828125, -0.0127410888671875, -0.00209808349609375, 0.008544921875, 0.01918792724609375, 0.0298309326171875, 0.04047393798828125, 0.051116943359375, 0.06175994873046875, 0.0724029541015625, 0.08304595947265625, 0.09368896484375, 0.10433197021484375, 0.1149749755859375, 0.12561798095703125, 0.136260986328125, 0.14690399169921875, 0.1575469970703125, 0.16819000244140625, 0.1788330078125, 0.18947601318359375, 0.2001190185546875, 0.21076202392578125, 0.221405029296875, 0.23204803466796875, 0.2426910400390625, 0.25333404541015625, 0.26397705078125, 0.27462005615234375, 0.2852630615234375, 0.29590606689453125, 0.306549072265625, 0.31719207763671875, 0.3278350830078125, 0.33847808837890625, 0.34912109375]}, "gradients/decoder.model.decoder.layers.9.self_attn.v_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 5.0, 0.0, 1.0, 0.0, 11.0, 3.0, 2.0, 4.0, 6.0, 8.0, 10.0, 3.0, 5.0, 3.0, 6.0, 6.0, 9.0, 13.0, 26.0, 30.0, 43.0, 68.0, 92.0, 125.0, 125.0, 108.0, 83.0, 49.0, 33.0, 33.0, 18.0, 12.0, 8.0, 8.0, 3.0, 3.0, 7.0, 6.0, 8.0, 8.0, 3.0, 6.0, 3.0, 3.0, 2.0, 4.0, 1.0, 4.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.165771484375, -0.16047286987304688, -0.15517425537109375, -0.14987564086914062, -0.1445770263671875, -0.13927841186523438, -0.13397979736328125, -0.12868118286132812, -0.123382568359375, -0.11808395385742188, -0.11278533935546875, -0.10748672485351562, -0.1021881103515625, -0.09688949584960938, -0.09159088134765625, -0.08629226684570312, -0.08099365234375, -0.07569503784179688, -0.07039642333984375, -0.06509780883789062, -0.0597991943359375, -0.054500579833984375, -0.04920196533203125, -0.043903350830078125, -0.038604736328125, -0.033306121826171875, -0.02800750732421875, -0.022708892822265625, -0.0174102783203125, -0.012111663818359375, -0.00681304931640625, -0.001514434814453125, 0.0037841796875, 0.009082794189453125, 0.01438140869140625, 0.019680023193359375, 0.0249786376953125, 0.030277252197265625, 0.03557586669921875, 0.040874481201171875, 0.046173095703125, 0.051471710205078125, 0.05677032470703125, 0.062068939208984375, 0.0673675537109375, 0.07266616821289062, 0.07796478271484375, 0.08326339721679688, 0.08856201171875, 0.09386062622070312, 0.09915924072265625, 0.10445785522460938, 0.1097564697265625, 0.11505508422851562, 0.12035369873046875, 0.12565231323242188, 0.130950927734375, 0.13624954223632812, 0.14154815673828125, 0.14684677124023438, 0.1521453857421875, 0.15744400024414062, 0.16274261474609375, 0.16804122924804688, 0.17333984375]}, "gradients/decoder.model.decoder.layers.9.self_attn.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 3.0, 1.0, 0.0, 1.0, 1.0, 2.0, 1.0, 6.0, 4.0, 4.0, 9.0, 9.0, 9.0, 14.0, 17.0, 31.0, 40.0, 72.0, 107.0, 155.0, 262.0, 462.0, 997.0, 2552.0, 13956.0, 1005129.0, 19448.0, 2894.0, 1053.0, 569.0, 266.0, 140.0, 130.0, 60.0, 45.0, 35.0, 19.0, 22.0, 13.0, 6.0, 7.0, 2.0, 8.0, 3.0, 0.0, 2.0, 3.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0], "bins": [-0.313720703125, -0.3043708801269531, -0.29502105712890625, -0.2856712341308594, -0.2763214111328125, -0.2669715881347656, -0.25762176513671875, -0.24827194213867188, -0.238922119140625, -0.22957229614257812, -0.22022247314453125, -0.21087265014648438, -0.2015228271484375, -0.19217300415039062, -0.18282318115234375, -0.17347335815429688, -0.16412353515625, -0.15477371215820312, -0.14542388916015625, -0.13607406616210938, -0.1267242431640625, -0.11737442016601562, -0.10802459716796875, -0.09867477416992188, -0.089324951171875, -0.07997512817382812, -0.07062530517578125, -0.061275482177734375, -0.0519256591796875, -0.042575836181640625, -0.03322601318359375, -0.023876190185546875, -0.0145263671875, -0.005176544189453125, 0.00417327880859375, 0.013523101806640625, 0.0228729248046875, 0.032222747802734375, 0.04157257080078125, 0.050922393798828125, 0.060272216796875, 0.06962203979492188, 0.07897186279296875, 0.08832168579101562, 0.0976715087890625, 0.10702133178710938, 0.11637115478515625, 0.12572097778320312, 0.13507080078125, 0.14442062377929688, 0.15377044677734375, 0.16312026977539062, 0.1724700927734375, 0.18181991577148438, 0.19116973876953125, 0.20051956176757812, 0.209869384765625, 0.21921920776367188, 0.22856903076171875, 0.23791885375976562, 0.2472686767578125, 0.2566184997558594, 0.26596832275390625, 0.2753181457519531, 0.28466796875]}, "gradients/decoder.model.decoder.layers.9.self_attn.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 2.0, 3.0, 5.0, 10.0, 18.0, 14.0, 47.0, 140.0, 611.0, 94.0, 30.0, 13.0, 14.0, 2.0, 8.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.626678466796875e-05, -5.3637661039829254e-05, -5.100853741168976e-05, -4.837941378355026e-05, -4.5750290155410767e-05, -4.312116652727127e-05, -4.0492042899131775e-05, -3.786291927099228e-05, -3.523379564285278e-05, -3.260467201471329e-05, -2.997554838657379e-05, -2.7346424758434296e-05, -2.47173011302948e-05, -2.2088177502155304e-05, -1.9459053874015808e-05, -1.6829930245876312e-05, -1.4200806617736816e-05, -1.157168298959732e-05, -8.942559361457825e-06, -6.313435733318329e-06, -3.684312105178833e-06, -1.0551884770393372e-06, 1.5739351511001587e-06, 4.2030587792396545e-06, 6.83218240737915e-06, 9.461306035518646e-06, 1.2090429663658142e-05, 1.4719553291797638e-05, 1.7348676919937134e-05, 1.997780054807663e-05, 2.2606924176216125e-05, 2.523604780435562e-05, 2.7865171432495117e-05, 3.0494295060634613e-05, 3.312341868877411e-05, 3.5752542316913605e-05, 3.83816659450531e-05, 4.1010789573192596e-05, 4.363991320133209e-05, 4.626903682947159e-05, 4.8898160457611084e-05, 5.152728408575058e-05, 5.4156407713890076e-05, 5.678553134202957e-05, 5.941465497016907e-05, 6.204377859830856e-05, 6.467290222644806e-05, 6.730202585458755e-05, 6.993114948272705e-05, 7.256027311086655e-05, 7.518939673900604e-05, 7.781852036714554e-05, 8.044764399528503e-05, 8.307676762342453e-05, 8.570589125156403e-05, 8.833501487970352e-05, 9.096413850784302e-05, 9.359326213598251e-05, 9.622238576412201e-05, 9.88515093922615e-05, 0.000101480633020401, 0.0001041097566485405, 0.00010673888027667999, 0.00010936800390481949, 0.00011199712753295898]}, "gradients/decoder.model.decoder.layers.9.self_attn.q_proj.weight": {"_type": "histogram", "values": [1.0, 2.0, 1.0, 0.0, 2.0, 0.0, 4.0, 5.0, 12.0, 9.0, 9.0, 16.0, 17.0, 32.0, 31.0, 50.0, 45.0, 85.0, 116.0, 158.0, 188.0, 311.0, 374.0, 519.0, 832.0, 1164.0, 1839.0, 2887.0, 5213.0, 12413.0, 55502.0, 844882.0, 90476.0, 15851.0, 6081.0, 3189.0, 1938.0, 1263.0, 915.0, 588.0, 443.0, 283.0, 215.0, 155.0, 123.0, 87.0, 57.0, 43.0, 50.0, 24.0, 16.0, 14.0, 9.0, 7.0, 10.0, 3.0, 8.0, 3.0, 4.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.03546142578125, -0.03433942794799805, -0.033217430114746094, -0.03209543228149414, -0.030973434448242188, -0.029851436614990234, -0.02872943878173828, -0.027607440948486328, -0.026485443115234375, -0.025363445281982422, -0.02424144744873047, -0.023119449615478516, -0.021997451782226562, -0.02087545394897461, -0.019753456115722656, -0.018631458282470703, -0.01750946044921875, -0.016387462615966797, -0.015265464782714844, -0.01414346694946289, -0.013021469116210938, -0.011899471282958984, -0.010777473449707031, -0.009655475616455078, -0.008533477783203125, -0.007411479949951172, -0.006289482116699219, -0.005167484283447266, -0.0040454864501953125, -0.0029234886169433594, -0.0018014907836914062, -0.0006794929504394531, 0.0004425048828125, 0.0015645027160644531, 0.0026865005493164062, 0.0038084983825683594, 0.0049304962158203125, 0.006052494049072266, 0.007174491882324219, 0.008296489715576172, 0.009418487548828125, 0.010540485382080078, 0.011662483215332031, 0.012784481048583984, 0.013906478881835938, 0.01502847671508789, 0.016150474548339844, 0.017272472381591797, 0.01839447021484375, 0.019516468048095703, 0.020638465881347656, 0.02176046371459961, 0.022882461547851562, 0.024004459381103516, 0.02512645721435547, 0.026248455047607422, 0.027370452880859375, 0.028492450714111328, 0.02961444854736328, 0.030736446380615234, 0.03185844421386719, 0.03298044204711914, 0.034102439880371094, 0.03522443771362305, 0.036346435546875]}, "gradients/decoder.model.decoder.layers.9.self_attn.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 3.0, 1.0, 0.0, 4.0, 2.0, 1.0, 2.0, 3.0, 7.0, 2.0, 6.0, 3.0, 15.0, 9.0, 42.0, 70.0, 139.0, 302.0, 209.0, 71.0, 43.0, 30.0, 13.0, 8.0, 5.0, 1.0, 3.0, 1.0, 2.0, 2.0, 1.0, 1.0, 0.0, 5.0, 1.0, 2.0, 0.0, 0.0, 1.0, 3.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.020477294921875, -0.019829511642456055, -0.01918172836303711, -0.018533945083618164, -0.01788616180419922, -0.017238378524780273, -0.016590595245361328, -0.015942811965942383, -0.015295028686523438, -0.014647245407104492, -0.013999462127685547, -0.013351678848266602, -0.012703895568847656, -0.012056112289428711, -0.011408329010009766, -0.01076054573059082, -0.010112762451171875, -0.00946497917175293, -0.008817195892333984, -0.008169412612915039, -0.007521629333496094, -0.0068738460540771484, -0.006226062774658203, -0.005578279495239258, -0.0049304962158203125, -0.004282712936401367, -0.003634929656982422, -0.0029871463775634766, -0.0023393630981445312, -0.001691579818725586, -0.0010437965393066406, -0.0003960132598876953, 0.00025177001953125, 0.0008995532989501953, 0.0015473365783691406, 0.002195119857788086, 0.0028429031372070312, 0.0034906864166259766, 0.004138469696044922, 0.004786252975463867, 0.0054340362548828125, 0.006081819534301758, 0.006729602813720703, 0.0073773860931396484, 0.008025169372558594, 0.008672952651977539, 0.009320735931396484, 0.00996851921081543, 0.010616302490234375, 0.01126408576965332, 0.011911869049072266, 0.012559652328491211, 0.013207435607910156, 0.013855218887329102, 0.014503002166748047, 0.015150785446166992, 0.015798568725585938, 0.016446352005004883, 0.017094135284423828, 0.017741918563842773, 0.01838970184326172, 0.019037485122680664, 0.01968526840209961, 0.020333051681518555, 0.0209808349609375]}, "gradients/decoder.model.decoder.layers.8.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 3.0, 0.0, 5.0, 7.0, 11.0, 7.0, 14.0, 14.0, 17.0, 30.0, 50.0, 69.0, 124.0, 268.0, 175.0, 66.0, 43.0, 32.0, 20.0, 14.0, 12.0, 3.0, 3.0, 4.0, 8.0, 3.0, 2.0, 1.0, 2.0, 3.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.9711198210716248, -0.9404379725456238, -0.9097561836242676, -0.8790743350982666, -0.8483924865722656, -0.8177106380462646, -0.7870287895202637, -0.7563470005989075, -0.7256651520729065, -0.6949833035469055, -0.6643015146255493, -0.6336196660995483, -0.6029378175735474, -0.5722559690475464, -0.5415741205215454, -0.5108923316001892, -0.48021048307418823, -0.44952863454818726, -0.41884681582450867, -0.3881649971008301, -0.3574831485748291, -0.3268013000488281, -0.29611948132514954, -0.26543766260147095, -0.23475581407546997, -0.2040739804506302, -0.1733921468257904, -0.14271031320095062, -0.11202847957611084, -0.08134664595127106, -0.050664812326431274, -0.01998297870159149, 0.010698795318603516, 0.0413806289434433, 0.07206246256828308, 0.10274429619312286, 0.13342612981796265, 0.16410796344280243, 0.1947897970676422, 0.225471630692482, 0.2561534643173218, 0.28683531284332275, 0.31751713156700134, 0.34819895029067993, 0.3788807988166809, 0.4095626473426819, 0.4402444660663605, 0.47092628479003906, 0.50160813331604, 0.532289981842041, 0.562971830368042, 0.5936536192893982, 0.6243354678153992, 0.6550173163414001, 0.6856991052627563, 0.7163809537887573, 0.7470628023147583, 0.7777446508407593, 0.8084264993667603, 0.8391082882881165, 0.8697901368141174, 0.9004719853401184, 0.9311537742614746, 0.9618356227874756, 0.9925174713134766]}, "gradients/decoder.model.decoder.layers.8.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 2.0, 4.0, 2.0, 5.0, 6.0, 3.0, 9.0, 5.0, 15.0, 12.0, 15.0, 17.0, 27.0, 26.0, 26.0, 23.0, 27.0, 27.0, 26.0, 35.0, 42.0, 47.0, 50.0, 55.0, 47.0, 44.0, 42.0, 39.0, 45.0, 36.0, 22.0, 29.0, 37.0, 24.0, 27.0, 21.0, 19.0, 13.0, 11.0, 9.0, 11.0, 3.0, 9.0, 0.0, 7.0, 4.0, 8.0, 1.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.3910885453224182, -0.37945693731307983, -0.36782532930374146, -0.3561937212944031, -0.3445621132850647, -0.3329305052757263, -0.32129889726638794, -0.30966728925704956, -0.2980356812477112, -0.2864040732383728, -0.2747724652290344, -0.26314085721969604, -0.25150924921035767, -0.2398776412010193, -0.2282460480928421, -0.21661444008350372, -0.20498284697532654, -0.19335123896598816, -0.18171963095664978, -0.1700880229473114, -0.15845641493797302, -0.14682480692863464, -0.13519321382045746, -0.12356160581111908, -0.1119299978017807, -0.10029838979244232, -0.08866678178310394, -0.07703518122434616, -0.06540357321500778, -0.0537719652056694, -0.04214036464691162, -0.030508756637573242, -0.018877148628234863, -0.007245542481541634, 0.004386063665151596, 0.016017667949199677, 0.027649275958538055, 0.039280883967876434, 0.050912484526634216, 0.0625440925359726, 0.07417570054531097, 0.08580730855464935, 0.09743891656398773, 0.10907051712274551, 0.12070212513208389, 0.13233372569084167, 0.14396533370018005, 0.15559694170951843, 0.1672285497188568, 0.1788601577281952, 0.19049176573753357, 0.20212337374687195, 0.21375498175621033, 0.2253865897655487, 0.2370181828737259, 0.24864979088306427, 0.26028138399124146, 0.27191299200057983, 0.2835446000099182, 0.2951762080192566, 0.30680781602859497, 0.31843942403793335, 0.33007103204727173, 0.3417026400566101, 0.3533342480659485]}, "gradients/decoder.model.decoder.layers.8.fc2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 3.0, 2.0, 2.0, 3.0, 6.0, 9.0, 7.0, 15.0, 14.0, 23.0, 35.0, 58.0, 94.0, 138.0, 184.0, 260.0, 411.0, 625.0, 1026.0, 1648.0, 2704.0, 4594.0, 8077.0, 15243.0, 29615.0, 69233.0, 3957915.0, 50304.0, 24010.0, 12125.0, 6482.0, 3691.0, 2119.0, 1279.0, 816.0, 529.0, 332.0, 199.0, 151.0, 107.0, 61.0, 43.0, 34.0, 21.0, 21.0, 7.0, 9.0, 8.0, 0.0, 4.0, 1.0, 2.0, 0.0, 1.0, 1.0], "bins": [-0.336181640625, -0.3266639709472656, -0.31714630126953125, -0.3076286315917969, -0.2981109619140625, -0.2885932922363281, -0.27907562255859375, -0.2695579528808594, -0.260040283203125, -0.2505226135253906, -0.24100494384765625, -0.23148727416992188, -0.2219696044921875, -0.21245193481445312, -0.20293426513671875, -0.19341659545898438, -0.18389892578125, -0.17438125610351562, -0.16486358642578125, -0.15534591674804688, -0.1458282470703125, -0.13631057739257812, -0.12679290771484375, -0.11727523803710938, -0.107757568359375, -0.09823989868164062, -0.08872222900390625, -0.07920455932617188, -0.0696868896484375, -0.060169219970703125, -0.05065155029296875, -0.041133880615234375, -0.0316162109375, -0.022098541259765625, -0.01258087158203125, -0.003063201904296875, 0.0064544677734375, 0.015972137451171875, 0.02548980712890625, 0.035007476806640625, 0.044525146484375, 0.054042816162109375, 0.06356048583984375, 0.07307815551757812, 0.0825958251953125, 0.09211349487304688, 0.10163116455078125, 0.11114883422851562, 0.12066650390625, 0.13018417358398438, 0.13970184326171875, 0.14921951293945312, 0.1587371826171875, 0.16825485229492188, 0.17777252197265625, 0.18729019165039062, 0.196807861328125, 0.20632553100585938, 0.21584320068359375, 0.22536087036132812, 0.2348785400390625, 0.24439620971679688, 0.25391387939453125, 0.2634315490722656, 0.27294921875]}, "gradients/decoder.model.decoder.layers.8.fc2.bias": {"_type": "histogram", "values": [3.0, 1.0, 1.0, 3.0, 1.0, 6.0, 2.0, 4.0, 2.0, 3.0, 4.0, 6.0, 9.0, 8.0, 12.0, 10.0, 12.0, 18.0, 26.0, 25.0, 26.0, 18.0, 23.0, 39.0, 35.0, 40.0, 52.0, 48.0, 56.0, 53.0, 56.0, 35.0, 51.0, 35.0, 33.0, 45.0, 29.0, 29.0, 32.0, 18.0, 25.0, 15.0, 12.0, 8.0, 7.0, 9.0, 9.0, 5.0, 2.0, 3.0, 4.0, 7.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0130157470703125, -0.012572288513183594, -0.012128829956054688, -0.011685371398925781, -0.011241912841796875, -0.010798454284667969, -0.010354995727539062, -0.009911537170410156, -0.00946807861328125, -0.009024620056152344, -0.008581161499023438, -0.008137702941894531, -0.007694244384765625, -0.007250785827636719, -0.0068073272705078125, -0.006363868713378906, -0.00592041015625, -0.005476951599121094, -0.0050334930419921875, -0.004590034484863281, -0.004146575927734375, -0.0037031173706054688, -0.0032596588134765625, -0.0028162002563476562, -0.00237274169921875, -0.0019292831420898438, -0.0014858245849609375, -0.0010423660278320312, -0.000598907470703125, -0.00015544891357421875, 0.0002880096435546875, 0.0007314682006835938, 0.0011749267578125, 0.0016183853149414062, 0.0020618438720703125, 0.0025053024291992188, 0.002948760986328125, 0.0033922195434570312, 0.0038356781005859375, 0.004279136657714844, 0.00472259521484375, 0.005166053771972656, 0.0056095123291015625, 0.006052970886230469, 0.006496429443359375, 0.006939888000488281, 0.0073833465576171875, 0.007826805114746094, 0.008270263671875, 0.008713722229003906, 0.009157180786132812, 0.009600639343261719, 0.010044097900390625, 0.010487556457519531, 0.010931015014648438, 0.011374473571777344, 0.01181793212890625, 0.012261390686035156, 0.012704849243164062, 0.013148307800292969, 0.013591766357421875, 0.014035224914550781, 0.014478683471679688, 0.014922142028808594, 0.0153656005859375]}, "gradients/decoder.model.decoder.layers.8.fc1.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 1.0, 2.0, 1.0, 3.0, 9.0, 2.0, 5.0, 12.0, 17.0, 17.0, 25.0, 50.0, 62.0, 107.0, 204.0, 382.0, 697.0, 1430.0, 3329.0, 8343.0, 23964.0, 129586.0, 3967929.0, 37925.0, 11606.0, 4524.0, 2058.0, 950.0, 472.0, 236.0, 120.0, 74.0, 54.0, 22.0, 13.0, 16.0, 7.0, 13.0, 4.0, 6.0, 6.0, 2.0, 2.0, 4.0, 1.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.18701171875, -0.1812000274658203, -0.17538833618164062, -0.16957664489746094, -0.16376495361328125, -0.15795326232910156, -0.15214157104492188, -0.1463298797607422, -0.1405181884765625, -0.1347064971923828, -0.12889480590820312, -0.12308311462402344, -0.11727142333984375, -0.11145973205566406, -0.10564804077148438, -0.09983634948730469, -0.094024658203125, -0.08821296691894531, -0.08240127563476562, -0.07658958435058594, -0.07077789306640625, -0.06496620178222656, -0.059154510498046875, -0.05334281921386719, -0.0475311279296875, -0.04171943664550781, -0.035907745361328125, -0.030096054077148438, -0.02428436279296875, -0.018472671508789062, -0.012660980224609375, -0.0068492889404296875, -0.00103759765625, 0.0047740936279296875, 0.010585784912109375, 0.016397476196289062, 0.02220916748046875, 0.028020858764648438, 0.033832550048828125, 0.03964424133300781, 0.0454559326171875, 0.05126762390136719, 0.057079315185546875, 0.06289100646972656, 0.06870269775390625, 0.07451438903808594, 0.08032608032226562, 0.08613777160644531, 0.091949462890625, 0.09776115417480469, 0.10357284545898438, 0.10938453674316406, 0.11519622802734375, 0.12100791931152344, 0.12681961059570312, 0.1326313018798828, 0.1384429931640625, 0.1442546844482422, 0.15006637573242188, 0.15587806701660156, 0.16168975830078125, 0.16750144958496094, 0.17331314086914062, 0.1791248321533203, 0.1849365234375]}, "gradients/decoder.model.decoder.layers.8.fc1.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 0.0, 2.0, 2.0, 0.0, 6.0, 3.0, 3.0, 6.0, 2.0, 3.0, 3.0, 5.0, 3.0, 5.0, 4.0, 12.0, 10.0, 15.0, 13.0, 13.0, 20.0, 12.0, 21.0, 69.0, 2557.0, 1065.0, 66.0, 28.0, 13.0, 16.0, 13.0, 15.0, 11.0, 10.0, 7.0, 9.0, 5.0, 13.0, 3.0, 5.0, 6.0, 5.0, 1.0, 2.0, 3.0, 1.0, 0.0, 2.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.03912353515625, -0.037774085998535156, -0.03642463684082031, -0.03507518768310547, -0.033725738525390625, -0.03237628936767578, -0.031026840209960938, -0.029677391052246094, -0.02832794189453125, -0.026978492736816406, -0.025629043579101562, -0.02427959442138672, -0.022930145263671875, -0.02158069610595703, -0.020231246948242188, -0.018881797790527344, -0.0175323486328125, -0.016182899475097656, -0.014833450317382812, -0.013484001159667969, -0.012134552001953125, -0.010785102844238281, -0.009435653686523438, -0.008086204528808594, -0.00673675537109375, -0.005387306213378906, -0.0040378570556640625, -0.0026884078979492188, -0.001338958740234375, 1.049041748046875e-05, 0.0013599395751953125, 0.0027093887329101562, 0.004058837890625, 0.005408287048339844, 0.0067577362060546875, 0.008107185363769531, 0.009456634521484375, 0.010806083679199219, 0.012155532836914062, 0.013504981994628906, 0.01485443115234375, 0.016203880310058594, 0.017553329467773438, 0.01890277862548828, 0.020252227783203125, 0.02160167694091797, 0.022951126098632812, 0.024300575256347656, 0.0256500244140625, 0.026999473571777344, 0.028348922729492188, 0.02969837188720703, 0.031047821044921875, 0.03239727020263672, 0.03374671936035156, 0.035096168518066406, 0.03644561767578125, 0.037795066833496094, 0.03914451599121094, 0.04049396514892578, 0.041843414306640625, 0.04319286346435547, 0.04454231262207031, 0.045891761779785156, 0.0472412109375]}, "gradients/decoder.model.decoder.layers.8.encoder_attn_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 4.0, 10.0, 8.0, 6.0, 16.0, 29.0, 28.0, 43.0, 83.0, 181.0, 274.0, 121.0, 73.0, 46.0, 20.0, 20.0, 16.0, 11.0, 6.0, 8.0, 3.0, 2.0, 3.0, 3.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.22078731656074524, -0.2132483720779419, -0.20570941269397736, -0.198170468211174, -0.19063150882720947, -0.18309256434440613, -0.1755536049604416, -0.16801466047763824, -0.1604757010936737, -0.15293675661087036, -0.14539779722690582, -0.13785885274410248, -0.13031989336013794, -0.1227809488773346, -0.11524198949337006, -0.10770304501056671, -0.10016409307718277, -0.09262514114379883, -0.08508618921041489, -0.07754723727703094, -0.070008285343647, -0.06246933713555336, -0.05493038520216942, -0.04739143326878548, -0.039852481335401535, -0.03231352940201759, -0.02477457746863365, -0.01723562739789486, -0.009696675464510918, -0.0021577253937721252, 0.005381226539611816, 0.012920178472995758, 0.0204591304063797, 0.02799808233976364, 0.03553703427314758, 0.043075986206531525, 0.050614938139915466, 0.05815388634800911, 0.06569284200668335, 0.0732317864894867, 0.08077074587345123, 0.08830969780683517, 0.09584864974021912, 0.10338760167360306, 0.110926553606987, 0.11846549808979034, 0.12600445747375488, 0.13354340195655823, 0.14108234643936157, 0.14862129092216492, 0.15616025030612946, 0.1636991947889328, 0.17123815417289734, 0.17877709865570068, 0.18631605803966522, 0.19385500252246857, 0.2013939619064331, 0.20893290638923645, 0.216471865773201, 0.22401081025600433, 0.23154976963996887, 0.23908871412277222, 0.24662767350673676, 0.2541666328907013, 0.26170557737350464]}, "gradients/decoder.model.decoder.layers.8.encoder_attn_layer_norm.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 2.0, 0.0, 2.0, 5.0, 3.0, 7.0, 3.0, 8.0, 16.0, 11.0, 7.0, 30.0, 16.0, 27.0, 18.0, 28.0, 31.0, 32.0, 36.0, 32.0, 33.0, 42.0, 54.0, 46.0, 45.0, 40.0, 49.0, 41.0, 39.0, 33.0, 39.0, 32.0, 23.0, 24.0, 23.0, 31.0, 19.0, 20.0, 12.0, 11.0, 7.0, 5.0, 4.0, 5.0, 5.0, 3.0, 4.0, 5.0, 1.0, 1.0, 4.0, 1.0, 0.0, 3.0, 0.0, 1.0], "bins": [-0.08188922703266144, -0.07932403683662415, -0.07675884664058685, -0.07419365644454956, -0.07162846624851227, -0.06906327605247498, -0.06649808585643768, -0.06393289566040039, -0.0613677054643631, -0.058802515268325806, -0.05623732507228851, -0.05367213487625122, -0.05110694468021393, -0.048541754484176636, -0.04597656428813934, -0.04341137409210205, -0.04084618389606476, -0.038280993700027466, -0.03571580350399017, -0.03315061330795288, -0.03058542311191559, -0.028020232915878296, -0.025455042719841003, -0.02288985252380371, -0.02032466232776642, -0.017759472131729126, -0.015194281935691833, -0.012629091739654541, -0.010063901543617249, -0.007498711347579956, -0.004933521151542664, -0.002368330955505371, 0.00019685178995132446, 0.002762041985988617, 0.005327232182025909, 0.007892422378063202, 0.010457612574100494, 0.013022802770137787, 0.01558799296617508, 0.018153183162212372, 0.020718373358249664, 0.023283563554286957, 0.02584875375032425, 0.028413943946361542, 0.030979134142398834, 0.03354432433843613, 0.03610951453447342, 0.03867470473051071, 0.041239894926548004, 0.0438050851225853, 0.04637027531862259, 0.04893546551465988, 0.051500655710697174, 0.05406584590673447, 0.05663103610277176, 0.05919622629880905, 0.061761416494846344, 0.06432660669088364, 0.06689179688692093, 0.06945698708295822, 0.07202217727899551, 0.0745873674750328, 0.0771525576710701, 0.07971774786710739, 0.08228293806314468]}, "gradients/decoder.model.decoder.layers.8.encoder_attn.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 1.0, 4.0, 3.0, 6.0, 7.0, 18.0, 21.0, 28.0, 49.0, 60.0, 110.0, 174.0, 333.0, 661.0, 1308.0, 3370.0, 11663.0, 78429.0, 814054.0, 115981.0, 15130.0, 3942.0, 1626.0, 695.0, 347.0, 203.0, 147.0, 60.0, 40.0, 26.0, 19.0, 13.0, 10.0, 8.0, 5.0, 5.0, 4.0, 4.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0204010009765625, -0.01963353157043457, -0.01886606216430664, -0.01809859275817871, -0.01733112335205078, -0.01656365394592285, -0.015796184539794922, -0.015028715133666992, -0.014261245727539062, -0.013493776321411133, -0.012726306915283203, -0.011958837509155273, -0.011191368103027344, -0.010423898696899414, -0.009656429290771484, -0.008888959884643555, -0.008121490478515625, -0.007354021072387695, -0.006586551666259766, -0.005819082260131836, -0.005051612854003906, -0.0042841434478759766, -0.003516674041748047, -0.002749204635620117, -0.0019817352294921875, -0.0012142658233642578, -0.0004467964172363281, 0.00032067298889160156, 0.0010881423950195312, 0.001855611801147461, 0.0026230812072753906, 0.0033905506134033203, 0.00415802001953125, 0.00492548942565918, 0.005692958831787109, 0.006460428237915039, 0.007227897644042969, 0.007995367050170898, 0.008762836456298828, 0.009530305862426758, 0.010297775268554688, 0.011065244674682617, 0.011832714080810547, 0.012600183486938477, 0.013367652893066406, 0.014135122299194336, 0.014902591705322266, 0.015670061111450195, 0.016437530517578125, 0.017204999923706055, 0.017972469329833984, 0.018739938735961914, 0.019507408142089844, 0.020274877548217773, 0.021042346954345703, 0.021809816360473633, 0.022577285766601562, 0.023344755172729492, 0.024112224578857422, 0.02487969398498535, 0.02564716339111328, 0.02641463279724121, 0.02718210220336914, 0.02794957160949707, 0.028717041015625]}, "gradients/decoder.model.decoder.layers.8.encoder_attn.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 1.0, 0.0, 1.0, 3.0, 3.0, 0.0, 11.0, 7.0, 14.0, 19.0, 22.0, 19.0, 28.0, 41.0, 51.0, 51.0, 53.0, 84.0, 70.0, 74.0, 81.0, 75.0, 58.0, 41.0, 33.0, 39.0, 42.0, 32.0, 20.0, 11.0, 9.0, 0.0, 8.0, 5.0, 3.0, 3.0, 3.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.05255126953125, -0.050560951232910156, -0.04857063293457031, -0.04658031463623047, -0.044589996337890625, -0.04259967803955078, -0.04060935974121094, -0.038619041442871094, -0.03662872314453125, -0.034638404846191406, -0.03264808654785156, -0.03065776824951172, -0.028667449951171875, -0.02667713165283203, -0.024686813354492188, -0.022696495056152344, -0.0207061767578125, -0.018715858459472656, -0.016725540161132812, -0.014735221862792969, -0.012744903564453125, -0.010754585266113281, -0.008764266967773438, -0.006773948669433594, -0.00478363037109375, -0.0027933120727539062, -0.0008029937744140625, 0.0011873245239257812, 0.003177642822265625, 0.005167961120605469, 0.0071582794189453125, 0.009148597717285156, 0.011138916015625, 0.013129234313964844, 0.015119552612304688, 0.01710987091064453, 0.019100189208984375, 0.02109050750732422, 0.023080825805664062, 0.025071144104003906, 0.02706146240234375, 0.029051780700683594, 0.031042098999023438, 0.03303241729736328, 0.035022735595703125, 0.03701305389404297, 0.03900337219238281, 0.040993690490722656, 0.0429840087890625, 0.044974327087402344, 0.04696464538574219, 0.04895496368408203, 0.050945281982421875, 0.05293560028076172, 0.05492591857910156, 0.056916236877441406, 0.05890655517578125, 0.060896873474121094, 0.06288719177246094, 0.06487751007080078, 0.06686782836914062, 0.06885814666748047, 0.07084846496582031, 0.07283878326416016, 0.0748291015625]}, "gradients/decoder.model.decoder.layers.8.encoder_attn.v_proj.weight": {"_type": "histogram", "values": [1.0, 2.0, 2.0, 2.0, 3.0, 5.0, 6.0, 6.0, 8.0, 12.0, 19.0, 26.0, 28.0, 32.0, 36.0, 54.0, 68.0, 75.0, 97.0, 134.0, 213.0, 377.0, 1036.0, 4519.0, 41417.0, 876065.0, 113154.0, 8272.0, 1548.0, 472.0, 222.0, 147.0, 100.0, 97.0, 65.0, 59.0, 39.0, 46.0, 27.0, 20.0, 19.0, 7.0, 12.0, 6.0, 8.0, 3.0, 3.0, 2.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0032787322998046875, -0.0031509697437286377, -0.003023207187652588, -0.002895444631576538, -0.0027676820755004883, -0.0026399195194244385, -0.0025121569633483887, -0.002384394407272339, -0.002256631851196289, -0.0021288692951202393, -0.0020011067390441895, -0.0018733441829681396, -0.0017455816268920898, -0.00161781907081604, -0.0014900565147399902, -0.0013622939586639404, -0.0012345314025878906, -0.0011067688465118408, -0.000979006290435791, -0.0008512437343597412, -0.0007234811782836914, -0.0005957186222076416, -0.0004679560661315918, -0.000340193510055542, -0.0002124309539794922, -8.466839790344238e-05, 4.309415817260742e-05, 0.00017085671424865723, 0.00029861927032470703, 0.00042638182640075684, 0.0005541443824768066, 0.0006819069385528564, 0.0008096694946289062, 0.0009374320507049561, 0.0010651946067810059, 0.0011929571628570557, 0.0013207197189331055, 0.0014484822750091553, 0.001576244831085205, 0.0017040073871612549, 0.0018317699432373047, 0.0019595324993133545, 0.0020872950553894043, 0.002215057611465454, 0.002342820167541504, 0.0024705827236175537, 0.0025983452796936035, 0.0027261078357696533, 0.002853870391845703, 0.002981632947921753, 0.0031093955039978027, 0.0032371580600738525, 0.0033649206161499023, 0.003492683172225952, 0.003620445728302002, 0.0037482082843780518, 0.0038759708404541016, 0.004003733396530151, 0.004131495952606201, 0.004259258508682251, 0.004387021064758301, 0.004514783620834351, 0.0046425461769104, 0.00477030873298645, 0.0048980712890625]}, "gradients/decoder.model.decoder.layers.8.encoder_attn.v_proj.bias": {"_type": "histogram", "values": [2.0, 1.0, 4.0, 0.0, 6.0, 3.0, 5.0, 6.0, 5.0, 10.0, 11.0, 16.0, 15.0, 12.0, 19.0, 19.0, 27.0, 35.0, 39.0, 38.0, 42.0, 45.0, 48.0, 57.0, 42.0, 50.0, 49.0, 43.0, 43.0, 44.0, 36.0, 32.0, 31.0, 32.0, 25.0, 24.0, 21.0, 13.0, 11.0, 17.0, 7.0, 7.0, 9.0, 7.0, 7.0, 2.0, 3.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.09405517578125, -0.09035015106201172, -0.08664512634277344, -0.08294010162353516, -0.07923507690429688, -0.0755300521850586, -0.07182502746582031, -0.06812000274658203, -0.06441497802734375, -0.06070995330810547, -0.05700492858886719, -0.053299903869628906, -0.049594879150390625, -0.045889854431152344, -0.04218482971191406, -0.03847980499267578, -0.0347747802734375, -0.03106975555419922, -0.027364730834960938, -0.023659706115722656, -0.019954681396484375, -0.016249656677246094, -0.012544631958007812, -0.008839607238769531, -0.00513458251953125, -0.0014295578002929688, 0.0022754669189453125, 0.005980491638183594, 0.009685516357421875, 0.013390541076660156, 0.017095565795898438, 0.02080059051513672, 0.024505615234375, 0.02821063995361328, 0.03191566467285156, 0.035620689392089844, 0.039325714111328125, 0.043030738830566406, 0.04673576354980469, 0.05044078826904297, 0.05414581298828125, 0.05785083770751953, 0.06155586242675781, 0.0652608871459961, 0.06896591186523438, 0.07267093658447266, 0.07637596130371094, 0.08008098602294922, 0.0837860107421875, 0.08749103546142578, 0.09119606018066406, 0.09490108489990234, 0.09860610961914062, 0.1023111343383789, 0.10601615905761719, 0.10972118377685547, 0.11342620849609375, 0.11713123321533203, 0.12083625793457031, 0.1245412826538086, 0.12824630737304688, 0.13195133209228516, 0.13565635681152344, 0.13936138153076172, 0.14306640625]}, "gradients/decoder.model.decoder.layers.8.encoder_attn.k_proj.weight": {"_type": "histogram", "values": [3.0, 0.0, 1.0, 2.0, 0.0, 5.0, 3.0, 8.0, 7.0, 20.0, 16.0, 14.0, 38.0, 42.0, 61.0, 78.0, 109.0, 162.0, 230.0, 264.0, 337.0, 492.0, 701.0, 1145.0, 1884.0, 3200.0, 6009.0, 12761.0, 32463.0, 108272.0, 600805.0, 195186.0, 49423.0, 17084.0, 7759.0, 3747.0, 2071.0, 1292.0, 874.0, 528.0, 389.0, 275.0, 208.0, 146.0, 115.0, 97.0, 67.0, 60.0, 32.0, 25.0, 20.0, 15.0, 4.0, 9.0, 6.0, 5.0, 2.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-5.507469177246094e-05, -5.3279101848602295e-05, -5.148351192474365e-05, -4.968792200088501e-05, -4.789233207702637e-05, -4.6096742153167725e-05, -4.430115222930908e-05, -4.250556230545044e-05, -4.07099723815918e-05, -3.8914382457733154e-05, -3.711879253387451e-05, -3.532320261001587e-05, -3.3527612686157227e-05, -3.1732022762298584e-05, -2.993643283843994e-05, -2.81408429145813e-05, -2.6345252990722656e-05, -2.4549663066864014e-05, -2.275407314300537e-05, -2.095848321914673e-05, -1.9162893295288086e-05, -1.7367303371429443e-05, -1.55717134475708e-05, -1.3776123523712158e-05, -1.1980533599853516e-05, -1.0184943675994873e-05, -8.38935375213623e-06, -6.593763828277588e-06, -4.798173904418945e-06, -3.0025839805603027e-06, -1.2069940567016602e-06, 5.885958671569824e-07, 2.384185791015625e-06, 4.179775714874268e-06, 5.97536563873291e-06, 7.770955562591553e-06, 9.566545486450195e-06, 1.1362135410308838e-05, 1.315772533416748e-05, 1.4953315258026123e-05, 1.6748905181884766e-05, 1.8544495105743408e-05, 2.034008502960205e-05, 2.2135674953460693e-05, 2.3931264877319336e-05, 2.572685480117798e-05, 2.752244472503662e-05, 2.9318034648895264e-05, 3.1113624572753906e-05, 3.290921449661255e-05, 3.470480442047119e-05, 3.6500394344329834e-05, 3.8295984268188477e-05, 4.009157419204712e-05, 4.188716411590576e-05, 4.3682754039764404e-05, 4.547834396362305e-05, 4.727393388748169e-05, 4.906952381134033e-05, 5.0865113735198975e-05, 5.266070365905762e-05, 5.445629358291626e-05, 5.62518835067749e-05, 5.8047473430633545e-05, 5.984306335449219e-05]}, "gradients/decoder.model.decoder.layers.8.encoder_attn.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 7.0, 1.0, 3.0, 1.0, 4.0, 2.0, 4.0, 17.0, 14.0, 39.0, 45.0, 142.0, 159.0, 291.0, 117.0, 74.0, 33.0, 24.0, 14.0, 10.0, 7.0, 2.0, 5.0, 3.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.3974647521972656e-06, -3.25031578540802e-06, -3.1031668186187744e-06, -2.956017851829529e-06, -2.808868885040283e-06, -2.6617199182510376e-06, -2.514570951461792e-06, -2.3674219846725464e-06, -2.2202730178833008e-06, -2.073124051094055e-06, -1.9259750843048096e-06, -1.778826117515564e-06, -1.6316771507263184e-06, -1.4845281839370728e-06, -1.3373792171478271e-06, -1.1902302503585815e-06, -1.043081283569336e-06, -8.959323167800903e-07, -7.487833499908447e-07, -6.016343832015991e-07, -4.544854164123535e-07, -3.073364496231079e-07, -1.601874828338623e-07, -1.30385160446167e-08, 1.341104507446289e-07, 2.812594175338745e-07, 4.284083843231201e-07, 5.755573511123657e-07, 7.227063179016113e-07, 8.698552846908569e-07, 1.0170042514801025e-06, 1.1641532182693481e-06, 1.3113021850585938e-06, 1.4584511518478394e-06, 1.605600118637085e-06, 1.7527490854263306e-06, 1.8998980522155762e-06, 2.0470470190048218e-06, 2.1941959857940674e-06, 2.341344952583313e-06, 2.4884939193725586e-06, 2.635642886161804e-06, 2.78279185295105e-06, 2.9299408197402954e-06, 3.077089786529541e-06, 3.2242387533187866e-06, 3.3713877201080322e-06, 3.518536686897278e-06, 3.6656856536865234e-06, 3.812834620475769e-06, 3.959983587265015e-06, 4.10713255405426e-06, 4.254281520843506e-06, 4.4014304876327515e-06, 4.548579454421997e-06, 4.695728421211243e-06, 4.842877388000488e-06, 4.990026354789734e-06, 5.1371753215789795e-06, 5.284324288368225e-06, 5.431473255157471e-06, 5.578622221946716e-06, 5.725771188735962e-06, 5.8729201555252075e-06, 6.020069122314453e-06]}, "gradients/decoder.model.decoder.layers.8.encoder_attn.q_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 3.0, 1.0, 4.0, 2.0, 4.0, 7.0, 7.0, 7.0, 6.0, 18.0, 28.0, 36.0, 53.0, 62.0, 96.0, 118.0, 164.0, 230.0, 371.0, 586.0, 915.0, 1529.0, 2783.0, 5453.0, 13313.0, 41423.0, 240507.0, 641495.0, 64209.0, 19208.0, 7496.0, 3403.0, 1927.0, 1108.0, 630.0, 415.0, 291.0, 184.0, 126.0, 96.0, 76.0, 48.0, 41.0, 25.0, 14.0, 11.0, 14.0, 5.0, 6.0, 8.0, 1.0, 3.0, 1.0, 1.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-5.0008296966552734e-05, -4.835054278373718e-05, -4.669278860092163e-05, -4.503503441810608e-05, -4.337728023529053e-05, -4.1719526052474976e-05, -4.0061771869659424e-05, -3.840401768684387e-05, -3.674626350402832e-05, -3.508850932121277e-05, -3.343075513839722e-05, -3.1773000955581665e-05, -3.0115246772766113e-05, -2.845749258995056e-05, -2.679973840713501e-05, -2.5141984224319458e-05, -2.3484230041503906e-05, -2.1826475858688354e-05, -2.0168721675872803e-05, -1.851096749305725e-05, -1.68532133102417e-05, -1.5195459127426147e-05, -1.3537704944610596e-05, -1.1879950761795044e-05, -1.0222196578979492e-05, -8.56444239616394e-06, -6.906688213348389e-06, -5.248934030532837e-06, -3.591179847717285e-06, -1.9334256649017334e-06, -2.7567148208618164e-07, 1.3820827007293701e-06, 3.039836883544922e-06, 4.697591066360474e-06, 6.355345249176025e-06, 8.013099431991577e-06, 9.670853614807129e-06, 1.132860779762268e-05, 1.2986361980438232e-05, 1.4644116163253784e-05, 1.6301870346069336e-05, 1.7959624528884888e-05, 1.961737871170044e-05, 2.127513289451599e-05, 2.2932887077331543e-05, 2.4590641260147095e-05, 2.6248395442962646e-05, 2.7906149625778198e-05, 2.956390380859375e-05, 3.12216579914093e-05, 3.2879412174224854e-05, 3.4537166357040405e-05, 3.619492053985596e-05, 3.785267472267151e-05, 3.951042890548706e-05, 4.116818308830261e-05, 4.2825937271118164e-05, 4.4483691453933716e-05, 4.614144563674927e-05, 4.779919981956482e-05, 4.945695400238037e-05, 5.111470818519592e-05, 5.2772462368011475e-05, 5.4430216550827026e-05, 5.608797073364258e-05]}, "gradients/decoder.model.decoder.layers.8.encoder_attn.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 2.0, 1.0, 2.0, 6.0, 4.0, 8.0, 2.0, 9.0, 15.0, 20.0, 25.0, 57.0, 63.0, 68.0, 111.0, 160.0, 128.0, 75.0, 79.0, 48.0, 36.0, 21.0, 19.0, 12.0, 15.0, 4.0, 3.0, 5.0, 1.0, 3.0, 1.0, 2.0, 3.0, 2.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-1.3113021850585938e-05, -1.2673437595367432e-05, -1.2233853340148926e-05, -1.179426908493042e-05, -1.1354684829711914e-05, -1.0915100574493408e-05, -1.0475516319274902e-05, -1.0035932064056396e-05, -9.59634780883789e-06, -9.156763553619385e-06, -8.717179298400879e-06, -8.277595043182373e-06, -7.838010787963867e-06, -7.398426532745361e-06, -6.9588422775268555e-06, -6.51925802230835e-06, -6.079673767089844e-06, -5.640089511871338e-06, -5.200505256652832e-06, -4.760921001434326e-06, -4.32133674621582e-06, -3.8817524909973145e-06, -3.4421682357788086e-06, -3.0025839805603027e-06, -2.562999725341797e-06, -2.123415470123291e-06, -1.6838312149047852e-06, -1.2442469596862793e-06, -8.046627044677734e-07, -3.650784492492676e-07, 7.450580596923828e-08, 5.140900611877441e-07, 9.5367431640625e-07, 1.3932585716247559e-06, 1.8328428268432617e-06, 2.2724270820617676e-06, 2.7120113372802734e-06, 3.1515955924987793e-06, 3.591179847717285e-06, 4.030764102935791e-06, 4.470348358154297e-06, 4.909932613372803e-06, 5.349516868591309e-06, 5.7891011238098145e-06, 6.22868537902832e-06, 6.668269634246826e-06, 7.107853889465332e-06, 7.547438144683838e-06, 7.987022399902344e-06, 8.42660665512085e-06, 8.866190910339355e-06, 9.305775165557861e-06, 9.745359420776367e-06, 1.0184943675994873e-05, 1.0624527931213379e-05, 1.1064112186431885e-05, 1.150369644165039e-05, 1.1943280696868896e-05, 1.2382864952087402e-05, 1.2822449207305908e-05, 1.3262033462524414e-05, 1.370161771774292e-05, 1.4141201972961426e-05, 1.4580786228179932e-05, 1.5020370483398438e-05]}, "gradients/decoder.model.decoder.layers.8.self_attn_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 1.0, 4.0, 8.0, 3.0, 6.0, 7.0, 11.0, 17.0, 29.0, 40.0, 86.0, 168.0, 304.0, 129.0, 56.0, 51.0, 28.0, 19.0, 11.0, 11.0, 7.0, 3.0, 3.0, 2.0, 4.0, 2.0, 2.0, 1.0, 0.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.09038817137479782, -0.08683321624994278, -0.08327826857566833, -0.0797233134508133, -0.07616835832595825, -0.07261340320110321, -0.06905845552682877, -0.06550350040197372, -0.06194854900240898, -0.05839359760284424, -0.0548386424779892, -0.051283691078424454, -0.04772873967885971, -0.04417378455400467, -0.040618833154439926, -0.03706388175487518, -0.03350892663002014, -0.02995397336781025, -0.026399020105600357, -0.022844068706035614, -0.019289115443825722, -0.01573416218161583, -0.012179210782051086, -0.008624257519841194, -0.005069304257631302, -0.001514351461082697, 0.002040601335465908, 0.005595553666353226, 0.009150506928563118, 0.01270546019077301, 0.016260411590337753, 0.019815364852547646, 0.02337031066417694, 0.026925263926386833, 0.030480217188596725, 0.03403516858816147, 0.03759012371301651, 0.04114507511258125, 0.044700026512145996, 0.04825498163700104, 0.05180993303656578, 0.055364884436130524, 0.058919839560985565, 0.06247479096055031, 0.06602974236011505, 0.06958469748497009, 0.07313965260982513, 0.07669460028409958, 0.08024955540895462, 0.08380451053380966, 0.0873594582080841, 0.09091441333293915, 0.09446936845779419, 0.09802432358264923, 0.10157927125692368, 0.10513422638177872, 0.10868917405605316, 0.1122441291809082, 0.11579907685518265, 0.11935403198003769, 0.12290898710489273, 0.12646393477916718, 0.13001888990402222, 0.13357384502887726, 0.1371288001537323]}, "gradients/decoder.model.decoder.layers.8.self_attn_layer_norm.bias": {"_type": "histogram", "values": [1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 2.0, 1.0, 2.0, 3.0, 6.0, 8.0, 9.0, 13.0, 11.0, 11.0, 21.0, 19.0, 27.0, 23.0, 35.0, 34.0, 32.0, 43.0, 35.0, 40.0, 44.0, 56.0, 48.0, 47.0, 56.0, 46.0, 34.0, 37.0, 44.0, 29.0, 26.0, 22.0, 26.0, 27.0, 21.0, 12.0, 14.0, 12.0, 3.0, 8.0, 4.0, 7.0, 2.0, 5.0, 2.0, 1.0, 2.0, 1.0, 1.0, 2.0, 2.0, 1.0, 0.0, 1.0], "bins": [-0.04068756476044655, -0.039444565773010254, -0.03820156678557396, -0.036958564072847366, -0.03571556508541107, -0.03447256609797478, -0.03322956711053848, -0.03198656812310219, -0.030743567273020744, -0.02950056828558445, -0.028257567435503006, -0.02701456844806671, -0.025771569460630417, -0.024528568610548973, -0.02328556962311268, -0.022042568773031235, -0.02079956978559494, -0.019556570798158646, -0.018313569948077202, -0.017070570960640907, -0.015827570110559464, -0.014584571123123169, -0.013341572135686874, -0.012098572216928005, -0.010855572298169136, -0.009612572379410267, -0.008369572460651398, -0.007126573473215103, -0.005883573554456234, -0.004640573635697365, -0.003397574182599783, -0.002154574729502201, -0.0009115748107433319, 0.0003314248751848936, 0.0015744245611131191, 0.0028174242470413446, 0.00406042393296957, 0.005303423851728439, 0.006546423304826021, 0.007789422757923603, 0.009032422676682472, 0.010275422595441341, 0.01151842251420021, 0.012761421501636505, 0.014004421420395374, 0.015247421339154243, 0.016490420326590538, 0.017733421176671982, 0.018976420164108276, 0.02021941915154457, 0.021462420001626015, 0.02270541898906231, 0.023948419839143753, 0.025191418826580048, 0.026434417814016342, 0.027677416801452637, 0.02892041765153408, 0.030163416638970375, 0.03140641748905182, 0.03264941647648811, 0.03389241546392441, 0.035135418176651, 0.036378417164087296, 0.03762141615152359, 0.038864415138959885]}, "gradients/decoder.model.decoder.layers.8.self_attn.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 2.0, 3.0, 3.0, 2.0, 1.0, 5.0, 18.0, 13.0, 17.0, 25.0, 42.0, 58.0, 97.0, 116.0, 166.0, 278.0, 364.0, 565.0, 825.0, 1241.0, 1865.0, 2869.0, 4717.0, 7424.0, 12259.0, 21289.0, 39869.0, 86297.0, 350285.0, 339335.0, 85330.0, 39312.0, 21178.0, 12313.0, 7191.0, 4517.0, 2959.0, 1884.0, 1327.0, 820.0, 527.0, 380.0, 221.0, 170.0, 126.0, 72.0, 55.0, 42.0, 29.0, 24.0, 9.0, 9.0, 9.0, 8.0, 3.0, 2.0, 1.0, 2.0, 2.0, 2.0], "bins": [-0.0258636474609375, -0.025079727172851562, -0.024295806884765625, -0.023511886596679688, -0.02272796630859375, -0.021944046020507812, -0.021160125732421875, -0.020376205444335938, -0.01959228515625, -0.018808364868164062, -0.018024444580078125, -0.017240524291992188, -0.01645660400390625, -0.015672683715820312, -0.014888763427734375, -0.014104843139648438, -0.0133209228515625, -0.012537002563476562, -0.011753082275390625, -0.010969161987304688, -0.01018524169921875, -0.009401321411132812, -0.008617401123046875, -0.007833480834960938, -0.007049560546875, -0.0062656402587890625, -0.005481719970703125, -0.0046977996826171875, -0.00391387939453125, -0.0031299591064453125, -0.002346038818359375, -0.0015621185302734375, -0.0007781982421875, 5.7220458984375e-06, 0.000789642333984375, 0.0015735626220703125, 0.00235748291015625, 0.0031414031982421875, 0.003925323486328125, 0.0047092437744140625, 0.0054931640625, 0.0062770843505859375, 0.007061004638671875, 0.007844924926757812, 0.00862884521484375, 0.009412765502929688, 0.010196685791015625, 0.010980606079101562, 0.0117645263671875, 0.012548446655273438, 0.013332366943359375, 0.014116287231445312, 0.01490020751953125, 0.015684127807617188, 0.016468048095703125, 0.017251968383789062, 0.018035888671875, 0.018819808959960938, 0.019603729248046875, 0.020387649536132812, 0.02117156982421875, 0.021955490112304688, 0.022739410400390625, 0.023523330688476562, 0.0243072509765625]}, "gradients/decoder.model.decoder.layers.8.self_attn.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 3.0, 1.0, 4.0, 3.0, 4.0, 6.0, 5.0, 12.0, 10.0, 9.0, 12.0, 10.0, 26.0, 14.0, 30.0, 34.0, 33.0, 49.0, 56.0, 51.0, 50.0, 58.0, 51.0, 54.0, 56.0, 49.0, 55.0, 45.0, 31.0, 30.0, 27.0, 23.0, 25.0, 20.0, 11.0, 13.0, 11.0, 8.0, 8.0, 3.0, 6.0, 3.0, 1.0, 4.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0023174285888671875, -0.0022341012954711914, -0.0021507740020751953, -0.0020674467086791992, -0.001984119415283203, -0.001900792121887207, -0.001817464828491211, -0.0017341375350952148, -0.0016508102416992188, -0.0015674829483032227, -0.0014841556549072266, -0.0014008283615112305, -0.0013175010681152344, -0.0012341737747192383, -0.0011508464813232422, -0.001067519187927246, -0.00098419189453125, -0.0009008646011352539, -0.0008175373077392578, -0.0007342100143432617, -0.0006508827209472656, -0.0005675554275512695, -0.00048422813415527344, -0.00040090084075927734, -0.00031757354736328125, -0.00023424625396728516, -0.00015091896057128906, -6.759166717529297e-05, 1.5735626220703125e-05, 9.906291961669922e-05, 0.0001823902130126953, 0.0002657175064086914, 0.0003490447998046875, 0.0004323720932006836, 0.0005156993865966797, 0.0005990266799926758, 0.0006823539733886719, 0.000765681266784668, 0.0008490085601806641, 0.0009323358535766602, 0.0010156631469726562, 0.0010989904403686523, 0.0011823177337646484, 0.0012656450271606445, 0.0013489723205566406, 0.0014322996139526367, 0.0015156269073486328, 0.001598954200744629, 0.001682281494140625, 0.001765608787536621, 0.0018489360809326172, 0.0019322633743286133, 0.0020155906677246094, 0.0020989179611206055, 0.0021822452545166016, 0.0022655725479125977, 0.0023488998413085938, 0.00243222713470459, 0.002515554428100586, 0.002598881721496582, 0.002682209014892578, 0.0027655363082885742, 0.0028488636016845703, 0.0029321908950805664, 0.0030155181884765625]}, "gradients/decoder.model.decoder.layers.8.self_attn.v_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 3.0, 1.0, 3.0, 5.0, 3.0, 5.0, 8.0, 15.0, 31.0, 33.0, 40.0, 71.0, 83.0, 134.0, 200.0, 233.0, 373.0, 558.0, 773.0, 1080.0, 1647.0, 2391.0, 3495.0, 5152.0, 7635.0, 11644.0, 18103.0, 29332.0, 51138.0, 102580.0, 358525.0, 246379.0, 86730.0, 44838.0, 26473.0, 16396.0, 10598.0, 7065.0, 4750.0, 3182.0, 2158.0, 1445.0, 1006.0, 660.0, 466.0, 351.0, 244.0, 160.0, 130.0, 88.0, 61.0, 25.0, 27.0, 15.0, 15.0, 3.0, 8.0, 2.0, 0.0, 2.0, 3.0], "bins": [-0.01605224609375, -0.015578627586364746, -0.015105009078979492, -0.014631390571594238, -0.014157772064208984, -0.01368415355682373, -0.013210535049438477, -0.012736916542053223, -0.012263298034667969, -0.011789679527282715, -0.011316061019897461, -0.010842442512512207, -0.010368824005126953, -0.0098952054977417, -0.009421586990356445, -0.008947968482971191, -0.008474349975585938, -0.008000731468200684, -0.00752711296081543, -0.007053494453430176, -0.006579875946044922, -0.006106257438659668, -0.005632638931274414, -0.00515902042388916, -0.004685401916503906, -0.004211783409118652, -0.0037381649017333984, -0.0032645463943481445, -0.0027909278869628906, -0.0023173093795776367, -0.0018436908721923828, -0.001370072364807129, -0.000896453857421875, -0.0004228353500366211, 5.078315734863281e-05, 0.0005244016647338867, 0.0009980201721191406, 0.0014716386795043945, 0.0019452571868896484, 0.0024188756942749023, 0.0028924942016601562, 0.00336611270904541, 0.003839731216430664, 0.004313349723815918, 0.004786968231201172, 0.005260586738586426, 0.00573420524597168, 0.006207823753356934, 0.0066814422607421875, 0.007155060768127441, 0.007628679275512695, 0.00810229778289795, 0.008575916290283203, 0.009049534797668457, 0.009523153305053711, 0.009996771812438965, 0.010470390319824219, 0.010944008827209473, 0.011417627334594727, 0.01189124584197998, 0.012364864349365234, 0.012838482856750488, 0.013312101364135742, 0.013785719871520996, 0.01425933837890625]}, "gradients/decoder.model.decoder.layers.8.self_attn.v_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 0.0, 2.0, 1.0, 2.0, 1.0, 4.0, 5.0, 5.0, 7.0, 1.0, 8.0, 5.0, 11.0, 9.0, 14.0, 17.0, 18.0, 20.0, 19.0, 25.0, 25.0, 22.0, 33.0, 34.0, 30.0, 42.0, 52.0, 51.0, 52.0, 47.0, 50.0, 52.0, 49.0, 37.0, 31.0, 39.0, 20.0, 28.0, 21.0, 21.0, 18.0, 15.0, 11.0, 13.0, 15.0, 9.0, 4.0, 6.0, 5.0, 2.0, 3.0, 1.0, 2.0, 3.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.01018524169921875, -0.00986790657043457, -0.00955057144165039, -0.009233236312866211, -0.008915901184082031, -0.008598566055297852, -0.008281230926513672, -0.007963895797729492, -0.0076465606689453125, -0.007329225540161133, -0.007011890411376953, -0.0066945552825927734, -0.006377220153808594, -0.006059885025024414, -0.005742549896240234, -0.005425214767456055, -0.005107879638671875, -0.004790544509887695, -0.004473209381103516, -0.004155874252319336, -0.0038385391235351562, -0.0035212039947509766, -0.003203868865966797, -0.002886533737182617, -0.0025691986083984375, -0.002251863479614258, -0.0019345283508300781, -0.0016171932220458984, -0.0012998580932617188, -0.000982522964477539, -0.0006651878356933594, -0.0003478527069091797, -3.0517578125e-05, 0.0002868175506591797, 0.0006041526794433594, 0.0009214878082275391, 0.0012388229370117188, 0.0015561580657958984, 0.0018734931945800781, 0.002190828323364258, 0.0025081634521484375, 0.002825498580932617, 0.003142833709716797, 0.0034601688385009766, 0.0037775039672851562, 0.004094839096069336, 0.004412174224853516, 0.004729509353637695, 0.005046844482421875, 0.005364179611206055, 0.005681514739990234, 0.005998849868774414, 0.006316184997558594, 0.0066335201263427734, 0.006950855255126953, 0.007268190383911133, 0.0075855255126953125, 0.007902860641479492, 0.008220195770263672, 0.008537530899047852, 0.008854866027832031, 0.009172201156616211, 0.00948953628540039, 0.00980687141418457, 0.01012420654296875]}, "gradients/decoder.model.decoder.layers.8.self_attn.k_proj.weight": {"_type": "histogram", "values": [3.0, 2.0, 1.0, 1.0, 1.0, 2.0, 3.0, 3.0, 5.0, 8.0, 15.0, 8.0, 18.0, 22.0, 26.0, 44.0, 29.0, 68.0, 128.0, 179.0, 337.0, 540.0, 1038.0, 2027.0, 5331.0, 20808.0, 943503.0, 59512.0, 8652.0, 3134.0, 1352.0, 691.0, 375.0, 208.0, 142.0, 107.0, 72.0, 49.0, 42.0, 30.0, 14.0, 12.0, 6.0, 6.0, 3.0, 3.0, 4.0, 4.0, 1.0, 2.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0301971435546875, -0.029066801071166992, -0.027936458587646484, -0.026806116104125977, -0.02567577362060547, -0.02454543113708496, -0.023415088653564453, -0.022284746170043945, -0.021154403686523438, -0.02002406120300293, -0.018893718719482422, -0.017763376235961914, -0.016633033752441406, -0.015502691268920898, -0.01437234878540039, -0.013242006301879883, -0.012111663818359375, -0.010981321334838867, -0.00985097885131836, -0.008720636367797852, -0.007590293884277344, -0.006459951400756836, -0.005329608917236328, -0.00419926643371582, -0.0030689239501953125, -0.0019385814666748047, -0.0008082389831542969, 0.00032210350036621094, 0.0014524459838867188, 0.0025827884674072266, 0.0037131309509277344, 0.004843473434448242, 0.00597381591796875, 0.007104158401489258, 0.008234500885009766, 0.009364843368530273, 0.010495185852050781, 0.011625528335571289, 0.012755870819091797, 0.013886213302612305, 0.015016555786132812, 0.01614689826965332, 0.017277240753173828, 0.018407583236694336, 0.019537925720214844, 0.02066826820373535, 0.02179861068725586, 0.022928953170776367, 0.024059295654296875, 0.025189638137817383, 0.02631998062133789, 0.0274503231048584, 0.028580665588378906, 0.029711008071899414, 0.030841350555419922, 0.03197169303894043, 0.03310203552246094, 0.034232378005981445, 0.03536272048950195, 0.03649306297302246, 0.03762340545654297, 0.03875374794006348, 0.039884090423583984, 0.04101443290710449, 0.042144775390625]}, "gradients/decoder.model.decoder.layers.8.self_attn.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 3.0, 0.0, 1.0, 0.0, 0.0, 1.0, 3.0, 3.0, 6.0, 8.0, 6.0, 7.0, 4.0, 13.0, 15.0, 37.0, 42.0, 62.0, 393.0, 256.0, 59.0, 20.0, 21.0, 13.0, 10.0, 6.0, 6.0, 3.0, 3.0, 4.0, 3.0, 2.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-1.0251998901367188e-05, -9.966082870960236e-06, -9.680166840553284e-06, -9.394250810146332e-06, -9.10833477973938e-06, -8.822418749332428e-06, -8.536502718925476e-06, -8.250586688518524e-06, -7.964670658111572e-06, -7.67875462770462e-06, -7.3928385972976685e-06, -7.1069225668907166e-06, -6.821006536483765e-06, -6.535090506076813e-06, -6.249174475669861e-06, -5.963258445262909e-06, -5.677342414855957e-06, -5.391426384449005e-06, -5.105510354042053e-06, -4.819594323635101e-06, -4.533678293228149e-06, -4.2477622628211975e-06, -3.961846232414246e-06, -3.6759302020072937e-06, -3.390014171600342e-06, -3.10409814119339e-06, -2.818182110786438e-06, -2.532266080379486e-06, -2.246350049972534e-06, -1.9604340195655823e-06, -1.6745179891586304e-06, -1.3886019587516785e-06, -1.1026859283447266e-06, -8.167698979377747e-07, -5.308538675308228e-07, -2.4493783712387085e-07, 4.0978193283081055e-08, 3.2689422369003296e-07, 6.128102540969849e-07, 8.987262845039368e-07, 1.1846423149108887e-06, 1.4705583453178406e-06, 1.7564743757247925e-06, 2.0423904061317444e-06, 2.3283064365386963e-06, 2.614222466945648e-06, 2.9001384973526e-06, 3.186054527759552e-06, 3.471970558166504e-06, 3.757886588573456e-06, 4.043802618980408e-06, 4.32971864938736e-06, 4.6156346797943115e-06, 4.9015507102012634e-06, 5.187466740608215e-06, 5.473382771015167e-06, 5.759298801422119e-06, 6.045214831829071e-06, 6.331130862236023e-06, 6.617046892642975e-06, 6.902962923049927e-06, 7.188878953456879e-06, 7.4747949838638306e-06, 7.760711014270782e-06, 8.046627044677734e-06]}, "gradients/decoder.model.decoder.layers.8.self_attn.q_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 2.0, 5.0, 4.0, 8.0, 11.0, 15.0, 16.0, 25.0, 29.0, 38.0, 58.0, 88.0, 110.0, 156.0, 222.0, 313.0, 487.0, 783.0, 1237.0, 2004.0, 3675.0, 6864.0, 14350.0, 36622.0, 186193.0, 707491.0, 50899.0, 18078.0, 8139.0, 4303.0, 2373.0, 1369.0, 881.0, 570.0, 327.0, 238.0, 165.0, 103.0, 85.0, 68.0, 42.0, 33.0, 25.0, 20.0, 13.0, 9.0, 7.0, 4.0, 5.0, 3.0, 2.0, 2.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.00606536865234375, -0.005882322788238525, -0.005699276924133301, -0.005516231060028076, -0.0053331851959228516, -0.005150139331817627, -0.004967093467712402, -0.004784047603607178, -0.004601001739501953, -0.0044179558753967285, -0.004234910011291504, -0.004051864147186279, -0.0038688182830810547, -0.00368577241897583, -0.0035027265548706055, -0.003319680690765381, -0.0031366348266601562, -0.0029535889625549316, -0.002770543098449707, -0.0025874972343444824, -0.002404451370239258, -0.002221405506134033, -0.0020383596420288086, -0.001855313777923584, -0.0016722679138183594, -0.0014892220497131348, -0.0013061761856079102, -0.0011231303215026855, -0.0009400844573974609, -0.0007570385932922363, -0.0005739927291870117, -0.0003909468650817871, -0.0002079010009765625, -2.485513687133789e-05, 0.00015819072723388672, 0.00034123659133911133, 0.0005242824554443359, 0.0007073283195495605, 0.0008903741836547852, 0.0010734200477600098, 0.0012564659118652344, 0.001439511775970459, 0.0016225576400756836, 0.0018056035041809082, 0.001988649368286133, 0.0021716952323913574, 0.002354741096496582, 0.0025377869606018066, 0.0027208328247070312, 0.002903878688812256, 0.0030869245529174805, 0.003269970417022705, 0.0034530162811279297, 0.0036360621452331543, 0.003819108009338379, 0.0040021538734436035, 0.004185199737548828, 0.004368245601654053, 0.004551291465759277, 0.004734337329864502, 0.0049173831939697266, 0.005100429058074951, 0.005283474922180176, 0.0054665207862854, 0.005649566650390625]}, "gradients/decoder.model.decoder.layers.8.self_attn.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 2.0, 0.0, 2.0, 0.0, 1.0, 1.0, 1.0, 1.0, 5.0, 7.0, 4.0, 7.0, 12.0, 13.0, 10.0, 19.0, 23.0, 33.0, 39.0, 57.0, 191.0, 322.0, 70.0, 55.0, 38.0, 19.0, 20.0, 16.0, 11.0, 5.0, 10.0, 5.0, 2.0, 5.0, 5.0, 2.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.004119873046875, -0.004005789756774902, -0.0038917064666748047, -0.003777623176574707, -0.0036635398864746094, -0.0035494565963745117, -0.003435373306274414, -0.0033212900161743164, -0.0032072067260742188, -0.003093123435974121, -0.0029790401458740234, -0.0028649568557739258, -0.002750873565673828, -0.0026367902755737305, -0.002522706985473633, -0.002408623695373535, -0.0022945404052734375, -0.00218045711517334, -0.002066373825073242, -0.0019522905349731445, -0.0018382072448730469, -0.0017241239547729492, -0.0016100406646728516, -0.001495957374572754, -0.0013818740844726562, -0.0012677907943725586, -0.001153707504272461, -0.0010396242141723633, -0.0009255409240722656, -0.000811457633972168, -0.0006973743438720703, -0.0005832910537719727, -0.000469207763671875, -0.00035512447357177734, -0.0002410411834716797, -0.00012695789337158203, -1.2874603271484375e-05, 0.00010120868682861328, 0.00021529197692871094, 0.0003293752670288086, 0.00044345855712890625, 0.0005575418472290039, 0.0006716251373291016, 0.0007857084274291992, 0.0008997917175292969, 0.0010138750076293945, 0.0011279582977294922, 0.0012420415878295898, 0.0013561248779296875, 0.0014702081680297852, 0.0015842914581298828, 0.0016983747482299805, 0.0018124580383300781, 0.0019265413284301758, 0.0020406246185302734, 0.002154707908630371, 0.0022687911987304688, 0.0023828744888305664, 0.002496957778930664, 0.0026110410690307617, 0.0027251243591308594, 0.002839207649230957, 0.0029532909393310547, 0.0030673742294311523, 0.00318145751953125]}, "gradients/decoder.model.decoder.layers.7.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 5.0, 3.0, 1.0, 5.0, 9.0, 8.0, 5.0, 7.0, 14.0, 13.0, 17.0, 15.0, 30.0, 36.0, 40.0, 70.0, 141.0, 182.0, 140.0, 79.0, 40.0, 34.0, 13.0, 22.0, 10.0, 12.0, 13.0, 9.0, 8.0, 5.0, 7.0, 2.0, 1.0, 2.0, 4.0, 3.0, 3.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 1.0, 0.0, 1.0], "bins": [-0.06755770742893219, -0.06528129428625107, -0.06300488114356995, -0.060728468000888824, -0.0584520548582077, -0.05617564171552658, -0.05389923229813576, -0.051622819155454636, -0.049346406012773514, -0.04706999287009239, -0.04479357972741127, -0.04251716658473015, -0.040240757167339325, -0.0379643440246582, -0.03568793088197708, -0.03341151773929596, -0.031135104596614838, -0.028858691453933716, -0.026582278311252594, -0.02430586703121662, -0.0220294538885355, -0.019753040745854378, -0.017476629465818405, -0.015200216323137283, -0.012923803180456161, -0.01064739003777504, -0.008370977826416492, -0.006094565149396658, -0.0038181524723768234, -0.0015417393296957016, 0.0007346728816628456, 0.003011085093021393, 0.005287505686283112, 0.007563918363302946, 0.00984033104032278, 0.012116743251681328, 0.01439315639436245, 0.01666956953704357, 0.018945980817079544, 0.021222393959760666, 0.023498807102441788, 0.02577522024512291, 0.02805163338780403, 0.030328044667840004, 0.03260445594787598, 0.0348808690905571, 0.03715728223323822, 0.03943369537591934, 0.041710108518600464, 0.043986521661281586, 0.04626293480396271, 0.04853934794664383, 0.05081576108932495, 0.05309217423200607, 0.055368583649396896, 0.05764499679207802, 0.05992140993475914, 0.06219782307744026, 0.06447423249483109, 0.06675064563751221, 0.06902705878019333, 0.07130347192287445, 0.07357988506555557, 0.0758562982082367, 0.07813271135091782]}, "gradients/decoder.model.decoder.layers.7.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 2.0, 3.0, 1.0, 2.0, 1.0, 3.0, 6.0, 5.0, 3.0, 10.0, 9.0, 7.0, 9.0, 12.0, 18.0, 25.0, 20.0, 13.0, 28.0, 28.0, 26.0, 32.0, 39.0, 42.0, 37.0, 38.0, 30.0, 37.0, 43.0, 45.0, 30.0, 44.0, 44.0, 43.0, 26.0, 33.0, 23.0, 25.0, 24.0, 24.0, 22.0, 22.0, 15.0, 13.0, 9.0, 8.0, 12.0, 4.0, 7.0, 5.0, 4.0, 3.0, 0.0, 1.0, 3.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.03467328101396561, -0.03352304920554161, -0.032372813671827316, -0.03122258186340332, -0.030072350054979324, -0.02892211638391018, -0.027771882712841034, -0.026621650904417038, -0.025471417233347893, -0.024321183562278748, -0.02317095175385475, -0.022020718082785606, -0.02087048441171646, -0.019720252603292465, -0.01857001893222332, -0.017419785261154175, -0.01626955345273018, -0.015119320712983608, -0.013969087973237038, -0.012818854302167892, -0.011668621562421322, -0.010518388822674751, -0.009368155151605606, -0.008217922411859035, -0.007067689672112465, -0.005917456932365894, -0.004767223726958036, -0.003616990754380822, -0.002466757781803608, -0.0013165250420570374, -0.00016629183664917946, 0.0009839413687586784, 0.0021341778337955475, 0.0032844108063727617, 0.004434643778949976, 0.005584876984357834, 0.0067351097241044044, 0.007885342463850975, 0.00903557613492012, 0.01018580887466669, 0.011336041614413261, 0.012486274354159832, 0.013636507093906403, 0.014786740764975548, 0.015936974436044693, 0.01708720624446869, 0.018237439915537834, 0.01938767358660698, 0.020537905395030975, 0.02168813906610012, 0.022838370874524117, 0.02398860454559326, 0.025138836354017258, 0.026289070025086403, 0.027439303696155548, 0.028589535504579544, 0.02973976917564869, 0.030890002846717834, 0.03204023465514183, 0.033190466463565826, 0.03434070199728012, 0.03549093380570412, 0.03664116561412811, 0.03779140114784241, 0.0389416329562664]}, "gradients/decoder.model.decoder.layers.7.fc2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 6.0, 0.0, 2.0, 4.0, 0.0, 8.0, 5.0, 10.0, 13.0, 21.0, 24.0, 49.0, 72.0, 118.0, 172.0, 313.0, 492.0, 876.0, 1502.0, 2662.0, 4991.0, 9502.0, 19858.0, 47837.0, 4046912.0, 29406.0, 13261.0, 7095.0, 3798.0, 2142.0, 1274.0, 734.0, 449.0, 246.0, 161.0, 105.0, 56.0, 49.0, 26.0, 20.0, 7.0, 6.0, 6.0, 2.0, 2.0, 2.0, 1.0, 0.0, 2.0], "bins": [-0.05712890625, -0.05567359924316406, -0.054218292236328125, -0.05276298522949219, -0.05130767822265625, -0.04985237121582031, -0.048397064208984375, -0.04694175720214844, -0.0454864501953125, -0.04403114318847656, -0.042575836181640625, -0.04112052917480469, -0.03966522216796875, -0.03820991516113281, -0.036754608154296875, -0.03529930114746094, -0.033843994140625, -0.03238868713378906, -0.030933380126953125, -0.029478073120117188, -0.02802276611328125, -0.026567459106445312, -0.025112152099609375, -0.023656845092773438, -0.0222015380859375, -0.020746231079101562, -0.019290924072265625, -0.017835617065429688, -0.01638031005859375, -0.014925003051757812, -0.013469696044921875, -0.012014389038085938, -0.01055908203125, -0.009103775024414062, -0.007648468017578125, -0.0061931610107421875, -0.00473785400390625, -0.0032825469970703125, -0.001827239990234375, -0.0003719329833984375, 0.0010833740234375, 0.0025386810302734375, 0.003993988037109375, 0.0054492950439453125, 0.00690460205078125, 0.008359909057617188, 0.009815216064453125, 0.011270523071289062, 0.012725830078125, 0.014181137084960938, 0.015636444091796875, 0.017091751098632812, 0.01854705810546875, 0.020002365112304688, 0.021457672119140625, 0.022912979125976562, 0.0243682861328125, 0.025823593139648438, 0.027278900146484375, 0.028734207153320312, 0.03018951416015625, 0.03164482116699219, 0.033100128173828125, 0.03455543518066406, 0.0360107421875]}, "gradients/decoder.model.decoder.layers.7.fc2.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 3.0, 0.0, 2.0, 2.0, 2.0, 7.0, 2.0, 7.0, 5.0, 9.0, 6.0, 17.0, 13.0, 20.0, 21.0, 21.0, 33.0, 36.0, 37.0, 35.0, 32.0, 49.0, 46.0, 53.0, 62.0, 39.0, 55.0, 44.0, 49.0, 47.0, 29.0, 32.0, 23.0, 20.0, 27.0, 23.0, 21.0, 10.0, 19.0, 11.0, 14.0, 8.0, 6.0, 7.0, 3.0, 1.0, 2.0, 3.0, 2.0, 0.0, 2.0, 1.0, 1.0, 0.0, 1.0], "bins": [-0.0017328262329101562, -0.0016818642616271973, -0.0016309022903442383, -0.0015799403190612793, -0.0015289783477783203, -0.0014780163764953613, -0.0014270544052124023, -0.0013760924339294434, -0.0013251304626464844, -0.0012741684913635254, -0.0012232065200805664, -0.0011722445487976074, -0.0011212825775146484, -0.0010703206062316895, -0.0010193586349487305, -0.0009683966636657715, -0.0009174346923828125, -0.0008664727210998535, -0.0008155107498168945, -0.0007645487785339355, -0.0007135868072509766, -0.0006626248359680176, -0.0006116628646850586, -0.0005607008934020996, -0.0005097389221191406, -0.00045877695083618164, -0.00040781497955322266, -0.00035685300827026367, -0.0003058910369873047, -0.0002549290657043457, -0.00020396709442138672, -0.00015300512313842773, -0.00010204315185546875, -5.1081180572509766e-05, -1.1920928955078125e-07, 5.08427619934082e-05, 0.00010180473327636719, 0.00015276670455932617, 0.00020372867584228516, 0.00025469064712524414, 0.0003056526184082031, 0.0003566145896911621, 0.0004075765609741211, 0.0004585385322570801, 0.0005095005035400391, 0.000560462474822998, 0.000611424446105957, 0.000662386417388916, 0.000713348388671875, 0.000764310359954834, 0.000815272331237793, 0.000866234302520752, 0.0009171962738037109, 0.0009681582450866699, 0.001019120216369629, 0.0010700821876525879, 0.0011210441589355469, 0.0011720061302185059, 0.0012229681015014648, 0.0012739300727844238, 0.0013248920440673828, 0.0013758540153503418, 0.0014268159866333008, 0.0014777779579162598, 0.0015287399291992188]}, "gradients/decoder.model.decoder.layers.7.fc1.weight": {"_type": "histogram", "values": [2.0, 0.0, 3.0, 1.0, 1.0, 0.0, 2.0, 1.0, 2.0, 1.0, 3.0, 0.0, 2.0, 1.0, 3.0, 3.0, 3.0, 1.0, 8.0, 4.0, 11.0, 7.0, 15.0, 20.0, 22.0, 41.0, 103.0, 400.0, 1706.0, 6964.0, 31101.0, 4118397.0, 27332.0, 6227.0, 1392.0, 317.0, 68.0, 33.0, 23.0, 16.0, 13.0, 8.0, 5.0, 2.0, 4.0, 1.0, 2.0, 1.0, 0.0, 4.0, 1.0, 1.0, 3.0, 3.0, 4.0, 3.0, 3.0, 1.0, 2.0, 1.0, 1.0, 1.0, 2.0, 2.0], "bins": [-0.045379638671875, -0.04393720626831055, -0.042494773864746094, -0.04105234146118164, -0.03960990905761719, -0.038167476654052734, -0.03672504425048828, -0.03528261184692383, -0.033840179443359375, -0.03239774703979492, -0.03095531463623047, -0.029512882232666016, -0.028070449829101562, -0.02662801742553711, -0.025185585021972656, -0.023743152618408203, -0.02230072021484375, -0.020858287811279297, -0.019415855407714844, -0.01797342300415039, -0.016530990600585938, -0.015088558197021484, -0.013646125793457031, -0.012203693389892578, -0.010761260986328125, -0.009318828582763672, -0.007876396179199219, -0.006433963775634766, -0.0049915313720703125, -0.0035490989685058594, -0.0021066665649414062, -0.0006642341613769531, 0.0007781982421875, 0.002220630645751953, 0.0036630630493164062, 0.005105495452880859, 0.0065479278564453125, 0.007990360260009766, 0.009432792663574219, 0.010875225067138672, 0.012317657470703125, 0.013760089874267578, 0.015202522277832031, 0.016644954681396484, 0.018087387084960938, 0.01952981948852539, 0.020972251892089844, 0.022414684295654297, 0.02385711669921875, 0.025299549102783203, 0.026741981506347656, 0.02818441390991211, 0.029626846313476562, 0.031069278717041016, 0.03251171112060547, 0.03395414352416992, 0.035396575927734375, 0.03683900833129883, 0.03828144073486328, 0.039723873138427734, 0.04116630554199219, 0.04260873794555664, 0.044051170349121094, 0.04549360275268555, 0.04693603515625]}, "gradients/decoder.model.decoder.layers.7.fc1.bias": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 3.0, 2.0, 1.0, 2.0, 2.0, 3.0, 2.0, 1.0, 2.0, 1.0, 5.0, 3.0, 3.0, 3.0, 6.0, 6.0, 4.0, 6.0, 1.0, 8.0, 10.0, 8.0, 6.0, 7.0, 20.0, 76.0, 3587.0, 197.0, 15.0, 14.0, 11.0, 9.0, 9.0, 2.0, 5.0, 7.0, 1.0, 1.0, 2.0, 2.0, 1.0, 6.0, 1.0, 5.0, 0.0, 1.0, 1.0, 3.0, 3.0, 4.0, 4.0, 1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 2.0, 0.0, 1.0, 1.0], "bins": [-0.00469207763671875, -0.004532873630523682, -0.004373669624328613, -0.004214465618133545, -0.0040552616119384766, -0.003896057605743408, -0.00373685359954834, -0.0035776495933532715, -0.003418445587158203, -0.0032592415809631348, -0.0031000375747680664, -0.002940833568572998, -0.0027816295623779297, -0.0026224255561828613, -0.002463221549987793, -0.0023040175437927246, -0.0021448135375976562, -0.001985609531402588, -0.0018264055252075195, -0.0016672015190124512, -0.0015079975128173828, -0.0013487935066223145, -0.001189589500427246, -0.0010303854942321777, -0.0008711814880371094, -0.000711977481842041, -0.0005527734756469727, -0.0003935694694519043, -0.00023436546325683594, -7.516145706176758e-05, 8.404254913330078e-05, 0.00024324655532836914, 0.0004024505615234375, 0.0005616545677185059, 0.0007208585739135742, 0.0008800625801086426, 0.001039266586303711, 0.0011984705924987793, 0.0013576745986938477, 0.001516878604888916, 0.0016760826110839844, 0.0018352866172790527, 0.001994490623474121, 0.0021536946296691895, 0.002312898635864258, 0.002472102642059326, 0.0026313066482543945, 0.002790510654449463, 0.0029497146606445312, 0.0031089186668395996, 0.003268122673034668, 0.0034273266792297363, 0.0035865306854248047, 0.003745734691619873, 0.0039049386978149414, 0.00406414270401001, 0.004223346710205078, 0.0043825507164001465, 0.004541754722595215, 0.004700958728790283, 0.0048601627349853516, 0.00501936674118042, 0.005178570747375488, 0.005337774753570557, 0.005496978759765625]}, "gradients/decoder.model.decoder.layers.7.encoder_attn_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 3.0, 1.0, 2.0, 2.0, 5.0, 2.0, 5.0, 11.0, 14.0, 20.0, 16.0, 28.0, 42.0, 36.0, 60.0, 86.0, 204.0, 168.0, 102.0, 38.0, 40.0, 33.0, 24.0, 9.0, 15.0, 12.0, 8.0, 9.0, 2.0, 1.0, 2.0, 5.0, 0.0, 2.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.014916558749973774, -0.014543081633746624, -0.014169605448842049, -0.013796128332614899, -0.013422651216387749, -0.013049174100160599, -0.012675696983933449, -0.012302220799028873, -0.011928743682801723, -0.011555266566574574, -0.011181790381669998, -0.010808313265442848, -0.010434836149215698, -0.010061359032988548, -0.009687881916761398, -0.009314405731856823, -0.008940928615629673, -0.008567451499402523, -0.008193975314497948, -0.007820498198270798, -0.007447021082043648, -0.007073543965816498, -0.006700067315250635, -0.0063265906646847725, -0.0059531135484576225, -0.0055796364322304726, -0.00520615978166461, -0.004832683131098747, -0.004459206014871597, -0.004085728898644447, -0.0037122522480785847, -0.0033387753646820784, -0.002965298481285572, -0.0025918215978890657, -0.0022183447144925594, -0.0018448678310960531, -0.0014713909476995468, -0.0010979140643030405, -0.0007244371809065342, -0.0003509602975100279, 2.2516585886478424e-05, 0.00039599346928298473, 0.000769470352679491, 0.0011429472360759974, 0.0015164241194725037, 0.00188990100286901, 0.0022633778862655163, 0.0026368547696620226, 0.003010331653058529, 0.003383808536455035, 0.0037572854198515415, 0.004130762070417404, 0.004504239186644554, 0.004877716302871704, 0.005251192953437567, 0.005624669604003429, 0.005998146720230579, 0.006371623836457729, 0.006745100487023592, 0.007118577137589455, 0.007492054253816605, 0.007865531370043755, 0.008239008486270905, 0.00861248467117548, 0.00898596178740263]}, "gradients/decoder.model.decoder.layers.7.encoder_attn_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 2.0, 1.0, 1.0, 2.0, 0.0, 1.0, 2.0, 6.0, 6.0, 7.0, 17.0, 11.0, 21.0, 23.0, 25.0, 28.0, 28.0, 36.0, 27.0, 32.0, 35.0, 44.0, 47.0, 43.0, 34.0, 45.0, 56.0, 57.0, 47.0, 33.0, 41.0, 28.0, 43.0, 33.0, 32.0, 21.0, 19.0, 14.0, 16.0, 5.0, 13.0, 8.0, 9.0, 1.0, 9.0, 0.0, 3.0, 3.0, 2.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.0059202066622674465, -0.005717385094612837, -0.005514563526958227, -0.005311742424964905, -0.005108920857310295, -0.004906099289655685, -0.004703277722001076, -0.004500456154346466, -0.004297634586691856, -0.004094813019037247, -0.0038919916842132807, -0.003689170116558671, -0.003486348781734705, -0.0032835272140800953, -0.0030807056464254856, -0.002877884078770876, -0.0026750629767775536, -0.002472241409122944, -0.002269420074298978, -0.002066598506644368, -0.0018637770554050803, -0.0016609556041657925, -0.0014581340365111828, -0.001255312585271895, -0.001052491134032607, -0.0008496696827933192, -0.0006468481733463705, -0.0004440266638994217, -0.00024120521266013384, -3.8383761420845985e-05, 0.0001644378062337637, 0.00036725925747305155, 0.0005700802430510521, 0.00077290169429034, 0.0009757232037372887, 0.0011785447131842375, 0.0013813661644235253, 0.0015841876156628132, 0.0017870091833174229, 0.0019898307509720325, 0.0021926520857959986, 0.0023954736534506083, 0.0025982949882745743, 0.002801116555929184, 0.0030039381235837936, 0.0032067594584077597, 0.0034095810260623693, 0.0036124023608863354, 0.003815223928540945, 0.004018045496195555, 0.004220867063850164, 0.004423688165843487, 0.0046265097334980965, 0.004829331301152706, 0.005032152868807316, 0.0052349744364619255, 0.005437796004116535, 0.005640617571771145, 0.0058434391394257545, 0.006046260707080364, 0.006249081809073687, 0.006451903376728296, 0.006654724944382906, 0.006857546512037516, 0.007060367614030838]}, "gradients/decoder.model.decoder.layers.7.encoder_attn.out_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0, 0.0, 2.0, 1.0, 1.0, 3.0, 8.0, 8.0, 10.0, 19.0, 36.0, 51.0, 126.0, 337.0, 1472.0, 19470.0, 977394.0, 46750.0, 2134.0, 403.0, 161.0, 71.0, 38.0, 25.0, 16.0, 10.0, 5.0, 5.0, 3.0, 3.0, 1.0, 1.0, 0.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0027980804443359375, -0.0026929080486297607, -0.002587735652923584, -0.0024825632572174072, -0.0023773908615112305, -0.0022722184658050537, -0.002167046070098877, -0.0020618736743927, -0.0019567012786865234, -0.0018515288829803467, -0.00174635648727417, -0.0016411840915679932, -0.0015360116958618164, -0.0014308393001556396, -0.0013256669044494629, -0.0012204945087432861, -0.0011153221130371094, -0.0010101497173309326, -0.0009049773216247559, -0.0007998049259185791, -0.0006946325302124023, -0.0005894601345062256, -0.00048428773880004883, -0.00037911534309387207, -0.0002739429473876953, -0.00016877055168151855, -6.35981559753418e-05, 4.157423973083496e-05, 0.00014674663543701172, 0.0002519190311431885, 0.00035709142684936523, 0.000462263822555542, 0.0005674362182617188, 0.0006726086139678955, 0.0007777810096740723, 0.000882953405380249, 0.0009881258010864258, 0.0010932981967926025, 0.0011984705924987793, 0.001303642988204956, 0.0014088153839111328, 0.0015139877796173096, 0.0016191601753234863, 0.001724332571029663, 0.0018295049667358398, 0.0019346773624420166, 0.0020398497581481934, 0.00214502215385437, 0.002250194549560547, 0.0023553669452667236, 0.0024605393409729004, 0.002565711736679077, 0.002670884132385254, 0.0027760565280914307, 0.0028812289237976074, 0.002986401319503784, 0.003091573715209961, 0.0031967461109161377, 0.0033019185066223145, 0.003407090902328491, 0.003512263298034668, 0.0036174356937408447, 0.0037226080894470215, 0.0038277804851531982, 0.003932952880859375]}, "gradients/decoder.model.decoder.layers.7.encoder_attn.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 3.0, 0.0, 3.0, 15.0, 20.0, 44.0, 58.0, 78.0, 136.0, 139.0, 160.0, 117.0, 86.0, 87.0, 31.0, 26.0, 4.0, 9.0, 2.0, 1.0, 1.0, 0.0, 1.0], "bins": [-0.0178680419921875, -0.01751798391342163, -0.01716792583465576, -0.016817867755889893, -0.016467809677124023, -0.016117751598358154, -0.015767693519592285, -0.015417635440826416, -0.015067577362060547, -0.014717519283294678, -0.014367461204528809, -0.01401740312576294, -0.01366734504699707, -0.013317286968231201, -0.012967228889465332, -0.012617170810699463, -0.012267112731933594, -0.011917054653167725, -0.011566996574401855, -0.011216938495635986, -0.010866880416870117, -0.010516822338104248, -0.010166764259338379, -0.00981670618057251, -0.00946664810180664, -0.009116590023040771, -0.008766531944274902, -0.008416473865509033, -0.008066415786743164, -0.007716357707977295, -0.007366299629211426, -0.007016241550445557, -0.0066661834716796875, -0.006316125392913818, -0.005966067314147949, -0.00561600923538208, -0.005265951156616211, -0.004915893077850342, -0.004565834999084473, -0.0042157769203186035, -0.0038657188415527344, -0.0035156607627868652, -0.003165602684020996, -0.002815544605255127, -0.002465486526489258, -0.0021154284477233887, -0.0017653703689575195, -0.0014153122901916504, -0.0010652542114257812, -0.0007151961326599121, -0.00036513805389404297, -1.5079975128173828e-05, 0.0003349781036376953, 0.0006850361824035645, 0.0010350942611694336, 0.0013851523399353027, 0.0017352104187011719, 0.002085268497467041, 0.00243532657623291, 0.0027853846549987793, 0.0031354427337646484, 0.0034855008125305176, 0.0038355588912963867, 0.004185616970062256, 0.004535675048828125]}, "gradients/decoder.model.decoder.layers.7.encoder_attn.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 6.0, 5.0, 13.0, 36.0, 41.0, 103.0, 153.0, 284.0, 941.0, 17820.0, 1011892.0, 15549.0, 985.0, 303.0, 179.0, 110.0, 62.0, 39.0, 19.0, 11.0, 10.0, 2.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.000545501708984375, -0.000527605414390564, -0.0005097091197967529, -0.0004918128252029419, -0.00047391653060913086, -0.0004560202360153198, -0.0004381239414215088, -0.00042022764682769775, -0.0004023313522338867, -0.0003844350576400757, -0.00036653876304626465, -0.0003486424684524536, -0.0003307461738586426, -0.00031284987926483154, -0.0002949535846710205, -0.00027705729007720947, -0.00025916099548339844, -0.0002412647008895874, -0.00022336840629577637, -0.00020547211170196533, -0.0001875758171081543, -0.00016967952251434326, -0.00015178322792053223, -0.0001338869333267212, -0.00011599063873291016, -9.809434413909912e-05, -8.019804954528809e-05, -6.230175495147705e-05, -4.4405460357666016e-05, -2.650916576385498e-05, -8.612871170043945e-06, 9.28342342376709e-06, 2.7179718017578125e-05, 4.507601261138916e-05, 6.29723072052002e-05, 8.086860179901123e-05, 9.876489639282227e-05, 0.0001166611909866333, 0.00013455748558044434, 0.00015245378017425537, 0.0001703500747680664, 0.00018824636936187744, 0.00020614266395568848, 0.0002240389585494995, 0.00024193525314331055, 0.0002598315477371216, 0.0002777278423309326, 0.00029562413692474365, 0.0003135204315185547, 0.0003314167261123657, 0.00034931302070617676, 0.0003672093152999878, 0.00038510560989379883, 0.00040300190448760986, 0.0004208981990814209, 0.00043879449367523193, 0.00045669078826904297, 0.000474587082862854, 0.000492483377456665, 0.0005103796720504761, 0.0005282759666442871, 0.0005461722612380981, 0.0005640685558319092, 0.0005819648504257202, 0.0005998611450195312]}, "gradients/decoder.model.decoder.layers.7.encoder_attn.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 6.0, 4.0, 10.0, 23.0, 22.0, 45.0, 44.0, 71.0, 93.0, 105.0, 126.0, 108.0, 97.0, 85.0, 49.0, 43.0, 31.0, 23.0, 14.0, 9.0, 5.0, 2.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.01580810546875, -0.015289783477783203, -0.014771461486816406, -0.01425313949584961, -0.013734817504882812, -0.013216495513916016, -0.012698173522949219, -0.012179851531982422, -0.011661529541015625, -0.011143207550048828, -0.010624885559082031, -0.010106563568115234, -0.009588241577148438, -0.00906991958618164, -0.008551597595214844, -0.008033275604248047, -0.00751495361328125, -0.006996631622314453, -0.006478309631347656, -0.005959987640380859, -0.0054416656494140625, -0.004923343658447266, -0.004405021667480469, -0.003886699676513672, -0.003368377685546875, -0.002850055694580078, -0.0023317337036132812, -0.0018134117126464844, -0.0012950897216796875, -0.0007767677307128906, -0.00025844573974609375, 0.0002598762512207031, 0.0007781982421875, 0.0012965202331542969, 0.0018148422241210938, 0.0023331642150878906, 0.0028514862060546875, 0.0033698081970214844, 0.0038881301879882812, 0.004406452178955078, 0.004924774169921875, 0.005443096160888672, 0.005961418151855469, 0.006479740142822266, 0.0069980621337890625, 0.007516384124755859, 0.008034706115722656, 0.008553028106689453, 0.00907135009765625, 0.009589672088623047, 0.010107994079589844, 0.01062631607055664, 0.011144638061523438, 0.011662960052490234, 0.012181282043457031, 0.012699604034423828, 0.013217926025390625, 0.013736248016357422, 0.014254570007324219, 0.014772891998291016, 0.015291213989257812, 0.01580953598022461, 0.016327857971191406, 0.016846179962158203, 0.017364501953125]}, "gradients/decoder.model.decoder.layers.7.encoder_attn.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 2.0, 4.0, 1.0, 7.0, 7.0, 8.0, 5.0, 19.0, 20.0, 37.0, 66.0, 51.0, 130.0, 89.0, 232.0, 151.0, 430.0, 713.0, 521.0, 1717.0, 1381.0, 4733.0, 3992.0, 14932.0, 14879.0, 78784.0, 665451.0, 138416.0, 77890.0, 14719.0, 15230.0, 3943.0, 4575.0, 2225.0, 707.0, 979.0, 319.0, 438.0, 146.0, 230.0, 70.0, 118.0, 63.0, 33.0, 37.0, 14.0, 18.0, 8.0, 14.0, 5.0, 5.0, 3.0, 2.0, 0.0, 1.0, 1.0, 1.0], "bins": [-3.159046173095703e-06, -3.0659139156341553e-06, -2.9727816581726074e-06, -2.8796494007110596e-06, -2.7865171432495117e-06, -2.693384885787964e-06, -2.600252628326416e-06, -2.507120370864868e-06, -2.4139881134033203e-06, -2.3208558559417725e-06, -2.2277235984802246e-06, -2.1345913410186768e-06, -2.041459083557129e-06, -1.948326826095581e-06, -1.8551945686340332e-06, -1.7620623111724854e-06, -1.6689300537109375e-06, -1.5757977962493896e-06, -1.4826655387878418e-06, -1.389533281326294e-06, -1.296401023864746e-06, -1.2032687664031982e-06, -1.1101365089416504e-06, -1.0170042514801025e-06, -9.238719940185547e-07, -8.307397365570068e-07, -7.37607479095459e-07, -6.444752216339111e-07, -5.513429641723633e-07, -4.5821070671081543e-07, -3.650784492492676e-07, -2.7194619178771973e-07, -1.7881393432617188e-07, -8.568167686462402e-08, 7.450580596923828e-09, 1.0058283805847168e-07, 1.9371509552001953e-07, 2.868473529815674e-07, 3.7997961044311523e-07, 4.731118679046631e-07, 5.662441253662109e-07, 6.593763828277588e-07, 7.525086402893066e-07, 8.456408977508545e-07, 9.387731552124023e-07, 1.0319054126739502e-06, 1.125037670135498e-06, 1.218169927597046e-06, 1.3113021850585938e-06, 1.4044344425201416e-06, 1.4975666999816895e-06, 1.5906989574432373e-06, 1.6838312149047852e-06, 1.776963472366333e-06, 1.8700957298278809e-06, 1.9632279872894287e-06, 2.0563602447509766e-06, 2.1494925022125244e-06, 2.2426247596740723e-06, 2.33575701713562e-06, 2.428889274597168e-06, 2.522021532058716e-06, 2.6151537895202637e-06, 2.7082860469818115e-06, 2.8014183044433594e-06]}, "gradients/decoder.model.decoder.layers.7.encoder_attn.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 0.0, 1.0, 5.0, 3.0, 6.0, 3.0, 7.0, 11.0, 9.0, 0.0, 17.0, 26.0, 53.0, 35.0, 81.0, 96.0, 94.0, 109.0, 112.0, 90.0, 79.0, 54.0, 0.0, 30.0, 21.0, 24.0, 19.0, 8.0, 4.0, 4.0, 3.0, 1.0, 4.0, 4.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.8477439880371094e-06, -1.7927959561347961e-06, -1.737847924232483e-06, -1.6828998923301697e-06, -1.6279518604278564e-06, -1.5730038285255432e-06, -1.51805579662323e-06, -1.4631077647209167e-06, -1.4081597328186035e-06, -1.3532117009162903e-06, -1.298263669013977e-06, -1.2433156371116638e-06, -1.1883676052093506e-06, -1.1334195733070374e-06, -1.0784715414047241e-06, -1.0235235095024109e-06, -9.685754776000977e-07, -9.136274456977844e-07, -8.586794137954712e-07, -8.03731381893158e-07, -7.487833499908447e-07, -6.938353180885315e-07, -6.388872861862183e-07, -5.83939254283905e-07, -5.289912223815918e-07, -4.7404319047927856e-07, -4.1909515857696533e-07, -3.641471266746521e-07, -3.0919909477233887e-07, -2.5425106287002563e-07, -1.993030309677124e-07, -1.4435499906539917e-07, -8.940696716308594e-08, -3.4458935260772705e-08, 2.0489096641540527e-08, 7.543712854385376e-08, 1.30385160446167e-07, 1.8533319234848022e-07, 2.4028122425079346e-07, 2.952292561531067e-07, 3.501772880554199e-07, 4.0512531995773315e-07, 4.600733518600464e-07, 5.150213837623596e-07, 5.699694156646729e-07, 6.249174475669861e-07, 6.798654794692993e-07, 7.348135113716125e-07, 7.897615432739258e-07, 8.44709575176239e-07, 8.996576070785522e-07, 9.546056389808655e-07, 1.0095536708831787e-06, 1.064501702785492e-06, 1.1194497346878052e-06, 1.1743977665901184e-06, 1.2293457984924316e-06, 1.2842938303947449e-06, 1.339241862297058e-06, 1.3941898941993713e-06, 1.4491379261016846e-06, 1.5040859580039978e-06, 1.559033989906311e-06, 1.6139820218086243e-06, 1.6689300537109375e-06]}, "gradients/decoder.model.decoder.layers.7.encoder_attn.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 3.0, 7.0, 4.0, 10.0, 25.0, 12.0, 59.0, 49.0, 59.0, 210.0, 168.0, 691.0, 524.0, 747.0, 2839.0, 2514.0, 11059.0, 11568.0, 20480.0, 137797.0, 671879.0, 137400.0, 20434.0, 11509.0, 10647.0, 2545.0, 2717.0, 789.0, 944.0, 227.0, 178.0, 223.0, 70.0, 86.0, 32.0, 19.0, 21.0, 7.0, 5.0, 2.0, 2.0, 2.0, 0.0, 2.0, 0.0, 3.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.384185791015625e-06, -2.300366759300232e-06, -2.216547727584839e-06, -2.132728695869446e-06, -2.0489096641540527e-06, -1.9650906324386597e-06, -1.8812716007232666e-06, -1.7974525690078735e-06, -1.7136335372924805e-06, -1.6298145055770874e-06, -1.5459954738616943e-06, -1.4621764421463013e-06, -1.3783574104309082e-06, -1.2945383787155151e-06, -1.210719347000122e-06, -1.126900315284729e-06, -1.043081283569336e-06, -9.592622518539429e-07, -8.754432201385498e-07, -7.916241884231567e-07, -7.078051567077637e-07, -6.239861249923706e-07, -5.401670932769775e-07, -4.5634806156158447e-07, -3.725290298461914e-07, -2.8870999813079834e-07, -2.0489096641540527e-07, -1.210719347000122e-07, -3.725290298461914e-08, 4.6566128730773926e-08, 1.30385160446167e-07, 2.1420419216156006e-07, 2.980232238769531e-07, 3.818422555923462e-07, 4.6566128730773926e-07, 5.494803190231323e-07, 6.332993507385254e-07, 7.171183824539185e-07, 8.009374141693115e-07, 8.847564458847046e-07, 9.685754776000977e-07, 1.0523945093154907e-06, 1.1362135410308838e-06, 1.2200325727462769e-06, 1.30385160446167e-06, 1.387670636177063e-06, 1.471489667892456e-06, 1.5553086996078491e-06, 1.6391277313232422e-06, 1.7229467630386353e-06, 1.8067657947540283e-06, 1.8905848264694214e-06, 1.9744038581848145e-06, 2.0582228899002075e-06, 2.1420419216156006e-06, 2.2258609533309937e-06, 2.3096799850463867e-06, 2.3934990167617798e-06, 2.477318048477173e-06, 2.561137080192566e-06, 2.644956111907959e-06, 2.728775143623352e-06, 2.812594175338745e-06, 2.896413207054138e-06, 2.9802322387695312e-06]}, "gradients/decoder.model.decoder.layers.7.encoder_attn.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 3.0, 0.0, 0.0, 9.0, 0.0, 0.0, 7.0, 0.0, 0.0, 18.0, 0.0, 0.0, 25.0, 0.0, 0.0, 44.0, 0.0, 0.0, 47.0, 0.0, 0.0, 118.0, 0.0, 0.0, 514.0, 0.0, 0.0, 101.0, 0.0, 0.0, 53.0, 0.0, 41.0, 0.0, 0.0, 19.0, 0.0, 0.0, 12.0, 0.0, 0.0, 2.0, 0.0, 0.0, 4.0, 0.0, 0.0, 4.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-5.364418029785156e-07, -5.159527063369751e-07, -4.954636096954346e-07, -4.7497451305389404e-07, -4.544854164123535e-07, -4.33996319770813e-07, -4.1350722312927246e-07, -3.9301812648773193e-07, -3.725290298461914e-07, -3.520399332046509e-07, -3.3155083656311035e-07, -3.110617399215698e-07, -2.905726432800293e-07, -2.7008354663848877e-07, -2.4959444999694824e-07, -2.2910535335540771e-07, -2.086162567138672e-07, -1.8812716007232666e-07, -1.6763806343078613e-07, -1.471489667892456e-07, -1.2665987014770508e-07, -1.0617077350616455e-07, -8.568167686462402e-08, -6.51925802230835e-08, -4.470348358154297e-08, -2.421438694000244e-08, -3.725290298461914e-09, 1.6763806343078613e-08, 3.725290298461914e-08, 5.774199962615967e-08, 7.82310962677002e-08, 9.872019290924072e-08, 1.1920928955078125e-07, 1.3969838619232178e-07, 1.601874828338623e-07, 1.8067657947540283e-07, 2.0116567611694336e-07, 2.2165477275848389e-07, 2.421438694000244e-07, 2.6263296604156494e-07, 2.8312206268310547e-07, 3.03611159324646e-07, 3.241002559661865e-07, 3.4458935260772705e-07, 3.650784492492676e-07, 3.855675458908081e-07, 4.0605664253234863e-07, 4.2654573917388916e-07, 4.470348358154297e-07, 4.675239324569702e-07, 4.880130290985107e-07, 5.085021257400513e-07, 5.289912223815918e-07, 5.494803190231323e-07, 5.699694156646729e-07, 5.904585123062134e-07, 6.109476089477539e-07, 6.314367055892944e-07, 6.51925802230835e-07, 6.724148988723755e-07, 6.92903995513916e-07, 7.133930921554565e-07, 7.338821887969971e-07, 7.543712854385376e-07, 7.748603820800781e-07]}, "gradients/decoder.model.decoder.layers.7.self_attn_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 0.0, 2.0, 2.0, 1.0, 3.0, 5.0, 6.0, 11.0, 17.0, 30.0, 49.0, 80.0, 207.0, 307.0, 131.0, 56.0, 38.0, 25.0, 13.0, 7.0, 10.0, 5.0, 3.0, 4.0, 1.0, 2.0, 0.0, 1.0, 0.0, 1.0, 1.0], "bins": [-0.01615816354751587, -0.015808161348104477, -0.01545816008001566, -0.015108157880604267, -0.01475815661251545, -0.014408154413104057, -0.014058152213692665, -0.013708150945603848, -0.013358148746192455, -0.013008146546781063, -0.012658145278692245, -0.012308143079280853, -0.011958141811192036, -0.011608139611780643, -0.011258138343691826, -0.010908136144280434, -0.010558133944869041, -0.01020813174545765, -0.009858130477368832, -0.00950812827795744, -0.009158127009868622, -0.00880812481045723, -0.008458122611045837, -0.00810812134295702, -0.007758120074868202, -0.007408118341118097, -0.007058116607367992, -0.0067081144079566, -0.006358112674206495, -0.00600811094045639, -0.0056581092067062855, -0.005308107472956181, -0.004958104807883501, -0.004608103074133396, -0.004258101340383291, -0.003908099606633186, -0.003558097407221794, -0.0032080956734716892, -0.0028580939397215843, -0.0025080919731408358, -0.002158090239390731, -0.0018080883892253041, -0.0014580865390598774, -0.0011080848053097725, -0.0007580829551443458, -0.00040808110497891903, -5.8079371228814125e-05, 0.00029192259535193443, 0.0006419243291020393, 0.000991926179267466, 0.0013419280294328928, 0.0016919297631829977, 0.0020419317297637463, 0.002391933463513851, 0.002741935197263956, 0.0030919371638447046, 0.0034419388975948095, 0.0037919406313449144, 0.004141942597925663, 0.004491944331675768, 0.004841946065425873, 0.005191948264837265, 0.005541949532926083, 0.005891951732337475, 0.00624195346608758]}, "gradients/decoder.model.decoder.layers.7.self_attn_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 5.0, 11.0, 14.0, 26.0, 55.0, 58.0, 88.0, 83.0, 95.0, 126.0, 112.0, 94.0, 83.0, 65.0, 40.0, 28.0, 13.0, 12.0, 5.0, 2.0, 3.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.01246578898280859, -0.012208775617182255, -0.01195176225155592, -0.011694748885929585, -0.01143773552030325, -0.011180723085999489, -0.010923709720373154, -0.010666696354746819, -0.010409682989120483, -0.010152669623494148, -0.009895656257867813, -0.009638642892241478, -0.009381629526615143, -0.009124616160988808, -0.008867602795362473, -0.008610590361058712, -0.008353576064109802, -0.008096562698483467, -0.007839549332857132, -0.007582535967230797, -0.007325523067265749, -0.007068509701639414, -0.006811496336013079, -0.0065544829703867435, -0.006297470070421696, -0.0060404567047953606, -0.005783443339169025, -0.00552642997354269, -0.0052694170735776424, -0.005012403707951307, -0.004755390342324972, -0.004498376976698637, -0.004241364076733589, -0.003984350711107254, -0.0037273375783115625, -0.0034703242126852274, -0.003213311079889536, -0.0029562977142632008, -0.0026992843486368656, -0.0024422709830105305, -0.002185257850214839, -0.0019282446010038257, -0.0016712313517928123, -0.0014142179861664772, -0.0011572047369554639, -0.0009001914877444506, -0.0006431781221181154, -0.0003861648729071021, -0.0001291516236960888, 0.00012786165461875498, 0.00038487493293359876, 0.000641888240352273, 0.0008989014895632863, 0.0011559147387742996, 0.0014129281044006348, 0.001669941353611648, 0.0019269546028226614, 0.0021839679684489965, 0.002440981101244688, 0.002697994466871023, 0.0029550078324973583, 0.00321202096529305, 0.003469034330919385, 0.0037260474637150764, 0.003983060829341412]}, "gradients/decoder.model.decoder.layers.7.self_attn.out_proj.weight": {"_type": "histogram", "values": [1.0, 2.0, 1.0, 0.0, 2.0, 2.0, 1.0, 9.0, 3.0, 9.0, 4.0, 5.0, 9.0, 0.0, 2.0, 6.0, 6.0, 6.0, 8.0, 9.0, 9.0, 21.0, 24.0, 26.0, 26.0, 35.0, 46.0, 68.0, 380.0, 14798.0, 974078.0, 57573.0, 984.0, 98.0, 57.0, 39.0, 33.0, 24.0, 17.0, 22.0, 19.0, 10.0, 10.0, 11.0, 6.0, 6.0, 8.0, 6.0, 9.0, 8.0, 4.0, 8.0, 7.0, 3.0, 3.0, 8.0, 2.0, 0.0, 2.0, 0.0, 1.0, 0.0, 1.0, 1.0], "bins": [-0.0161590576171875, -0.015631914138793945, -0.01510477066040039, -0.014577627182006836, -0.014050483703613281, -0.013523340225219727, -0.012996196746826172, -0.012469053268432617, -0.011941909790039062, -0.011414766311645508, -0.010887622833251953, -0.010360479354858398, -0.009833335876464844, -0.009306192398071289, -0.008779048919677734, -0.00825190544128418, -0.007724761962890625, -0.00719761848449707, -0.006670475006103516, -0.006143331527709961, -0.005616188049316406, -0.0050890445709228516, -0.004561901092529297, -0.004034757614135742, -0.0035076141357421875, -0.002980470657348633, -0.002453327178955078, -0.0019261837005615234, -0.0013990402221679688, -0.0008718967437744141, -0.0003447532653808594, 0.0001823902130126953, 0.00070953369140625, 0.0012366771697998047, 0.0017638206481933594, 0.002290964126586914, 0.0028181076049804688, 0.0033452510833740234, 0.003872394561767578, 0.004399538040161133, 0.0049266815185546875, 0.005453824996948242, 0.005980968475341797, 0.0065081119537353516, 0.007035255432128906, 0.007562398910522461, 0.008089542388916016, 0.00861668586730957, 0.009143829345703125, 0.00967097282409668, 0.010198116302490234, 0.010725259780883789, 0.011252403259277344, 0.011779546737670898, 0.012306690216064453, 0.012833833694458008, 0.013360977172851562, 0.013888120651245117, 0.014415264129638672, 0.014942407608032227, 0.015469551086425781, 0.015996694564819336, 0.01652383804321289, 0.017050981521606445, 0.017578125]}, "gradients/decoder.model.decoder.layers.7.self_attn.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 7.0, 26.0, 66.0, 123.0, 209.0, 272.0, 157.0, 100.0, 38.0, 17.0, 3.0, 1.0, 2.0], "bins": [-0.0018281936645507812, -0.0017956830561161041, -0.001763172447681427, -0.0017306618392467499, -0.0016981512308120728, -0.0016656406223773956, -0.0016331300139427185, -0.0016006194055080414, -0.0015681087970733643, -0.0015355981886386871, -0.00150308758020401, -0.0014705769717693329, -0.0014380663633346558, -0.0014055557548999786, -0.0013730451464653015, -0.0013405345380306244, -0.0013080239295959473, -0.0012755133211612701, -0.001243002712726593, -0.001210492104291916, -0.0011779814958572388, -0.0011454708874225616, -0.0011129602789878845, -0.0010804496705532074, -0.0010479390621185303, -0.0010154284536838531, -0.000982917845249176, -0.0009504072368144989, -0.0009178966283798218, -0.0008853860199451447, -0.0008528754115104675, -0.0008203648030757904, -0.0007878541946411133, -0.0007553435862064362, -0.000722832977771759, -0.0006903223693370819, -0.0006578117609024048, -0.0006253011524677277, -0.0005927905440330505, -0.0005602799355983734, -0.0005277693271636963, -0.0004952587187290192, -0.00046274811029434204, -0.0004302375018596649, -0.0003977268934249878, -0.00036521628499031067, -0.00033270567655563354, -0.0003001950681209564, -0.0002676844596862793, -0.00023517385125160217, -0.00020266324281692505, -0.00017015263438224792, -0.0001376420259475708, -0.00010513141751289368, -7.262080907821655e-05, -4.011020064353943e-05, -7.599592208862305e-06, 2.491101622581482e-05, 5.742162466049194e-05, 8.993223309516907e-05, 0.0001224428415298462, 0.00015495344996452332, 0.00018746405839920044, 0.00021997466683387756, 0.0002524852752685547]}, "gradients/decoder.model.decoder.layers.7.self_attn.v_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 2.0, 0.0, 0.0, 3.0, 1.0, 2.0, 4.0, 3.0, 4.0, 8.0, 8.0, 11.0, 36.0, 40.0, 56.0, 74.0, 140.0, 215.0, 353.0, 595.0, 1023.0, 1755.0, 3301.0, 6344.0, 12738.0, 28659.0, 72130.0, 238759.0, 489557.0, 113839.0, 42395.0, 17999.0, 8556.0, 4414.0, 2338.0, 1250.0, 787.0, 439.0, 257.0, 184.0, 107.0, 65.0, 33.0, 25.0, 23.0, 14.0, 8.0, 5.0, 5.0, 2.0, 1.0, 4.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0007233619689941406, -0.0007001906633377075, -0.0006770193576812744, -0.0006538480520248413, -0.0006306767463684082, -0.0006075054407119751, -0.000584334135055542, -0.0005611628293991089, -0.0005379915237426758, -0.0005148202180862427, -0.0004916489124298096, -0.00046847760677337646, -0.00044530630111694336, -0.00042213499546051025, -0.00039896368980407715, -0.00037579238414764404, -0.00035262107849121094, -0.00032944977283477783, -0.0003062784671783447, -0.0002831071615219116, -0.0002599358558654785, -0.0002367645502090454, -0.0002135932445526123, -0.0001904219388961792, -0.0001672506332397461, -0.000144079327583313, -0.00012090802192687988, -9.773671627044678e-05, -7.456541061401367e-05, -5.1394104957580566e-05, -2.822279930114746e-05, -5.0514936447143555e-06, 1.811981201171875e-05, 4.1291117668151855e-05, 6.446242332458496e-05, 8.763372898101807e-05, 0.00011080503463745117, 0.00013397634029388428, 0.00015714764595031738, 0.0001803189516067505, 0.0002034902572631836, 0.0002266615629196167, 0.0002498328685760498, 0.0002730041742324829, 0.000296175479888916, 0.0003193467855453491, 0.0003425180912017822, 0.00036568939685821533, 0.00038886070251464844, 0.00041203200817108154, 0.00043520331382751465, 0.00045837461948394775, 0.00048154592514038086, 0.000504717230796814, 0.0005278885364532471, 0.0005510598421096802, 0.0005742311477661133, 0.0005974024534225464, 0.0006205737590789795, 0.0006437450647354126, 0.0006669163703918457, 0.0006900876760482788, 0.0007132589817047119, 0.000736430287361145, 0.0007596015930175781]}, "gradients/decoder.model.decoder.layers.7.self_attn.v_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 2.0, 1.0, 1.0, 3.0, 4.0, 1.0, 4.0, 3.0, 7.0, 13.0, 10.0, 12.0, 19.0, 23.0, 19.0, 23.0, 25.0, 42.0, 46.0, 47.0, 43.0, 52.0, 51.0, 61.0, 69.0, 44.0, 52.0, 52.0, 35.0, 38.0, 32.0, 30.0, 29.0, 19.0, 22.0, 22.0, 18.0, 11.0, 7.0, 7.0, 3.0, 4.0, 3.0, 0.0, 0.0, 1.0, 1.0, 4.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0003237724304199219, -0.0003132671117782593, -0.0003027617931365967, -0.0002922564744949341, -0.0002817511558532715, -0.0002712458372116089, -0.0002607405185699463, -0.0002502351999282837, -0.0002397298812866211, -0.0002292245626449585, -0.0002187192440032959, -0.0002082139253616333, -0.0001977086067199707, -0.0001872032880783081, -0.0001766979694366455, -0.0001661926507949829, -0.0001556873321533203, -0.00014518201351165771, -0.00013467669486999512, -0.00012417137622833252, -0.00011366605758666992, -0.00010316073894500732, -9.265542030334473e-05, -8.215010166168213e-05, -7.164478302001953e-05, -6.113946437835693e-05, -5.0634145736694336e-05, -4.012882709503174e-05, -2.962350845336914e-05, -1.9118189811706543e-05, -8.612871170043945e-06, 1.8924474716186523e-06, 1.239776611328125e-05, 2.2903084754943848e-05, 3.3408403396606445e-05, 4.391372203826904e-05, 5.441904067993164e-05, 6.492435932159424e-05, 7.542967796325684e-05, 8.593499660491943e-05, 9.644031524658203e-05, 0.00010694563388824463, 0.00011745095252990723, 0.00012795627117156982, 0.00013846158981323242, 0.00014896690845489502, 0.00015947222709655762, 0.00016997754573822021, 0.0001804828643798828, 0.0001909881830215454, 0.000201493501663208, 0.0002119988203048706, 0.0002225041389465332, 0.0002330094575881958, 0.0002435147762298584, 0.000254020094871521, 0.0002645254135131836, 0.0002750307321548462, 0.0002855360507965088, 0.0002960413694381714, 0.000306546688079834, 0.0003170520067214966, 0.0003275573253631592, 0.0003380626440048218, 0.0003485679626464844]}, "gradients/decoder.model.decoder.layers.7.self_attn.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 4.0, 3.0, 1.0, 6.0, 3.0, 9.0, 8.0, 8.0, 20.0, 17.0, 40.0, 33.0, 59.0, 40.0, 70.0, 103.0, 149.0, 245.0, 308.0, 458.0, 729.0, 1234.0, 2363.0, 6376.0, 75568.0, 945667.0, 8437.0, 2665.0, 1399.0, 836.0, 494.0, 349.0, 232.0, 164.0, 110.0, 90.0, 63.0, 52.0, 36.0, 25.0, 24.0, 21.0, 13.0, 9.0, 12.0, 3.0, 2.0, 2.0, 4.0, 2.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.004398345947265625, -0.004261314868927002, -0.004124283790588379, -0.003987252712249756, -0.003850221633911133, -0.0037131905555725098, -0.0035761594772338867, -0.0034391283988952637, -0.0033020973205566406, -0.0031650662422180176, -0.0030280351638793945, -0.0028910040855407715, -0.0027539730072021484, -0.0026169419288635254, -0.0024799108505249023, -0.0023428797721862793, -0.0022058486938476562, -0.002068817615509033, -0.0019317865371704102, -0.0017947554588317871, -0.001657724380493164, -0.001520693302154541, -0.001383662223815918, -0.001246631145477295, -0.0011096000671386719, -0.0009725689888000488, -0.0008355379104614258, -0.0006985068321228027, -0.0005614757537841797, -0.00042444467544555664, -0.0002874135971069336, -0.00015038251876831055, -1.33514404296875e-05, 0.00012367963790893555, 0.0002607107162475586, 0.00039774179458618164, 0.0005347728729248047, 0.0006718039512634277, 0.0008088350296020508, 0.0009458661079406738, 0.0010828971862792969, 0.00121992826461792, 0.001356959342956543, 0.001493990421295166, 0.001631021499633789, 0.0017680525779724121, 0.0019050836563110352, 0.002042114734649658, 0.0021791458129882812, 0.0023161768913269043, 0.0024532079696655273, 0.0025902390480041504, 0.0027272701263427734, 0.0028643012046813965, 0.0030013322830200195, 0.0031383633613586426, 0.0032753944396972656, 0.0034124255180358887, 0.0035494565963745117, 0.0036864876747131348, 0.003823518753051758, 0.003960549831390381, 0.004097580909729004, 0.004234611988067627, 0.00437164306640625]}, "gradients/decoder.model.decoder.layers.7.self_attn.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 2.0, 4.0, 0.0, 3.0, 0.0, 5.0, 6.0, 0.0, 17.0, 39.0, 0.0, 101.0, 0.0, 201.0, 269.0, 0.0, 192.0, 84.0, 0.0, 44.0, 0.0, 20.0, 6.0, 0.0, 7.0, 3.0, 0.0, 3.0, 0.0, 2.0, 2.0, 0.0, 1.0, 3.0, 0.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.2516975402832031e-06, -1.214444637298584e-06, -1.1771917343139648e-06, -1.1399388313293457e-06, -1.1026859283447266e-06, -1.0654330253601074e-06, -1.0281801223754883e-06, -9.909272193908691e-07, -9.5367431640625e-07, -9.164214134216309e-07, -8.791685104370117e-07, -8.419156074523926e-07, -8.046627044677734e-07, -7.674098014831543e-07, -7.301568984985352e-07, -6.92903995513916e-07, -6.556510925292969e-07, -6.183981895446777e-07, -5.811452865600586e-07, -5.438923835754395e-07, -5.066394805908203e-07, -4.6938657760620117e-07, -4.3213367462158203e-07, -3.948807716369629e-07, -3.5762786865234375e-07, -3.203749656677246e-07, -2.8312206268310547e-07, -2.4586915969848633e-07, -2.086162567138672e-07, -1.7136335372924805e-07, -1.341104507446289e-07, -9.685754776000977e-08, -5.960464477539063e-08, -2.2351741790771484e-08, 1.4901161193847656e-08, 5.21540641784668e-08, 8.940696716308594e-08, 1.2665987014770508e-07, 1.6391277313232422e-07, 2.0116567611694336e-07, 2.384185791015625e-07, 2.7567148208618164e-07, 3.129243850708008e-07, 3.501772880554199e-07, 3.8743019104003906e-07, 4.246830940246582e-07, 4.6193599700927734e-07, 4.991888999938965e-07, 5.364418029785156e-07, 5.736947059631348e-07, 6.109476089477539e-07, 6.48200511932373e-07, 6.854534149169922e-07, 7.227063179016113e-07, 7.599592208862305e-07, 7.972121238708496e-07, 8.344650268554688e-07, 8.717179298400879e-07, 9.08970832824707e-07, 9.462237358093262e-07, 9.834766387939453e-07, 1.0207295417785645e-06, 1.0579824447631836e-06, 1.0952353477478027e-06, 1.1324882507324219e-06]}, "gradients/decoder.model.decoder.layers.7.self_attn.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 0.0, 2.0, 3.0, 7.0, 7.0, 5.0, 18.0, 24.0, 31.0, 34.0, 65.0, 128.0, 196.0, 347.0, 578.0, 968.0, 1760.0, 3266.0, 7596.0, 24599.0, 941251.0, 47973.0, 10462.0, 4268.0, 2110.0, 1157.0, 680.0, 395.0, 244.0, 152.0, 81.0, 53.0, 35.0, 19.0, 16.0, 8.0, 11.0, 6.0, 5.0, 5.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.001285552978515625, -0.0012407153844833374, -0.0011958777904510498, -0.0011510401964187622, -0.0011062026023864746, -0.001061365008354187, -0.0010165274143218994, -0.0009716898202896118, -0.0009268522262573242, -0.0008820146322250366, -0.000837177038192749, -0.0007923394441604614, -0.0007475018501281738, -0.0007026642560958862, -0.0006578266620635986, -0.000612989068031311, -0.0005681514739990234, -0.0005233138799667358, -0.00047847628593444824, -0.00043363869190216064, -0.00038880109786987305, -0.00034396350383758545, -0.00029912590980529785, -0.00025428831577301025, -0.00020945072174072266, -0.00016461312770843506, -0.00011977553367614746, -7.493793964385986e-05, -3.0100345611572266e-05, 1.4737248420715332e-05, 5.957484245300293e-05, 0.00010441243648529053, 0.00014925003051757812, 0.00019408762454986572, 0.00023892521858215332, 0.0002837628126144409, 0.0003286004066467285, 0.0003734380006790161, 0.0004182755947113037, 0.0004631131887435913, 0.0005079507827758789, 0.0005527883768081665, 0.0005976259708404541, 0.0006424635648727417, 0.0006873011589050293, 0.0007321387529373169, 0.0007769763469696045, 0.0008218139410018921, 0.0008666515350341797, 0.0009114891290664673, 0.0009563267230987549, 0.0010011643171310425, 0.00104600191116333, 0.0010908395051956177, 0.0011356770992279053, 0.0011805146932601929, 0.0012253522872924805, 0.001270189881324768, 0.0013150274753570557, 0.0013598650693893433, 0.0014047026634216309, 0.0014495402574539185, 0.001494377851486206, 0.0015392154455184937, 0.0015840530395507812]}, "gradients/decoder.model.decoder.layers.7.self_attn.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 2.0, 3.0, 1.0, 3.0, 2.0, 2.0, 1.0, 5.0, 2.0, 1.0, 4.0, 9.0, 11.0, 16.0, 18.0, 20.0, 57.0, 622.0, 137.0, 30.0, 16.0, 12.0, 8.0, 6.0, 6.0, 3.0, 1.0, 3.0, 4.0, 0.0, 3.0, 2.0, 0.0, 1.0, 1.0, 1.0, 3.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0005736351013183594, -0.0005536600947380066, -0.0005336850881576538, -0.000513710081577301, -0.0004937350749969482, -0.00047376006841659546, -0.0004537850618362427, -0.0004338100552558899, -0.0004138350486755371, -0.0003938600420951843, -0.00037388503551483154, -0.00035391002893447876, -0.000333935022354126, -0.0003139600157737732, -0.0002939850091934204, -0.0002740100026130676, -0.00025403499603271484, -0.00023405998945236206, -0.00021408498287200928, -0.0001941099762916565, -0.0001741349697113037, -0.00015415996313095093, -0.00013418495655059814, -0.00011420994997024536, -9.423494338989258e-05, -7.42599368095398e-05, -5.428493022918701e-05, -3.430992364883423e-05, -1.4334917068481445e-05, 5.640089511871338e-06, 2.561509609222412e-05, 4.5590102672576904e-05, 6.556510925292969e-05, 8.554011583328247e-05, 0.00010551512241363525, 0.00012549012899398804, 0.00014546513557434082, 0.0001654401421546936, 0.0001854151487350464, 0.00020539015531539917, 0.00022536516189575195, 0.00024534016847610474, 0.0002653151750564575, 0.0002852901816368103, 0.0003052651882171631, 0.00032524019479751587, 0.00034521520137786865, 0.00036519020795822144, 0.0003851652145385742, 0.000405140221118927, 0.0004251152276992798, 0.00044509023427963257, 0.00046506524085998535, 0.00048504024744033813, 0.0005050152540206909, 0.0005249902606010437, 0.0005449652671813965, 0.0005649402737617493, 0.000584915280342102, 0.0006048902869224548, 0.0006248652935028076, 0.0006448403000831604, 0.0006648153066635132, 0.000684790313243866, 0.0007047653198242188]}, "gradients/decoder.model.decoder.layers.6.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 4.0, 2.0, 1.0, 0.0, 0.0, 4.0, 4.0, 4.0, 8.0, 10.0, 25.0, 31.0, 108.0, 416.0, 272.0, 60.0, 29.0, 14.0, 9.0, 4.0, 3.0, 4.0, 4.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.007576433476060629, -0.007243949919939041, -0.006911466829478741, -0.006578983273357153, -0.006246499717235565, -0.005914016161113977, -0.00558153260499239, -0.005249049514532089, -0.0049165659584105015, -0.004584082402288914, -0.004251599311828613, -0.0039191157557070255, -0.0035866321995854378, -0.00325414864346385, -0.002921665320172906, -0.002589181996881962, -0.002256698440760374, -0.0019242150010541081, -0.0015917315613478422, -0.0012592481216415763, -0.0009267646819353104, -0.0005942812422290444, -0.0002617978025227785, 7.068552076816559e-05, 0.00040316907688975334, 0.0007356525165960193, 0.0010681359563022852, 0.0014006193960085511, 0.001733102835714817, 0.002065586391836405, 0.002398069715127349, 0.002730553038418293, 0.003063037060201168, 0.003395520616322756, 0.0037280039396137, 0.004060487262904644, 0.004392970819026232, 0.0047254543751478195, 0.00505793746560812, 0.005390421021729708, 0.0057229045778512955, 0.006055388133972883, 0.006387871690094471, 0.006720354780554771, 0.007052838336676359, 0.007385321892797947, 0.007717804983258247, 0.008050289005041122, 0.008382772095501423, 0.008715255185961723, 0.009047739207744598, 0.009380222298204899, 0.0097127053886652, 0.010045189410448074, 0.010377672500908375, 0.010710155591368675, 0.01104263961315155, 0.01137512270361185, 0.011707606725394726, 0.012040089815855026, 0.012372573837637901, 0.012705056928098202, 0.013037540018558502, 0.013370024040341377, 0.013702507130801678]}, "gradients/decoder.model.decoder.layers.6.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 4.0, 2.0, 2.0, 3.0, 7.0, 14.0, 14.0, 15.0, 17.0, 36.0, 40.0, 42.0, 52.0, 41.0, 62.0, 60.0, 58.0, 73.0, 66.0, 50.0, 61.0, 59.0, 52.0, 43.0, 33.0, 35.0, 15.0, 18.0, 12.0, 11.0, 8.0, 9.0, 4.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0016567098209634423, -0.0016177872894331813, -0.001578864874318242, -0.001539942342787981, -0.00150101981125772, -0.001462097279727459, -0.001423174748197198, -0.0013842523330822587, -0.0013453298015519977, -0.0013064072700217366, -0.0012674848549067974, -0.0012285623233765364, -0.0011896397918462753, -0.0011507172603160143, -0.0011117947287857533, -0.001072872313670814, -0.001033949782140553, -0.000995027250610292, -0.0009561047772876918, -0.0009171823039650917, -0.0008782597724348307, -0.0008393372409045696, -0.0008004147675819695, -0.0007614922942593694, -0.0007225697627291083, -0.0006836472311988473, -0.0006447247578762472, -0.000605802284553647, -0.000566879753023386, -0.000527957221493125, -0.0004890347481705248, -0.00045011224574409425, -0.0004111898015253246, -0.000372267299098894, -0.0003333447966724634, -0.00029442229424603283, -0.00025549979181960225, -0.00021657728939317167, -0.00017765478696674109, -0.0001387322845403105, -9.980978211387992e-05, -6.0887279687449336e-05, -2.1964777261018753e-05, 1.695772516541183e-05, 5.588022759184241e-05, 9.4802730018273e-05, 0.00013372523244470358, 0.00017264773487113416, 0.00021157023729756474, 0.00025049273972399533, 0.0002894152421504259, 0.0003283377445768565, 0.0003672602470032871, 0.00040618274942971766, 0.00044510525185614824, 0.0004840277542825788, 0.0005229502567090094, 0.0005618727300316095, 0.0006007952615618706, 0.0006397177930921316, 0.0006786402664147317, 0.0007175627397373319, 0.0007564852712675929, 0.000795407802797854, 0.0008343302761204541]}, "gradients/decoder.model.decoder.layers.6.fc2.weight": {"_type": "histogram", "values": [1.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 3.0, 5.0, 8.0, 7.0, 12.0, 12.0, 34.0, 47.0, 57.0, 70.0, 109.0, 153.0, 259.0, 386.0, 455.0, 757.0, 1090.0, 1778.0, 3079.0, 5282.0, 10388.0, 25157.0, 142135.0, 3940647.0, 33956.0, 12654.0, 6129.0, 3404.0, 2113.0, 1294.0, 837.0, 587.0, 398.0, 247.0, 164.0, 148.0, 110.0, 83.0, 64.0, 41.0, 33.0, 26.0, 19.0, 11.0, 8.0, 11.0, 7.0, 1.0, 6.0, 5.0, 3.0, 2.0, 0.0, 1.0, 2.0, 2.0, 1.0], "bins": [-0.002170562744140625, -0.0020985305309295654, -0.002026498317718506, -0.0019544661045074463, -0.0018824338912963867, -0.0018104016780853271, -0.0017383694648742676, -0.001666337251663208, -0.0015943050384521484, -0.0015222728252410889, -0.0014502406120300293, -0.0013782083988189697, -0.0013061761856079102, -0.0012341439723968506, -0.001162111759185791, -0.0010900795459747314, -0.0010180473327636719, -0.0009460151195526123, -0.0008739829063415527, -0.0008019506931304932, -0.0007299184799194336, -0.000657886266708374, -0.0005858540534973145, -0.0005138218402862549, -0.0004417896270751953, -0.00036975741386413574, -0.00029772520065307617, -0.0002256929874420166, -0.00015366077423095703, -8.162856101989746e-05, -9.59634780883789e-06, 6.243586540222168e-05, 0.00013446807861328125, 0.00020650029182434082, 0.0002785325050354004, 0.00035056471824645996, 0.00042259693145751953, 0.0004946291446685791, 0.0005666613578796387, 0.0006386935710906982, 0.0007107257843017578, 0.0007827579975128174, 0.000854790210723877, 0.0009268224239349365, 0.000998854637145996, 0.0010708868503570557, 0.0011429190635681152, 0.0012149512767791748, 0.0012869834899902344, 0.001359015703201294, 0.0014310479164123535, 0.001503080129623413, 0.0015751123428344727, 0.0016471445560455322, 0.0017191767692565918, 0.0017912089824676514, 0.001863241195678711, 0.0019352734088897705, 0.00200730562210083, 0.0020793378353118896, 0.0021513700485229492, 0.002223402261734009, 0.0022954344749450684, 0.002367466688156128, 0.0024394989013671875]}, "gradients/decoder.model.decoder.layers.6.fc2.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 2.0, 2.0, 2.0, 5.0, 2.0, 4.0, 3.0, 5.0, 2.0, 9.0, 5.0, 13.0, 11.0, 17.0, 12.0, 32.0, 66.0, 105.0, 139.0, 163.0, 135.0, 98.0, 54.0, 40.0, 23.0, 17.0, 7.0, 4.0, 5.0, 4.0, 5.0, 5.0, 6.0, 3.0, 2.0, 0.0, 2.0, 2.0, 1.0, 2.0, 2.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-8.928775787353516e-05, -8.57766717672348e-05, -8.226558566093445e-05, -7.87544995546341e-05, -7.524341344833374e-05, -7.173232734203339e-05, -6.822124123573303e-05, -6.471015512943268e-05, -6.119906902313232e-05, -5.768798291683197e-05, -5.4176896810531616e-05, -5.066581070423126e-05, -4.715472459793091e-05, -4.3643638491630554e-05, -4.01325523853302e-05, -3.6621466279029846e-05, -3.311038017272949e-05, -2.9599294066429138e-05, -2.6088207960128784e-05, -2.257712185382843e-05, -1.9066035747528076e-05, -1.5554949641227722e-05, -1.2043863534927368e-05, -8.532777428627014e-06, -5.02169132232666e-06, -1.5106052160263062e-06, 2.000480890274048e-06, 5.511566996574402e-06, 9.022653102874756e-06, 1.253373920917511e-05, 1.6044825315475464e-05, 1.9555911421775818e-05, 2.3066997528076172e-05, 2.6578083634376526e-05, 3.008916974067688e-05, 3.3600255846977234e-05, 3.711134195327759e-05, 4.062242805957794e-05, 4.4133514165878296e-05, 4.764460027217865e-05, 5.1155686378479004e-05, 5.466677248477936e-05, 5.817785859107971e-05, 6.168894469738007e-05, 6.520003080368042e-05, 6.871111690998077e-05, 7.222220301628113e-05, 7.573328912258148e-05, 7.924437522888184e-05, 8.275546133518219e-05, 8.626654744148254e-05, 8.97776335477829e-05, 9.328871965408325e-05, 9.67998057603836e-05, 0.00010031089186668396, 0.00010382197797298431, 0.00010733306407928467, 0.00011084415018558502, 0.00011435523629188538, 0.00011786632239818573, 0.00012137740850448608, 0.00012488849461078644, 0.0001283995807170868, 0.00013191066682338715, 0.0001354217529296875]}, "gradients/decoder.model.decoder.layers.6.fc1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 0.0, 1.0, 1.0, 0.0, 2.0, 3.0, 6.0, 1.0, 8.0, 7.0, 9.0, 18.0, 32.0, 105.0, 496.0, 8035.0, 4173435.0, 11331.0, 604.0, 103.0, 38.0, 25.0, 7.0, 8.0, 9.0, 5.0, 1.0, 3.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.004665374755859375, -0.004537701606750488, -0.0044100284576416016, -0.004282355308532715, -0.004154682159423828, -0.004027009010314941, -0.0038993358612060547, -0.003771662712097168, -0.0036439895629882812, -0.0035163164138793945, -0.003388643264770508, -0.003260970115661621, -0.0031332969665527344, -0.0030056238174438477, -0.002877950668334961, -0.0027502775192260742, -0.0026226043701171875, -0.0024949312210083008, -0.002367258071899414, -0.0022395849227905273, -0.0021119117736816406, -0.001984238624572754, -0.0018565654754638672, -0.0017288923263549805, -0.0016012191772460938, -0.001473546028137207, -0.0013458728790283203, -0.0012181997299194336, -0.0010905265808105469, -0.0009628534317016602, -0.0008351802825927734, -0.0007075071334838867, -0.000579833984375, -0.0004521608352661133, -0.00032448768615722656, -0.00019681453704833984, -6.914138793945312e-05, 5.8531761169433594e-05, 0.0001862049102783203, 0.00031387805938720703, 0.00044155120849609375, 0.0005692243576049805, 0.0006968975067138672, 0.0008245706558227539, 0.0009522438049316406, 0.0010799169540405273, 0.001207590103149414, 0.0013352632522583008, 0.0014629364013671875, 0.0015906095504760742, 0.001718282699584961, 0.0018459558486938477, 0.0019736289978027344, 0.002101302146911621, 0.002228975296020508, 0.0023566484451293945, 0.0024843215942382812, 0.002611994743347168, 0.0027396678924560547, 0.0028673410415649414, 0.002995014190673828, 0.003122687339782715, 0.0032503604888916016, 0.0033780336380004883, 0.003505706787109375]}, "gradients/decoder.model.decoder.layers.6.fc1.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 4.0, 2.0, 4.0, 11.0, 7.0, 14.0, 25.0, 34.0, 52.0, 113.0, 2839.0, 777.0, 61.0, 54.0, 28.0, 22.0, 7.0, 7.0, 7.0, 4.0, 3.0, 3.0, 3.0, 0.0, 3.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00022363662719726562, -0.00021531060338020325, -0.00020698457956314087, -0.0001986585557460785, -0.0001903325319290161, -0.00018200650811195374, -0.00017368048429489136, -0.00016535446047782898, -0.0001570284366607666, -0.00014870241284370422, -0.00014037638902664185, -0.00013205036520957947, -0.0001237243413925171, -0.00011539831757545471, -0.00010707229375839233, -9.874626994132996e-05, -9.042024612426758e-05, -8.20942223072052e-05, -7.376819849014282e-05, -6.544217467308044e-05, -5.7116150856018066e-05, -4.879012703895569e-05, -4.046410322189331e-05, -3.213807940483093e-05, -2.3812055587768555e-05, -1.5486031770706177e-05, -7.160007953643799e-06, 1.166015863418579e-06, 9.492039680480957e-06, 1.7818063497543335e-05, 2.6144087314605713e-05, 3.447011113166809e-05, 4.279613494873047e-05, 5.112215876579285e-05, 5.9448182582855225e-05, 6.77742063999176e-05, 7.610023021697998e-05, 8.442625403404236e-05, 9.275227785110474e-05, 0.00010107830166816711, 0.00010940432548522949, 0.00011773034930229187, 0.00012605637311935425, 0.00013438239693641663, 0.000142708420753479, 0.00015103444457054138, 0.00015936046838760376, 0.00016768649220466614, 0.00017601251602172852, 0.0001843385398387909, 0.00019266456365585327, 0.00020099058747291565, 0.00020931661128997803, 0.0002176426351070404, 0.00022596865892410278, 0.00023429468274116516, 0.00024262070655822754, 0.0002509467303752899, 0.0002592727541923523, 0.00026759877800941467, 0.00027592480182647705, 0.00028425082564353943, 0.0002925768494606018, 0.0003009028732776642, 0.00030922889709472656]}, "gradients/decoder.model.decoder.layers.6.encoder_attn_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 3.0, 3.0, 1.0, 9.0, 8.0, 8.0, 20.0, 37.0, 64.0, 135.0, 284.0, 241.0, 102.0, 46.0, 22.0, 8.0, 6.0, 2.0, 3.0, 4.0, 2.0, 2.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.001041240175254643, -0.001008381019346416, -0.000975521863438189, -0.000942662765737623, -0.000909803609829396, -0.000876944453921169, -0.000844085356220603, -0.000811226200312376, -0.0007783670444041491, -0.0007455078884959221, -0.0007126487325876951, -0.0006797896348871291, -0.0006469304789789021, -0.0006140713230706751, -0.0005812122253701091, -0.0005483530694618821, -0.0005154939135536551, -0.0004826347576454282, -0.00044977563084103167, -0.00041691650403663516, -0.0003840573481284082, -0.0003511981922201812, -0.0003183390654157847, -0.0002854799386113882, -0.00025262078270316124, -0.0002197616413468495, -0.00018690249999053776, -0.00015404335863422602, -0.00012118421727791429, -8.832507592160255e-05, -5.546593456529081e-05, -2.260679320897907e-05, 1.0252231732010841e-05, 4.311137308832258e-05, 7.597051444463432e-05, 0.00010882965580094606, 0.0001416887971572578, 0.00017454793851356953, 0.00020740707986988127, 0.000240266221226193, 0.00027312536258250475, 0.0003059845184907317, 0.0003388436452951282, 0.00037170277209952474, 0.0004045619280077517, 0.00043742108391597867, 0.0004702802107203752, 0.0005031393375247717, 0.0005359984934329987, 0.0005688576493412256, 0.0006017168052494526, 0.0006345759029500186, 0.0006674350588582456, 0.0007002942147664726, 0.0007331533124670386, 0.0007660124683752656, 0.0007988716242834926, 0.0008317307801917195, 0.0008645899360999465, 0.0008974490338005126, 0.0009303081897087395, 0.0009631673456169665, 0.0009960264433175325, 0.0010288855992257595, 0.0010617447551339865]}, "gradients/decoder.model.decoder.layers.6.encoder_attn_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 3.0, 4.0, 3.0, 2.0, 2.0, 7.0, 5.0, 9.0, 9.0, 16.0, 11.0, 9.0, 17.0, 13.0, 22.0, 21.0, 27.0, 25.0, 24.0, 33.0, 21.0, 34.0, 40.0, 39.0, 47.0, 27.0, 35.0, 51.0, 44.0, 39.0, 33.0, 38.0, 46.0, 34.0, 25.0, 28.0, 24.0, 22.0, 20.0, 15.0, 13.0, 17.0, 16.0, 4.0, 6.0, 8.0, 6.0, 7.0, 6.0, 3.0, 1.0, 2.0, 1.0, 3.0, 0.0, 2.0, 0.0, 2.0, 0.0, 1.0], "bins": [-0.0001485988759668544, -0.00014382496010512114, -0.00013905102969147265, -0.00013427709927782416, -0.0001295031834160909, -0.00012472926755435765, -0.00011995533714070916, -0.00011518141400301829, -0.00011040749086532742, -0.00010563356772763655, -0.00010085964458994567, -9.60857214522548e-05, -9.131179831456393e-05, -8.653787517687306e-05, -8.176395203918219e-05, -7.699002890149131e-05, -7.221610576380044e-05, -6.744218262610957e-05, -6.26682594884187e-05, -5.7894336350727826e-05, -5.3120413213036954e-05, -4.834649007534608e-05, -4.357256693765521e-05, -3.879864379996434e-05, -3.4024720662273467e-05, -2.9250797524582595e-05, -2.4476874386891723e-05, -1.970295124920085e-05, -1.4929028111509979e-05, -1.0155104973819107e-05, -5.381181836128235e-06, -6.072586984373629e-07, 4.166678991168737e-06, 8.94060212885961e-06, 1.3714525266550481e-05, 1.8488448404241353e-05, 2.3262371541932225e-05, 2.8036294679623097e-05, 3.281021781731397e-05, 3.758414095500484e-05, 4.235806409269571e-05, 4.7131987230386585e-05, 5.190591036807746e-05, 5.667983350576833e-05, 6.14537566434592e-05, 6.622767978115007e-05, 7.100160291884094e-05, 7.577552605653182e-05, 8.054944919422269e-05, 8.532337233191356e-05, 9.009729546960443e-05, 9.48712186072953e-05, 9.964514174498618e-05, 0.00010441906488267705, 0.00010919298802036792, 0.00011396691115805879, 0.00011874083429574966, 0.00012351476470939815, 0.0001282886805711314, 0.00013306259643286467, 0.00013783652684651315, 0.00014261045726016164, 0.0001473843731218949, 0.00015215828898362815, 0.00015693221939727664]}, "gradients/decoder.model.decoder.layers.6.encoder_attn.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0, 4.0, 3.0, 8.0, 3.0, 13.0, 26.0, 21.0, 31.0, 51.0, 96.0, 123.0, 228.0, 369.0, 675.0, 1279.0, 2830.0, 7694.0, 26746.0, 148528.0, 743350.0, 86391.0, 18802.0, 6348.0, 2482.0, 1102.0, 562.0, 313.0, 174.0, 132.0, 61.0, 44.0, 21.0, 11.0, 9.0, 6.0, 4.0, 5.0, 5.0, 1.0, 5.0, 2.0, 1.0, 3.0, 2.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.829334259033203e-05, -5.643535405397415e-05, -5.457736551761627e-05, -5.271937698125839e-05, -5.086138844490051e-05, -4.900339990854263e-05, -4.7145411372184753e-05, -4.5287422835826874e-05, -4.3429434299468994e-05, -4.1571445763111115e-05, -3.9713457226753235e-05, -3.7855468690395355e-05, -3.5997480154037476e-05, -3.4139491617679596e-05, -3.2281503081321716e-05, -3.0423514544963837e-05, -2.8565526008605957e-05, -2.6707537472248077e-05, -2.4849548935890198e-05, -2.2991560399532318e-05, -2.113357186317444e-05, -1.927558332681656e-05, -1.741759479045868e-05, -1.55596062541008e-05, -1.370161771774292e-05, -1.184362918138504e-05, -9.98564064502716e-06, -8.127652108669281e-06, -6.269663572311401e-06, -4.411675035953522e-06, -2.553686499595642e-06, -6.956979632377625e-07, 1.1622905731201172e-06, 3.020279109477997e-06, 4.8782676458358765e-06, 6.736256182193756e-06, 8.594244718551636e-06, 1.0452233254909515e-05, 1.2310221791267395e-05, 1.4168210327625275e-05, 1.6026198863983154e-05, 1.7884187400341034e-05, 1.9742175936698914e-05, 2.1600164473056793e-05, 2.3458153009414673e-05, 2.5316141545772552e-05, 2.7174130082130432e-05, 2.9032118618488312e-05, 3.089010715484619e-05, 3.274809569120407e-05, 3.460608422756195e-05, 3.646407276391983e-05, 3.832206130027771e-05, 4.018004983663559e-05, 4.203803837299347e-05, 4.389602690935135e-05, 4.575401544570923e-05, 4.761200398206711e-05, 4.946999251842499e-05, 5.132798105478287e-05, 5.318596959114075e-05, 5.504395812749863e-05, 5.6901946663856506e-05, 5.8759935200214386e-05, 6.0617923736572266e-05]}, "gradients/decoder.model.decoder.layers.6.encoder_attn.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 3.0, 1.0, 6.0, 6.0, 3.0, 6.0, 5.0, 13.0, 10.0, 23.0, 30.0, 36.0, 53.0, 70.0, 84.0, 103.0, 105.0, 125.0, 96.0, 65.0, 43.0, 33.0, 20.0, 15.0, 11.0, 9.0, 11.0, 7.0, 6.0, 4.0, 5.0, 2.0, 2.0, 2.0, 2.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0], "bins": [-0.0004200935363769531, -0.0004094243049621582, -0.0003987550735473633, -0.00038808584213256836, -0.00037741661071777344, -0.0003667473793029785, -0.0003560781478881836, -0.00034540891647338867, -0.00033473968505859375, -0.00032407045364379883, -0.0003134012222290039, -0.000302731990814209, -0.00029206275939941406, -0.00028139352798461914, -0.0002707242965698242, -0.0002600550651550293, -0.0002493858337402344, -0.00023871660232543945, -0.00022804737091064453, -0.0002173781394958496, -0.0002067089080810547, -0.00019603967666625977, -0.00018537044525146484, -0.00017470121383666992, -0.000164031982421875, -0.00015336275100708008, -0.00014269351959228516, -0.00013202428817749023, -0.00012135505676269531, -0.00011068582534790039, -0.00010001659393310547, -8.934736251831055e-05, -7.867813110351562e-05, -6.80088996887207e-05, -5.733966827392578e-05, -4.667043685913086e-05, -3.600120544433594e-05, -2.5331974029541016e-05, -1.4662742614746094e-05, -3.993511199951172e-06, 6.67572021484375e-06, 1.7344951629638672e-05, 2.8014183044433594e-05, 3.8683414459228516e-05, 4.935264587402344e-05, 6.002187728881836e-05, 7.069110870361328e-05, 8.13603401184082e-05, 9.202957153320312e-05, 0.00010269880294799805, 0.00011336803436279297, 0.0001240372657775879, 0.0001347064971923828, 0.00014537572860717773, 0.00015604496002197266, 0.00016671419143676758, 0.0001773834228515625, 0.00018805265426635742, 0.00019872188568115234, 0.00020939111709594727, 0.0002200603485107422, 0.0002307295799255371, 0.00024139881134033203, 0.00025206804275512695, 0.0002627372741699219]}, "gradients/decoder.model.decoder.layers.6.encoder_attn.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 3.0, 2.0, 6.0, 3.0, 4.0, 7.0, 7.0, 12.0, 17.0, 23.0, 23.0, 28.0, 31.0, 37.0, 55.0, 76.0, 73.0, 64.0, 128.0, 165.0, 244.0, 696.0, 2008.0, 9628.0, 148492.0, 811348.0, 64456.0, 7550.0, 1811.0, 500.0, 250.0, 180.0, 143.0, 91.0, 89.0, 68.0, 52.0, 42.0, 31.0, 33.0, 25.0, 11.0, 9.0, 13.0, 6.0, 7.0, 6.0, 9.0, 3.0, 2.0, 0.0, 2.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 2.0], "bins": [-1.2814998626708984e-05, -1.237913966178894e-05, -1.1943280696868896e-05, -1.1507421731948853e-05, -1.1071562767028809e-05, -1.0635703802108765e-05, -1.019984483718872e-05, -9.763985872268677e-06, -9.328126907348633e-06, -8.892267942428589e-06, -8.456408977508545e-06, -8.020550012588501e-06, -7.584691047668457e-06, -7.148832082748413e-06, -6.712973117828369e-06, -6.277114152908325e-06, -5.841255187988281e-06, -5.405396223068237e-06, -4.969537258148193e-06, -4.533678293228149e-06, -4.0978193283081055e-06, -3.6619603633880615e-06, -3.2261013984680176e-06, -2.7902424335479736e-06, -2.3543834686279297e-06, -1.9185245037078857e-06, -1.4826655387878418e-06, -1.0468065738677979e-06, -6.109476089477539e-07, -1.7508864402770996e-07, 2.60770320892334e-07, 6.966292858123779e-07, 1.1324882507324219e-06, 1.5683472156524658e-06, 2.0042061805725098e-06, 2.4400651454925537e-06, 2.8759241104125977e-06, 3.3117830753326416e-06, 3.7476420402526855e-06, 4.1835010051727295e-06, 4.6193599700927734e-06, 5.055218935012817e-06, 5.491077899932861e-06, 5.926936864852905e-06, 6.362795829772949e-06, 6.798654794692993e-06, 7.234513759613037e-06, 7.670372724533081e-06, 8.106231689453125e-06, 8.542090654373169e-06, 8.977949619293213e-06, 9.413808584213257e-06, 9.8496675491333e-06, 1.0285526514053345e-05, 1.0721385478973389e-05, 1.1157244443893433e-05, 1.1593103408813477e-05, 1.202896237373352e-05, 1.2464821338653564e-05, 1.2900680303573608e-05, 1.3336539268493652e-05, 1.3772398233413696e-05, 1.420825719833374e-05, 1.4644116163253784e-05, 1.5079975128173828e-05]}, "gradients/decoder.model.decoder.layers.6.encoder_attn.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 1.0, 3.0, 7.0, 2.0, 4.0, 5.0, 6.0, 9.0, 12.0, 10.0, 18.0, 16.0, 13.0, 14.0, 16.0, 29.0, 32.0, 26.0, 31.0, 28.0, 33.0, 48.0, 36.0, 58.0, 39.0, 41.0, 32.0, 44.0, 45.0, 34.0, 26.0, 34.0, 37.0, 37.0, 30.0, 33.0, 20.0, 15.0, 17.0, 15.0, 13.0, 6.0, 9.0, 10.0, 6.0, 5.0, 2.0, 5.0, 2.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0], "bins": [-0.0003685951232910156, -0.0003559477627277374, -0.00034330040216445923, -0.00033065304160118103, -0.00031800568103790283, -0.00030535832047462463, -0.00029271095991134644, -0.00028006359934806824, -0.00026741623878479004, -0.00025476887822151184, -0.00024212151765823364, -0.00022947415709495544, -0.00021682679653167725, -0.00020417943596839905, -0.00019153207540512085, -0.00017888471484184265, -0.00016623735427856445, -0.00015358999371528625, -0.00014094263315200806, -0.00012829527258872986, -0.00011564791202545166, -0.00010300055146217346, -9.035319089889526e-05, -7.770583033561707e-05, -6.505846977233887e-05, -5.241110920906067e-05, -3.976374864578247e-05, -2.7116388082504272e-05, -1.4469027519226074e-05, -1.821666955947876e-06, 1.0825693607330322e-05, 2.347305417060852e-05, 3.612041473388672e-05, 4.876777529716492e-05, 6.141513586044312e-05, 7.406249642372131e-05, 8.670985698699951e-05, 9.935721755027771e-05, 0.00011200457811355591, 0.0001246519386768341, 0.0001372992992401123, 0.0001499466598033905, 0.0001625940203666687, 0.0001752413809299469, 0.0001878887414932251, 0.0002005361020565033, 0.0002131834626197815, 0.0002258308231830597, 0.0002384781837463379, 0.0002511255443096161, 0.0002637729048728943, 0.0002764202654361725, 0.0002890676259994507, 0.0003017149865627289, 0.0003143623471260071, 0.0003270097076892853, 0.0003396570682525635, 0.0003523044288158417, 0.0003649517893791199, 0.00037759914994239807, 0.00039024651050567627, 0.00040289387106895447, 0.00041554123163223267, 0.00042818859219551086, 0.00044083595275878906]}, "gradients/decoder.model.decoder.layers.6.encoder_attn.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 54.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 9147.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1029915.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 9392.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 67.0], "bins": [-1.7881393432617188e-07, -1.7415732145309448e-07, -1.695007085800171e-07, -1.648440957069397e-07, -1.601874828338623e-07, -1.555308699607849e-07, -1.5087425708770752e-07, -1.4621764421463013e-07, -1.4156103134155273e-07, -1.3690441846847534e-07, -1.3224780559539795e-07, -1.2759119272232056e-07, -1.2293457984924316e-07, -1.1827796697616577e-07, -1.1362135410308838e-07, -1.0896474123001099e-07, -1.043081283569336e-07, -9.96515154838562e-08, -9.499490261077881e-08, -9.033828973770142e-08, -8.568167686462402e-08, -8.102506399154663e-08, -7.636845111846924e-08, -7.171183824539185e-08, -6.705522537231445e-08, -6.239861249923706e-08, -5.774199962615967e-08, -5.3085386753082275e-08, -4.842877388000488e-08, -4.377216100692749e-08, -3.91155481338501e-08, -3.4458935260772705e-08, -2.9802322387695312e-08, -2.514570951461792e-08, -2.0489096641540527e-08, -1.5832483768463135e-08, -1.1175870895385742e-08, -6.51925802230835e-09, -1.862645149230957e-09, 2.7939677238464355e-09, 7.450580596923828e-09, 1.210719347000122e-08, 1.6763806343078613e-08, 2.1420419216156006e-08, 2.60770320892334e-08, 3.073364496231079e-08, 3.5390257835388184e-08, 4.0046870708465576e-08, 4.470348358154297e-08, 4.936009645462036e-08, 5.4016709327697754e-08, 5.8673322200775146e-08, 6.332993507385254e-08, 6.798654794692993e-08, 7.264316082000732e-08, 7.729977369308472e-08, 8.195638656616211e-08, 8.66129994392395e-08, 9.12696123123169e-08, 9.592622518539429e-08, 1.0058283805847168e-07, 1.0523945093154907e-07, 1.0989606380462646e-07, 1.1455267667770386e-07, 1.1920928955078125e-07]}, "gradients/decoder.model.decoder.layers.6.encoder_attn.k_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 0.0, 3.0, 0.0, 5.0, 0.0, 2.0, 0.0, 9.0, 0.0, 19.0, 31.0, 0.0, 38.0, 0.0, 58.0, 0.0, 82.0, 0.0, 94.0, 0.0, 92.0, 118.0, 0.0, 102.0, 0.0, 112.0, 0.0, 65.0, 0.0, 64.0, 0.0, 55.0, 28.0, 0.0, 14.0, 0.0, 11.0, 0.0, 6.0, 0.0, 3.0, 0.0, 4.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.0728836059570312e-06, -1.0402873158454895e-06, -1.0076910257339478e-06, -9.75094735622406e-07, -9.424984455108643e-07, -9.099021553993225e-07, -8.773058652877808e-07, -8.44709575176239e-07, -8.121132850646973e-07, -7.795169949531555e-07, -7.469207048416138e-07, -7.14324414730072e-07, -6.817281246185303e-07, -6.491318345069885e-07, -6.165355443954468e-07, -5.83939254283905e-07, -5.513429641723633e-07, -5.187466740608215e-07, -4.861503839492798e-07, -4.5355409383773804e-07, -4.209578037261963e-07, -3.8836151361465454e-07, -3.557652235031128e-07, -3.2316893339157104e-07, -2.905726432800293e-07, -2.5797635316848755e-07, -2.253800630569458e-07, -1.9278377294540405e-07, -1.601874828338623e-07, -1.2759119272232056e-07, -9.499490261077881e-08, -6.239861249923706e-08, -2.9802322387695312e-08, 2.7939677238464355e-09, 3.5390257835388184e-08, 6.798654794692993e-08, 1.0058283805847168e-07, 1.3317912817001343e-07, 1.6577541828155518e-07, 1.9837170839309692e-07, 2.3096799850463867e-07, 2.635642886161804e-07, 2.9616057872772217e-07, 3.287568688392639e-07, 3.6135315895080566e-07, 3.939494490623474e-07, 4.2654573917388916e-07, 4.591420292854309e-07, 4.917383193969727e-07, 5.243346095085144e-07, 5.569308996200562e-07, 5.895271897315979e-07, 6.221234798431396e-07, 6.547197699546814e-07, 6.873160600662231e-07, 7.199123501777649e-07, 7.525086402893066e-07, 7.851049304008484e-07, 8.177012205123901e-07, 8.502975106239319e-07, 8.828938007354736e-07, 9.154900908470154e-07, 9.480863809585571e-07, 9.806826710700989e-07, 1.0132789611816406e-06]}, "gradients/decoder.model.decoder.layers.6.encoder_attn.q_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 15.0, 0.0, 0.0, 0.0, 31.0, 0.0, 0.0, 0.0, 152.0, 0.0, 0.0, 971.0, 0.0, 0.0, 0.0, 6893.0, 0.0, 0.0, 0.0, 1032629.0, 0.0, 0.0, 0.0, 6717.0, 0.0, 0.0, 960.0, 0.0, 0.0, 0.0, 156.0, 0.0, 0.0, 0.0, 24.0, 0.0, 0.0, 0.0, 22.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.172325134277344e-07, -4.0140002965927124e-07, -3.855675458908081e-07, -3.6973506212234497e-07, -3.5390257835388184e-07, -3.380700945854187e-07, -3.2223761081695557e-07, -3.0640512704849243e-07, -2.905726432800293e-07, -2.7474015951156616e-07, -2.5890767574310303e-07, -2.430751919746399e-07, -2.2724270820617676e-07, -2.1141022443771362e-07, -1.955777406692505e-07, -1.7974525690078735e-07, -1.6391277313232422e-07, -1.4808028936386108e-07, -1.3224780559539795e-07, -1.1641532182693481e-07, -1.0058283805847168e-07, -8.475035429000854e-08, -6.891787052154541e-08, -5.3085386753082275e-08, -3.725290298461914e-08, -2.1420419216156006e-08, -5.587935447692871e-09, 1.0244548320770264e-08, 2.60770320892334e-08, 4.190951585769653e-08, 5.774199962615967e-08, 7.35744833946228e-08, 8.940696716308594e-08, 1.0523945093154907e-07, 1.210719347000122e-07, 1.3690441846847534e-07, 1.5273690223693848e-07, 1.685693860054016e-07, 1.8440186977386475e-07, 2.0023435354232788e-07, 2.1606683731079102e-07, 2.3189932107925415e-07, 2.477318048477173e-07, 2.635642886161804e-07, 2.7939677238464355e-07, 2.952292561531067e-07, 3.110617399215698e-07, 3.2689422369003296e-07, 3.427267074584961e-07, 3.5855919122695923e-07, 3.7439167499542236e-07, 3.902241587638855e-07, 4.0605664253234863e-07, 4.2188912630081177e-07, 4.377216100692749e-07, 4.5355409383773804e-07, 4.6938657760620117e-07, 4.852190613746643e-07, 5.010515451431274e-07, 5.168840289115906e-07, 5.327165126800537e-07, 5.485489964485168e-07, 5.6438148021698e-07, 5.802139639854431e-07, 5.960464477539062e-07]}, "gradients/decoder.model.decoder.layers.6.encoder_attn.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 18.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 991.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 14.0], "bins": [-1.1920928955078125e-07, -1.1641532182693481e-07, -1.1362135410308838e-07, -1.1082738637924194e-07, -1.0803341865539551e-07, -1.0523945093154907e-07, -1.0244548320770264e-07, -9.96515154838562e-08, -9.685754776000977e-08, -9.406358003616333e-08, -9.12696123123169e-08, -8.847564458847046e-08, -8.568167686462402e-08, -8.288770914077759e-08, -8.009374141693115e-08, -7.729977369308472e-08, -7.450580596923828e-08, -7.171183824539185e-08, -6.891787052154541e-08, -6.612390279769897e-08, -6.332993507385254e-08, -6.05359673500061e-08, -5.774199962615967e-08, -5.494803190231323e-08, -5.21540641784668e-08, -4.936009645462036e-08, -4.6566128730773926e-08, -4.377216100692749e-08, -4.0978193283081055e-08, -3.818422555923462e-08, -3.5390257835388184e-08, -3.259629011154175e-08, -2.9802322387695312e-08, -2.7008354663848877e-08, -2.421438694000244e-08, -2.1420419216156006e-08, -1.862645149230957e-08, -1.5832483768463135e-08, -1.30385160446167e-08, -1.0244548320770264e-08, -7.450580596923828e-09, -4.6566128730773926e-09, -1.862645149230957e-09, 9.313225746154785e-10, 3.725290298461914e-09, 6.51925802230835e-09, 9.313225746154785e-09, 1.210719347000122e-08, 1.4901161193847656e-08, 1.7695128917694092e-08, 2.0489096641540527e-08, 2.3283064365386963e-08, 2.60770320892334e-08, 2.8870999813079834e-08, 3.166496753692627e-08, 3.4458935260772705e-08, 3.725290298461914e-08, 4.0046870708465576e-08, 4.284083843231201e-08, 4.563480615615845e-08, 4.842877388000488e-08, 5.122274160385132e-08, 5.4016709327697754e-08, 5.681067705154419e-08, 5.960464477539063e-08]}, "gradients/decoder.model.decoder.layers.6.self_attn_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 2.0, 1.0, 1.0, 2.0, 3.0, 2.0, 6.0, 8.0, 5.0, 18.0, 31.0, 65.0, 125.0, 295.0, 251.0, 107.0, 44.0, 16.0, 11.0, 3.0, 5.0, 2.0, 3.0, 0.0, 2.0, 2.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0006109077367000282, -0.0005916517693549395, -0.0005723958020098507, -0.0005531397764571011, -0.0005338838091120124, -0.0005146278417669237, -0.0004953718744218349, -0.00047611587797291577, -0.0004568598815239966, -0.00043760391417890787, -0.0004183479177299887, -0.0003990919503849, -0.0003798359539359808, -0.0003605799865908921, -0.00034132401924580336, -0.0003220680227968842, -0.00030281205545179546, -0.00028355608810670674, -0.00026430009165778756, -0.00024504412431269884, -0.00022578812786377966, -0.00020653216051869094, -0.000187276178621687, -0.00016802019672468305, -0.0001487642148276791, -0.00012950823293067515, -0.0001102522510336712, -9.099627641262487e-05, -7.174029451562092e-05, -5.248431261861697e-05, -3.3228337997570634e-05, -1.3972356100566685e-05, 5.2836257964372635e-06, 2.453960587445181e-05, 4.3795585952466354e-05, 6.30515642114915e-05, 8.230754610849544e-05, 0.00010156352800549939, 0.00012081950262654573, 0.00014007548452354968, 0.00015933146642055362, 0.00017858744831755757, 0.00019784343021456152, 0.00021709941211156547, 0.0002363553794566542, 0.00025561137590557337, 0.0002748673432506621, 0.0002941233105957508, 0.00031337930704467, 0.0003326352743897587, 0.0003518912708386779, 0.0003711472381837666, 0.0003904032346326858, 0.0004096592019777745, 0.0004289151984266937, 0.0004481711657717824, 0.0004674271331168711, 0.00048668310046195984, 0.0005059390678070486, 0.0005251950933597982, 0.0005444510607048869, 0.0005637070280499756, 0.0005829629953950644, 0.000602219020947814, 0.0006214749882929027]}, "gradients/decoder.model.decoder.layers.6.self_attn_layer_norm.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 2.0, 3.0, 6.0, 4.0, 8.0, 5.0, 13.0, 12.0, 14.0, 12.0, 12.0, 17.0, 26.0, 24.0, 29.0, 32.0, 34.0, 30.0, 36.0, 46.0, 49.0, 40.0, 39.0, 52.0, 50.0, 52.0, 38.0, 45.0, 40.0, 41.0, 25.0, 28.0, 23.0, 25.0, 19.0, 13.0, 16.0, 5.0, 9.0, 10.0, 7.0, 7.0, 4.0, 7.0, 2.0, 4.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-9.333225898444653e-05, -9.031982335727662e-05, -8.730739500606433e-05, -8.429495937889442e-05, -8.128253102768213e-05, -7.827009540051222e-05, -7.525766704929993e-05, -7.224523142213002e-05, -6.923280307091773e-05, -6.622036744374782e-05, -6.320793909253553e-05, -6.0195507103344426e-05, -5.7183075114153326e-05, -5.4170643124962226e-05, -5.1158211135771126e-05, -4.814577550860122e-05, -4.513334351941012e-05, -4.212091153021902e-05, -3.910847954102792e-05, -3.609604755183682e-05, -3.308361556264572e-05, -3.007118357345462e-05, -2.7058749765274115e-05, -2.4046317776083015e-05, -2.1033885786891915e-05, -1.8021453797700815e-05, -1.5009021808509715e-05, -1.1996588909823913e-05, -8.984156920632813e-06, -5.971724931441713e-06, -2.9592920327559114e-06, 5.313995643518865e-08, 3.0655719456262887e-06, 6.078003934817389e-06, 9.090435924008489e-06, 1.210286882269429e-05, 1.511530081188539e-05, 1.812773189158179e-05, 2.1140165699762292e-05, 2.4152597688953392e-05, 2.7165029678144492e-05, 3.0177461667335592e-05, 3.3189895475516096e-05, 3.6202327464707196e-05, 3.9214759453898296e-05, 4.2227191443089396e-05, 4.5239623432280496e-05, 4.8252055421471596e-05, 5.1264487410662696e-05, 5.4276919399853796e-05, 5.7289351389044896e-05, 6.0301783378235996e-05, 6.33142190054059e-05, 6.63266473566182e-05, 6.93390829837881e-05, 7.23515113350004e-05, 7.53639469621703e-05, 7.837638258934021e-05, 8.13888109405525e-05, 8.440124656772241e-05, 8.74136749189347e-05, 9.042611054610461e-05, 9.34385388973169e-05, 9.645097452448681e-05, 9.94634028756991e-05]}, "gradients/decoder.model.decoder.layers.6.self_attn.out_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 2.0, 2.0, 1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 2.0, 1.0, 1.0, 3.0, 5.0, 7.0, 4.0, 7.0, 3.0, 3.0, 8.0, 12.0, 18.0, 25.0, 120.0, 2014.0, 870172.0, 174825.0, 1129.0, 87.0, 36.0, 15.0, 20.0, 4.0, 6.0, 4.0, 8.0, 2.0, 3.0, 4.0, 4.0, 2.0, 0.0, 2.0, 0.0, 0.0, 3.0, 0.0, 0.0, 2.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.001483917236328125, -0.0014325976371765137, -0.0013812780380249023, -0.001329958438873291, -0.0012786388397216797, -0.0012273192405700684, -0.001175999641418457, -0.0011246800422668457, -0.0010733604431152344, -0.001022040843963623, -0.0009707212448120117, -0.0009194016456604004, -0.0008680820465087891, -0.0008167624473571777, -0.0007654428482055664, -0.0007141232490539551, -0.0006628036499023438, -0.0006114840507507324, -0.0005601644515991211, -0.0005088448524475098, -0.00045752525329589844, -0.0004062056541442871, -0.0003548860549926758, -0.00030356645584106445, -0.0002522468566894531, -0.0002009272575378418, -0.00014960765838623047, -9.828805923461914e-05, -4.696846008300781e-05, 4.351139068603516e-06, 5.5670738220214844e-05, 0.00010699033737182617, 0.0001583099365234375, 0.00020962953567504883, 0.00026094913482666016, 0.0003122687339782715, 0.0003635883331298828, 0.00041490793228149414, 0.00046622753143310547, 0.0005175471305847168, 0.0005688667297363281, 0.0006201863288879395, 0.0006715059280395508, 0.0007228255271911621, 0.0007741451263427734, 0.0008254647254943848, 0.0008767843246459961, 0.0009281039237976074, 0.0009794235229492188, 0.00103074312210083, 0.0010820627212524414, 0.0011333823204040527, 0.001184701919555664, 0.0012360215187072754, 0.0012873411178588867, 0.001338660717010498, 0.0013899803161621094, 0.0014412999153137207, 0.001492619514465332, 0.0015439391136169434, 0.0015952587127685547, 0.001646578311920166, 0.0016978979110717773, 0.0017492175102233887, 0.001800537109375]}, "gradients/decoder.model.decoder.layers.6.self_attn.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 2.0, 0.0, 1.0, 0.0, 1.0, 1.0, 2.0, 2.0, 2.0, 2.0, 4.0, 7.0, 6.0, 7.0, 7.0, 13.0, 16.0, 29.0, 41.0, 47.0, 68.0, 78.0, 81.0, 112.0, 98.0, 73.0, 68.0, 50.0, 50.0, 26.0, 25.0, 28.0, 13.0, 6.0, 5.0, 10.0, 4.0, 7.0, 3.0, 7.0, 0.0, 5.0, 2.0, 1.0, 0.0, 1.0, 1.0, 2.0, 0.0, 1.0, 2.0, 1.0], "bins": [-2.0802021026611328e-05, -2.02227383852005e-05, -1.9643455743789673e-05, -1.9064173102378845e-05, -1.8484890460968018e-05, -1.790560781955719e-05, -1.7326325178146362e-05, -1.6747042536735535e-05, -1.6167759895324707e-05, -1.558847725391388e-05, -1.5009194612503052e-05, -1.4429911971092224e-05, -1.3850629329681396e-05, -1.3271346688270569e-05, -1.2692064046859741e-05, -1.2112781405448914e-05, -1.1533498764038086e-05, -1.0954216122627258e-05, -1.037493348121643e-05, -9.795650839805603e-06, -9.216368198394775e-06, -8.637085556983948e-06, -8.05780291557312e-06, -7.4785202741622925e-06, -6.899237632751465e-06, -6.319954991340637e-06, -5.7406723499298096e-06, -5.161389708518982e-06, -4.582107067108154e-06, -4.002824425697327e-06, -3.423541784286499e-06, -2.8442591428756714e-06, -2.2649765014648438e-06, -1.6856938600540161e-06, -1.1064112186431885e-06, -5.271285772323608e-07, 5.21540641784668e-08, 6.314367055892944e-07, 1.210719347000122e-06, 1.7900019884109497e-06, 2.3692846298217773e-06, 2.948567271232605e-06, 3.5278499126434326e-06, 4.10713255405426e-06, 4.686415195465088e-06, 5.2656978368759155e-06, 5.844980478286743e-06, 6.424263119697571e-06, 7.0035457611083984e-06, 7.582828402519226e-06, 8.162111043930054e-06, 8.741393685340881e-06, 9.320676326751709e-06, 9.899958968162537e-06, 1.0479241609573364e-05, 1.1058524250984192e-05, 1.163780689239502e-05, 1.2217089533805847e-05, 1.2796372175216675e-05, 1.3375654816627502e-05, 1.395493745803833e-05, 1.4534220099449158e-05, 1.5113502740859985e-05, 1.5692785382270813e-05, 1.627206802368164e-05]}, "gradients/decoder.model.decoder.layers.6.self_attn.v_proj.weight": {"_type": "histogram", "values": [3.0, 2.0, 6.0, 5.0, 4.0, 3.0, 12.0, 13.0, 19.0, 16.0, 26.0, 40.0, 53.0, 69.0, 99.0, 148.0, 217.0, 314.0, 432.0, 587.0, 886.0, 1359.0, 2137.0, 3484.0, 6072.0, 10963.0, 23810.0, 59839.0, 220618.0, 510495.0, 125391.0, 41937.0, 17841.0, 8581.0, 4875.0, 2868.0, 1693.0, 1111.0, 706.0, 513.0, 386.0, 247.0, 168.0, 126.0, 90.0, 68.0, 48.0, 42.0, 30.0, 34.0, 20.0, 26.0, 10.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 4.0, 0.0, 1.0, 1.0, 1.0], "bins": [-7.623434066772461e-05, -7.363315671682358e-05, -7.103197276592255e-05, -6.843078881502151e-05, -6.582960486412048e-05, -6.322842091321945e-05, -6.062723696231842e-05, -5.802605301141739e-05, -5.542486906051636e-05, -5.2823685109615326e-05, -5.0222501158714294e-05, -4.762131720781326e-05, -4.502013325691223e-05, -4.24189493060112e-05, -3.981776535511017e-05, -3.721658140420914e-05, -3.4615397453308105e-05, -3.2014213502407074e-05, -2.9413029551506042e-05, -2.681184560060501e-05, -2.421066164970398e-05, -2.1609477698802948e-05, -1.9008293747901917e-05, -1.6407109797000885e-05, -1.3805925846099854e-05, -1.1204741895198822e-05, -8.60355794429779e-06, -6.002373993396759e-06, -3.4011900424957275e-06, -8.00006091594696e-07, 1.8011778593063354e-06, 4.402361810207367e-06, 7.0035457611083984e-06, 9.60472971200943e-06, 1.2205913662910461e-05, 1.4807097613811493e-05, 1.7408281564712524e-05, 2.0009465515613556e-05, 2.2610649466514587e-05, 2.521183341741562e-05, 2.781301736831665e-05, 3.0414201319217682e-05, 3.301538527011871e-05, 3.5616569221019745e-05, 3.8217753171920776e-05, 4.081893712282181e-05, 4.342012107372284e-05, 4.602130502462387e-05, 4.86224889755249e-05, 5.1223672926425934e-05, 5.3824856877326965e-05, 5.6426040828228e-05, 5.902722477912903e-05, 6.162840873003006e-05, 6.422959268093109e-05, 6.683077663183212e-05, 6.943196058273315e-05, 7.203314453363419e-05, 7.463432848453522e-05, 7.723551243543625e-05, 7.983669638633728e-05, 8.243788033723831e-05, 8.503906428813934e-05, 8.764024823904037e-05, 9.02414321899414e-05]}, "gradients/decoder.model.decoder.layers.6.self_attn.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 1.0, 0.0, 3.0, 0.0, 2.0, 4.0, 3.0, 5.0, 6.0, 4.0, 0.0, 12.0, 7.0, 19.0, 20.0, 24.0, 37.0, 55.0, 72.0, 81.0, 81.0, 72.0, 104.0, 84.0, 79.0, 69.0, 43.0, 27.0, 20.0, 16.0, 12.0, 9.0, 8.0, 3.0, 9.0, 5.0, 3.0, 3.0, 1.0, 2.0, 2.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 3.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0], "bins": [-4.398822784423828e-05, -4.239846020936966e-05, -4.080869257450104e-05, -3.9218924939632416e-05, -3.7629157304763794e-05, -3.603938966989517e-05, -3.444962203502655e-05, -3.285985440015793e-05, -3.127008676528931e-05, -2.9680319130420685e-05, -2.8090551495552063e-05, -2.650078386068344e-05, -2.491101622581482e-05, -2.3321248590946198e-05, -2.1731480956077576e-05, -2.0141713321208954e-05, -1.8551945686340332e-05, -1.696217805147171e-05, -1.537241041660309e-05, -1.3782642781734467e-05, -1.2192875146865845e-05, -1.0603107511997223e-05, -9.013339877128601e-06, -7.423572242259979e-06, -5.833804607391357e-06, -4.244036972522736e-06, -2.6542693376541138e-06, -1.064501702785492e-06, 5.252659320831299e-07, 2.1150335669517517e-06, 3.7048012018203735e-06, 5.294568836688995e-06, 6.884336471557617e-06, 8.474104106426239e-06, 1.006387174129486e-05, 1.1653639376163483e-05, 1.3243407011032104e-05, 1.4833174645900726e-05, 1.6422942280769348e-05, 1.801270991563797e-05, 1.9602477550506592e-05, 2.1192245185375214e-05, 2.2782012820243835e-05, 2.4371780455112457e-05, 2.596154808998108e-05, 2.75513157248497e-05, 2.9141083359718323e-05, 3.0730850994586945e-05, 3.2320618629455566e-05, 3.391038626432419e-05, 3.550015389919281e-05, 3.708992153406143e-05, 3.8679689168930054e-05, 4.0269456803798676e-05, 4.18592244386673e-05, 4.344899207353592e-05, 4.503875970840454e-05, 4.662852734327316e-05, 4.8218294978141785e-05, 4.9808062613010406e-05, 5.139783024787903e-05, 5.298759788274765e-05, 5.457736551761627e-05, 5.6167133152484894e-05, 5.7756900787353516e-05]}, "gradients/decoder.model.decoder.layers.6.self_attn.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 2.0, 3.0, 5.0, 3.0, 6.0, 4.0, 5.0, 8.0, 16.0, 21.0, 19.0, 27.0, 44.0, 53.0, 60.0, 97.0, 119.0, 178.0, 273.0, 356.0, 591.0, 955.0, 1925.0, 5072.0, 19046.0, 961949.0, 43541.0, 7925.0, 2590.0, 1266.0, 722.0, 475.0, 311.0, 234.0, 169.0, 107.0, 103.0, 76.0, 47.0, 37.0, 31.0, 24.0, 20.0, 12.0, 14.0, 4.0, 6.0, 8.0, 3.0, 1.0, 2.0, 1.0, 3.0, 0.0, 1.0, 2.0], "bins": [-0.0004374980926513672, -0.0004245191812515259, -0.00041154026985168457, -0.00039856135845184326, -0.00038558244705200195, -0.00037260353565216064, -0.00035962462425231934, -0.00034664571285247803, -0.0003336668014526367, -0.0003206878900527954, -0.0003077089786529541, -0.0002947300672531128, -0.0002817511558532715, -0.0002687722444534302, -0.00025579333305358887, -0.00024281442165374756, -0.00022983551025390625, -0.00021685659885406494, -0.00020387768745422363, -0.00019089877605438232, -0.00017791986465454102, -0.0001649409532546997, -0.0001519620418548584, -0.0001389831304550171, -0.00012600421905517578, -0.00011302530765533447, -0.00010004639625549316, -8.706748485565186e-05, -7.408857345581055e-05, -6.110966205596924e-05, -4.813075065612793e-05, -3.515183925628662e-05, -2.2172927856445312e-05, -9.194016456604004e-06, 3.7848949432373047e-06, 1.6763806343078613e-05, 2.9742717742919922e-05, 4.272162914276123e-05, 5.570054054260254e-05, 6.867945194244385e-05, 8.165836334228516e-05, 9.463727474212646e-05, 0.00010761618614196777, 0.00012059509754180908, 0.0001335740089416504, 0.0001465529203414917, 0.000159531831741333, 0.00017251074314117432, 0.00018548965454101562, 0.00019846856594085693, 0.00021144747734069824, 0.00022442638874053955, 0.00023740530014038086, 0.00025038421154022217, 0.0002633631229400635, 0.0002763420343399048, 0.0002893209457397461, 0.0003022998571395874, 0.0003152787685394287, 0.00032825767993927, 0.00034123659133911133, 0.00035421550273895264, 0.00036719441413879395, 0.00038017332553863525, 0.00039315223693847656]}, "gradients/decoder.model.decoder.layers.6.self_attn.k_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 18.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 83.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 236.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 363.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 240.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 66.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 13.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0], "bins": [-2.384185791015625e-07, -2.3096799850463867e-07, -2.2351741790771484e-07, -2.1606683731079102e-07, -2.086162567138672e-07, -2.0116567611694336e-07, -1.9371509552001953e-07, -1.862645149230957e-07, -1.7881393432617188e-07, -1.7136335372924805e-07, -1.6391277313232422e-07, -1.564621925354004e-07, -1.4901161193847656e-07, -1.4156103134155273e-07, -1.341104507446289e-07, -1.2665987014770508e-07, -1.1920928955078125e-07, -1.1175870895385742e-07, -1.043081283569336e-07, -9.685754776000977e-08, -8.940696716308594e-08, -8.195638656616211e-08, -7.450580596923828e-08, -6.705522537231445e-08, -5.960464477539063e-08, -5.21540641784668e-08, -4.470348358154297e-08, -3.725290298461914e-08, -2.9802322387695312e-08, -2.2351741790771484e-08, -1.4901161193847656e-08, -7.450580596923828e-09, 0.0, 7.450580596923828e-09, 1.4901161193847656e-08, 2.2351741790771484e-08, 2.9802322387695312e-08, 3.725290298461914e-08, 4.470348358154297e-08, 5.21540641784668e-08, 5.960464477539063e-08, 6.705522537231445e-08, 7.450580596923828e-08, 8.195638656616211e-08, 8.940696716308594e-08, 9.685754776000977e-08, 1.043081283569336e-07, 1.1175870895385742e-07, 1.1920928955078125e-07, 1.2665987014770508e-07, 1.341104507446289e-07, 1.4156103134155273e-07, 1.4901161193847656e-07, 1.564621925354004e-07, 1.6391277313232422e-07, 1.7136335372924805e-07, 1.7881393432617188e-07, 1.862645149230957e-07, 1.9371509552001953e-07, 2.0116567611694336e-07, 2.086162567138672e-07, 2.1606683731079102e-07, 2.2351741790771484e-07, 2.3096799850463867e-07, 2.384185791015625e-07]}, "gradients/decoder.model.decoder.layers.6.self_attn.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 1.0, 4.0, 4.0, 4.0, 6.0, 5.0, 10.0, 15.0, 20.0, 20.0, 25.0, 41.0, 60.0, 59.0, 89.0, 135.0, 158.0, 258.0, 353.0, 483.0, 705.0, 1067.0, 1507.0, 2399.0, 3938.0, 6339.0, 13076.0, 41420.0, 916981.0, 31460.0, 11658.0, 5937.0, 3471.0, 2215.0, 1443.0, 938.0, 642.0, 480.0, 342.0, 213.0, 162.0, 128.0, 91.0, 51.0, 43.0, 34.0, 22.0, 20.0, 12.0, 10.0, 5.0, 5.0, 2.0, 1.0, 1.0, 1.0, 2.0, 1.0], "bins": [-0.00011277198791503906, -0.00010949559509754181, -0.00010621920228004456, -0.0001029428094625473, -9.966641664505005e-05, -9.63900238275528e-05, -9.311363101005554e-05, -8.983723819255829e-05, -8.656084537506104e-05, -8.328445255756378e-05, -8.000805974006653e-05, -7.673166692256927e-05, -7.345527410507202e-05, -7.017888128757477e-05, -6.690248847007751e-05, -6.362609565258026e-05, -6.034970283508301e-05, -5.7073310017585754e-05, -5.37969172000885e-05, -5.052052438259125e-05, -4.7244131565093994e-05, -4.396773874759674e-05, -4.069134593009949e-05, -3.7414953112602234e-05, -3.413856029510498e-05, -3.086216747760773e-05, -2.7585774660110474e-05, -2.430938184261322e-05, -2.1032989025115967e-05, -1.7756596207618713e-05, -1.448020339012146e-05, -1.1203810572624207e-05, -7.927417755126953e-06, -4.6510249376297e-06, -1.3746321201324463e-06, 1.9017606973648071e-06, 5.1781535148620605e-06, 8.454546332359314e-06, 1.1730939149856567e-05, 1.500733196735382e-05, 1.8283724784851074e-05, 2.1560117602348328e-05, 2.483651041984558e-05, 2.8112903237342834e-05, 3.138929605484009e-05, 3.466568887233734e-05, 3.7942081689834595e-05, 4.121847450733185e-05, 4.44948673248291e-05, 4.7771260142326355e-05, 5.104765295982361e-05, 5.432404577732086e-05, 5.7600438594818115e-05, 6.087683141231537e-05, 6.415322422981262e-05, 6.742961704730988e-05, 7.070600986480713e-05, 7.398240268230438e-05, 7.725879549980164e-05, 8.053518831729889e-05, 8.381158113479614e-05, 8.70879739522934e-05, 9.036436676979065e-05, 9.36407595872879e-05, 9.691715240478516e-05]}, "gradients/decoder.model.decoder.layers.6.self_attn.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 2.0, 1.0, 2.0, 2.0, 2.0, 4.0, 2.0, 5.0, 0.0, 3.0, 7.0, 6.0, 8.0, 8.0, 11.0, 7.0, 16.0, 41.0, 359.0, 407.0, 49.0, 7.0, 12.0, 8.0, 8.0, 3.0, 6.0, 4.0, 4.0, 3.0, 0.0, 2.0, 2.0, 3.0, 1.0, 1.0, 3.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-7.659196853637695e-05, -7.457658648490906e-05, -7.256120443344116e-05, -7.054582238197327e-05, -6.853044033050537e-05, -6.651505827903748e-05, -6.449967622756958e-05, -6.248429417610168e-05, -6.046891212463379e-05, -5.8453530073165894e-05, -5.6438148021698e-05, -5.44227659702301e-05, -5.240738391876221e-05, -5.039200186729431e-05, -4.8376619815826416e-05, -4.636123776435852e-05, -4.4345855712890625e-05, -4.233047366142273e-05, -4.0315091609954834e-05, -3.829970955848694e-05, -3.628432750701904e-05, -3.426894545555115e-05, -3.225356340408325e-05, -3.0238181352615356e-05, -2.822279930114746e-05, -2.6207417249679565e-05, -2.419203519821167e-05, -2.2176653146743774e-05, -2.016127109527588e-05, -1.8145889043807983e-05, -1.6130506992340088e-05, -1.4115124940872192e-05, -1.2099742889404297e-05, -1.0084360837936401e-05, -8.068978786468506e-06, -6.05359673500061e-06, -4.038214683532715e-06, -2.0228326320648193e-06, -7.450580596923828e-09, 2.0079314708709717e-06, 4.023313522338867e-06, 6.038695573806763e-06, 8.054077625274658e-06, 1.0069459676742554e-05, 1.208484172821045e-05, 1.4100223779678345e-05, 1.611560583114624e-05, 1.8130987882614136e-05, 2.014636993408203e-05, 2.2161751985549927e-05, 2.4177134037017822e-05, 2.6192516088485718e-05, 2.8207898139953613e-05, 3.022328019142151e-05, 3.2238662242889404e-05, 3.42540442943573e-05, 3.6269426345825195e-05, 3.828480839729309e-05, 4.0300190448760986e-05, 4.231557250022888e-05, 4.433095455169678e-05, 4.634633660316467e-05, 4.836171865463257e-05, 5.0377100706100464e-05, 5.239248275756836e-05]}, "gradients/decoder.model.decoder.layers.5.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0, 3.0, 2.0, 2.0, 2.0, 1.0, 3.0, 3.0, 3.0, 2.0, 1.0, 0.0, 6.0, 2.0, 9.0, 9.0, 5.0, 6.0, 11.0, 17.0, 36.0, 42.0, 43.0, 76.0, 100.0, 149.0, 139.0, 92.0, 84.0, 32.0, 39.0, 23.0, 20.0, 3.0, 13.0, 7.0, 6.0, 3.0, 1.0, 4.0, 0.0, 5.0, 1.0, 4.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0], "bins": [-0.0004162068653386086, -0.0004035309248138219, -0.00039085495518520474, -0.00037817901466041803, -0.00036550304503180087, -0.00035282710450701416, -0.000340151134878397, -0.0003274751943536103, -0.00031479925382882357, -0.00030212331330403686, -0.0002894473436754197, -0.000276771403150633, -0.0002640954335220158, -0.0002514194929972291, -0.00023874353792052716, -0.00022606758284382522, -0.00021339162776712328, -0.00020071567269042134, -0.0001880397176137194, -0.00017536376253701746, -0.00016268782201223075, -0.00015001186693552881, -0.00013733591185882688, -0.00012465997133404016, -0.00011198400898138061, -9.930805390467867e-05, -8.663210610393435e-05, -7.395615102723241e-05, -6.128019595053047e-05, -4.8604248149786144e-05, -3.5928293073084205e-05, -2.325234527233988e-05, -1.0576390195637941e-05, 2.0995621525798924e-06, 1.4775514500797726e-05, 2.7451467758510262e-05, 4.0127419197233394e-05, 5.2803370635956526e-05, 6.547932571265846e-05, 7.815527351340279e-05, 9.083122859010473e-05, 0.00010350718366680667, 0.00011618313146755099, 0.00012885908654425293, 0.00014153504162095487, 0.00015421098214574158, 0.00016688695177435875, 0.00017956289229914546, 0.0001922388473758474, 0.00020491480245254934, 0.00021759075752925128, 0.00023026671260595322, 0.00024294265313073993, 0.00025561859365552664, 0.0002682945632841438, 0.0002809705038089305, 0.0002936464734375477, 0.0003063224139623344, 0.00031899838359095156, 0.00033167432411573827, 0.00034435029374435544, 0.00035702623426914215, 0.00036970217479392886, 0.00038237814442254603, 0.00039505408494733274]}, "gradients/decoder.model.decoder.layers.5.final_layer_norm.bias": {"_type": "histogram", "values": [3.0, 0.0, 3.0, 2.0, 2.0, 0.0, 1.0, 2.0, 3.0, 6.0, 3.0, 7.0, 6.0, 12.0, 8.0, 8.0, 9.0, 16.0, 17.0, 20.0, 19.0, 16.0, 25.0, 26.0, 30.0, 38.0, 32.0, 37.0, 41.0, 41.0, 38.0, 27.0, 37.0, 28.0, 35.0, 33.0, 30.0, 30.0, 35.0, 30.0, 36.0, 34.0, 23.0, 28.0, 16.0, 15.0, 24.0, 11.0, 10.0, 13.0, 10.0, 9.0, 4.0, 10.0, 5.0, 9.0, 1.0, 1.0, 0.0, 3.0, 2.0, 1.0, 1.0, 2.0], "bins": [-8.431822061538696e-05, -8.163775783032179e-05, -7.895730232121423e-05, -7.627683953614905e-05, -7.35963840270415e-05, -7.091592124197632e-05, -6.823545845691115e-05, -6.555500294780359e-05, -6.287454743869603e-05, -6.019408829160966e-05, -5.751362914452329e-05, -5.483316635945812e-05, -5.215271085035056e-05, -4.9472248065285385e-05, -4.679178891819902e-05, -4.411132977111265e-05, -4.1430866986047477e-05, -3.875040783896111e-05, -3.606994869187474e-05, -3.338948590680957e-05, -3.070903039770201e-05, -2.8028569431626238e-05, -2.5348108465550467e-05, -2.26676493184641e-05, -1.9987190171377733e-05, -1.7306731024291366e-05, -1.4626270967710298e-05, -1.1945810911129229e-05, -9.265351764042862e-06, -6.584892616956495e-06, -3.904431650880724e-06, -1.2239725037943572e-06, 1.4564793673343956e-06, 4.1369389691681135e-06, 6.8173985710018314e-06, 9.4978586275829e-06, 1.2178317774669267e-05, 1.4858776921755634e-05, 1.7539237887831405e-05, 2.0219697034917772e-05, 2.290015618200414e-05, 2.5580615329090506e-05, 2.8261074476176873e-05, 3.094153362326324e-05, 3.3621996408328414e-05, 3.6302451917435974e-05, 3.898291470250115e-05, 4.1663373849587515e-05, 4.434383299667388e-05, 4.702429214376025e-05, 4.9704751290846616e-05, 5.238521407591179e-05, 5.506566958501935e-05, 5.7746132370084524e-05, 6.042659151717089e-05, 6.310705066425726e-05, 6.578750617336482e-05, 6.846796895842999e-05, 7.114842446753755e-05, 7.382888725260273e-05, 7.650934276171029e-05, 7.918980554677546e-05, 8.187026833184063e-05, 8.45507238409482e-05, 8.723118662601337e-05]}, "gradients/decoder.model.decoder.layers.5.fc2.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 2.0, 3.0, 1.0, 0.0, 0.0, 2.0, 0.0, 7.0, 4.0, 3.0, 4.0, 8.0, 11.0, 8.0, 13.0, 14.0, 19.0, 22.0, 36.0, 52.0, 77.0, 97.0, 131.0, 191.0, 237.0, 434.0, 704.0, 1216.0, 2233.0, 4547.0, 10154.0, 33404.0, 4091942.0, 29956.0, 9201.0, 4283.0, 2097.0, 1180.0, 620.0, 430.0, 291.0, 181.0, 130.0, 95.0, 55.0, 41.0, 48.0, 32.0, 21.0, 16.0, 9.0, 6.0, 4.0, 8.0, 6.0, 4.0, 5.0, 2.0, 1.0, 3.0], "bins": [-0.0006055831909179688, -0.000588979572057724, -0.0005723759531974792, -0.0005557723343372345, -0.0005391687154769897, -0.000522565096616745, -0.0005059614777565002, -0.0004893578588962555, -0.00047275424003601074, -0.000456150621175766, -0.00043954700231552124, -0.0004229433834552765, -0.00040633976459503174, -0.000389736145734787, -0.00037313252687454224, -0.0003565289080142975, -0.00033992528915405273, -0.000323321670293808, -0.00030671805143356323, -0.0002901144325733185, -0.00027351081371307373, -0.000256907194852829, -0.00024030357599258423, -0.00022369995713233948, -0.00020709633827209473, -0.00019049271941184998, -0.00017388910055160522, -0.00015728548169136047, -0.00014068186283111572, -0.00012407824397087097, -0.00010747462511062622, -9.087100625038147e-05, -7.426738739013672e-05, -5.766376852989197e-05, -4.106014966964722e-05, -2.4456530809402466e-05, -7.852911949157715e-06, 8.750706911087036e-06, 2.5354325771331787e-05, 4.195794463157654e-05, 5.856156349182129e-05, 7.516518235206604e-05, 9.176880121231079e-05, 0.00010837242007255554, 0.0001249760389328003, 0.00014157965779304504, 0.0001581832766532898, 0.00017478689551353455, 0.0001913905143737793, 0.00020799413323402405, 0.0002245977520942688, 0.00024120137095451355, 0.0002578049898147583, 0.00027440860867500305, 0.0002910122275352478, 0.00030761584639549255, 0.0003242194652557373, 0.00034082308411598206, 0.0003574267029762268, 0.00037403032183647156, 0.0003906339406967163, 0.00040723755955696106, 0.0004238411784172058, 0.00044044479727745056, 0.0004570484161376953]}, "gradients/decoder.model.decoder.layers.5.fc2.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 2.0, 5.0, 6.0, 3.0, 4.0, 6.0, 6.0, 11.0, 12.0, 23.0, 30.0, 61.0, 90.0, 142.0, 163.0, 152.0, 109.0, 73.0, 35.0, 23.0, 12.0, 4.0, 13.0, 5.0, 4.0, 8.0, 4.0, 1.0, 2.0, 2.0, 2.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-4.863739013671875e-05, -4.7274865210056305e-05, -4.591234028339386e-05, -4.4549815356731415e-05, -4.318729043006897e-05, -4.1824765503406525e-05, -4.046224057674408e-05, -3.9099715650081635e-05, -3.773719072341919e-05, -3.6374665796756744e-05, -3.50121408700943e-05, -3.3649615943431854e-05, -3.228709101676941e-05, -3.0924566090106964e-05, -2.956204116344452e-05, -2.8199516236782074e-05, -2.683699131011963e-05, -2.5474466383457184e-05, -2.411194145679474e-05, -2.2749416530132294e-05, -2.138689160346985e-05, -2.0024366676807404e-05, -1.866184175014496e-05, -1.7299316823482513e-05, -1.593679189682007e-05, -1.4574266970157623e-05, -1.3211742043495178e-05, -1.1849217116832733e-05, -1.0486692190170288e-05, -9.124167263507843e-06, -7.761642336845398e-06, -6.399117410182953e-06, -5.036592483520508e-06, -3.6740675568580627e-06, -2.3115426301956177e-06, -9.490177035331726e-07, 4.1350722312927246e-07, 1.7760321497917175e-06, 3.1385570764541626e-06, 4.501082003116608e-06, 5.863606929779053e-06, 7.226131856441498e-06, 8.588656783103943e-06, 9.951181709766388e-06, 1.1313706636428833e-05, 1.2676231563091278e-05, 1.4038756489753723e-05, 1.5401281416416168e-05, 1.6763806343078613e-05, 1.812633126974106e-05, 1.9488856196403503e-05, 2.085138112306595e-05, 2.2213906049728394e-05, 2.357643097639084e-05, 2.4938955903053284e-05, 2.630148082971573e-05, 2.7664005756378174e-05, 2.902653068304062e-05, 3.0389055609703064e-05, 3.175158053636551e-05, 3.3114105463027954e-05, 3.44766303896904e-05, 3.5839155316352844e-05, 3.720168024301529e-05, 3.8564205169677734e-05]}, "gradients/decoder.model.decoder.layers.5.fc1.weight": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 0.0, 0.0, 1.0, 5.0, 2.0, 3.0, 1.0, 2.0, 7.0, 2.0, 7.0, 7.0, 19.0, 21.0, 38.0, 64.0, 137.0, 303.0, 600.0, 1582.0, 6609.0, 956309.0, 3218753.0, 6981.0, 1675.0, 609.0, 290.0, 124.0, 58.0, 34.0, 22.0, 6.0, 7.0, 8.0, 1.0, 2.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0], "bins": [-0.0004057884216308594, -0.0003895610570907593, -0.0003733336925506592, -0.0003571063280105591, -0.000340878963470459, -0.0003246515989303589, -0.0003084242343902588, -0.0002921968698501587, -0.0002759695053100586, -0.0002597421407699585, -0.0002435147762298584, -0.0002272874116897583, -0.0002110600471496582, -0.0001948326826095581, -0.000178605318069458, -0.0001623779535293579, -0.0001461505889892578, -0.00012992322444915771, -0.00011369585990905762, -9.746849536895752e-05, -8.124113082885742e-05, -6.501376628875732e-05, -4.8786401748657227e-05, -3.255903720855713e-05, -1.633167266845703e-05, -1.043081283569336e-07, 1.6123056411743164e-05, 3.235042095184326e-05, 4.857778549194336e-05, 6.480515003204346e-05, 8.103251457214355e-05, 9.725987911224365e-05, 0.00011348724365234375, 0.00012971460819244385, 0.00014594197273254395, 0.00016216933727264404, 0.00017839670181274414, 0.00019462406635284424, 0.00021085143089294434, 0.00022707879543304443, 0.00024330615997314453, 0.00025953352451324463, 0.0002757608890533447, 0.0002919882535934448, 0.0003082156181335449, 0.000324442982673645, 0.0003406703472137451, 0.0003568977117538452, 0.0003731250762939453, 0.0003893524408340454, 0.0004055798053741455, 0.0004218071699142456, 0.0004380345344543457, 0.0004542618989944458, 0.0004704892635345459, 0.000486716628074646, 0.0005029439926147461, 0.0005191713571548462, 0.0005353987216949463, 0.0005516260862350464, 0.0005678534507751465, 0.0005840808153152466, 0.0006003081798553467, 0.0006165355443954468, 0.0006327629089355469]}, "gradients/decoder.model.decoder.layers.5.fc1.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 2.0, 5.0, 2.0, 2.0, 4.0, 9.0, 13.0, 26.0, 37.0, 85.0, 3238.0, 563.0, 44.0, 20.0, 16.0, 9.0, 6.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-8.213520050048828e-05, -7.928814738988876e-05, -7.644109427928925e-05, -7.359404116868973e-05, -7.074698805809021e-05, -6.789993494749069e-05, -6.505288183689117e-05, -6.220582872629166e-05, -5.935877561569214e-05, -5.651172250509262e-05, -5.36646693944931e-05, -5.0817616283893585e-05, -4.797056317329407e-05, -4.512351006269455e-05, -4.227645695209503e-05, -3.9429403841495514e-05, -3.6582350730895996e-05, -3.373529762029648e-05, -3.088824450969696e-05, -2.8041191399097443e-05, -2.5194138288497925e-05, -2.2347085177898407e-05, -1.950003206729889e-05, -1.665297895669937e-05, -1.3805925846099854e-05, -1.0958872735500336e-05, -8.111819624900818e-06, -5.2647665143013e-06, -2.4177134037017822e-06, 4.293397068977356e-07, 3.2763928174972534e-06, 6.123445928096771e-06, 8.970499038696289e-06, 1.1817552149295807e-05, 1.4664605259895325e-05, 1.7511658370494843e-05, 2.035871148109436e-05, 2.3205764591693878e-05, 2.6052817702293396e-05, 2.8899870812892914e-05, 3.174692392349243e-05, 3.459397703409195e-05, 3.744103014469147e-05, 4.0288083255290985e-05, 4.31351363658905e-05, 4.598218947649002e-05, 4.882924258708954e-05, 5.1676295697689056e-05, 5.4523348808288574e-05, 5.737040191888809e-05, 6.021745502948761e-05, 6.306450814008713e-05, 6.591156125068665e-05, 6.875861436128616e-05, 7.160566747188568e-05, 7.44527205824852e-05, 7.729977369308472e-05, 8.014682680368423e-05, 8.299387991428375e-05, 8.584093302488327e-05, 8.868798613548279e-05, 9.15350392460823e-05, 9.438209235668182e-05, 9.722914546728134e-05, 0.00010007619857788086]}, "gradients/decoder.model.decoder.layers.5.encoder_attn_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 5.0, 0.0, 2.0, 4.0, 5.0, 7.0, 8.0, 18.0, 20.0, 26.0, 53.0, 137.0, 319.0, 219.0, 97.0, 28.0, 24.0, 18.0, 10.0, 5.0, 6.0, 3.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00017920378013513982, -0.00017164761084131896, -0.0001640914415474981, -0.00015653527225367725, -0.0001489791029598564, -0.00014142293366603553, -0.00013386676437221467, -0.0001263105805264786, -0.00011875441123265773, -0.00011119824193883687, -0.00010364207264501601, -9.608590335119516e-05, -8.852972678141668e-05, -8.097355748759583e-05, -7.341738819377497e-05, -6.58612116239965e-05, -5.830504960613325e-05, -5.0748880312312394e-05, -4.319270738051273e-05, -3.563653808669187e-05, -2.808036697388161e-05, -2.0524195861071348e-05, -1.296802656725049e-05, -5.411853635450825e-06, 2.144315658370033e-06, 9.700486771180294e-06, 1.7256657883990556e-05, 2.4812827177811414e-05, 3.236899647163227e-05, 3.992516940343194e-05, 4.7481338697252795e-05, 5.503751162905246e-05, 6.259368092287332e-05, 7.014985021669418e-05, 7.770601951051503e-05, 8.526218880433589e-05, 9.281836537411436e-05, 0.00010037453466793522, 0.00010793070396175608, 0.00011548688053153455, 0.0001230430498253554, 0.00013059921911917627, 0.00013815538841299713, 0.00014571155770681798, 0.00015326772700063884, 0.0001608238962944597, 0.00016838006558828056, 0.00017593624943401664, 0.00018349240417592227, 0.00019104857346974313, 0.000198604742763564, 0.00020616091205738485, 0.0002137170813512057, 0.00022127325064502656, 0.00022882941993884742, 0.0002363856037845835, 0.00024394177307840437, 0.00025149795692414045, 0.0002590541262179613, 0.00026661029551178217, 0.00027416646480560303, 0.0002817226340994239, 0.00028927880339324474, 0.0002968349726870656, 0.00030439114198088646]}, "gradients/decoder.model.decoder.layers.5.encoder_attn_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0, 4.0, 5.0, 8.0, 10.0, 17.0, 21.0, 26.0, 27.0, 19.0, 36.0, 42.0, 44.0, 48.0, 50.0, 51.0, 46.0, 47.0, 61.0, 54.0, 60.0, 42.0, 52.0, 35.0, 38.0, 27.0, 30.0, 18.0, 17.0, 17.0, 19.0, 10.0, 8.0, 7.0, 3.0, 4.0, 3.0, 5.0, 1.0, 2.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.2117313316557556e-05, -5.038960443926044e-05, -4.866189192398451e-05, -4.6934183046687394e-05, -4.520647053141147e-05, -4.347876165411435e-05, -4.175105277681723e-05, -4.002334026154131e-05, -3.829563138424419e-05, -3.656792250694707e-05, -3.4840209991671145e-05, -3.311250111437403e-05, -3.138479223707691e-05, -2.9657079721800983e-05, -2.7929370844503865e-05, -2.6201660148217343e-05, -2.447394945193082e-05, -2.27462387556443e-05, -2.1018528059357777e-05, -1.929081918206066e-05, -1.7563108485774137e-05, -1.5835397789487615e-05, -1.4107688002695795e-05, -1.2379978215903975e-05, -1.0652267519617453e-05, -8.924556823330931e-06, -7.196847036539111e-06, -5.46913679499994e-06, -3.7414265534607694e-06, -2.0137158571742475e-06, -2.860060703824274e-07, 1.4417037164093927e-06, 3.1694144126959145e-06, 4.8971246542350855e-06, 6.6248348957742564e-06, 8.352544682566077e-06, 1.0080255378852598e-05, 1.180796607513912e-05, 1.353567586193094e-05, 1.526338564872276e-05, 1.6991096345009282e-05, 1.8718807041295804e-05, 2.0446517737582326e-05, 2.2174226614879444e-05, 2.3901937311165966e-05, 2.5629648007452488e-05, 2.7357356884749606e-05, 2.9085067581036128e-05, 3.081277827732265e-05, 3.254048715461977e-05, 3.4268199669895694e-05, 3.599590854719281e-05, 3.772362106246874e-05, 3.9451329939765856e-05, 4.1179038817062974e-05, 4.29067513323389e-05, 4.463446020963602e-05, 4.6362169086933136e-05, 4.808988160220906e-05, 4.981759047950618e-05, 5.15452993568033e-05, 5.3273011872079223e-05, 5.500072074937634e-05, 5.672842962667346e-05, 5.8456142141949385e-05]}, "gradients/decoder.model.decoder.layers.5.encoder_attn.out_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 3.0, 5.0, 6.0, 7.0, 17.0, 15.0, 20.0, 43.0, 54.0, 104.0, 169.0, 312.0, 763.0, 1732.0, 6756.0, 32493.0, 661718.0, 320099.0, 17488.0, 4305.0, 1228.0, 565.0, 284.0, 160.0, 70.0, 57.0, 26.0, 19.0, 11.0, 7.0, 9.0, 3.0, 4.0, 2.0, 3.0, 1.0, 2.0, 2.0, 2.0, 0.0, 1.0, 0.0, 2.0], "bins": [-1.4424324035644531e-05, -1.40434131026268e-05, -1.366250216960907e-05, -1.3281591236591339e-05, -1.2900680303573608e-05, -1.2519769370555878e-05, -1.2138858437538147e-05, -1.1757947504520416e-05, -1.1377036571502686e-05, -1.0996125638484955e-05, -1.0615214705467224e-05, -1.0234303772449493e-05, -9.853392839431763e-06, -9.472481906414032e-06, -9.091570973396301e-06, -8.71066004037857e-06, -8.32974910736084e-06, -7.948838174343109e-06, -7.567927241325378e-06, -7.187016308307648e-06, -6.806105375289917e-06, -6.425194442272186e-06, -6.0442835092544556e-06, -5.663372576236725e-06, -5.282461643218994e-06, -4.9015507102012634e-06, -4.520639777183533e-06, -4.139728844165802e-06, -3.7588179111480713e-06, -3.3779069781303406e-06, -2.99699604511261e-06, -2.616085112094879e-06, -2.2351741790771484e-06, -1.8542632460594177e-06, -1.473352313041687e-06, -1.0924413800239563e-06, -7.115304470062256e-07, -3.3061951398849487e-07, 5.029141902923584e-08, 4.3120235204696655e-07, 8.121132850646973e-07, 1.193024218082428e-06, 1.5739351511001587e-06, 1.9548460841178894e-06, 2.33575701713562e-06, 2.716667950153351e-06, 3.0975788831710815e-06, 3.4784898161888123e-06, 3.859400749206543e-06, 4.240311682224274e-06, 4.621222615242004e-06, 5.002133548259735e-06, 5.383044481277466e-06, 5.7639554142951965e-06, 6.144866347312927e-06, 6.525777280330658e-06, 6.906688213348389e-06, 7.287599146366119e-06, 7.66851007938385e-06, 8.04942101240158e-06, 8.430331945419312e-06, 8.811242878437042e-06, 9.192153811454773e-06, 9.573064744472504e-06, 9.953975677490234e-06]}, "gradients/decoder.model.decoder.layers.5.encoder_attn.out_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 1.0, 3.0, 2.0, 3.0, 3.0, 1.0, 0.0, 3.0, 2.0, 6.0, 6.0, 11.0, 21.0, 16.0, 24.0, 41.0, 42.0, 56.0, 64.0, 84.0, 67.0, 69.0, 80.0, 84.0, 60.0, 60.0, 50.0, 30.0, 26.0, 21.0, 15.0, 9.0, 13.0, 12.0, 4.0, 6.0, 2.0, 5.0, 7.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0], "bins": [-4.744529724121094e-05, -4.559475928544998e-05, -4.3744221329689026e-05, -4.189368337392807e-05, -4.0043145418167114e-05, -3.819260746240616e-05, -3.63420695066452e-05, -3.449153155088425e-05, -3.264099359512329e-05, -3.0790455639362335e-05, -2.893991768360138e-05, -2.7089379727840424e-05, -2.5238841772079468e-05, -2.3388303816318512e-05, -2.1537765860557556e-05, -1.96872279047966e-05, -1.7836689949035645e-05, -1.598615199327469e-05, -1.4135614037513733e-05, -1.2285076081752777e-05, -1.0434538125991821e-05, -8.584000170230865e-06, -6.73346221446991e-06, -4.882924258708954e-06, -3.032386302947998e-06, -1.1818483471870422e-06, 6.686896085739136e-07, 2.5192275643348694e-06, 4.369765520095825e-06, 6.220303475856781e-06, 8.070841431617737e-06, 9.921379387378693e-06, 1.1771917343139648e-05, 1.3622455298900604e-05, 1.547299325466156e-05, 1.7323531210422516e-05, 1.917406916618347e-05, 2.1024607121944427e-05, 2.2875145077705383e-05, 2.472568303346634e-05, 2.6576220989227295e-05, 2.842675894498825e-05, 3.0277296900749207e-05, 3.212783485651016e-05, 3.397837281227112e-05, 3.5828910768032074e-05, 3.767944872379303e-05, 3.9529986679553986e-05, 4.138052463531494e-05, 4.32310625910759e-05, 4.508160054683685e-05, 4.693213850259781e-05, 4.8782676458358765e-05, 5.063321441411972e-05, 5.2483752369880676e-05, 5.433429032564163e-05, 5.618482828140259e-05, 5.8035366237163544e-05, 5.98859041929245e-05, 6.173644214868546e-05, 6.358698010444641e-05, 6.543751806020737e-05, 6.728805601596832e-05, 6.913859397172928e-05, 7.098913192749023e-05]}, "gradients/decoder.model.decoder.layers.5.encoder_attn.v_proj.weight": {"_type": "histogram", "values": [1.0, 3.0, 1.0, 0.0, 2.0, 3.0, 0.0, 4.0, 9.0, 10.0, 3.0, 11.0, 16.0, 16.0, 22.0, 38.0, 9.0, 46.0, 62.0, 69.0, 47.0, 103.0, 149.0, 123.0, 444.0, 1414.0, 2173.0, 27979.0, 854145.0, 150824.0, 6027.0, 3079.0, 750.0, 194.0, 210.0, 158.0, 102.0, 33.0, 77.0, 45.0, 17.0, 35.0, 29.0, 17.0, 19.0, 19.0, 4.0, 4.0, 5.0, 7.0, 2.0, 4.0, 5.0, 6.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-2.9206275939941406e-06, -2.8191134333610535e-06, -2.7175992727279663e-06, -2.616085112094879e-06, -2.514570951461792e-06, -2.413056790828705e-06, -2.3115426301956177e-06, -2.2100284695625305e-06, -2.1085143089294434e-06, -2.007000148296356e-06, -1.905485987663269e-06, -1.8039718270301819e-06, -1.7024576663970947e-06, -1.6009435057640076e-06, -1.4994293451309204e-06, -1.3979151844978333e-06, -1.296401023864746e-06, -1.194886863231659e-06, -1.0933727025985718e-06, -9.918585419654846e-07, -8.903443813323975e-07, -7.888302206993103e-07, -6.873160600662231e-07, -5.85801899433136e-07, -4.842877388000488e-07, -3.8277357816696167e-07, -2.812594175338745e-07, -1.7974525690078735e-07, -7.82310962677002e-08, 2.3283064365386963e-08, 1.2479722499847412e-07, 2.2631138563156128e-07, 3.2782554626464844e-07, 4.293397068977356e-07, 5.308538675308228e-07, 6.323680281639099e-07, 7.338821887969971e-07, 8.353963494300842e-07, 9.369105100631714e-07, 1.0384246706962585e-06, 1.1399388313293457e-06, 1.2414529919624329e-06, 1.34296715259552e-06, 1.4444813132286072e-06, 1.5459954738616943e-06, 1.6475096344947815e-06, 1.7490237951278687e-06, 1.8505379557609558e-06, 1.952052116394043e-06, 2.05356627702713e-06, 2.1550804376602173e-06, 2.2565945982933044e-06, 2.3581087589263916e-06, 2.4596229195594788e-06, 2.561137080192566e-06, 2.662651240825653e-06, 2.7641654014587402e-06, 2.8656795620918274e-06, 2.9671937227249146e-06, 3.0687078833580017e-06, 3.170222043991089e-06, 3.271736204624176e-06, 3.373250365257263e-06, 3.4747645258903503e-06, 3.5762786865234375e-06]}, "gradients/decoder.model.decoder.layers.5.encoder_attn.v_proj.bias": {"_type": "histogram", "values": [3.0, 2.0, 0.0, 1.0, 3.0, 1.0, 4.0, 5.0, 7.0, 4.0, 3.0, 10.0, 9.0, 8.0, 13.0, 14.0, 17.0, 20.0, 18.0, 21.0, 29.0, 31.0, 38.0, 49.0, 43.0, 44.0, 51.0, 53.0, 40.0, 45.0, 46.0, 35.0, 41.0, 45.0, 36.0, 44.0, 29.0, 22.0, 23.0, 16.0, 15.0, 11.0, 16.0, 11.0, 7.0, 7.0, 4.0, 6.0, 5.0, 5.0, 1.0, 2.0, 5.0, 4.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-8.195638656616211e-05, -7.906462997198105e-05, -7.617287337779999e-05, -7.328111678361893e-05, -7.038936018943787e-05, -6.74976035952568e-05, -6.460584700107574e-05, -6.171409040689468e-05, -5.882233381271362e-05, -5.593057721853256e-05, -5.30388206243515e-05, -5.014706403017044e-05, -4.725530743598938e-05, -4.436355084180832e-05, -4.147179424762726e-05, -3.85800376534462e-05, -3.568828105926514e-05, -3.2796524465084076e-05, -2.9904767870903015e-05, -2.7013011276721954e-05, -2.4121254682540894e-05, -2.1229498088359833e-05, -1.8337741494178772e-05, -1.544598489999771e-05, -1.255422830581665e-05, -9.66247171163559e-06, -6.770715117454529e-06, -3.878958523273468e-06, -9.872019290924072e-07, 1.9045546650886536e-06, 4.796311259269714e-06, 7.688067853450775e-06, 1.0579824447631836e-05, 1.3471581041812897e-05, 1.6363337635993958e-05, 1.925509423017502e-05, 2.214685082435608e-05, 2.503860741853714e-05, 2.79303640127182e-05, 3.082212060689926e-05, 3.371387720108032e-05, 3.660563379526138e-05, 3.9497390389442444e-05, 4.2389146983623505e-05, 4.5280903577804565e-05, 4.8172660171985626e-05, 5.106441676616669e-05, 5.395617336034775e-05, 5.684792995452881e-05, 5.973968654870987e-05, 6.263144314289093e-05, 6.552319973707199e-05, 6.841495633125305e-05, 7.130671292543411e-05, 7.419846951961517e-05, 7.709022611379623e-05, 7.99819827079773e-05, 8.287373930215836e-05, 8.576549589633942e-05, 8.865725249052048e-05, 9.154900908470154e-05, 9.44407656788826e-05, 9.733252227306366e-05, 0.00010022427886724472, 0.00010311603546142578]}, "gradients/decoder.model.decoder.layers.5.encoder_attn.k_proj.weight": {"_type": "histogram", "values": [28.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2530.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1043576.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2422.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 20.0], "bins": [-1.1920928955078125e-07, -1.1548399925231934e-07, -1.1175870895385742e-07, -1.0803341865539551e-07, -1.043081283569336e-07, -1.0058283805847168e-07, -9.685754776000977e-08, -9.313225746154785e-08, -8.940696716308594e-08, -8.568167686462402e-08, -8.195638656616211e-08, -7.82310962677002e-08, -7.450580596923828e-08, -7.078051567077637e-08, -6.705522537231445e-08, -6.332993507385254e-08, -5.960464477539063e-08, -5.587935447692871e-08, -5.21540641784668e-08, -4.842877388000488e-08, -4.470348358154297e-08, -4.0978193283081055e-08, -3.725290298461914e-08, -3.3527612686157227e-08, -2.9802322387695312e-08, -2.60770320892334e-08, -2.2351741790771484e-08, -1.862645149230957e-08, -1.4901161193847656e-08, -1.1175870895385742e-08, -7.450580596923828e-09, -3.725290298461914e-09, 0.0, 3.725290298461914e-09, 7.450580596923828e-09, 1.1175870895385742e-08, 1.4901161193847656e-08, 1.862645149230957e-08, 2.2351741790771484e-08, 2.60770320892334e-08, 2.9802322387695312e-08, 3.3527612686157227e-08, 3.725290298461914e-08, 4.0978193283081055e-08, 4.470348358154297e-08, 4.842877388000488e-08, 5.21540641784668e-08, 5.587935447692871e-08, 5.960464477539063e-08, 6.332993507385254e-08, 6.705522537231445e-08, 7.078051567077637e-08, 7.450580596923828e-08, 7.82310962677002e-08, 8.195638656616211e-08, 8.568167686462402e-08, 8.940696716308594e-08, 9.313225746154785e-08, 9.685754776000977e-08, 1.0058283805847168e-07, 1.043081283569336e-07, 1.0803341865539551e-07, 1.1175870895385742e-07, 1.1548399925231934e-07, 1.1920928955078125e-07]}, "gradients/decoder.model.decoder.layers.5.encoder_attn.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 3.0, 0.0, 2.0, 0.0, 0.0, 16.0, 0.0, 26.0, 0.0, 0.0, 44.0, 0.0, 78.0, 0.0, 100.0, 0.0, 0.0, 148.0, 0.0, 174.0, 0.0, 141.0, 0.0, 0.0, 120.0, 0.0, 79.0, 0.0, 0.0, 35.0, 0.0, 20.0, 0.0, 13.0, 0.0, 0.0, 6.0, 0.0, 3.0, 0.0, 3.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 2.0], "bins": [-8.940696716308594e-07, -8.689239621162415e-07, -8.437782526016235e-07, -8.186325430870056e-07, -7.934868335723877e-07, -7.683411240577698e-07, -7.431954145431519e-07, -7.180497050285339e-07, -6.92903995513916e-07, -6.677582859992981e-07, -6.426125764846802e-07, -6.174668669700623e-07, -5.923211574554443e-07, -5.671754479408264e-07, -5.420297384262085e-07, -5.168840289115906e-07, -4.917383193969727e-07, -4.6659260988235474e-07, -4.414469003677368e-07, -4.163011908531189e-07, -3.91155481338501e-07, -3.6600977182388306e-07, -3.4086406230926514e-07, -3.157183527946472e-07, -2.905726432800293e-07, -2.654269337654114e-07, -2.4028122425079346e-07, -2.1513551473617554e-07, -1.8998980522155762e-07, -1.648440957069397e-07, -1.3969838619232178e-07, -1.1455267667770386e-07, -8.940696716308594e-08, -6.426125764846802e-08, -3.91155481338501e-08, -1.3969838619232178e-08, 1.1175870895385742e-08, 3.632158041000366e-08, 6.146728992462158e-08, 8.66129994392395e-08, 1.1175870895385742e-07, 1.3690441846847534e-07, 1.6205012798309326e-07, 1.8719583749771118e-07, 2.123415470123291e-07, 2.3748725652694702e-07, 2.6263296604156494e-07, 2.8777867555618286e-07, 3.129243850708008e-07, 3.380700945854187e-07, 3.632158041000366e-07, 3.8836151361465454e-07, 4.1350722312927246e-07, 4.386529326438904e-07, 4.637986421585083e-07, 4.889443516731262e-07, 5.140900611877441e-07, 5.392357707023621e-07, 5.6438148021698e-07, 5.895271897315979e-07, 6.146728992462158e-07, 6.398186087608337e-07, 6.649643182754517e-07, 6.901100277900696e-07, 7.152557373046875e-07]}, "gradients/decoder.model.decoder.layers.5.encoder_attn.q_proj.weight": {"_type": "histogram", "values": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1048576.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], "bins": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]}, "gradients/decoder.model.decoder.layers.5.encoder_attn.q_proj.bias": {"_type": "histogram", "values": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1024.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], "bins": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]}, "gradients/decoder.model.decoder.layers.5.self_attn_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 2.0, 2.0, 7.0, 5.0, 8.0, 29.0, 29.0, 55.0, 201.0, 418.0, 150.0, 58.0, 21.0, 14.0, 5.0, 7.0, 1.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00011221566091990098, -0.00010609726450638846, -9.997886081691831e-05, -9.386046440340579e-05, -8.774206798989326e-05, -8.162366430042312e-05, -7.550526788691059e-05, -6.938686419744045e-05, -6.326846778392792e-05, -5.715006773243658e-05, -5.103166768094525e-05, -4.491327126743272e-05, -3.8794871215941384e-05, -3.267647116445005e-05, -2.655807475093752e-05, -2.0439674699446186e-05, -1.4321274647954851e-05, -8.202875505958218e-06, -2.084476363961585e-06, 4.033921868540347e-06, 1.0152321920031682e-05, 1.6270721971523017e-05, 2.2389118385035545e-05, 2.850751843652688e-05, 3.4625918488018215e-05, 4.074431853950955e-05, 4.6862718591000885e-05, 5.298111500451341e-05, 5.909951505600475e-05, 6.521791510749608e-05, 7.133631152100861e-05, 7.745470793452114e-05, 8.357310434803367e-05, 8.96915007615462e-05, 9.580990445101634e-05, 0.00010192830086452886, 0.000108046704553999, 0.00011416510096751153, 0.00012028349738102406, 0.0001264018937945366, 0.00013252030475996435, 0.00013863870117347687, 0.0001447570975869894, 0.00015087550855241716, 0.0001569939049659297, 0.00016311230137944221, 0.00016923069779295474, 0.00017534909420646727, 0.0001814674906199798, 0.00018758588703349233, 0.00019370428344700485, 0.00019982267986051738, 0.00020594109082594514, 0.00021205948723945767, 0.0002181778836529702, 0.00022429628006648272, 0.00023041467647999525, 0.00023653307289350778, 0.0002426514693070203, 0.00024876988027244806, 0.00025488826213404536, 0.0002610066730994731, 0.0002671250840649009, 0.0002732434659264982, 0.00027936187689192593]}, "gradients/decoder.model.decoder.layers.5.self_attn_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 9.0, 11.0, 15.0, 28.0, 32.0, 36.0, 44.0, 53.0, 68.0, 65.0, 59.0, 72.0, 77.0, 76.0, 66.0, 64.0, 55.0, 38.0, 39.0, 27.0, 18.0, 13.0, 21.0, 10.0, 2.0, 5.0, 5.0, 3.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.373942283564247e-05, -4.238243491272442e-05, -4.102544335182756e-05, -3.966845542890951e-05, -3.831146750599146e-05, -3.695447958307341e-05, -3.559748802217655e-05, -3.42405000992585e-05, -3.288350853836164e-05, -3.152652061544359e-05, -3.0169530873536132e-05, -2.8812541131628677e-05, -2.7455553208710626e-05, -2.609856346680317e-05, -2.4741573724895716e-05, -2.3384585801977664e-05, -2.2027597879059613e-05, -2.0670608137152158e-05, -1.9313620214234106e-05, -1.795663047232665e-05, -1.65996425494086e-05, -1.5242652807501145e-05, -1.388566306559369e-05, -1.2528674233180936e-05, -1.1171685400768183e-05, -9.81469656835543e-06, -8.457707735942677e-06, -7.100717994035222e-06, -5.743729161622468e-06, -4.386740329209715e-06, -3.02975058730226e-06, -1.6727617548895068e-06, -3.157729224767536e-07, 1.041216137309675e-06, 2.398205197096104e-06, 3.755194484256208e-06, 5.112183316668961e-06, 6.4691721490817145e-06, 7.82616189098917e-06, 9.183150723401923e-06, 1.0540139555814676e-05, 1.189712838822743e-05, 1.3254117220640182e-05, 1.4611106962547638e-05, 1.5968096704455093e-05, 1.7325084627373144e-05, 1.86820743692806e-05, 2.0039064111188054e-05, 2.1396052034106106e-05, 2.275304177601356e-05, 2.4110029698931612e-05, 2.5467019440839067e-05, 2.682400736375712e-05, 2.8180997105664574e-05, 2.953798684757203e-05, 3.089497477049008e-05, 3.225196269340813e-05, 3.360895061632618e-05, 3.496594217722304e-05, 3.632293010014109e-05, 3.7679918023059145e-05, 3.9036909583956e-05, 4.0393897506874055e-05, 4.1750885429792106e-05, 4.3107876990688965e-05]}, "gradients/decoder.model.decoder.layers.5.self_attn.out_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 3.0, 1.0, 0.0, 1.0, 3.0, 4.0, 2.0, 3.0, 11.0, 6.0, 7.0, 13.0, 11.0, 10.0, 8.0, 19.0, 13.0, 22.0, 31.0, 40.0, 48.0, 98.0, 243.0, 662.0, 2241.0, 11517.0, 155437.0, 835984.0, 35450.0, 4648.0, 1181.0, 433.0, 146.0, 58.0, 45.0, 26.0, 25.0, 18.0, 23.0, 7.0, 9.0, 4.0, 10.0, 6.0, 9.0, 5.0, 7.0, 5.0, 2.0, 3.0, 4.0, 3.0, 3.0, 1.0, 0.0, 1.0, 1.0, 1.0], "bins": [-0.000179290771484375, -0.00017389841377735138, -0.00016850605607032776, -0.00016311369836330414, -0.00015772134065628052, -0.0001523289829492569, -0.00014693662524223328, -0.00014154426753520966, -0.00013615190982818604, -0.00013075955212116241, -0.0001253671944141388, -0.00011997483670711517, -0.00011458247900009155, -0.00010919012129306793, -0.00010379776358604431, -9.840540587902069e-05, -9.301304817199707e-05, -8.762069046497345e-05, -8.222833275794983e-05, -7.683597505092621e-05, -7.144361734390259e-05, -6.605125963687897e-05, -6.065890192985535e-05, -5.5266544222831726e-05, -4.9874186515808105e-05, -4.4481828808784485e-05, -3.9089471101760864e-05, -3.3697113394737244e-05, -2.8304755687713623e-05, -2.2912397980690002e-05, -1.7520040273666382e-05, -1.2127682566642761e-05, -6.735324859619141e-06, -1.34296715259552e-06, 4.049390554428101e-06, 9.441748261451721e-06, 1.4834105968475342e-05, 2.0226463675498962e-05, 2.5618821382522583e-05, 3.1011179089546204e-05, 3.6403536796569824e-05, 4.1795894503593445e-05, 4.7188252210617065e-05, 5.2580609917640686e-05, 5.797296762466431e-05, 6.336532533168793e-05, 6.875768303871155e-05, 7.415004074573517e-05, 7.954239845275879e-05, 8.493475615978241e-05, 9.032711386680603e-05, 9.571947157382965e-05, 0.00010111182928085327, 0.00010650418698787689, 0.00011189654469490051, 0.00011728890240192413, 0.00012268126010894775, 0.00012807361781597137, 0.000133465975522995, 0.00013885833323001862, 0.00014425069093704224, 0.00014964304864406586, 0.00015503540635108948, 0.0001604277640581131, 0.00016582012176513672]}, "gradients/decoder.model.decoder.layers.5.self_attn.out_proj.bias": {"_type": "histogram", "values": [2.0, 3.0, 4.0, 6.0, 14.0, 30.0, 89.0, 129.0, 212.0, 212.0, 157.0, 81.0, 38.0, 18.0, 10.0, 8.0, 1.0, 0.0, 7.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.4836273193359375e-06, -4.886649549007416e-06, -4.289671778678894e-06, -3.6926940083503723e-06, -3.0957162380218506e-06, -2.498738467693329e-06, -1.9017606973648071e-06, -1.3047829270362854e-06, -7.078051567077637e-07, -1.1082738637924194e-07, 4.861503839492798e-07, 1.0831281542778015e-06, 1.6801059246063232e-06, 2.277083694934845e-06, 2.8740614652633667e-06, 3.4710392355918884e-06, 4.06801700592041e-06, 4.664994776248932e-06, 5.261972546577454e-06, 5.858950316905975e-06, 6.455928087234497e-06, 7.052905857563019e-06, 7.64988362789154e-06, 8.246861398220062e-06, 8.843839168548584e-06, 9.440816938877106e-06, 1.0037794709205627e-05, 1.063477247953415e-05, 1.1231750249862671e-05, 1.1828728020191193e-05, 1.2425705790519714e-05, 1.3022683560848236e-05, 1.3619661331176758e-05, 1.421663910150528e-05, 1.4813616871833801e-05, 1.5410594642162323e-05, 1.6007572412490845e-05, 1.6604550182819366e-05, 1.7201527953147888e-05, 1.779850572347641e-05, 1.839548349380493e-05, 1.8992461264133453e-05, 1.9589439034461975e-05, 2.0186416804790497e-05, 2.078339457511902e-05, 2.138037234544754e-05, 2.1977350115776062e-05, 2.2574327886104584e-05, 2.3171305656433105e-05, 2.3768283426761627e-05, 2.436526119709015e-05, 2.496223896741867e-05, 2.5559216737747192e-05, 2.6156194508075714e-05, 2.6753172278404236e-05, 2.7350150048732758e-05, 2.794712781906128e-05, 2.85441055893898e-05, 2.9141083359718323e-05, 2.9738061130046844e-05, 3.0335038900375366e-05, 3.093201667070389e-05, 3.152899444103241e-05, 3.212597221136093e-05, 3.272294998168945e-05]}, "gradients/decoder.model.decoder.layers.5.self_attn.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 3.0, 4.0, 3.0, 6.0, 2.0, 11.0, 19.0, 16.0, 24.0, 52.0, 59.0, 69.0, 111.0, 166.0, 221.0, 345.0, 590.0, 997.0, 1621.0, 2904.0, 5217.0, 9837.0, 18521.0, 38856.0, 93507.0, 318709.0, 367628.0, 103677.0, 41972.0, 20102.0, 10221.0, 5466.0, 3029.0, 1788.0, 1021.0, 613.0, 384.0, 249.0, 175.0, 101.0, 68.0, 60.0, 40.0, 38.0, 27.0, 11.0, 9.0, 11.0, 5.0, 2.0, 2.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.7940998077392578e-05, -1.7344020307064056e-05, -1.6747042536735535e-05, -1.6150064766407013e-05, -1.555308699607849e-05, -1.495610922574997e-05, -1.4359131455421448e-05, -1.3762153685092926e-05, -1.3165175914764404e-05, -1.2568198144435883e-05, -1.197122037410736e-05, -1.1374242603778839e-05, -1.0777264833450317e-05, -1.0180287063121796e-05, -9.583309292793274e-06, -8.986331522464752e-06, -8.38935375213623e-06, -7.792375981807709e-06, -7.195398211479187e-06, -6.598420441150665e-06, -6.0014426708221436e-06, -5.404464900493622e-06, -4.8074871301651e-06, -4.210509359836578e-06, -3.6135315895080566e-06, -3.016553819179535e-06, -2.419576048851013e-06, -1.8225982785224915e-06, -1.2256205081939697e-06, -6.28642737865448e-07, -3.166496753692627e-08, 5.653128027915955e-07, 1.1622905731201172e-06, 1.759268343448639e-06, 2.3562461137771606e-06, 2.9532238841056824e-06, 3.550201654434204e-06, 4.147179424762726e-06, 4.7441571950912476e-06, 5.341134965419769e-06, 5.938112735748291e-06, 6.535090506076813e-06, 7.1320682764053345e-06, 7.729046046733856e-06, 8.326023817062378e-06, 8.9230015873909e-06, 9.519979357719421e-06, 1.0116957128047943e-05, 1.0713934898376465e-05, 1.1310912668704987e-05, 1.1907890439033508e-05, 1.250486820936203e-05, 1.3101845979690552e-05, 1.3698823750019073e-05, 1.4295801520347595e-05, 1.4892779290676117e-05, 1.548975706100464e-05, 1.608673483133316e-05, 1.6683712601661682e-05, 1.7280690371990204e-05, 1.7877668142318726e-05, 1.8474645912647247e-05, 1.907162368297577e-05, 1.966860145330429e-05, 2.0265579223632812e-05]}, "gradients/decoder.model.decoder.layers.5.self_attn.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 3.0, 1.0, 4.0, 3.0, 3.0, 5.0, 8.0, 13.0, 14.0, 21.0, 24.0, 27.0, 33.0, 51.0, 51.0, 63.0, 65.0, 81.0, 73.0, 72.0, 74.0, 71.0, 44.0, 40.0, 42.0, 31.0, 18.0, 23.0, 15.0, 14.0, 8.0, 8.0, 3.0, 3.0, 4.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.2695789337158203e-05, -1.2276694178581238e-05, -1.1857599020004272e-05, -1.1438503861427307e-05, -1.1019408702850342e-05, -1.0600313544273376e-05, -1.0181218385696411e-05, -9.762123227119446e-06, -9.34302806854248e-06, -8.923932909965515e-06, -8.50483775138855e-06, -8.085742592811584e-06, -7.666647434234619e-06, -7.247552275657654e-06, -6.8284571170806885e-06, -6.409361958503723e-06, -5.990266799926758e-06, -5.5711716413497925e-06, -5.152076482772827e-06, -4.732981324195862e-06, -4.3138861656188965e-06, -3.894791007041931e-06, -3.475695848464966e-06, -3.0566006898880005e-06, -2.637505531311035e-06, -2.21841037273407e-06, -1.7993152141571045e-06, -1.3802200555801392e-06, -9.611248970031738e-07, -5.420297384262085e-07, -1.2293457984924316e-07, 2.9616057872772217e-07, 7.152557373046875e-07, 1.1343508958816528e-06, 1.5534460544586182e-06, 1.9725412130355835e-06, 2.391636371612549e-06, 2.810731530189514e-06, 3.2298266887664795e-06, 3.648921847343445e-06, 4.06801700592041e-06, 4.4871121644973755e-06, 4.906207323074341e-06, 5.325302481651306e-06, 5.7443976402282715e-06, 6.163492798805237e-06, 6.582587957382202e-06, 7.0016831159591675e-06, 7.420778274536133e-06, 7.839873433113098e-06, 8.258968591690063e-06, 8.678063750267029e-06, 9.097158908843994e-06, 9.51625406742096e-06, 9.935349225997925e-06, 1.035444438457489e-05, 1.0773539543151855e-05, 1.119263470172882e-05, 1.1611729860305786e-05, 1.2030825018882751e-05, 1.2449920177459717e-05, 1.2869015336036682e-05, 1.3288110494613647e-05, 1.3707205653190613e-05, 1.4126300811767578e-05]}, "gradients/decoder.model.decoder.layers.5.self_attn.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 3.0, 2.0, 2.0, 4.0, 2.0, 7.0, 5.0, 12.0, 9.0, 21.0, 25.0, 23.0, 37.0, 46.0, 78.0, 99.0, 198.0, 227.0, 496.0, 904.0, 2049.0, 5610.0, 21116.0, 912906.0, 90330.0, 8866.0, 2756.0, 1171.0, 555.0, 353.0, 229.0, 125.0, 78.0, 42.0, 44.0, 41.0, 23.0, 18.0, 11.0, 10.0, 8.0, 7.0, 6.0, 6.0, 2.0, 3.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0], "bins": [-1.1324882507324219e-05, -1.097843050956726e-05, -1.0631978511810303e-05, -1.0285526514053345e-05, -9.939074516296387e-06, -9.592622518539429e-06, -9.24617052078247e-06, -8.899718523025513e-06, -8.553266525268555e-06, -8.206814527511597e-06, -7.860362529754639e-06, -7.513910531997681e-06, -7.167458534240723e-06, -6.821006536483765e-06, -6.474554538726807e-06, -6.128102540969849e-06, -5.781650543212891e-06, -5.435198545455933e-06, -5.088746547698975e-06, -4.742294549942017e-06, -4.395842552185059e-06, -4.049390554428101e-06, -3.7029385566711426e-06, -3.3564865589141846e-06, -3.0100345611572266e-06, -2.6635825634002686e-06, -2.3171305656433105e-06, -1.9706785678863525e-06, -1.6242265701293945e-06, -1.2777745723724365e-06, -9.313225746154785e-07, -5.848705768585205e-07, -2.384185791015625e-07, 1.0803341865539551e-07, 4.544854164123535e-07, 8.009374141693115e-07, 1.1473894119262695e-06, 1.4938414096832275e-06, 1.8402934074401855e-06, 2.1867454051971436e-06, 2.5331974029541016e-06, 2.8796494007110596e-06, 3.2261013984680176e-06, 3.5725533962249756e-06, 3.919005393981934e-06, 4.265457391738892e-06, 4.61190938949585e-06, 4.958361387252808e-06, 5.304813385009766e-06, 5.651265382766724e-06, 5.997717380523682e-06, 6.34416937828064e-06, 6.690621376037598e-06, 7.037073373794556e-06, 7.383525371551514e-06, 7.729977369308472e-06, 8.07642936706543e-06, 8.422881364822388e-06, 8.769333362579346e-06, 9.115785360336304e-06, 9.462237358093262e-06, 9.80868935585022e-06, 1.0155141353607178e-05, 1.0501593351364136e-05, 1.0848045349121094e-05]}, "gradients/decoder.model.decoder.layers.5.self_attn.k_proj.bias": {"_type": "histogram", "values": [12.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 161.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 673.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 163.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 14.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.1920928955078125e-07, -1.1455267667770386e-07, -1.0989606380462646e-07, -1.0523945093154907e-07, -1.0058283805847168e-07, -9.592622518539429e-08, -9.12696123123169e-08, -8.66129994392395e-08, -8.195638656616211e-08, -7.729977369308472e-08, -7.264316082000732e-08, -6.798654794692993e-08, -6.332993507385254e-08, -5.8673322200775146e-08, -5.4016709327697754e-08, -4.936009645462036e-08, -4.470348358154297e-08, -4.0046870708465576e-08, -3.5390257835388184e-08, -3.073364496231079e-08, -2.60770320892334e-08, -2.1420419216156006e-08, -1.6763806343078613e-08, -1.210719347000122e-08, -7.450580596923828e-09, -2.7939677238464355e-09, 1.862645149230957e-09, 6.51925802230835e-09, 1.1175870895385742e-08, 1.5832483768463135e-08, 2.0489096641540527e-08, 2.514570951461792e-08, 2.9802322387695312e-08, 3.4458935260772705e-08, 3.91155481338501e-08, 4.377216100692749e-08, 4.842877388000488e-08, 5.3085386753082275e-08, 5.774199962615967e-08, 6.239861249923706e-08, 6.705522537231445e-08, 7.171183824539185e-08, 7.636845111846924e-08, 8.102506399154663e-08, 8.568167686462402e-08, 9.033828973770142e-08, 9.499490261077881e-08, 9.96515154838562e-08, 1.043081283569336e-07, 1.0896474123001099e-07, 1.1362135410308838e-07, 1.1827796697616577e-07, 1.2293457984924316e-07, 1.2759119272232056e-07, 1.3224780559539795e-07, 1.3690441846847534e-07, 1.4156103134155273e-07, 1.4621764421463013e-07, 1.5087425708770752e-07, 1.555308699607849e-07, 1.601874828338623e-07, 1.648440957069397e-07, 1.695007085800171e-07, 1.7415732145309448e-07, 1.7881393432617188e-07]}, "gradients/decoder.model.decoder.layers.5.self_attn.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 4.0, 7.0, 2.0, 10.0, 7.0, 22.0, 16.0, 30.0, 49.0, 38.0, 125.0, 77.0, 223.0, 146.0, 471.0, 706.0, 517.0, 1513.0, 1097.0, 3790.0, 3057.0, 10332.0, 9758.0, 52682.0, 766164.0, 112465.0, 53286.0, 9729.0, 10581.0, 2914.0, 3702.0, 1980.0, 681.0, 893.0, 303.0, 438.0, 141.0, 204.0, 94.0, 125.0, 69.0, 26.0, 32.0, 10.0, 24.0, 9.0, 6.0, 6.0, 2.0, 6.0, 0.0, 2.0, 1.0, 0.0, 1.0], "bins": [-3.159046173095703e-06, -3.0659139156341553e-06, -2.9727816581726074e-06, -2.8796494007110596e-06, -2.7865171432495117e-06, -2.693384885787964e-06, -2.600252628326416e-06, -2.507120370864868e-06, -2.4139881134033203e-06, -2.3208558559417725e-06, -2.2277235984802246e-06, -2.1345913410186768e-06, -2.041459083557129e-06, -1.948326826095581e-06, -1.8551945686340332e-06, -1.7620623111724854e-06, -1.6689300537109375e-06, -1.5757977962493896e-06, -1.4826655387878418e-06, -1.389533281326294e-06, -1.296401023864746e-06, -1.2032687664031982e-06, -1.1101365089416504e-06, -1.0170042514801025e-06, -9.238719940185547e-07, -8.307397365570068e-07, -7.37607479095459e-07, -6.444752216339111e-07, -5.513429641723633e-07, -4.5821070671081543e-07, -3.650784492492676e-07, -2.7194619178771973e-07, -1.7881393432617188e-07, -8.568167686462402e-08, 7.450580596923828e-09, 1.0058283805847168e-07, 1.9371509552001953e-07, 2.868473529815674e-07, 3.7997961044311523e-07, 4.731118679046631e-07, 5.662441253662109e-07, 6.593763828277588e-07, 7.525086402893066e-07, 8.456408977508545e-07, 9.387731552124023e-07, 1.0319054126739502e-06, 1.125037670135498e-06, 1.218169927597046e-06, 1.3113021850585938e-06, 1.4044344425201416e-06, 1.4975666999816895e-06, 1.5906989574432373e-06, 1.6838312149047852e-06, 1.776963472366333e-06, 1.8700957298278809e-06, 1.9632279872894287e-06, 2.0563602447509766e-06, 2.1494925022125244e-06, 2.2426247596740723e-06, 2.33575701713562e-06, 2.428889274597168e-06, 2.522021532058716e-06, 2.6151537895202637e-06, 2.7082860469818115e-06, 2.8014183044433594e-06]}, "gradients/decoder.model.decoder.layers.5.self_attn.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 4.0, 0.0, 0.0, 1.0, 1.0, 3.0, 9.0, 4.0, 6.0, 8.0, 10.0, 10.0, 10.0, 16.0, 26.0, 57.0, 189.0, 313.0, 113.0, 68.0, 30.0, 29.0, 22.0, 12.0, 15.0, 7.0, 7.0, 4.0, 5.0, 7.0, 6.0, 4.0, 4.0, 4.0, 0.0, 1.0, 1.0, 3.0, 2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-2.1457672119140625e-06, -2.08243727684021e-06, -2.0191073417663574e-06, -1.955777406692505e-06, -1.8924474716186523e-06, -1.8291175365447998e-06, -1.7657876014709473e-06, -1.7024576663970947e-06, -1.6391277313232422e-06, -1.5757977962493896e-06, -1.5124678611755371e-06, -1.4491379261016846e-06, -1.385807991027832e-06, -1.3224780559539795e-06, -1.259148120880127e-06, -1.1958181858062744e-06, -1.1324882507324219e-06, -1.0691583156585693e-06, -1.0058283805847168e-06, -9.424984455108643e-07, -8.791685104370117e-07, -8.158385753631592e-07, -7.525086402893066e-07, -6.891787052154541e-07, -6.258487701416016e-07, -5.62518835067749e-07, -4.991888999938965e-07, -4.3585896492004395e-07, -3.725290298461914e-07, -3.0919909477233887e-07, -2.4586915969848633e-07, -1.825392246246338e-07, -1.1920928955078125e-07, -5.587935447692871e-08, 7.450580596923828e-09, 7.078051567077637e-08, 1.341104507446289e-07, 1.9744038581848145e-07, 2.60770320892334e-07, 3.241002559661865e-07, 3.8743019104003906e-07, 4.507601261138916e-07, 5.140900611877441e-07, 5.774199962615967e-07, 6.407499313354492e-07, 7.040798664093018e-07, 7.674098014831543e-07, 8.307397365570068e-07, 8.940696716308594e-07, 9.57399606704712e-07, 1.0207295417785645e-06, 1.084059476852417e-06, 1.1473894119262695e-06, 1.210719347000122e-06, 1.2740492820739746e-06, 1.3373792171478271e-06, 1.4007091522216797e-06, 1.4640390872955322e-06, 1.5273690223693848e-06, 1.5906989574432373e-06, 1.6540288925170898e-06, 1.7173588275909424e-06, 1.780688762664795e-06, 1.8440186977386475e-06, 1.9073486328125e-06]}, "gradients/decoder.model.decoder.layers.4.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0, 2.0, 2.0, 2.0, 4.0, 1.0, 4.0, 5.0, 8.0, 5.0, 15.0, 8.0, 13.0, 19.0, 30.0, 35.0, 41.0, 54.0, 91.0, 124.0, 129.0, 120.0, 76.0, 62.0, 29.0, 30.0, 23.0, 10.0, 16.0, 11.0, 9.0, 5.0, 5.0, 6.0, 3.0, 6.0, 2.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.121003828709945e-05, -3.0322647944558412e-05, -2.9435257602017373e-05, -2.8547867259476334e-05, -2.7660476916935295e-05, -2.6773086574394256e-05, -2.5885696231853217e-05, -2.4998304070322774e-05, -2.4110913727781735e-05, -2.3223523385240696e-05, -2.2336133042699657e-05, -2.144874270015862e-05, -2.056135235761758e-05, -1.9673960196087137e-05, -1.8786569853546098e-05, -1.789917951100506e-05, -1.701178916846402e-05, -1.612439882592298e-05, -1.5237008483381942e-05, -1.4349618140840903e-05, -1.3462226888805162e-05, -1.2574836546264123e-05, -1.1687446203723084e-05, -1.0800054951687343e-05, -9.912666428135708e-06, -9.025276085594669e-06, -8.13788574305363e-06, -7.25049494576524e-06, -6.36310414847685e-06, -5.475713805935811e-06, -4.588323463394772e-06, -3.7009326661063824e-06, -2.8135418688179925e-06, -1.926151298903278e-06, -1.0387608426754014e-06, -1.5137038644752465e-07, 7.360201834671898e-07, 1.6234107533819042e-06, 2.510801095922943e-06, 3.398191893211333e-06, 4.285582235752372e-06, 5.172972578293411e-06, 6.060363375581801e-06, 6.94775371812284e-06, 7.835144060663879e-06, 8.722534403204918e-06, 9.609924745745957e-06, 1.0497315997781698e-05, 1.1384706340322737e-05, 1.2272096682863776e-05, 1.3159487025404815e-05, 1.4046878277440555e-05, 1.4934268619981594e-05, 1.5821658962522633e-05, 1.6709049305063672e-05, 1.759643964760471e-05, 1.848382999014575e-05, 1.937122033268679e-05, 2.025861067522783e-05, 2.1146001017768867e-05, 2.2033391360309906e-05, 2.292078352184035e-05, 2.3808173864381388e-05, 2.4695564206922427e-05, 2.5582954549463466e-05]}, "gradients/decoder.model.decoder.layers.4.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 4.0, 1.0, 6.0, 5.0, 11.0, 15.0, 13.0, 19.0, 16.0, 27.0, 26.0, 39.0, 30.0, 38.0, 37.0, 40.0, 41.0, 49.0, 40.0, 51.0, 62.0, 53.0, 45.0, 48.0, 38.0, 35.0, 35.0, 28.0, 26.0, 25.0, 24.0, 15.0, 15.0, 18.0, 8.0, 7.0, 7.0, 5.0, 2.0, 5.0, 3.0, 1.0, 1.0, 1.0, 1.0, 0.0, 2.0, 1.0], "bins": [-1.9295825040899217e-05, -1.8758068108581938e-05, -1.8220312995254062e-05, -1.7682556062936783e-05, -1.7144799130619504e-05, -1.6607042198302224e-05, -1.606928708497435e-05, -1.553153015265707e-05, -1.4993774129834492e-05, -1.4456018107011914e-05, -1.3918261174694635e-05, -1.3380505151872057e-05, -1.284274912904948e-05, -1.23049921967322e-05, -1.1767236173909623e-05, -1.1229480151087046e-05, -1.0691723218769766e-05, -1.0153967195947189e-05, -9.61621026362991e-06, -9.078454240807332e-06, -8.540697308490053e-06, -8.002941285667475e-06, -7.465185262844898e-06, -6.927428785274969e-06, -6.389672307705041e-06, -5.851915830135113e-06, -5.314159352565184e-06, -4.776403329742607e-06, -4.238646852172678e-06, -3.70089037460275e-06, -3.163134124406497e-06, -2.625377874210244e-06, -2.087619577650912e-06, -1.5498632137678214e-06, -1.0121068498847308e-06, -4.743504860016401e-07, 6.340587788145058e-08, 6.01162355451379e-07, 1.138918605647632e-06, 1.6766748558438849e-06, 2.2144313334138133e-06, 2.7521878109837417e-06, 3.2899440611799946e-06, 3.8277003113762476e-06, 4.365456788946176e-06, 4.903213266516104e-06, 5.440969289338682e-06, 5.97872576690861e-06, 6.516482244478539e-06, 7.054238722048467e-06, 7.591995199618395e-06, 8.129751222440973e-06, 8.667508154758252e-06, 9.20526417758083e-06, 9.743020200403407e-06, 1.0280777132720686e-05, 1.0818533155543264e-05, 1.1356289178365842e-05, 1.189404611068312e-05, 1.2431802133505698e-05, 1.2969558156328276e-05, 1.3507315088645555e-05, 1.4045071111468133e-05, 1.458282713429071e-05, 1.512058406660799e-05]}, "gradients/decoder.model.decoder.layers.4.fc2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 4.0, 1.0, 5.0, 9.0, 6.0, 4.0, 8.0, 12.0, 14.0, 30.0, 31.0, 62.0, 63.0, 114.0, 122.0, 233.0, 314.0, 527.0, 756.0, 1145.0, 1882.0, 3053.0, 5141.0, 9099.0, 18250.0, 239531.0, 3862888.0, 24296.0, 10757.0, 5953.0, 3625.0, 2200.0, 1424.0, 892.0, 609.0, 387.0, 275.0, 184.0, 121.0, 103.0, 78.0, 30.0, 16.0, 20.0, 7.0, 5.0, 7.0, 2.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0], "bins": [-2.4974346160888672e-05, -2.4197623133659363e-05, -2.3420900106430054e-05, -2.2644177079200745e-05, -2.1867454051971436e-05, -2.1090731024742126e-05, -2.0314007997512817e-05, -1.9537284970283508e-05, -1.87605619430542e-05, -1.798383891582489e-05, -1.720711588859558e-05, -1.6430392861366272e-05, -1.5653669834136963e-05, -1.4876946806907654e-05, -1.4100223779678345e-05, -1.3323500752449036e-05, -1.2546777725219727e-05, -1.1770054697990417e-05, -1.0993331670761108e-05, -1.02166086435318e-05, -9.43988561630249e-06, -8.663162589073181e-06, -7.886439561843872e-06, -7.109716534614563e-06, -6.332993507385254e-06, -5.556270480155945e-06, -4.779547452926636e-06, -4.002824425697327e-06, -3.2261013984680176e-06, -2.4493783712387085e-06, -1.6726553440093994e-06, -8.959323167800903e-07, -1.1920928955078125e-07, 6.575137376785278e-07, 1.434236764907837e-06, 2.210959792137146e-06, 2.987682819366455e-06, 3.764405846595764e-06, 4.541128873825073e-06, 5.317851901054382e-06, 6.094574928283691e-06, 6.8712979555130005e-06, 7.64802098274231e-06, 8.424744009971619e-06, 9.201467037200928e-06, 9.978190064430237e-06, 1.0754913091659546e-05, 1.1531636118888855e-05, 1.2308359146118164e-05, 1.3085082173347473e-05, 1.3861805200576782e-05, 1.4638528227806091e-05, 1.54152512550354e-05, 1.619197428226471e-05, 1.696869730949402e-05, 1.7745420336723328e-05, 1.8522143363952637e-05, 1.9298866391181946e-05, 2.0075589418411255e-05, 2.0852312445640564e-05, 2.1629035472869873e-05, 2.2405758500099182e-05, 2.318248152732849e-05, 2.39592045545578e-05, 2.473592758178711e-05]}, "gradients/decoder.model.decoder.layers.4.fc2.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 1.0, 3.0, 2.0, 1.0, 2.0, 2.0, 1.0, 7.0, 8.0, 6.0, 11.0, 7.0, 11.0, 26.0, 15.0, 39.0, 23.0, 36.0, 20.0, 19.0, 60.0, 32.0, 56.0, 31.0, 68.0, 36.0, 67.0, 44.0, 32.0, 60.0, 19.0, 62.0, 14.0, 45.0, 18.0, 14.0, 21.0, 11.0, 19.0, 4.0, 25.0, 4.0, 8.0, 2.0, 5.0, 8.0, 2.0, 3.0, 4.0, 4.0, 0.0, 2.0], "bins": [-3.2186508178710938e-06, -3.1329691410064697e-06, -3.0472874641418457e-06, -2.9616057872772217e-06, -2.8759241104125977e-06, -2.7902424335479736e-06, -2.7045607566833496e-06, -2.6188790798187256e-06, -2.5331974029541016e-06, -2.4475157260894775e-06, -2.3618340492248535e-06, -2.2761523723602295e-06, -2.1904706954956055e-06, -2.1047890186309814e-06, -2.0191073417663574e-06, -1.9334256649017334e-06, -1.8477439880371094e-06, -1.7620623111724854e-06, -1.6763806343078613e-06, -1.5906989574432373e-06, -1.5050172805786133e-06, -1.4193356037139893e-06, -1.3336539268493652e-06, -1.2479722499847412e-06, -1.1622905731201172e-06, -1.0766088962554932e-06, -9.909272193908691e-07, -9.052455425262451e-07, -8.195638656616211e-07, -7.338821887969971e-07, -6.48200511932373e-07, -5.62518835067749e-07, -4.76837158203125e-07, -3.91155481338501e-07, -3.0547380447387695e-07, -2.1979212760925293e-07, -1.341104507446289e-07, -4.842877388000488e-08, 3.725290298461914e-08, 1.2293457984924316e-07, 2.086162567138672e-07, 2.942979335784912e-07, 3.7997961044311523e-07, 4.6566128730773926e-07, 5.513429641723633e-07, 6.370246410369873e-07, 7.227063179016113e-07, 8.083879947662354e-07, 8.940696716308594e-07, 9.797513484954834e-07, 1.0654330253601074e-06, 1.1511147022247314e-06, 1.2367963790893555e-06, 1.3224780559539795e-06, 1.4081597328186035e-06, 1.4938414096832275e-06, 1.5795230865478516e-06, 1.6652047634124756e-06, 1.7508864402770996e-06, 1.8365681171417236e-06, 1.9222497940063477e-06, 2.0079314708709717e-06, 2.0936131477355957e-06, 2.1792948246002197e-06, 2.2649765014648438e-06]}, "gradients/decoder.model.decoder.layers.4.fc1.weight": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 2.0, 1.0, 4.0, 5.0, 2.0, 7.0, 9.0, 17.0, 26.0, 37.0, 25.0, 34.0, 47.0, 102.0, 370.0, 4699.0, 4135845.0, 51098.0, 1444.0, 200.0, 81.0, 51.0, 50.0, 32.0, 27.0, 36.0, 14.0, 10.0, 5.0, 6.0, 4.0, 2.0, 1.0, 5.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.8683414459228516e-05, -3.6728568375110626e-05, -3.477372229099274e-05, -3.281887620687485e-05, -3.086403012275696e-05, -2.890918403863907e-05, -2.695433795452118e-05, -2.499949187040329e-05, -2.30446457862854e-05, -2.108979970216751e-05, -1.913495361804962e-05, -1.7180107533931732e-05, -1.5225261449813843e-05, -1.3270415365695953e-05, -1.1315569281578064e-05, -9.360723197460175e-06, -7.405877113342285e-06, -5.451031029224396e-06, -3.4961849451065063e-06, -1.541338860988617e-06, 4.1350722312927246e-07, 2.368353307247162e-06, 4.323199391365051e-06, 6.278045475482941e-06, 8.23289155960083e-06, 1.018773764371872e-05, 1.2142583727836609e-05, 1.4097429811954498e-05, 1.6052275896072388e-05, 1.8007121980190277e-05, 1.9961968064308167e-05, 2.1916814148426056e-05, 2.3871660232543945e-05, 2.5826506316661835e-05, 2.7781352400779724e-05, 2.9736198484897614e-05, 3.16910445690155e-05, 3.364589065313339e-05, 3.560073673725128e-05, 3.755558282136917e-05, 3.951042890548706e-05, 4.146527498960495e-05, 4.342012107372284e-05, 4.537496715784073e-05, 4.732981324195862e-05, 4.928465932607651e-05, 5.12395054101944e-05, 5.3194351494312286e-05, 5.5149197578430176e-05, 5.7104043662548065e-05, 5.9058889746665955e-05, 6.1013735830783844e-05, 6.296858191490173e-05, 6.492342799901962e-05, 6.687827408313751e-05, 6.88331201672554e-05, 7.078796625137329e-05, 7.274281233549118e-05, 7.469765841960907e-05, 7.665250450372696e-05, 7.860735058784485e-05, 8.056219667196274e-05, 8.251704275608063e-05, 8.447188884019852e-05, 8.64267349243164e-05]}, "gradients/decoder.model.decoder.layers.4.fc1.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 9.0, 9.0, 8.0, 25.0, 53.0, 1326.0, 2523.0, 81.0, 29.0, 14.0, 7.0, 3.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-2.181529998779297e-05, -2.1387822926044464e-05, -2.096034586429596e-05, -2.0532868802547455e-05, -2.010539174079895e-05, -1.9677914679050446e-05, -1.925043761730194e-05, -1.8822960555553436e-05, -1.839548349380493e-05, -1.7968006432056427e-05, -1.7540529370307922e-05, -1.7113052308559418e-05, -1.6685575246810913e-05, -1.625809818506241e-05, -1.5830621123313904e-05, -1.54031440615654e-05, -1.4975666999816895e-05, -1.454818993806839e-05, -1.4120712876319885e-05, -1.369323581457138e-05, -1.3265758752822876e-05, -1.2838281691074371e-05, -1.2410804629325867e-05, -1.1983327567577362e-05, -1.1555850505828857e-05, -1.1128373444080353e-05, -1.0700896382331848e-05, -1.0273419320583344e-05, -9.845942258834839e-06, -9.418465197086334e-06, -8.99098813533783e-06, -8.563511073589325e-06, -8.13603401184082e-06, -7.708556950092316e-06, -7.281079888343811e-06, -6.853602826595306e-06, -6.426125764846802e-06, -5.998648703098297e-06, -5.5711716413497925e-06, -5.143694579601288e-06, -4.716217517852783e-06, -4.2887404561042786e-06, -3.861263394355774e-06, -3.4337863326072693e-06, -3.0063092708587646e-06, -2.57883220911026e-06, -2.1513551473617554e-06, -1.7238780856132507e-06, -1.296401023864746e-06, -8.689239621162415e-07, -4.414469003677368e-07, -1.3969838619232178e-08, 4.1350722312927246e-07, 8.409842848777771e-07, 1.2684613466262817e-06, 1.6959384083747864e-06, 2.123415470123291e-06, 2.5508925318717957e-06, 2.9783695936203003e-06, 3.405846655368805e-06, 3.8333237171173096e-06, 4.260800778865814e-06, 4.688277840614319e-06, 5.1157549023628235e-06, 5.543231964111328e-06]}, "gradients/decoder.model.decoder.layers.4.encoder_attn_layer_norm.weight": {"_type": "histogram", "values": [2.0, 0.0, 2.0, 0.0, 0.0, 2.0, 1.0, 0.0, 1.0, 2.0, 5.0, 3.0, 0.0, 6.0, 3.0, 7.0, 15.0, 11.0, 23.0, 22.0, 32.0, 35.0, 71.0, 95.0, 145.0, 148.0, 92.0, 70.0, 40.0, 42.0, 30.0, 23.0, 14.0, 11.0, 12.0, 16.0, 10.0, 3.0, 4.0, 5.0, 2.0, 4.0, 2.0, 2.0, 0.0, 3.0, 3.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-7.285401352419285e-06, -6.994559953454882e-06, -6.703718554490479e-06, -6.4128771555260755e-06, -6.1220353018143214e-06, -5.831194357597269e-06, -5.540352503885515e-06, -5.249511104921112e-06, -4.958669705956709e-06, -4.6678283069923054e-06, -4.376986908027902e-06, -4.086145509063499e-06, -3.7953038827254204e-06, -3.504462483761017e-06, -3.2136208574229386e-06, -2.9227794584585354e-06, -2.631938059494132e-06, -2.341096660529729e-06, -2.0502552615653258e-06, -1.7594136352272471e-06, -1.468572236262844e-06, -1.1777308372984407e-06, -8.868893246471998e-07, -5.960478119959589e-07, -3.052064130315557e-07, -1.4364957223733654e-08, 2.764764985840884e-07, 5.673179543919105e-07, 8.581594101997325e-07, 1.1490008091641357e-06, 1.4398423218153766e-06, 1.7306838344666176e-06, 2.02152477868367e-06, 2.312366177648073e-06, 2.6032075766124763e-06, 2.894049202950555e-06, 3.184890601914958e-06, 3.4757320008793613e-06, 3.76657362721744e-06, 4.057415026181843e-06, 4.348256425146246e-06, 4.6390978241106495e-06, 4.929939223075053e-06, 5.220780622039456e-06, 5.51162247575121e-06, 5.802463419968262e-06, 6.093305273680016e-06, 6.38414667264442e-06, 6.674988071608823e-06, 6.965829470573226e-06, 7.256670869537629e-06, 7.547512268502032e-06, 7.838353667466436e-06, 8.12919552117819e-06, 8.420036465395242e-06, 8.710878319106996e-06, 9.00172017281875e-06, 9.292562026530504e-06, 9.583402970747557e-06, 9.87424482445931e-06, 1.0165085768676363e-05, 1.0455927622388117e-05, 1.074676856660517e-05, 1.1037610420316923e-05, 1.1328451364533976e-05]}, "gradients/decoder.model.decoder.layers.4.encoder_attn_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 2.0, 3.0, 2.0, 1.0, 5.0, 13.0, 16.0, 17.0, 23.0, 23.0, 27.0, 30.0, 40.0, 36.0, 45.0, 52.0, 50.0, 51.0, 67.0, 52.0, 49.0, 54.0, 57.0, 56.0, 52.0, 47.0, 22.0, 20.0, 22.0, 14.0, 20.0, 12.0, 11.0, 9.0, 4.0, 4.0, 4.0, 2.0, 3.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6.57949340165942e-06, -6.387001121765934e-06, -6.194508841872448e-06, -6.002016561978962e-06, -5.8095242820854764e-06, -5.617032002191991e-06, -5.424539722298505e-06, -5.232047442405019e-06, -5.039555162511533e-06, -4.847062882618047e-06, -4.6545706027245615e-06, -4.462078322831076e-06, -4.26958604293759e-06, -4.077093763044104e-06, -3.884601483150618e-06, -3.6921092032571323e-06, -3.4996169233636465e-06, -3.3071246434701607e-06, -3.114632363576675e-06, -2.922140083683189e-06, -2.729647803789703e-06, -2.5371555238962173e-06, -2.3446632440027315e-06, -2.1521709641092457e-06, -1.95967868421576e-06, -1.767186404322274e-06, -1.5746941244287882e-06, -1.3822018445353024e-06, -1.1897095646418165e-06, -9.972172847483307e-07, -8.047250048548449e-07, -6.122327249613591e-07, -4.197404450678732e-07, -2.272481651743874e-07, -3.4755885280901566e-08, 1.5773639461258426e-07, 3.502286745060701e-07, 5.427209543995559e-07, 7.352132342930418e-07, 9.277055141865276e-07, 1.1201977940800134e-06, 1.3126900739734992e-06, 1.505182353866985e-06, 1.697674633760471e-06, 1.8901669136539567e-06, 2.0826591935474426e-06, 2.2751514734409284e-06, 2.4676437533344142e-06, 2.6601360332279e-06, 2.852628313121386e-06, 3.0451205930148717e-06, 3.2376128729083575e-06, 3.4301051528018434e-06, 3.622597432695329e-06, 3.815089712588815e-06, 4.007581992482301e-06, 4.200074272375787e-06, 4.3925665522692725e-06, 4.585058832162758e-06, 4.777551112056244e-06, 4.97004339194973e-06, 5.162535671843216e-06, 5.355027951736702e-06, 5.5475202316301875e-06, 5.740012511523673e-06]}, "gradients/decoder.model.decoder.layers.4.encoder_attn.out_proj.weight": {"_type": "histogram", "values": [1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 4.0, 12.0, 0.0, 14.0, 30.0, 0.0, 38.0, 99.0, 0.0, 176.0, 331.0, 0.0, 772.0, 2923.0, 0.0, 35509.0, 968713.0, 35543.0, 0.0, 2926.0, 805.0, 0.0, 303.0, 165.0, 0.0, 83.0, 46.0, 0.0, 32.0, 17.0, 0.0, 13.0, 8.0, 1.0, 0.0, 1.0, 3.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-1.3709068298339844e-06, -1.3299286365509033e-06, -1.2889504432678223e-06, -1.2479722499847412e-06, -1.2069940567016602e-06, -1.166015863418579e-06, -1.125037670135498e-06, -1.084059476852417e-06, -1.043081283569336e-06, -1.0021030902862549e-06, -9.611248970031738e-07, -9.201467037200928e-07, -8.791685104370117e-07, -8.381903171539307e-07, -7.972121238708496e-07, -7.562339305877686e-07, -7.152557373046875e-07, -6.742775440216064e-07, -6.332993507385254e-07, -5.923211574554443e-07, -5.513429641723633e-07, -5.103647708892822e-07, -4.6938657760620117e-07, -4.284083843231201e-07, -3.8743019104003906e-07, -3.46451997756958e-07, -3.0547380447387695e-07, -2.644956111907959e-07, -2.2351741790771484e-07, -1.825392246246338e-07, -1.4156103134155273e-07, -1.0058283805847168e-07, -5.960464477539063e-08, -1.862645149230957e-08, 2.2351741790771484e-08, 6.332993507385254e-08, 1.043081283569336e-07, 1.4528632164001465e-07, 1.862645149230957e-07, 2.2724270820617676e-07, 2.682209014892578e-07, 3.0919909477233887e-07, 3.501772880554199e-07, 3.91155481338501e-07, 4.3213367462158203e-07, 4.731118679046631e-07, 5.140900611877441e-07, 5.550682544708252e-07, 5.960464477539062e-07, 6.370246410369873e-07, 6.780028343200684e-07, 7.189810276031494e-07, 7.599592208862305e-07, 8.009374141693115e-07, 8.419156074523926e-07, 8.828938007354736e-07, 9.238719940185547e-07, 9.648501873016357e-07, 1.0058283805847168e-06, 1.0468065738677979e-06, 1.087784767150879e-06, 1.12876296043396e-06, 1.169741153717041e-06, 1.210719347000122e-06, 1.2516975402832031e-06]}, "gradients/decoder.model.decoder.layers.4.encoder_attn.out_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 5.0, 2.0, 4.0, 4.0, 7.0, 4.0, 21.0, 31.0, 21.0, 35.0, 41.0, 37.0, 34.0, 68.0, 75.0, 74.0, 61.0, 77.0, 68.0, 43.0, 73.0, 51.0, 44.0, 18.0, 29.0, 25.0, 16.0, 12.0, 11.0, 7.0, 3.0, 6.0, 2.0, 4.0, 1.0, 2.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-6.4373016357421875e-06, -6.275251507759094e-06, -6.113201379776001e-06, -5.951151251792908e-06, -5.7891011238098145e-06, -5.627050995826721e-06, -5.465000867843628e-06, -5.302950739860535e-06, -5.140900611877441e-06, -4.978850483894348e-06, -4.816800355911255e-06, -4.654750227928162e-06, -4.492700099945068e-06, -4.330649971961975e-06, -4.168599843978882e-06, -4.0065497159957886e-06, -3.844499588012695e-06, -3.682449460029602e-06, -3.520399332046509e-06, -3.3583492040634155e-06, -3.1962990760803223e-06, -3.034248948097229e-06, -2.8721988201141357e-06, -2.7101486921310425e-06, -2.5480985641479492e-06, -2.386048436164856e-06, -2.2239983081817627e-06, -2.0619481801986694e-06, -1.8998980522155762e-06, -1.737847924232483e-06, -1.5757977962493896e-06, -1.4137476682662964e-06, -1.2516975402832031e-06, -1.0896474123001099e-06, -9.275972843170166e-07, -7.655471563339233e-07, -6.034970283508301e-07, -4.414469003677368e-07, -2.7939677238464355e-07, -1.1734664440155029e-07, 4.470348358154297e-08, 2.0675361156463623e-07, 3.688037395477295e-07, 5.308538675308228e-07, 6.92903995513916e-07, 8.549541234970093e-07, 1.0170042514801025e-06, 1.1790543794631958e-06, 1.341104507446289e-06, 1.5031546354293823e-06, 1.6652047634124756e-06, 1.8272548913955688e-06, 1.989305019378662e-06, 2.1513551473617554e-06, 2.3134052753448486e-06, 2.475455403327942e-06, 2.637505531311035e-06, 2.7995556592941284e-06, 2.9616057872772217e-06, 3.123655915260315e-06, 3.285706043243408e-06, 3.4477561712265015e-06, 3.6098062992095947e-06, 3.771856427192688e-06, 3.933906555175781e-06]}, "gradients/decoder.model.decoder.layers.4.encoder_attn.v_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 6.0, 0.0, 0.0, 0.0, 31.0, 0.0, 0.0, 0.0, 206.0, 0.0, 0.0, 0.0, 0.0, 1434.0, 0.0, 0.0, 0.0, 1045429.0, 0.0, 0.0, 0.0, 1261.0, 0.0, 0.0, 0.0, 167.0, 0.0, 0.0, 0.0, 0.0, 29.0, 0.0, 0.0, 0.0, 8.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.980232238769531e-07, -2.8405338525772095e-07, -2.7008354663848877e-07, -2.561137080192566e-07, -2.421438694000244e-07, -2.2817403078079224e-07, -2.1420419216156006e-07, -2.0023435354232788e-07, -1.862645149230957e-07, -1.7229467630386353e-07, -1.5832483768463135e-07, -1.4435499906539917e-07, -1.30385160446167e-07, -1.1641532182693481e-07, -1.0244548320770264e-07, -8.847564458847046e-08, -7.450580596923828e-08, -6.05359673500061e-08, -4.6566128730773926e-08, -3.259629011154175e-08, -1.862645149230957e-08, -4.6566128730773926e-09, 9.313225746154785e-09, 2.3283064365386963e-08, 3.725290298461914e-08, 5.122274160385132e-08, 6.51925802230835e-08, 7.916241884231567e-08, 9.313225746154785e-08, 1.0710209608078003e-07, 1.210719347000122e-07, 1.3504177331924438e-07, 1.4901161193847656e-07, 1.6298145055770874e-07, 1.7695128917694092e-07, 1.909211277961731e-07, 2.0489096641540527e-07, 2.1886080503463745e-07, 2.3283064365386963e-07, 2.468004822731018e-07, 2.60770320892334e-07, 2.7474015951156616e-07, 2.8870999813079834e-07, 3.026798367500305e-07, 3.166496753692627e-07, 3.3061951398849487e-07, 3.4458935260772705e-07, 3.5855919122695923e-07, 3.725290298461914e-07, 3.864988684654236e-07, 4.0046870708465576e-07, 4.1443854570388794e-07, 4.284083843231201e-07, 4.423782229423523e-07, 4.5634806156158447e-07, 4.7031790018081665e-07, 4.842877388000488e-07, 4.98257577419281e-07, 5.122274160385132e-07, 5.261972546577454e-07, 5.401670932769775e-07, 5.541369318962097e-07, 5.681067705154419e-07, 5.820766091346741e-07, 5.960464477539062e-07]}, "gradients/decoder.model.decoder.layers.4.encoder_attn.v_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 1.0, 3.0, 6.0, 2.0, 2.0, 7.0, 10.0, 20.0, 21.0, 40.0, 37.0, 39.0, 51.0, 67.0, 55.0, 91.0, 75.0, 81.0, 58.0, 82.0, 46.0, 53.0, 39.0, 32.0, 22.0, 27.0, 17.0, 12.0, 5.0, 5.0, 3.0, 2.0, 3.0, 1.0, 2.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-8.52346420288086e-06, -8.122064173221588e-06, -7.720664143562317e-06, -7.319264113903046e-06, -6.917864084243774e-06, -6.516464054584503e-06, -6.115064024925232e-06, -5.713663995265961e-06, -5.3122639656066895e-06, -4.910863935947418e-06, -4.509463906288147e-06, -4.108063876628876e-06, -3.7066638469696045e-06, -3.3052638173103333e-06, -2.903863787651062e-06, -2.5024637579917908e-06, -2.1010637283325195e-06, -1.6996636986732483e-06, -1.298263669013977e-06, -8.968636393547058e-07, -4.954636096954346e-07, -9.406358003616333e-08, 3.073364496231079e-07, 7.087364792823792e-07, 1.1101365089416504e-06, 1.5115365386009216e-06, 1.912936568260193e-06, 2.314336597919464e-06, 2.7157366275787354e-06, 3.1171366572380066e-06, 3.518536686897278e-06, 3.919936716556549e-06, 4.32133674621582e-06, 4.7227367758750916e-06, 5.124136805534363e-06, 5.525536835193634e-06, 5.926936864852905e-06, 6.3283368945121765e-06, 6.729736924171448e-06, 7.131136953830719e-06, 7.53253698348999e-06, 7.933937013149261e-06, 8.335337042808533e-06, 8.736737072467804e-06, 9.138137102127075e-06, 9.539537131786346e-06, 9.940937161445618e-06, 1.0342337191104889e-05, 1.074373722076416e-05, 1.1145137250423431e-05, 1.1546537280082703e-05, 1.1947937309741974e-05, 1.2349337339401245e-05, 1.2750737369060516e-05, 1.3152137398719788e-05, 1.3553537428379059e-05, 1.395493745803833e-05, 1.4356337487697601e-05, 1.4757737517356873e-05, 1.5159137547016144e-05, 1.5560537576675415e-05, 1.5961937606334686e-05, 1.6363337635993958e-05, 1.676473766565323e-05, 1.71661376953125e-05]}, "gradients/decoder.model.decoder.layers.4.encoder_attn.k_proj.weight": {"_type": "histogram", "values": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1048576.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], "bins": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]}, "gradients/decoder.model.decoder.layers.4.encoder_attn.k_proj.bias": {"_type": "histogram", "values": [16.0, 0.0, 0.0, 0.0, 0.0, 14.0, 0.0, 0.0, 0.0, 0.0, 12.0, 0.0, 0.0, 0.0, 0.0, 0.0, 26.0, 0.0, 0.0, 0.0, 0.0, 45.0, 0.0, 0.0, 0.0, 0.0, 789.0, 0.0, 0.0, 0.0, 0.0, 0.0, 54.0, 0.0, 0.0, 0.0, 0.0, 23.0, 0.0, 0.0, 0.0, 0.0, 19.0, 0.0, 0.0, 0.0, 0.0, 0.0, 11.0, 0.0, 0.0, 0.0, 0.0, 13.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.980232238769531e-07, -2.868473529815674e-07, -2.7567148208618164e-07, -2.644956111907959e-07, -2.5331974029541016e-07, -2.421438694000244e-07, -2.3096799850463867e-07, -2.1979212760925293e-07, -2.086162567138672e-07, -1.9744038581848145e-07, -1.862645149230957e-07, -1.7508864402770996e-07, -1.6391277313232422e-07, -1.5273690223693848e-07, -1.4156103134155273e-07, -1.30385160446167e-07, -1.1920928955078125e-07, -1.0803341865539551e-07, -9.685754776000977e-08, -8.568167686462402e-08, -7.450580596923828e-08, -6.332993507385254e-08, -5.21540641784668e-08, -4.0978193283081055e-08, -2.9802322387695312e-08, -1.862645149230957e-08, -7.450580596923828e-09, 3.725290298461914e-09, 1.4901161193847656e-08, 2.60770320892334e-08, 3.725290298461914e-08, 4.842877388000488e-08, 5.960464477539063e-08, 7.078051567077637e-08, 8.195638656616211e-08, 9.313225746154785e-08, 1.043081283569336e-07, 1.1548399925231934e-07, 1.2665987014770508e-07, 1.3783574104309082e-07, 1.4901161193847656e-07, 1.601874828338623e-07, 1.7136335372924805e-07, 1.825392246246338e-07, 1.9371509552001953e-07, 2.0489096641540527e-07, 2.1606683731079102e-07, 2.2724270820617676e-07, 2.384185791015625e-07, 2.4959444999694824e-07, 2.60770320892334e-07, 2.7194619178771973e-07, 2.8312206268310547e-07, 2.942979335784912e-07, 3.0547380447387695e-07, 3.166496753692627e-07, 3.2782554626464844e-07, 3.390014171600342e-07, 3.501772880554199e-07, 3.6135315895080566e-07, 3.725290298461914e-07, 3.8370490074157715e-07, 3.948807716369629e-07, 4.0605664253234863e-07, 4.172325134277344e-07]}, "gradients/decoder.model.decoder.layers.4.encoder_attn.q_proj.weight": {"_type": "histogram", "values": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1048576.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], "bins": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]}, "gradients/decoder.model.decoder.layers.4.encoder_attn.q_proj.bias": {"_type": "histogram", "values": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1024.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], "bins": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]}, "gradients/decoder.model.decoder.layers.4.self_attn_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 1.0, 0.0, 2.0, 1.0, 1.0, 2.0, 0.0, 2.0, 6.0, 8.0, 4.0, 11.0, 13.0, 12.0, 22.0, 32.0, 37.0, 57.0, 77.0, 132.0, 193.0, 124.0, 80.0, 58.0, 31.0, 31.0, 21.0, 12.0, 10.0, 5.0, 4.0, 9.0, 4.0, 4.0, 0.0, 3.0, 3.0, 3.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-6.27329472990823e-06, -6.074144039303064e-06, -5.874993348697899e-06, -5.6758426580927335e-06, -5.476692422234919e-06, -5.277541276882403e-06, -5.078391041024588e-06, -4.879240350419423e-06, -4.680089659814257e-06, -4.480938969209092e-06, -4.281788278603926e-06, -4.082637587998761e-06, -3.8834868973935954e-06, -3.6843364341621054e-06, -3.4851859709306154e-06, -3.28603528032545e-06, -3.0868845897202846e-06, -2.887733899115119e-06, -2.6885832085099537e-06, -2.4894327452784637e-06, -2.2902820546732983e-06, -2.091131364068133e-06, -1.8919807871498051e-06, -1.6928302102314774e-06, -1.493679519626312e-06, -1.2945288290211465e-06, -1.0953782521028188e-06, -8.962276183410722e-07, -6.970769845793257e-07, -4.979262939741602e-07, -2.987757170558325e-07, -9.962514013750479e-08, 9.952555046766065e-08, 2.9867618422940723e-07, 4.978268179911538e-07, 6.969774517529004e-07, 8.96128085514647e-07, 1.0952787761198124e-06, 1.2944293530381401e-06, 1.4935799299564678e-06, 1.6927306205616333e-06, 1.8918813111667987e-06, 2.091032001771964e-06, 2.290182465003454e-06, 2.4893331556086196e-06, 2.688483846213785e-06, 2.887634309445275e-06, 3.0867850000504404e-06, 3.285935690655606e-06, 3.4850863812607713e-06, 3.6842370718659367e-06, 3.883387762471102e-06, 4.082537998328917e-06, 4.281689143681433e-06, 4.480839379539248e-06, 4.679990070144413e-06, 4.8791407607495785e-06, 5.078291451354744e-06, 5.277442141959909e-06, 5.476592832565075e-06, 5.67574352317024e-06, 5.874893759028055e-06, 6.07404444963322e-06, 6.273195140238386e-06, 6.472345830843551e-06]}, "gradients/decoder.model.decoder.layers.4.self_attn_layer_norm.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 4.0, 0.0, 1.0, 5.0, 11.0, 14.0, 18.0, 32.0, 22.0, 27.0, 43.0, 50.0, 61.0, 63.0, 59.0, 82.0, 71.0, 61.0, 54.0, 80.0, 52.0, 52.0, 38.0, 22.0, 25.0, 16.0, 24.0, 9.0, 6.0, 3.0, 3.0, 2.0, 0.0, 3.0, 2.0, 0.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.62584500585217e-06, -5.475055331771728e-06, -5.324265202943934e-06, -5.1734755288634915e-06, -5.022685854783049e-06, -4.871895725955255e-06, -4.721106051874813e-06, -4.57031637779437e-06, -4.4195262489665765e-06, -4.268736574886134e-06, -4.11794644605834e-06, -3.967156771977898e-06, -3.816367097897455e-06, -3.6655769690696616e-06, -3.514787294989219e-06, -3.363997393535101e-06, -3.2132077194546582e-06, -3.06241781800054e-06, -2.9116281439200975e-06, -2.7608382424659794e-06, -2.6100483410118613e-06, -2.4592586669314187e-06, -2.3084687654773006e-06, -2.1576788640231825e-06, -2.00688918994274e-06, -1.8560994021754595e-06, -1.7053095007213415e-06, -1.554519712954061e-06, -1.403729811499943e-06, -1.2529400237326627e-06, -1.1021502359653823e-06, -9.513603345112642e-07, -8.005704330571461e-07, -6.497805884464469e-07, -4.989907438357477e-07, -3.4820095606846735e-07, -1.9741111145776813e-07, -4.662126684706891e-08, 1.0416852092021145e-07, 2.5495842237432953e-07, 4.057482101416099e-07, 5.565380547523091e-07, 7.073278993630083e-07, 8.581176871302887e-07, 1.008907474897569e-06, 1.1596973763516871e-06, 1.3104871641189675e-06, 1.4612770655730856e-06, 1.612066853340366e-06, 1.7628566411076463e-06, 1.9136464288749266e-06, 2.0644363303290447e-06, 2.215226231783163e-06, 2.3660159058636054e-06, 2.5168058073177235e-06, 2.6675957087718416e-06, 2.8183853828522842e-06, 2.9691752843064023e-06, 3.119964958386845e-06, 3.270754859840963e-06, 3.421544761295081e-06, 3.572334662749199e-06, 3.723124336829642e-06, 3.873914465657435e-06, 4.024704139737878e-06]}, "gradients/decoder.model.decoder.layers.4.self_attn.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 2.0, 6.0, 5.0, 4.0, 2.0, 7.0, 6.0, 12.0, 11.0, 11.0, 26.0, 38.0, 81.0, 90.0, 112.0, 310.0, 347.0, 835.0, 939.0, 1458.0, 3903.0, 4791.0, 14727.0, 22006.0, 47314.0, 280905.0, 450991.0, 150826.0, 31682.0, 20539.0, 6349.0, 3813.0, 2990.0, 1176.0, 977.0, 393.0, 256.0, 265.0, 107.0, 88.0, 43.0, 32.0, 27.0, 17.0, 9.0, 10.0, 8.0, 8.0, 2.0, 1.0, 3.0, 2.0, 2.0, 2.0, 2.0, 1.0, 2.0], "bins": [-4.76837158203125e-06, -4.624947905540466e-06, -4.481524229049683e-06, -4.338100552558899e-06, -4.194676876068115e-06, -4.0512531995773315e-06, -3.907829523086548e-06, -3.764405846595764e-06, -3.6209821701049805e-06, -3.4775584936141968e-06, -3.334134817123413e-06, -3.1907111406326294e-06, -3.0472874641418457e-06, -2.903863787651062e-06, -2.7604401111602783e-06, -2.6170164346694946e-06, -2.473592758178711e-06, -2.3301690816879272e-06, -2.1867454051971436e-06, -2.04332172870636e-06, -1.8998980522155762e-06, -1.7564743757247925e-06, -1.6130506992340088e-06, -1.469627022743225e-06, -1.3262033462524414e-06, -1.1827796697616577e-06, -1.039355993270874e-06, -8.959323167800903e-07, -7.525086402893066e-07, -6.09084963798523e-07, -4.6566128730773926e-07, -3.2223761081695557e-07, -1.7881393432617188e-07, -3.5390257835388184e-08, 1.0803341865539551e-07, 2.514570951461792e-07, 3.948807716369629e-07, 5.383044481277466e-07, 6.817281246185303e-07, 8.25151801109314e-07, 9.685754776000977e-07, 1.1119991540908813e-06, 1.255422830581665e-06, 1.3988465070724487e-06, 1.5422701835632324e-06, 1.6856938600540161e-06, 1.8291175365447998e-06, 1.9725412130355835e-06, 2.115964889526367e-06, 2.259388566017151e-06, 2.4028122425079346e-06, 2.5462359189987183e-06, 2.689659595489502e-06, 2.8330832719802856e-06, 2.9765069484710693e-06, 3.119930624961853e-06, 3.2633543014526367e-06, 3.4067779779434204e-06, 3.550201654434204e-06, 3.693625330924988e-06, 3.8370490074157715e-06, 3.980472683906555e-06, 4.123896360397339e-06, 4.2673200368881226e-06, 4.410743713378906e-06]}, "gradients/decoder.model.decoder.layers.4.self_attn.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 4.0, 0.0, 3.0, 14.0, 0.0, 18.0, 28.0, 0.0, 53.0, 0.0, 46.0, 73.0, 0.0, 101.0, 120.0, 0.0, 112.0, 0.0, 105.0, 111.0, 0.0, 81.0, 48.0, 0.0, 35.0, 0.0, 23.0, 17.0, 0.0, 12.0, 4.0, 0.0, 6.0, 0.0, 3.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.1324882507324219e-06, -1.0952353477478027e-06, -1.0579824447631836e-06, -1.0207295417785645e-06, -9.834766387939453e-07, -9.462237358093262e-07, -9.08970832824707e-07, -8.717179298400879e-07, -8.344650268554688e-07, -7.972121238708496e-07, -7.599592208862305e-07, -7.227063179016113e-07, -6.854534149169922e-07, -6.48200511932373e-07, -6.109476089477539e-07, -5.736947059631348e-07, -5.364418029785156e-07, -4.991888999938965e-07, -4.6193599700927734e-07, -4.246830940246582e-07, -3.8743019104003906e-07, -3.501772880554199e-07, -3.129243850708008e-07, -2.7567148208618164e-07, -2.384185791015625e-07, -2.0116567611694336e-07, -1.6391277313232422e-07, -1.2665987014770508e-07, -8.940696716308594e-08, -5.21540641784668e-08, -1.4901161193847656e-08, 2.2351741790771484e-08, 5.960464477539063e-08, 9.685754776000977e-08, 1.341104507446289e-07, 1.7136335372924805e-07, 2.086162567138672e-07, 2.4586915969848633e-07, 2.8312206268310547e-07, 3.203749656677246e-07, 3.5762786865234375e-07, 3.948807716369629e-07, 4.3213367462158203e-07, 4.6938657760620117e-07, 5.066394805908203e-07, 5.438923835754395e-07, 5.811452865600586e-07, 6.183981895446777e-07, 6.556510925292969e-07, 6.92903995513916e-07, 7.301568984985352e-07, 7.674098014831543e-07, 8.046627044677734e-07, 8.419156074523926e-07, 8.791685104370117e-07, 9.164214134216309e-07, 9.5367431640625e-07, 9.909272193908691e-07, 1.0281801223754883e-06, 1.0654330253601074e-06, 1.1026859283447266e-06, 1.1399388313293457e-06, 1.1771917343139648e-06, 1.214444637298584e-06, 1.2516975402832031e-06]}, "gradients/decoder.model.decoder.layers.4.self_attn.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0, 2.0, 6.0, 5.0, 12.0, 17.0, 29.0, 37.0, 45.0, 107.0, 158.0, 241.0, 658.0, 1876.0, 13526.0, 65913.0, 622265.0, 297224.0, 35845.0, 8316.0, 1248.0, 438.0, 202.0, 143.0, 112.0, 53.0, 31.0, 21.0, 17.0, 10.0, 3.0, 3.0, 3.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-7.033348083496094e-06, -6.842426955699921e-06, -6.6515058279037476e-06, -6.4605847001075745e-06, -6.269663572311401e-06, -6.078742444515228e-06, -5.887821316719055e-06, -5.696900188922882e-06, -5.505979061126709e-06, -5.315057933330536e-06, -5.124136805534363e-06, -4.93321567773819e-06, -4.742294549942017e-06, -4.5513734221458435e-06, -4.36045229434967e-06, -4.169531166553497e-06, -3.978610038757324e-06, -3.787688910961151e-06, -3.596767783164978e-06, -3.405846655368805e-06, -3.214925527572632e-06, -3.0240043997764587e-06, -2.8330832719802856e-06, -2.6421621441841125e-06, -2.4512410163879395e-06, -2.2603198885917664e-06, -2.0693987607955933e-06, -1.8784776329994202e-06, -1.687556505203247e-06, -1.496635377407074e-06, -1.3057142496109009e-06, -1.1147931218147278e-06, -9.238719940185547e-07, -7.329508662223816e-07, -5.420297384262085e-07, -3.511086106300354e-07, -1.601874828338623e-07, 3.073364496231079e-08, 2.2165477275848389e-07, 4.12575900554657e-07, 6.034970283508301e-07, 7.944181561470032e-07, 9.853392839431763e-07, 1.1762604117393494e-06, 1.3671815395355225e-06, 1.5581026673316956e-06, 1.7490237951278687e-06, 1.9399449229240417e-06, 2.130866050720215e-06, 2.321787178516388e-06, 2.512708306312561e-06, 2.703629434108734e-06, 2.8945505619049072e-06, 3.0854716897010803e-06, 3.2763928174972534e-06, 3.4673139452934265e-06, 3.6582350730895996e-06, 3.849156200885773e-06, 4.040077328681946e-06, 4.230998456478119e-06, 4.421919584274292e-06, 4.612840712070465e-06, 4.803761839866638e-06, 4.994682967662811e-06, 5.185604095458984e-06]}, "gradients/decoder.model.decoder.layers.4.self_attn.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 3.0, 3.0, 4.0, 6.0, 24.0, 13.0, 34.0, 18.0, 70.0, 39.0, 112.0, 84.0, 131.0, 67.0, 128.0, 52.0, 53.0, 75.0, 19.0, 39.0, 10.0, 11.0, 9.0, 8.0, 2.0, 1.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.4437904357910156e-06, -2.3562461137771606e-06, -2.2687017917633057e-06, -2.1811574697494507e-06, -2.0936131477355957e-06, -2.0060688257217407e-06, -1.9185245037078857e-06, -1.8309801816940308e-06, -1.7434358596801758e-06, -1.6558915376663208e-06, -1.5683472156524658e-06, -1.4808028936386108e-06, -1.3932585716247559e-06, -1.3057142496109009e-06, -1.218169927597046e-06, -1.130625605583191e-06, -1.043081283569336e-06, -9.55536961555481e-07, -8.67992639541626e-07, -7.80448317527771e-07, -6.92903995513916e-07, -6.05359673500061e-07, -5.178153514862061e-07, -4.302710294723511e-07, -3.427267074584961e-07, -2.551823854446411e-07, -1.6763806343078613e-07, -8.009374141693115e-08, 7.450580596923828e-09, 9.499490261077881e-08, 1.825392246246338e-07, 2.7008354663848877e-07, 3.5762786865234375e-07, 4.4517219066619873e-07, 5.327165126800537e-07, 6.202608346939087e-07, 7.078051567077637e-07, 7.953494787216187e-07, 8.828938007354736e-07, 9.704381227493286e-07, 1.0579824447631836e-06, 1.1455267667770386e-06, 1.2330710887908936e-06, 1.3206154108047485e-06, 1.4081597328186035e-06, 1.4957040548324585e-06, 1.5832483768463135e-06, 1.6707926988601685e-06, 1.7583370208740234e-06, 1.8458813428878784e-06, 1.9334256649017334e-06, 2.0209699869155884e-06, 2.1085143089294434e-06, 2.1960586309432983e-06, 2.2836029529571533e-06, 2.3711472749710083e-06, 2.4586915969848633e-06, 2.5462359189987183e-06, 2.6337802410125732e-06, 2.7213245630264282e-06, 2.808868885040283e-06, 2.896413207054138e-06, 2.983957529067993e-06, 3.071501851081848e-06, 3.159046173095703e-06]}, "gradients/decoder.model.decoder.layers.4.self_attn.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 2.0, 4.0, 0.0, 4.0, 8.0, 11.0, 11.0, 0.0, 14.0, 46.0, 72.0, 0.0, 131.0, 268.0, 633.0, 1793.0, 0.0, 6280.0, 40311.0, 949858.0, 0.0, 39968.0, 6163.0, 1741.0, 630.0, 0.0, 290.0, 148.0, 75.0, 47.0, 0.0, 20.0, 12.0, 8.0, 0.0, 11.0, 6.0, 1.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.430511474609375e-06, -1.383945345878601e-06, -1.3373792171478271e-06, -1.2908130884170532e-06, -1.2442469596862793e-06, -1.1976808309555054e-06, -1.1511147022247314e-06, -1.1045485734939575e-06, -1.0579824447631836e-06, -1.0114163160324097e-06, -9.648501873016357e-07, -9.182840585708618e-07, -8.717179298400879e-07, -8.25151801109314e-07, -7.7858567237854e-07, -7.320195436477661e-07, -6.854534149169922e-07, -6.388872861862183e-07, -5.923211574554443e-07, -5.457550287246704e-07, -4.991888999938965e-07, -4.5262277126312256e-07, -4.0605664253234863e-07, -3.594905138015747e-07, -3.129243850708008e-07, -2.6635825634002686e-07, -2.1979212760925293e-07, -1.73225998878479e-07, -1.2665987014770508e-07, -8.009374141693115e-08, -3.3527612686157227e-08, 1.30385160446167e-08, 5.960464477539063e-08, 1.0617077350616455e-07, 1.5273690223693848e-07, 1.993030309677124e-07, 2.4586915969848633e-07, 2.9243528842926025e-07, 3.390014171600342e-07, 3.855675458908081e-07, 4.3213367462158203e-07, 4.78699803352356e-07, 5.252659320831299e-07, 5.718320608139038e-07, 6.183981895446777e-07, 6.649643182754517e-07, 7.115304470062256e-07, 7.580965757369995e-07, 8.046627044677734e-07, 8.512288331985474e-07, 8.977949619293213e-07, 9.443610906600952e-07, 9.909272193908691e-07, 1.037493348121643e-06, 1.084059476852417e-06, 1.130625605583191e-06, 1.1771917343139648e-06, 1.2237578630447388e-06, 1.2703239917755127e-06, 1.3168901205062866e-06, 1.3634562492370605e-06, 1.4100223779678345e-06, 1.4565885066986084e-06, 1.5031546354293823e-06, 1.5497207641601562e-06]}, "gradients/decoder.model.decoder.layers.4.self_attn.k_proj.bias": {"_type": "histogram", "values": [6.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1010.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 8.0], "bins": [-5.960464477539063e-08, -5.774199962615967e-08, -5.587935447692871e-08, -5.4016709327697754e-08, -5.21540641784668e-08, -5.029141902923584e-08, -4.842877388000488e-08, -4.6566128730773926e-08, -4.470348358154297e-08, -4.284083843231201e-08, -4.0978193283081055e-08, -3.91155481338501e-08, -3.725290298461914e-08, -3.5390257835388184e-08, -3.3527612686157227e-08, -3.166496753692627e-08, -2.9802322387695312e-08, -2.7939677238464355e-08, -2.60770320892334e-08, -2.421438694000244e-08, -2.2351741790771484e-08, -2.0489096641540527e-08, -1.862645149230957e-08, -1.6763806343078613e-08, -1.4901161193847656e-08, -1.30385160446167e-08, -1.1175870895385742e-08, -9.313225746154785e-09, -7.450580596923828e-09, -5.587935447692871e-09, -3.725290298461914e-09, -1.862645149230957e-09, 0.0, 1.862645149230957e-09, 3.725290298461914e-09, 5.587935447692871e-09, 7.450580596923828e-09, 9.313225746154785e-09, 1.1175870895385742e-08, 1.30385160446167e-08, 1.4901161193847656e-08, 1.6763806343078613e-08, 1.862645149230957e-08, 2.0489096641540527e-08, 2.2351741790771484e-08, 2.421438694000244e-08, 2.60770320892334e-08, 2.7939677238464355e-08, 2.9802322387695312e-08, 3.166496753692627e-08, 3.3527612686157227e-08, 3.5390257835388184e-08, 3.725290298461914e-08, 3.91155481338501e-08, 4.0978193283081055e-08, 4.284083843231201e-08, 4.470348358154297e-08, 4.6566128730773926e-08, 4.842877388000488e-08, 5.029141902923584e-08, 5.21540641784668e-08, 5.4016709327697754e-08, 5.587935447692871e-08, 5.774199962615967e-08, 5.960464477539063e-08]}, "gradients/decoder.model.decoder.layers.4.self_attn.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 0.0, 5.0, 0.0, 10.0, 0.0, 0.0, 32.0, 0.0, 66.0, 0.0, 0.0, 290.0, 0.0, 0.0, 1949.0, 0.0, 24580.0, 0.0, 0.0, 994343.0, 0.0, 24923.0, 0.0, 0.0, 1893.0, 0.0, 331.0, 0.0, 0.0, 82.0, 0.0, 40.0, 0.0, 0.0, 9.0, 0.0, 0.0, 3.0, 0.0, 5.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 2.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 3.0], "bins": [-6.556510925292969e-07, -6.323680281639099e-07, -6.09084963798523e-07, -5.85801899433136e-07, -5.62518835067749e-07, -5.392357707023621e-07, -5.159527063369751e-07, -4.926696419715881e-07, -4.6938657760620117e-07, -4.461035132408142e-07, -4.2282044887542725e-07, -3.995373845100403e-07, -3.762543201446533e-07, -3.5297125577926636e-07, -3.296881914138794e-07, -3.0640512704849243e-07, -2.8312206268310547e-07, -2.598389983177185e-07, -2.3655593395233154e-07, -2.1327286958694458e-07, -1.8998980522155762e-07, -1.6670674085617065e-07, -1.434236764907837e-07, -1.2014061212539673e-07, -9.685754776000977e-08, -7.35744833946228e-08, -5.029141902923584e-08, -2.7008354663848877e-08, -3.725290298461914e-09, 1.955777406692505e-08, 4.284083843231201e-08, 6.612390279769897e-08, 8.940696716308594e-08, 1.126900315284729e-07, 1.3597309589385986e-07, 1.5925616025924683e-07, 1.825392246246338e-07, 2.0582228899002075e-07, 2.2910535335540771e-07, 2.523884177207947e-07, 2.7567148208618164e-07, 2.989545464515686e-07, 3.2223761081695557e-07, 3.4552067518234253e-07, 3.688037395477295e-07, 3.9208680391311646e-07, 4.153698682785034e-07, 4.386529326438904e-07, 4.6193599700927734e-07, 4.852190613746643e-07, 5.085021257400513e-07, 5.317851901054382e-07, 5.550682544708252e-07, 5.783513188362122e-07, 6.016343832015991e-07, 6.249174475669861e-07, 6.48200511932373e-07, 6.7148357629776e-07, 6.94766640663147e-07, 7.180497050285339e-07, 7.413327693939209e-07, 7.646158337593079e-07, 7.878988981246948e-07, 8.111819624900818e-07, 8.344650268554688e-07]}, "gradients/decoder.model.decoder.layers.4.self_attn.q_proj.bias": {"_type": "histogram", "values": [3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 22.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 101.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 792.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 78.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 17.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 5.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.980232238769531e-07, -2.896413207054138e-07, -2.812594175338745e-07, -2.728775143623352e-07, -2.644956111907959e-07, -2.561137080192566e-07, -2.477318048477173e-07, -2.39349901676178e-07, -2.3096799850463867e-07, -2.2258609533309937e-07, -2.1420419216156006e-07, -2.0582228899002075e-07, -1.9744038581848145e-07, -1.8905848264694214e-07, -1.8067657947540283e-07, -1.7229467630386353e-07, -1.6391277313232422e-07, -1.555308699607849e-07, -1.471489667892456e-07, -1.387670636177063e-07, -1.30385160446167e-07, -1.2200325727462769e-07, -1.1362135410308838e-07, -1.0523945093154907e-07, -9.685754776000977e-08, -8.847564458847046e-08, -8.009374141693115e-08, -7.171183824539185e-08, -6.332993507385254e-08, -5.494803190231323e-08, -4.6566128730773926e-08, -3.818422555923462e-08, -2.9802322387695312e-08, -2.1420419216156006e-08, -1.30385160446167e-08, -4.6566128730773926e-09, 3.725290298461914e-09, 1.210719347000122e-08, 2.0489096641540527e-08, 2.8870999813079834e-08, 3.725290298461914e-08, 4.563480615615845e-08, 5.4016709327697754e-08, 6.239861249923706e-08, 7.078051567077637e-08, 7.916241884231567e-08, 8.754432201385498e-08, 9.592622518539429e-08, 1.043081283569336e-07, 1.126900315284729e-07, 1.210719347000122e-07, 1.2945383787155151e-07, 1.3783574104309082e-07, 1.4621764421463013e-07, 1.5459954738616943e-07, 1.6298145055770874e-07, 1.7136335372924805e-07, 1.7974525690078735e-07, 1.8812716007232666e-07, 1.9650906324386597e-07, 2.0489096641540527e-07, 2.1327286958694458e-07, 2.2165477275848389e-07, 2.300366759300232e-07, 2.384185791015625e-07]}, "gradients/decoder.model.decoder.layers.3.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 2.0, 4.0, 3.0, 1.0, 5.0, 6.0, 7.0, 8.0, 13.0, 11.0, 27.0, 42.0, 49.0, 63.0, 90.0, 135.0, 143.0, 101.0, 88.0, 41.0, 45.0, 30.0, 28.0, 11.0, 15.0, 11.0, 8.0, 4.0, 5.0, 5.0, 3.0, 2.0, 1.0, 4.0, 1.0, 1.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.883167210005922e-06, -3.7362997318268754e-06, -3.589432481021504e-06, -3.442565002842457e-06, -3.2956977520370856e-06, -3.1488302738580387e-06, -3.001962795678992e-06, -2.855095317499945e-06, -2.7082280666945735e-06, -2.5613605885155266e-06, -2.414493337710155e-06, -2.2676258595311083e-06, -2.1207583813520614e-06, -1.97389113054669e-06, -1.827023652367643e-06, -1.680156287875434e-06, -1.5332889233832248e-06, -1.3864215588910156e-06, -1.2395541943988064e-06, -1.0926867162197595e-06, -9.458193517275504e-07, -7.989519872353412e-07, -6.520845658997132e-07, -5.052171445640852e-07, -3.58349780071876e-07, -2.1148238715795742e-07, -6.461499424403883e-08, 8.225239866987977e-08, 2.2911979158379836e-07, 3.759871560760075e-07, 5.228545774116355e-07, 6.697219987472636e-07, 8.165889084921218e-07, 9.63456272984331e-07, 1.1103236374765402e-06, 1.257191115655587e-06, 1.4040584801477962e-06, 1.5509258446400054e-06, 1.6977933228190523e-06, 1.8446606873112614e-06, 1.9915280518034706e-06, 2.1383955299825175e-06, 2.285262780787889e-06, 2.432130258966936e-06, 2.5789977371459827e-06, 2.725864987951354e-06, 2.872732466130401e-06, 3.0195997169357724e-06, 3.1664671951148193e-06, 3.3133346732938662e-06, 3.4602019240992377e-06, 3.6070694022782845e-06, 3.753936653083656e-06, 3.900804131262703e-06, 4.04767160944175e-06, 4.194539087620797e-06, 4.3414065657998435e-06, 4.48827404397889e-06, 4.635141522157937e-06, 4.782008545589633e-06, 4.92887602376868e-06, 5.075743501947727e-06, 5.222610980126774e-06, 5.369478458305821e-06, 5.516345481737517e-06]}, "gradients/decoder.model.decoder.layers.3.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 2.0, 4.0, 0.0, 2.0, 0.0, 6.0, 5.0, 3.0, 12.0, 2.0, 6.0, 14.0, 14.0, 21.0, 20.0, 16.0, 24.0, 28.0, 35.0, 26.0, 37.0, 39.0, 33.0, 51.0, 47.0, 52.0, 43.0, 38.0, 44.0, 51.0, 44.0, 41.0, 22.0, 37.0, 31.0, 36.0, 27.0, 18.0, 18.0, 15.0, 5.0, 6.0, 9.0, 10.0, 7.0, 1.0, 2.0, 4.0, 7.0, 2.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.5280094178015133e-06, -2.4428120468655834e-06, -2.357614448555978e-06, -2.272417077620048e-06, -2.1872194793104427e-06, -2.1020221083745128e-06, -2.0168245100649074e-06, -1.9316271391289774e-06, -1.8464296545062098e-06, -1.7612321698834421e-06, -1.6760346852606745e-06, -1.5908372006379068e-06, -1.5056398297019769e-06, -1.4204422313923715e-06, -1.3352448604564415e-06, -1.2500473758336739e-06, -1.1648498912109062e-06, -1.0796524065881385e-06, -9.944549219653709e-07, -9.092574941860221e-07, -8.240600095632544e-07, -7.388625249404868e-07, -6.53665097161138e-07, -5.684676125383703e-07, -4.832701279156026e-07, -3.98072643292835e-07, -3.1287518709177675e-07, -2.2767773089071852e-07, -1.4248024626795086e-07, -5.72827616451832e-08, 2.79146661341656e-08, 1.1311215075693326e-07, 1.9830963537970092e-07, 2.835071200024686e-07, 3.687045762035268e-07, 4.5390203240458504e-07, 5.390995170273527e-07, 6.242970016501204e-07, 7.094944294294692e-07, 7.946919140522368e-07, 8.798893986750045e-07, 9.650868832977721e-07, 1.0502843679205398e-06, 1.1354818525433075e-06, 1.2206792234792374e-06, 1.3058768217888428e-06, 1.3910741927247727e-06, 1.4762716773475404e-06, 1.561469161970308e-06, 1.6466666465930757e-06, 1.7318641312158434e-06, 1.8170615021517733e-06, 1.9022591004613787e-06, 1.9874564713973086e-06, 2.0726538423332386e-06, 2.157851440642844e-06, 2.2430490389524493e-06, 2.3282464098883793e-06, 2.4134440081979847e-06, 2.4986413791339146e-06, 2.58383897744352e-06, 2.66903634837945e-06, 2.75423371931538e-06, 2.8394313176249852e-06, 2.924628688560915e-06]}, "gradients/decoder.model.decoder.layers.3.fc2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 2.0, 3.0, 7.0, 12.0, 13.0, 12.0, 13.0, 36.0, 57.0, 59.0, 145.0, 159.0, 244.0, 318.0, 528.0, 818.0, 1868.0, 2513.0, 4576.0, 12878.0, 3744766.0, 411869.0, 5802.0, 2956.0, 1713.0, 1037.0, 671.0, 506.0, 226.0, 147.0, 94.0, 62.0, 68.0, 51.0, 18.0, 16.0, 8.0, 5.0, 10.0, 5.0, 2.0, 2.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6.377696990966797e-06, -6.188638508319855e-06, -5.999580025672913e-06, -5.8105215430259705e-06, -5.621463060379028e-06, -5.432404577732086e-06, -5.243346095085144e-06, -5.054287612438202e-06, -4.86522912979126e-06, -4.676170647144318e-06, -4.4871121644973755e-06, -4.298053681850433e-06, -4.108995199203491e-06, -3.919936716556549e-06, -3.730878233909607e-06, -3.541819751262665e-06, -3.3527612686157227e-06, -3.1637027859687805e-06, -2.9746443033218384e-06, -2.7855858206748962e-06, -2.596527338027954e-06, -2.407468855381012e-06, -2.21841037273407e-06, -2.0293518900871277e-06, -1.8402934074401855e-06, -1.6512349247932434e-06, -1.4621764421463013e-06, -1.2731179594993591e-06, -1.084059476852417e-06, -8.950009942054749e-07, -7.059425115585327e-07, -5.168840289115906e-07, -3.2782554626464844e-07, -1.387670636177063e-07, 5.029141902923584e-08, 2.39349901676178e-07, 4.284083843231201e-07, 6.174668669700623e-07, 8.065253496170044e-07, 9.955838322639465e-07, 1.1846423149108887e-06, 1.3737007975578308e-06, 1.562759280204773e-06, 1.751817762851715e-06, 1.9408762454986572e-06, 2.1299347281455994e-06, 2.3189932107925415e-06, 2.5080516934394836e-06, 2.6971101760864258e-06, 2.886168658733368e-06, 3.07522714138031e-06, 3.264285624027252e-06, 3.4533441066741943e-06, 3.6424025893211365e-06, 3.831461071968079e-06, 4.020519554615021e-06, 4.209578037261963e-06, 4.398636519908905e-06, 4.587695002555847e-06, 4.776753485202789e-06, 4.9658119678497314e-06, 5.154870450496674e-06, 5.343928933143616e-06, 5.532987415790558e-06, 5.7220458984375e-06]}, "gradients/decoder.model.decoder.layers.3.fc2.bias": {"_type": "histogram", "values": [1.0, 2.0, 0.0, 0.0, 0.0, 2.0, 0.0, 4.0, 5.0, 0.0, 4.0, 0.0, 8.0, 0.0, 9.0, 0.0, 17.0, 19.0, 0.0, 36.0, 0.0, 38.0, 0.0, 47.0, 56.0, 0.0, 64.0, 0.0, 74.0, 0.0, 84.0, 0.0, 89.0, 97.0, 0.0, 62.0, 0.0, 65.0, 0.0, 65.0, 50.0, 0.0, 30.0, 0.0, 35.0, 0.0, 17.0, 0.0, 13.0, 5.0, 0.0, 9.0, 0.0, 4.0, 0.0, 8.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0], "bins": [-1.0728836059570312e-06, -1.039355993270874e-06, -1.0058283805847168e-06, -9.723007678985596e-07, -9.387731552124023e-07, -9.052455425262451e-07, -8.717179298400879e-07, -8.381903171539307e-07, -8.046627044677734e-07, -7.711350917816162e-07, -7.37607479095459e-07, -7.040798664093018e-07, -6.705522537231445e-07, -6.370246410369873e-07, -6.034970283508301e-07, -5.699694156646729e-07, -5.364418029785156e-07, -5.029141902923584e-07, -4.6938657760620117e-07, -4.3585896492004395e-07, -4.023313522338867e-07, -3.688037395477295e-07, -3.3527612686157227e-07, -3.0174851417541504e-07, -2.682209014892578e-07, -2.3469328880310059e-07, -2.0116567611694336e-07, -1.6763806343078613e-07, -1.341104507446289e-07, -1.0058283805847168e-07, -6.705522537231445e-08, -3.3527612686157227e-08, 0.0, 3.3527612686157227e-08, 6.705522537231445e-08, 1.0058283805847168e-07, 1.341104507446289e-07, 1.6763806343078613e-07, 2.0116567611694336e-07, 2.3469328880310059e-07, 2.682209014892578e-07, 3.0174851417541504e-07, 3.3527612686157227e-07, 3.688037395477295e-07, 4.023313522338867e-07, 4.3585896492004395e-07, 4.6938657760620117e-07, 5.029141902923584e-07, 5.364418029785156e-07, 5.699694156646729e-07, 6.034970283508301e-07, 6.370246410369873e-07, 6.705522537231445e-07, 7.040798664093018e-07, 7.37607479095459e-07, 7.711350917816162e-07, 8.046627044677734e-07, 8.381903171539307e-07, 8.717179298400879e-07, 9.052455425262451e-07, 9.387731552124023e-07, 9.723007678985596e-07, 1.0058283805847168e-06, 1.039355993270874e-06, 1.0728836059570312e-06]}, "gradients/decoder.model.decoder.layers.3.fc1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 4.0, 0.0, 4.0, 4.0, 7.0, 22.0, 54.0, 216.0, 1181.0, 41966.0, 4148338.0, 1941.0, 371.0, 105.0, 38.0, 16.0, 9.0, 8.0, 1.0, 0.0, 4.0, 4.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.5331974029541016e-05, -2.470053732395172e-05, -2.4069100618362427e-05, -2.3437663912773132e-05, -2.2806227207183838e-05, -2.2174790501594543e-05, -2.154335379600525e-05, -2.0911917090415955e-05, -2.028048038482666e-05, -1.9649043679237366e-05, -1.901760697364807e-05, -1.8386170268058777e-05, -1.7754733562469482e-05, -1.7123296856880188e-05, -1.6491860151290894e-05, -1.58604234457016e-05, -1.5228986740112305e-05, -1.459755003452301e-05, -1.3966113328933716e-05, -1.3334676623344421e-05, -1.2703239917755127e-05, -1.2071803212165833e-05, -1.1440366506576538e-05, -1.0808929800987244e-05, -1.017749309539795e-05, -9.546056389808655e-06, -8.91461968421936e-06, -8.283182978630066e-06, -7.651746273040771e-06, -7.020309567451477e-06, -6.388872861862183e-06, -5.757436156272888e-06, -5.125999450683594e-06, -4.494562745094299e-06, -3.863126039505005e-06, -3.2316893339157104e-06, -2.600252628326416e-06, -1.9688159227371216e-06, -1.3373792171478271e-06, -7.059425115585327e-07, -7.450580596923828e-08, 5.569308996200562e-07, 1.1883676052093506e-06, 1.819804310798645e-06, 2.4512410163879395e-06, 3.082677721977234e-06, 3.7141144275665283e-06, 4.345551133155823e-06, 4.976987838745117e-06, 5.608424544334412e-06, 6.239861249923706e-06, 6.8712979555130005e-06, 7.502734661102295e-06, 8.13417136669159e-06, 8.765608072280884e-06, 9.397044777870178e-06, 1.0028481483459473e-05, 1.0659918189048767e-05, 1.1291354894638062e-05, 1.1922791600227356e-05, 1.255422830581665e-05, 1.3185665011405945e-05, 1.381710171699524e-05, 1.4448538422584534e-05, 1.5079975128173828e-05]}, "gradients/decoder.model.decoder.layers.3.fc1.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 1.0, 4.0, 5.0, 3.0, 4.0, 24.0, 59.0, 301.0, 3254.0, 304.0, 52.0, 25.0, 29.0, 4.0, 6.0, 2.0, 2.0, 4.0, 2.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.6226043701171875e-06, -2.5546178221702576e-06, -2.4866312742233276e-06, -2.4186447262763977e-06, -2.3506581783294678e-06, -2.282671630382538e-06, -2.214685082435608e-06, -2.146698534488678e-06, -2.078711986541748e-06, -2.010725438594818e-06, -1.942738890647888e-06, -1.8747523427009583e-06, -1.8067657947540283e-06, -1.7387792468070984e-06, -1.6707926988601685e-06, -1.6028061509132385e-06, -1.5348196029663086e-06, -1.4668330550193787e-06, -1.3988465070724487e-06, -1.3308599591255188e-06, -1.2628734111785889e-06, -1.194886863231659e-06, -1.126900315284729e-06, -1.058913767337799e-06, -9.909272193908691e-07, -9.229406714439392e-07, -8.549541234970093e-07, -7.869675755500793e-07, -7.189810276031494e-07, -6.509944796562195e-07, -5.830079317092896e-07, -5.150213837623596e-07, -4.470348358154297e-07, -3.7904828786849976e-07, -3.110617399215698e-07, -2.430751919746399e-07, -1.7508864402770996e-07, -1.0710209608078003e-07, -3.91155481338501e-08, 2.8870999813079834e-08, 9.685754776000977e-08, 1.648440957069397e-07, 2.3283064365386963e-07, 3.0081719160079956e-07, 3.688037395477295e-07, 4.367902874946594e-07, 5.047768354415894e-07, 5.727633833885193e-07, 6.407499313354492e-07, 7.087364792823792e-07, 7.767230272293091e-07, 8.44709575176239e-07, 9.126961231231689e-07, 9.806826710700989e-07, 1.0486692190170288e-06, 1.1166557669639587e-06, 1.1846423149108887e-06, 1.2526288628578186e-06, 1.3206154108047485e-06, 1.3886019587516785e-06, 1.4565885066986084e-06, 1.5245750546455383e-06, 1.5925616025924683e-06, 1.6605481505393982e-06, 1.7285346984863281e-06]}, "gradients/decoder.model.decoder.layers.3.encoder_attn_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 4.0, 0.0, 1.0, 1.0, 3.0, 6.0, 7.0, 10.0, 14.0, 19.0, 19.0, 41.0, 49.0, 82.0, 135.0, 176.0, 145.0, 98.0, 57.0, 42.0, 26.0, 22.0, 13.0, 12.0, 10.0, 5.0, 8.0, 2.0, 1.0, 2.0, 4.0, 1.0, 0.0, 2.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.492502062523272e-06, -2.3950819922902156e-06, -2.2976619220571592e-06, -2.2002416244504275e-06, -2.102821554217371e-06, -2.005401483984315e-06, -1.9079814137512585e-06, -1.8105612298313645e-06, -1.7131410459114704e-06, -1.6157209756784141e-06, -1.51830079175852e-06, -1.4208807215254637e-06, -1.3234605376055697e-06, -1.2260404673725134e-06, -1.128620397139457e-06, -1.031200213219563e-06, -9.337801429865067e-07, -8.363600159100315e-07, -7.389398888335563e-07, -6.415198186005e-07, -5.44099634680606e-07, -4.4667956444754964e-07, -3.4925943737107445e-07, -2.5183931029459927e-07, -1.544191832181241e-07, -5.6999063247076265e-08, 4.042105672397156e-08, 1.3784116958959203e-07, 2.352612966660672e-07, 3.3268139532083296e-07, 4.3010152239730814e-07, 5.275216494737833e-07, 6.249417765502585e-07, 7.223619036267337e-07, 8.197820307032089e-07, 9.172021009362652e-07, 1.0146222848561592e-06, 1.1120423550892156e-06, 1.2094624253222719e-06, 1.306882609242166e-06, 1.40430279316206e-06, 1.5017228633951163e-06, 1.5991430473150103e-06, 1.6965631175480667e-06, 1.7939833014679607e-06, 1.891403371701017e-06, 1.9888234419340733e-06, 2.0862435121671297e-06, 2.183663582400186e-06, 2.2810836526332423e-06, 2.3785037228662986e-06, 2.4759240204730304e-06, 2.5733440907060867e-06, 2.670764160939143e-06, 2.7681842311721994e-06, 2.865604528778931e-06, 2.9630245990119874e-06, 3.0604446692450438e-06, 3.1578647394781e-06, 3.255285037084832e-06, 3.352705107317888e-06, 3.4501251775509445e-06, 3.547545247784001e-06, 3.6449655453907326e-06, 3.742385615623789e-06]}, "gradients/decoder.model.decoder.layers.3.encoder_attn_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 4.0, 3.0, 9.0, 9.0, 7.0, 8.0, 17.0, 16.0, 19.0, 23.0, 32.0, 29.0, 46.0, 48.0, 48.0, 55.0, 49.0, 51.0, 33.0, 62.0, 62.0, 58.0, 38.0, 43.0, 37.0, 37.0, 28.0, 36.0, 20.0, 16.0, 18.0, 8.0, 12.0, 6.0, 5.0, 8.0, 4.0, 2.0, 4.0, 3.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.4537446304530022e-06, -1.403905798724736e-06, -1.3540668533096323e-06, -1.3042279078945285e-06, -1.2543890761662624e-06, -1.2045502444379963e-06, -1.1547112990228925e-06, -1.1048723536077887e-06, -1.0550335218795226e-06, -1.0051946901512565e-06, -9.553557447361527e-07, -9.055168561644678e-07, -8.556779675927828e-07, -8.058390790210979e-07, -7.560001904494129e-07, -7.06161301877728e-07, -6.56322413306043e-07, -6.064835247343581e-07, -5.566446361626731e-07, -5.068057475909882e-07, -4.5696685901930323e-07, -4.071279704476183e-07, -3.5728908187593333e-07, -3.074501933042484e-07, -2.5761130473256344e-07, -2.077724161608785e-07, -1.5793352758919355e-07, -1.080946390175086e-07, -5.825575044582365e-08, -8.416861874138704e-09, 4.1422026697546244e-08, 9.126091526923119e-08, 1.4109980384091614e-07, 1.9093869241260109e-07, 2.4077758098428603e-07, 2.90616469555971e-07, 3.4045535812765593e-07, 3.902942466993409e-07, 4.401331352710258e-07, 4.899720238427108e-07, 5.398109124143957e-07, 5.896498009860807e-07, 6.394886895577656e-07, 6.893275781294506e-07, 7.391664667011355e-07, 7.890053552728205e-07, 8.388442438445054e-07, 8.886831324161903e-07, 9.385220209878753e-07, 9.883608527161414e-07, 1.0381997981312452e-06, 1.088038743546349e-06, 1.137877575274615e-06, 1.1877164070028812e-06, 1.237555352417985e-06, 1.2873942978330888e-06, 1.3372331295613549e-06, 1.387071961289621e-06, 1.4369109067047248e-06, 1.4867498521198286e-06, 1.5365886838480947e-06, 1.5864275155763607e-06, 1.6362664609914646e-06, 1.6861054064065684e-06, 1.7359442381348344e-06]}, "gradients/decoder.model.decoder.layers.3.encoder_attn.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 9.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 154.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 4485.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1039271.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 4496.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 141.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 15.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-2.980232238769531e-07, -2.896413207054138e-07, -2.812594175338745e-07, -2.728775143623352e-07, -2.644956111907959e-07, -2.561137080192566e-07, -2.477318048477173e-07, -2.39349901676178e-07, -2.3096799850463867e-07, -2.2258609533309937e-07, -2.1420419216156006e-07, -2.0582228899002075e-07, -1.9744038581848145e-07, -1.8905848264694214e-07, -1.8067657947540283e-07, -1.7229467630386353e-07, -1.6391277313232422e-07, -1.555308699607849e-07, -1.471489667892456e-07, -1.387670636177063e-07, -1.30385160446167e-07, -1.2200325727462769e-07, -1.1362135410308838e-07, -1.0523945093154907e-07, -9.685754776000977e-08, -8.847564458847046e-08, -8.009374141693115e-08, -7.171183824539185e-08, -6.332993507385254e-08, -5.494803190231323e-08, -4.6566128730773926e-08, -3.818422555923462e-08, -2.9802322387695312e-08, -2.1420419216156006e-08, -1.30385160446167e-08, -4.6566128730773926e-09, 3.725290298461914e-09, 1.210719347000122e-08, 2.0489096641540527e-08, 2.8870999813079834e-08, 3.725290298461914e-08, 4.563480615615845e-08, 5.4016709327697754e-08, 6.239861249923706e-08, 7.078051567077637e-08, 7.916241884231567e-08, 8.754432201385498e-08, 9.592622518539429e-08, 1.043081283569336e-07, 1.126900315284729e-07, 1.210719347000122e-07, 1.2945383787155151e-07, 1.3783574104309082e-07, 1.4621764421463013e-07, 1.5459954738616943e-07, 1.6298145055770874e-07, 1.7136335372924805e-07, 1.7974525690078735e-07, 1.8812716007232666e-07, 1.9650906324386597e-07, 2.0489096641540527e-07, 2.1327286958694458e-07, 2.2165477275848389e-07, 2.300366759300232e-07, 2.384185791015625e-07]}, "gradients/decoder.model.decoder.layers.3.encoder_attn.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 0.0, 3.0, 1.0, 0.0, 10.0, 0.0, 14.0, 23.0, 0.0, 28.0, 44.0, 0.0, 52.0, 0.0, 84.0, 84.0, 0.0, 112.0, 116.0, 0.0, 101.0, 0.0, 90.0, 79.0, 0.0, 59.0, 32.0, 0.0, 25.0, 0.0, 24.0, 9.0, 0.0, 7.0, 10.0, 0.0, 1.0, 0.0, 3.0, 2.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0], "bins": [-1.3709068298339844e-06, -1.3336539268493652e-06, -1.296401023864746e-06, -1.259148120880127e-06, -1.2218952178955078e-06, -1.1846423149108887e-06, -1.1473894119262695e-06, -1.1101365089416504e-06, -1.0728836059570312e-06, -1.0356307029724121e-06, -9.98377799987793e-07, -9.611248970031738e-07, -9.238719940185547e-07, -8.866190910339355e-07, -8.493661880493164e-07, -8.121132850646973e-07, -7.748603820800781e-07, -7.37607479095459e-07, -7.003545761108398e-07, -6.631016731262207e-07, -6.258487701416016e-07, -5.885958671569824e-07, -5.513429641723633e-07, -5.140900611877441e-07, -4.76837158203125e-07, -4.3958425521850586e-07, -4.023313522338867e-07, -3.650784492492676e-07, -3.2782554626464844e-07, -2.905726432800293e-07, -2.5331974029541016e-07, -2.1606683731079102e-07, -1.7881393432617188e-07, -1.4156103134155273e-07, -1.043081283569336e-07, -6.705522537231445e-08, -2.9802322387695312e-08, 7.450580596923828e-09, 4.470348358154297e-08, 8.195638656616211e-08, 1.1920928955078125e-07, 1.564621925354004e-07, 1.9371509552001953e-07, 2.3096799850463867e-07, 2.682209014892578e-07, 3.0547380447387695e-07, 3.427267074584961e-07, 3.7997961044311523e-07, 4.172325134277344e-07, 4.544854164123535e-07, 4.917383193969727e-07, 5.289912223815918e-07, 5.662441253662109e-07, 6.034970283508301e-07, 6.407499313354492e-07, 6.780028343200684e-07, 7.152557373046875e-07, 7.525086402893066e-07, 7.897615432739258e-07, 8.270144462585449e-07, 8.642673492431641e-07, 9.015202522277832e-07, 9.387731552124023e-07, 9.760260581970215e-07, 1.0132789611816406e-06]}, "gradients/decoder.model.decoder.layers.3.encoder_attn.v_proj.weight": {"_type": "histogram", "values": [113.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1048374.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 89.0], "bins": [-5.960464477539063e-08, -5.774199962615967e-08, -5.587935447692871e-08, -5.4016709327697754e-08, -5.21540641784668e-08, -5.029141902923584e-08, -4.842877388000488e-08, -4.6566128730773926e-08, -4.470348358154297e-08, -4.284083843231201e-08, -4.0978193283081055e-08, -3.91155481338501e-08, -3.725290298461914e-08, -3.5390257835388184e-08, -3.3527612686157227e-08, -3.166496753692627e-08, -2.9802322387695312e-08, -2.7939677238464355e-08, -2.60770320892334e-08, -2.421438694000244e-08, -2.2351741790771484e-08, -2.0489096641540527e-08, -1.862645149230957e-08, -1.6763806343078613e-08, -1.4901161193847656e-08, -1.30385160446167e-08, -1.1175870895385742e-08, -9.313225746154785e-09, -7.450580596923828e-09, -5.587935447692871e-09, -3.725290298461914e-09, -1.862645149230957e-09, 0.0, 1.862645149230957e-09, 3.725290298461914e-09, 5.587935447692871e-09, 7.450580596923828e-09, 9.313225746154785e-09, 1.1175870895385742e-08, 1.30385160446167e-08, 1.4901161193847656e-08, 1.6763806343078613e-08, 1.862645149230957e-08, 2.0489096641540527e-08, 2.2351741790771484e-08, 2.421438694000244e-08, 2.60770320892334e-08, 2.7939677238464355e-08, 2.9802322387695312e-08, 3.166496753692627e-08, 3.3527612686157227e-08, 3.5390257835388184e-08, 3.725290298461914e-08, 3.91155481338501e-08, 4.0978193283081055e-08, 4.284083843231201e-08, 4.470348358154297e-08, 4.6566128730773926e-08, 4.842877388000488e-08, 5.029141902923584e-08, 5.21540641784668e-08, 5.4016709327697754e-08, 5.587935447692871e-08, 5.774199962615967e-08, 5.960464477539063e-08]}, "gradients/decoder.model.decoder.layers.3.encoder_attn.v_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 2.0, 0.0, 0.0, 2.0, 2.0, 1.0, 3.0, 4.0, 6.0, 7.0, 1.0, 1.0, 5.0, 5.0, 9.0, 14.0, 21.0, 15.0, 13.0, 19.0, 17.0, 36.0, 50.0, 36.0, 46.0, 31.0, 13.0, 18.0, 33.0, 50.0, 136.0, 47.0, 16.0, 19.0, 17.0, 29.0, 37.0, 40.0, 53.0, 34.0, 8.0, 8.0, 7.0, 20.0, 23.0, 19.0, 10.0, 10.0, 5.0, 4.0, 6.0, 1.0, 4.0, 3.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0], "bins": [-2.086162567138672e-06, -2.0256265997886658e-06, -1.9650906324386597e-06, -1.9045546650886536e-06, -1.8440186977386475e-06, -1.7834827303886414e-06, -1.7229467630386353e-06, -1.6624107956886292e-06, -1.601874828338623e-06, -1.541338860988617e-06, -1.4808028936386108e-06, -1.4202669262886047e-06, -1.3597309589385986e-06, -1.2991949915885925e-06, -1.2386590242385864e-06, -1.1781230568885803e-06, -1.1175870895385742e-06, -1.0570511221885681e-06, -9.96515154838562e-07, -9.359791874885559e-07, -8.754432201385498e-07, -8.149072527885437e-07, -7.543712854385376e-07, -6.938353180885315e-07, -6.332993507385254e-07, -5.727633833885193e-07, -5.122274160385132e-07, -4.516914486885071e-07, -3.91155481338501e-07, -3.3061951398849487e-07, -2.7008354663848877e-07, -2.0954757928848267e-07, -1.4901161193847656e-07, -8.847564458847046e-08, -2.7939677238464355e-08, 3.259629011154175e-08, 9.313225746154785e-08, 1.5366822481155396e-07, 2.1420419216156006e-07, 2.7474015951156616e-07, 3.3527612686157227e-07, 3.9581209421157837e-07, 4.5634806156158447e-07, 5.168840289115906e-07, 5.774199962615967e-07, 6.379559636116028e-07, 6.984919309616089e-07, 7.59027898311615e-07, 8.195638656616211e-07, 8.800998330116272e-07, 9.406358003616333e-07, 1.0011717677116394e-06, 1.0617077350616455e-06, 1.1222437024116516e-06, 1.1827796697616577e-06, 1.2433156371116638e-06, 1.30385160446167e-06, 1.364387571811676e-06, 1.4249235391616821e-06, 1.4854595065116882e-06, 1.5459954738616943e-06, 1.6065314412117004e-06, 1.6670674085617065e-06, 1.7276033759117126e-06, 1.7881393432617188e-06]}, "gradients/decoder.model.decoder.layers.3.encoder_attn.k_proj.weight": {"_type": "histogram", "values": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1048576.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], "bins": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]}, "gradients/decoder.model.decoder.layers.3.encoder_attn.k_proj.bias": {"_type": "histogram", "values": [13.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1001.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 10.0], "bins": [-5.960464477539063e-08, -5.774199962615967e-08, -5.587935447692871e-08, -5.4016709327697754e-08, -5.21540641784668e-08, -5.029141902923584e-08, -4.842877388000488e-08, -4.6566128730773926e-08, -4.470348358154297e-08, -4.284083843231201e-08, -4.0978193283081055e-08, -3.91155481338501e-08, -3.725290298461914e-08, -3.5390257835388184e-08, -3.3527612686157227e-08, -3.166496753692627e-08, -2.9802322387695312e-08, -2.7939677238464355e-08, -2.60770320892334e-08, -2.421438694000244e-08, -2.2351741790771484e-08, -2.0489096641540527e-08, -1.862645149230957e-08, -1.6763806343078613e-08, -1.4901161193847656e-08, -1.30385160446167e-08, -1.1175870895385742e-08, -9.313225746154785e-09, -7.450580596923828e-09, -5.587935447692871e-09, -3.725290298461914e-09, -1.862645149230957e-09, 0.0, 1.862645149230957e-09, 3.725290298461914e-09, 5.587935447692871e-09, 7.450580596923828e-09, 9.313225746154785e-09, 1.1175870895385742e-08, 1.30385160446167e-08, 1.4901161193847656e-08, 1.6763806343078613e-08, 1.862645149230957e-08, 2.0489096641540527e-08, 2.2351741790771484e-08, 2.421438694000244e-08, 2.60770320892334e-08, 2.7939677238464355e-08, 2.9802322387695312e-08, 3.166496753692627e-08, 3.3527612686157227e-08, 3.5390257835388184e-08, 3.725290298461914e-08, 3.91155481338501e-08, 4.0978193283081055e-08, 4.284083843231201e-08, 4.470348358154297e-08, 4.6566128730773926e-08, 4.842877388000488e-08, 5.029141902923584e-08, 5.21540641784668e-08, 5.4016709327697754e-08, 5.587935447692871e-08, 5.774199962615967e-08, 5.960464477539063e-08]}, "gradients/decoder.model.decoder.layers.3.encoder_attn.q_proj.weight": {"_type": "histogram", "values": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1048576.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], "bins": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]}, "gradients/decoder.model.decoder.layers.3.encoder_attn.q_proj.bias": {"_type": "histogram", "values": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1024.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], "bins": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]}, "gradients/decoder.model.decoder.layers.3.self_attn_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 4.0, 3.0, 6.0, 5.0, 14.0, 8.0, 17.0, 32.0, 36.0, 46.0, 75.0, 133.0, 162.0, 177.0, 107.0, 51.0, 44.0, 23.0, 19.0, 13.0, 6.0, 9.0, 7.0, 5.0, 3.0, 1.0, 2.0, 1.0, 1.0, 2.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-1.3345915022000554e-06, -1.276193984267593e-06, -1.2177964663351304e-06, -1.1593988347158302e-06, -1.1010013167833677e-06, -1.0426037988509052e-06, -9.842062809184426e-07, -9.258087629859801e-07, -8.674111882100988e-07, -8.090136702776363e-07, -7.506160955017549e-07, -6.922185775692924e-07, -6.338210596368299e-07, -5.754234848609485e-07, -5.17025966928486e-07, -4.586284205743141e-07, -4.0023087422014214e-07, -3.418333278659702e-07, -2.8343578151179827e-07, -2.2503826357933576e-07, -1.6664071722516383e-07, -1.0824317087099189e-07, -4.9845652938529383e-08, 8.551893415642553e-09, 6.694943976981449e-08, 1.2534698612398643e-07, 1.8374451826730365e-07, 2.4214205041062087e-07, 3.005395967647928e-07, 3.5893714311896474e-07, 4.1733466105142725e-07, 4.757322074055992e-07, 5.341296400729334e-07, 5.925271580053959e-07, 6.509247327812773e-07, 7.093222507137398e-07, 7.677198254896211e-07, 8.261173434220836e-07, 8.845148613545462e-07, 9.429123792870087e-07, 1.0013100109063089e-06, 1.0597075288387714e-06, 1.118105046771234e-06, 1.1765025647036964e-06, 1.2349001963229966e-06, 1.2932977142554591e-06, 1.3516952321879216e-06, 1.4100927501203842e-06, 1.4684902680528467e-06, 1.5268877859853092e-06, 1.5852853039177717e-06, 1.643682935537072e-06, 1.7020804534695344e-06, 1.760477971401997e-06, 1.8188754893344594e-06, 1.877273007266922e-06, 1.9356705251993844e-06, 1.994068043131847e-06, 2.0524655610643094e-06, 2.110863078996772e-06, 2.1692605969292345e-06, 2.227658114861697e-06, 2.286055860167835e-06, 2.3444533781002974e-06, 2.40285089603276e-06]}, "gradients/decoder.model.decoder.layers.3.self_attn_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 3.0, 5.0, 1.0, 10.0, 8.0, 11.0, 12.0, 15.0, 15.0, 22.0, 32.0, 39.0, 48.0, 44.0, 63.0, 55.0, 57.0, 49.0, 54.0, 69.0, 57.0, 55.0, 47.0, 35.0, 29.0, 43.0, 32.0, 17.0, 20.0, 14.0, 10.0, 14.0, 5.0, 5.0, 4.0, 7.0, 1.0, 1.0, 3.0, 1.0, 2.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.0938127843473922e-06, -1.0600149380479706e-06, -1.0262170917485491e-06, -9.924192454491276e-07, -9.586213991497061e-07, -9.248235528502846e-07, -8.910257065508631e-07, -8.572278602514416e-07, -8.234300139520201e-07, -7.896321676525986e-07, -7.558343213531771e-07, -7.220364750537556e-07, -6.882386287543341e-07, -6.544407824549126e-07, -6.20642936155491e-07, -5.868450898560695e-07, -5.53047243556648e-07, -5.192493972572265e-07, -4.85451550957805e-07, -4.516537046583835e-07, -4.17855858358962e-07, -3.840580120595405e-07, -3.50260165760119e-07, -3.1646231946069747e-07, -2.8266447316127596e-07, -2.4886662686185446e-07, -2.1506878056243295e-07, -1.8127093426301144e-07, -1.4747308796358993e-07, -1.1367524166416842e-07, -7.987739536474692e-08, -4.607954906532541e-08, -1.2281702765903901e-08, 2.1516143533517607e-08, 5.5313989832939114e-08, 8.911183613236062e-08, 1.2290968243178213e-07, 1.5670752873120364e-07, 1.9050537503062515e-07, 2.2430322133004665e-07, 2.5810106762946816e-07, 2.9189891392888967e-07, 3.256967602283112e-07, 3.594946065277327e-07, 3.932924528271542e-07, 4.270902991265757e-07, 4.608881454259972e-07, 4.946859917254187e-07, 5.284838380248402e-07, 5.622816843242617e-07, 5.960795306236832e-07, 6.298773769231047e-07, 6.636752232225263e-07, 6.974730695219478e-07, 7.312709158213693e-07, 7.650687621207908e-07, 7.988666084202123e-07, 8.326644547196338e-07, 8.664623010190553e-07, 9.002601473184768e-07, 9.340579936178983e-07, 9.678558399173198e-07, 1.0016536862167413e-06, 1.0354515325161628e-06, 1.0692493788155844e-06]}, "gradients/decoder.model.decoder.layers.3.self_attn.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 3.0, 1.0, 0.0, 2.0, 1.0, 2.0, 9.0, 8.0, 9.0, 13.0, 28.0, 39.0, 33.0, 175.0, 155.0, 273.0, 466.0, 886.0, 1811.0, 4181.0, 44686.0, 141247.0, 660655.0, 141064.0, 33485.0, 11063.0, 4274.0, 2787.0, 460.0, 274.0, 159.0, 106.0, 62.0, 48.0, 59.0, 14.0, 9.0, 9.0, 3.0, 3.0, 3.0, 3.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.086162567138672e-06, -2.018176019191742e-06, -1.950189471244812e-06, -1.882202923297882e-06, -1.8142163753509521e-06, -1.7462298274040222e-06, -1.6782432794570923e-06, -1.6102567315101624e-06, -1.5422701835632324e-06, -1.4742836356163025e-06, -1.4062970876693726e-06, -1.3383105397224426e-06, -1.2703239917755127e-06, -1.2023374438285828e-06, -1.1343508958816528e-06, -1.066364347934723e-06, -9.98377799987793e-07, -9.30391252040863e-07, -8.624047040939331e-07, -7.944181561470032e-07, -7.264316082000732e-07, -6.584450602531433e-07, -5.904585123062134e-07, -5.224719643592834e-07, -4.544854164123535e-07, -3.864988684654236e-07, -3.1851232051849365e-07, -2.505257725715637e-07, -1.825392246246338e-07, -1.1455267667770386e-07, -4.6566128730773926e-08, 2.1420419216156006e-08, 8.940696716308594e-08, 1.5739351511001587e-07, 2.253800630569458e-07, 2.9336661100387573e-07, 3.6135315895080566e-07, 4.293397068977356e-07, 4.973262548446655e-07, 5.653128027915955e-07, 6.332993507385254e-07, 7.012858986854553e-07, 7.692724466323853e-07, 8.372589945793152e-07, 9.052455425262451e-07, 9.73232090473175e-07, 1.041218638420105e-06, 1.109205186367035e-06, 1.1771917343139648e-06, 1.2451782822608948e-06, 1.3131648302078247e-06, 1.3811513781547546e-06, 1.4491379261016846e-06, 1.5171244740486145e-06, 1.5851110219955444e-06, 1.6530975699424744e-06, 1.7210841178894043e-06, 1.7890706658363342e-06, 1.8570572137832642e-06, 1.925043761730194e-06, 1.993030309677124e-06, 2.061016857624054e-06, 2.129003405570984e-06, 2.196989953517914e-06, 2.2649765014648438e-06]}, "gradients/decoder.model.decoder.layers.3.self_attn.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 0.0, 1.0, 2.0, 1.0, 6.0, 5.0, 7.0, 8.0, 12.0, 17.0, 16.0, 30.0, 33.0, 31.0, 46.0, 54.0, 60.0, 64.0, 54.0, 65.0, 59.0, 54.0, 51.0, 55.0, 61.0, 49.0, 28.0, 38.0, 17.0, 19.0, 13.0, 19.0, 13.0, 3.0, 6.0, 5.0, 3.0, 6.0, 1.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0], "bins": [-6.258487701416016e-06, -6.075017154216766e-06, -5.891546607017517e-06, -5.708076059818268e-06, -5.5246055126190186e-06, -5.341134965419769e-06, -5.15766441822052e-06, -4.974193871021271e-06, -4.7907233238220215e-06, -4.607252776622772e-06, -4.423782229423523e-06, -4.240311682224274e-06, -4.056841135025024e-06, -3.873370587825775e-06, -3.689900040626526e-06, -3.5064294934272766e-06, -3.3229589462280273e-06, -3.139488399028778e-06, -2.956017851829529e-06, -2.7725473046302795e-06, -2.5890767574310303e-06, -2.405606210231781e-06, -2.2221356630325317e-06, -2.0386651158332825e-06, -1.8551945686340332e-06, -1.671724021434784e-06, -1.4882534742355347e-06, -1.3047829270362854e-06, -1.1213123798370361e-06, -9.378418326377869e-07, -7.543712854385376e-07, -5.709007382392883e-07, -3.8743019104003906e-07, -2.039596438407898e-07, -2.0489096641540527e-08, 1.6298145055770874e-07, 3.46451997756958e-07, 5.299225449562073e-07, 7.133930921554565e-07, 8.968636393547058e-07, 1.080334186553955e-06, 1.2638047337532043e-06, 1.4472752809524536e-06, 1.6307458281517029e-06, 1.8142163753509521e-06, 1.9976869225502014e-06, 2.1811574697494507e-06, 2.3646280169487e-06, 2.5480985641479492e-06, 2.7315691113471985e-06, 2.9150396585464478e-06, 3.098510205745697e-06, 3.2819807529449463e-06, 3.4654513001441956e-06, 3.648921847343445e-06, 3.832392394542694e-06, 4.015862941741943e-06, 4.199333488941193e-06, 4.382804036140442e-06, 4.566274583339691e-06, 4.7497451305389404e-06, 4.93321567773819e-06, 5.116686224937439e-06, 5.300156772136688e-06, 5.4836273193359375e-06]}, "gradients/decoder.model.decoder.layers.3.self_attn.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 2.0, 4.0, 1.0, 4.0, 2.0, 2.0, 11.0, 6.0, 11.0, 11.0, 11.0, 17.0, 23.0, 17.0, 37.0, 32.0, 41.0, 47.0, 80.0, 159.0, 971.0, 965097.0, 81014.0, 510.0, 100.0, 69.0, 56.0, 49.0, 29.0, 30.0, 25.0, 18.0, 20.0, 13.0, 13.0, 10.0, 7.0, 7.0, 7.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-1.1086463928222656e-05, -1.0738149285316467e-05, -1.0389834642410278e-05, -1.004151999950409e-05, -9.6932053565979e-06, -9.344890713691711e-06, -8.996576070785522e-06, -8.648261427879333e-06, -8.299946784973145e-06, -7.951632142066956e-06, -7.603317499160767e-06, -7.255002856254578e-06, -6.906688213348389e-06, -6.5583735704422e-06, -6.210058927536011e-06, -5.861744284629822e-06, -5.513429641723633e-06, -5.165114998817444e-06, -4.816800355911255e-06, -4.468485713005066e-06, -4.120171070098877e-06, -3.771856427192688e-06, -3.423541784286499e-06, -3.07522714138031e-06, -2.726912498474121e-06, -2.378597855567932e-06, -2.030283212661743e-06, -1.6819685697555542e-06, -1.3336539268493652e-06, -9.853392839431763e-07, -6.370246410369873e-07, -2.8870999813079834e-07, 5.960464477539063e-08, 4.079192876815796e-07, 7.562339305877686e-07, 1.1045485734939575e-06, 1.4528632164001465e-06, 1.8011778593063354e-06, 2.1494925022125244e-06, 2.4978071451187134e-06, 2.8461217880249023e-06, 3.1944364309310913e-06, 3.5427510738372803e-06, 3.891065716743469e-06, 4.239380359649658e-06, 4.587695002555847e-06, 4.936009645462036e-06, 5.284324288368225e-06, 5.632638931274414e-06, 5.980953574180603e-06, 6.329268217086792e-06, 6.677582859992981e-06, 7.02589750289917e-06, 7.374212145805359e-06, 7.722526788711548e-06, 8.070841431617737e-06, 8.419156074523926e-06, 8.767470717430115e-06, 9.115785360336304e-06, 9.464100003242493e-06, 9.812414646148682e-06, 1.016072928905487e-05, 1.050904393196106e-05, 1.0857358574867249e-05, 1.1205673217773438e-05]}, "gradients/decoder.model.decoder.layers.3.self_attn.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 5.0, 1.0, 3.0, 2.0, 1.0, 6.0, 6.0, 13.0, 9.0, 8.0, 14.0, 17.0, 22.0, 26.0, 31.0, 35.0, 39.0, 49.0, 54.0, 41.0, 74.0, 48.0, 49.0, 60.0, 48.0, 63.0, 45.0, 45.0, 38.0, 27.0, 21.0, 24.0, 15.0, 20.0, 11.0, 16.0, 4.0, 7.0, 8.0, 5.0, 0.0, 0.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 1.0, 0.0, 1.0], "bins": [-7.450580596923828e-06, -7.209368050098419e-06, -6.96815550327301e-06, -6.726942956447601e-06, -6.485730409622192e-06, -6.2445178627967834e-06, -6.0033053159713745e-06, -5.7620927691459656e-06, -5.520880222320557e-06, -5.279667675495148e-06, -5.038455128669739e-06, -4.79724258184433e-06, -4.556030035018921e-06, -4.314817488193512e-06, -4.073604941368103e-06, -3.832392394542694e-06, -3.591179847717285e-06, -3.3499673008918762e-06, -3.1087547540664673e-06, -2.8675422072410583e-06, -2.6263296604156494e-06, -2.3851171135902405e-06, -2.1439045667648315e-06, -1.9026920199394226e-06, -1.6614794731140137e-06, -1.4202669262886047e-06, -1.1790543794631958e-06, -9.378418326377869e-07, -6.966292858123779e-07, -4.55416738986969e-07, -2.1420419216156006e-07, 2.7008354663848877e-08, 2.682209014892578e-07, 5.094334483146667e-07, 7.506459951400757e-07, 9.918585419654846e-07, 1.2330710887908936e-06, 1.4742836356163025e-06, 1.7154961824417114e-06, 1.9567087292671204e-06, 2.1979212760925293e-06, 2.4391338229179382e-06, 2.680346369743347e-06, 2.921558916568756e-06, 3.162771463394165e-06, 3.403984010219574e-06, 3.645196557044983e-06, 3.886409103870392e-06, 4.127621650695801e-06, 4.36883419752121e-06, 4.610046744346619e-06, 4.851259291172028e-06, 5.0924718379974365e-06, 5.3336843848228455e-06, 5.574896931648254e-06, 5.816109478473663e-06, 6.057322025299072e-06, 6.298534572124481e-06, 6.53974711894989e-06, 6.780959665775299e-06, 7.022172212600708e-06, 7.263384759426117e-06, 7.504597306251526e-06, 7.745809853076935e-06, 7.987022399902344e-06]}, "gradients/decoder.model.decoder.layers.3.self_attn.k_proj.weight": {"_type": "histogram", "values": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1048576.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], "bins": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]}, "gradients/decoder.model.decoder.layers.3.self_attn.k_proj.bias": {"_type": "histogram", "values": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1024.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], "bins": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]}, "gradients/decoder.model.decoder.layers.3.self_attn.q_proj.weight": {"_type": "histogram", "values": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1048576.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], "bins": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]}, "gradients/decoder.model.decoder.layers.3.self_attn.q_proj.bias": {"_type": "histogram", "values": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1024.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], "bins": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]}, "gradients/decoder.model.decoder.layers.2.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 2.0, 1019.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-9.026277439261321e-06, -6.980095349717885e-06, -4.933912805427099e-06, -2.887730261136312e-06, -8.415481715928763e-07, 1.2046339179505594e-06, 3.250816916988697e-06, 5.296999006532133e-06, 7.343181096075568e-06, 9.389363185619004e-06, 1.1435546184657142e-05, 1.3481728274200577e-05, 1.5527910363744013e-05, 1.757409336278215e-05, 1.9620274542830884e-05, 2.1666457541869022e-05, 2.371264054090716e-05, 2.5758823539945297e-05, 2.780500471999403e-05, 2.985118771903217e-05, 3.18973688990809e-05, 3.3943550079129636e-05, 3.598973489715718e-05, 3.803591607720591e-05, 4.0082097257254645e-05, 4.212827843730338e-05, 4.417446325533092e-05, 4.6220644435379654e-05, 4.826682561542839e-05, 5.031300679547712e-05, 5.235919161350466e-05, 5.44053727935534e-05, 5.6451550335623324e-05, 5.849773151567206e-05, 6.05439163336996e-05, 6.259010115172714e-05, 6.463628233177587e-05, 6.668246351182461e-05, 6.872864469187334e-05, 7.077482587192208e-05, 7.282100705197081e-05, 7.486718823201954e-05, 7.691336941206828e-05, 7.895955059211701e-05, 8.100573904812336e-05, 8.30519202281721e-05, 8.509810140822083e-05, 8.714428258826956e-05, 8.91904637683183e-05, 9.123664494836703e-05, 9.328282612841576e-05, 9.532901458442211e-05, 9.737519576447085e-05, 9.942137694451958e-05, 0.00010146755812456831, 0.00010351373930461705, 0.0001055599277606234, 0.00010760610894067213, 0.00010965229012072086, 0.00011169847857672721, 0.00011374465975677595, 0.00011579084093682468, 0.00011783702211687341, 0.00011988320329692215, 0.00012192938447697088]}, "gradients/decoder.model.decoder.layers.2.final_layer_norm.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 2.0, 1.0, 2.0, 1.0, 5.0, 4.0, 5.0, 5.0, 9.0, 8.0, 7.0, 13.0, 16.0, 17.0, 19.0, 26.0, 18.0, 31.0, 24.0, 37.0, 42.0, 38.0, 42.0, 47.0, 37.0, 40.0, 38.0, 52.0, 45.0, 43.0, 29.0, 31.0, 32.0, 31.0, 24.0, 30.0, 26.0, 34.0, 11.0, 14.0, 13.0, 16.0, 6.0, 11.0, 5.0, 7.0, 8.0, 3.0, 7.0, 4.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-1.1211740456928965e-05, -1.0827145160874352e-05, -1.044255077431444e-05, -1.0057955478259828e-05, -9.673360182205215e-06, -9.288765795645304e-06, -8.904170499590691e-06, -8.519575203536078e-06, -8.134980816976167e-06, -7.750385520921554e-06, -7.365791134361643e-06, -6.9811958383070305e-06, -6.5966009969997685e-06, -6.212006155692507e-06, -5.827410859637894e-06, -5.442816018330632e-06, -5.05822117702337e-06, -4.673626335716108e-06, -4.289031494408846e-06, -3.904436198354233e-06, -3.519841357046971e-06, -3.135246515739709e-06, -2.7506514470587717e-06, -2.3660563783778343e-06, -1.9814615370705724e-06, -1.5968665820764727e-06, -1.212271627082373e-06, -8.276766720882733e-07, -4.4308171709417365e-07, -5.8486875786911696e-08, 3.261081928940257e-07, 7.107032615749631e-07, 1.0952971933875233e-06, 1.479892148381623e-06, 1.8644871033757227e-06, 2.24908217205666e-06, 2.633677013363922e-06, 3.018271854671184e-06, 3.4028669233521214e-06, 3.7874619920330588e-06, 4.172056833340321e-06, 4.556651674647583e-06, 4.941246515954845e-06, 5.3258418120094575e-06, 5.7104366533167195e-06, 6.095031494623981e-06, 6.479626790678594e-06, 6.864221631985856e-06, 7.248816473293118e-06, 7.633411769347731e-06, 8.018006155907642e-06, 8.402601451962255e-06, 8.787195838522166e-06, 9.171791134576779e-06, 9.556386430631392e-06, 9.940980817191303e-06, 1.0325576113245916e-05, 1.0710171409300528e-05, 1.109476579586044e-05, 1.1479361091915052e-05, 1.1863956387969665e-05, 1.2248550774529576e-05, 1.2633146070584189e-05, 1.3017741366638802e-05, 1.3402335753198713e-05]}, "gradients/decoder.model.decoder.layers.2.fc2.weight": {"_type": "histogram", "values": [1.0, 0.0, 16.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 17.0, 0.0, 0.0, 17.0, 820.0, 4193343.0, 30.0, 20.0, 0.0, 20.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 20.0], "bins": [-8.165836334228516e-06, -7.919035851955414e-06, -7.672235369682312e-06, -7.42543488740921e-06, -7.178634405136108e-06, -6.931833922863007e-06, -6.685033440589905e-06, -6.438232958316803e-06, -6.191432476043701e-06, -5.944631993770599e-06, -5.6978315114974976e-06, -5.451031029224396e-06, -5.204230546951294e-06, -4.957430064678192e-06, -4.71062958240509e-06, -4.4638291001319885e-06, -4.217028617858887e-06, -3.970228135585785e-06, -3.723427653312683e-06, -3.4766271710395813e-06, -3.2298266887664795e-06, -2.9830262064933777e-06, -2.736225724220276e-06, -2.489425241947174e-06, -2.2426247596740723e-06, -1.9958242774009705e-06, -1.7490237951278687e-06, -1.5022233128547668e-06, -1.255422830581665e-06, -1.0086223483085632e-06, -7.618218660354614e-07, -5.150213837623596e-07, -2.682209014892578e-07, -2.1420419216156006e-08, 2.253800630569458e-07, 4.721805453300476e-07, 7.189810276031494e-07, 9.657815098762512e-07, 1.212581992149353e-06, 1.4593824744224548e-06, 1.7061829566955566e-06, 1.9529834389686584e-06, 2.1997839212417603e-06, 2.446584403514862e-06, 2.693384885787964e-06, 2.9401853680610657e-06, 3.1869858503341675e-06, 3.4337863326072693e-06, 3.680586814880371e-06, 3.927387297153473e-06, 4.174187779426575e-06, 4.4209882616996765e-06, 4.667788743972778e-06, 4.91458922624588e-06, 5.161389708518982e-06, 5.408190190792084e-06, 5.6549906730651855e-06, 5.901791155338287e-06, 6.148591637611389e-06, 6.395392119884491e-06, 6.642192602157593e-06, 6.888993084430695e-06, 7.135793566703796e-06, 7.382594048976898e-06, 7.62939453125e-06]}, "gradients/decoder.model.decoder.layers.2.fc2.bias": {"_type": "histogram", "values": [21.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 978.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 24.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.960464477539063e-08, -5.681067705154419e-08, -5.4016709327697754e-08, -5.122274160385132e-08, -4.842877388000488e-08, -4.563480615615845e-08, -4.284083843231201e-08, -4.0046870708465576e-08, -3.725290298461914e-08, -3.4458935260772705e-08, -3.166496753692627e-08, -2.8870999813079834e-08, -2.60770320892334e-08, -2.3283064365386963e-08, -2.0489096641540527e-08, -1.7695128917694092e-08, -1.4901161193847656e-08, -1.210719347000122e-08, -9.313225746154785e-09, -6.51925802230835e-09, -3.725290298461914e-09, -9.313225746154785e-10, 1.862645149230957e-09, 4.6566128730773926e-09, 7.450580596923828e-09, 1.0244548320770264e-08, 1.30385160446167e-08, 1.5832483768463135e-08, 1.862645149230957e-08, 2.1420419216156006e-08, 2.421438694000244e-08, 2.7008354663848877e-08, 2.9802322387695312e-08, 3.259629011154175e-08, 3.5390257835388184e-08, 3.818422555923462e-08, 4.0978193283081055e-08, 4.377216100692749e-08, 4.6566128730773926e-08, 4.936009645462036e-08, 5.21540641784668e-08, 5.494803190231323e-08, 5.774199962615967e-08, 6.05359673500061e-08, 6.332993507385254e-08, 6.612390279769897e-08, 6.891787052154541e-08, 7.171183824539185e-08, 7.450580596923828e-08, 7.729977369308472e-08, 8.009374141693115e-08, 8.288770914077759e-08, 8.568167686462402e-08, 8.847564458847046e-08, 9.12696123123169e-08, 9.406358003616333e-08, 9.685754776000977e-08, 9.96515154838562e-08, 1.0244548320770264e-07, 1.0523945093154907e-07, 1.0803341865539551e-07, 1.1082738637924194e-07, 1.1362135410308838e-07, 1.1641532182693481e-07, 1.1920928955078125e-07]}, "gradients/decoder.model.decoder.layers.2.fc1.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 168.0, 4193892.0, 232.0, 8.0], "bins": [-2.9802322387695312e-06, -2.9318034648895264e-06, -2.8833746910095215e-06, -2.8349459171295166e-06, -2.7865171432495117e-06, -2.738088369369507e-06, -2.689659595489502e-06, -2.641230821609497e-06, -2.592802047729492e-06, -2.5443732738494873e-06, -2.4959444999694824e-06, -2.4475157260894775e-06, -2.3990869522094727e-06, -2.3506581783294678e-06, -2.302229404449463e-06, -2.253800630569458e-06, -2.205371856689453e-06, -2.1569430828094482e-06, -2.1085143089294434e-06, -2.0600855350494385e-06, -2.0116567611694336e-06, -1.9632279872894287e-06, -1.914799213409424e-06, -1.866370439529419e-06, -1.817941665649414e-06, -1.7695128917694092e-06, -1.7210841178894043e-06, -1.6726553440093994e-06, -1.6242265701293945e-06, -1.5757977962493896e-06, -1.5273690223693848e-06, -1.4789402484893799e-06, -1.430511474609375e-06, -1.3820827007293701e-06, -1.3336539268493652e-06, -1.2852251529693604e-06, -1.2367963790893555e-06, -1.1883676052093506e-06, -1.1399388313293457e-06, -1.0915100574493408e-06, -1.043081283569336e-06, -9.94652509689331e-07, -9.462237358093262e-07, -8.977949619293213e-07, -8.493661880493164e-07, -8.009374141693115e-07, -7.525086402893066e-07, -7.040798664093018e-07, -6.556510925292969e-07, -6.07222318649292e-07, -5.587935447692871e-07, -5.103647708892822e-07, -4.6193599700927734e-07, -4.1350722312927246e-07, -3.650784492492676e-07, -3.166496753692627e-07, -2.682209014892578e-07, -2.1979212760925293e-07, -1.7136335372924805e-07, -1.2293457984924316e-07, -7.450580596923828e-08, -2.60770320892334e-08, 2.2351741790771484e-08, 7.078051567077637e-08, 1.1920928955078125e-07]}, "gradients/decoder.model.decoder.layers.2.fc1.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 4094.0], "bins": [-5.960464477539063e-08, -5.8673322200775146e-08, -5.774199962615967e-08, -5.681067705154419e-08, -5.587935447692871e-08, -5.494803190231323e-08, -5.4016709327697754e-08, -5.3085386753082275e-08, -5.21540641784668e-08, -5.122274160385132e-08, -5.029141902923584e-08, -4.936009645462036e-08, -4.842877388000488e-08, -4.7497451305389404e-08, -4.6566128730773926e-08, -4.563480615615845e-08, -4.470348358154297e-08, -4.377216100692749e-08, -4.284083843231201e-08, -4.190951585769653e-08, -4.0978193283081055e-08, -4.0046870708465576e-08, -3.91155481338501e-08, -3.818422555923462e-08, -3.725290298461914e-08, -3.632158041000366e-08, -3.5390257835388184e-08, -3.4458935260772705e-08, -3.3527612686157227e-08, -3.259629011154175e-08, -3.166496753692627e-08, -3.073364496231079e-08, -2.9802322387695312e-08, -2.8870999813079834e-08, -2.7939677238464355e-08, -2.7008354663848877e-08, -2.60770320892334e-08, -2.514570951461792e-08, -2.421438694000244e-08, -2.3283064365386963e-08, -2.2351741790771484e-08, -2.1420419216156006e-08, -2.0489096641540527e-08, -1.955777406692505e-08, -1.862645149230957e-08, -1.7695128917694092e-08, -1.6763806343078613e-08, -1.5832483768463135e-08, -1.4901161193847656e-08, -1.3969838619232178e-08, -1.30385160446167e-08, -1.210719347000122e-08, -1.1175870895385742e-08, -1.0244548320770264e-08, -9.313225746154785e-09, -8.381903171539307e-09, -7.450580596923828e-09, -6.51925802230835e-09, -5.587935447692871e-09, -4.6566128730773926e-09, -3.725290298461914e-09, -2.7939677238464355e-09, -1.862645149230957e-09, -9.313225746154785e-10, 0.0]}, "gradients/decoder.model.decoder.layers.2.encoder_attn_layer_norm.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 2.0, 2.0, 3.0, 2.0, 3.0, 5.0, 3.0, 2.0, 3.0, 7.0, 7.0, 10.0, 11.0, 11.0, 11.0, 23.0, 24.0, 26.0, 42.0, 55.0, 70.0, 79.0, 100.0, 101.0, 83.0, 63.0, 70.0, 33.0, 32.0, 31.0, 25.0, 15.0, 13.0, 5.0, 3.0, 9.0, 5.0, 5.0, 5.0, 3.0, 2.0, 0.0, 5.0, 1.0, 1.0, 1.0, 2.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-9.751079232955817e-08, -9.425651370520427e-08, -9.100223508085037e-08, -8.774795645649647e-08, -8.449367783214257e-08, -8.123940631321602e-08, -7.798512058343476e-08, -7.473084906450822e-08, -7.147657044015432e-08, -6.822229181580042e-08, -6.496801319144652e-08, -6.171373456709262e-08, -5.845945949545239e-08, -5.520518087109849e-08, -5.195090224674459e-08, -4.869662717510437e-08, -4.544234499803679e-08, -4.2188066373682886e-08, -3.8933787749328985e-08, -3.567951267768876e-08, -3.242523405333486e-08, -2.917095542898096e-08, -2.591667680462706e-08, -2.2662399956629997e-08, -1.9408121332276096e-08, -1.6153842707922195e-08, -1.2899565859925133e-08, -9.645287235571232e-09, -6.39100949939575e-09, -3.1367317632202685e-09, 1.1754686113363277e-10, 3.3718237091306946e-09, 6.626102333484596e-09, 9.880380069660077e-09, 1.3134657805835559e-08, 1.638893643018946e-08, 1.9643213278186522e-08, 2.2897491902540423e-08, 2.6151770526894325e-08, 2.9406047374891386e-08, 3.266032422288845e-08, 3.591460284724235e-08, 3.916888147159625e-08, 4.242316009595015e-08, 4.5677435167590374e-08, 4.8931713791944276e-08, 5.218599241629818e-08, 5.54402674879384e-08, 5.869454966500598e-08, 6.19488247366462e-08, 6.52031033610001e-08, 6.8457381985354e-08, 7.17116606097079e-08, 7.496593923406181e-08, 7.822021785841571e-08, 8.147449648276961e-08, 8.472877510712351e-08, 8.798305373147741e-08, 9.123733235583131e-08, 9.449161098018521e-08, 9.774588960453912e-08, 1.0100016822889302e-07, 1.0425443974781956e-07, 1.0750871837217346e-07, 1.1076299699652736e-07]}, "gradients/decoder.model.decoder.layers.2.encoder_attn_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0, 4.0, 6.0, 2.0, 9.0, 14.0, 15.0, 15.0, 17.0, 20.0, 19.0, 41.0, 29.0, 30.0, 50.0, 40.0, 45.0, 47.0, 66.0, 55.0, 50.0, 51.0, 60.0, 48.0, 36.0, 40.0, 38.0, 30.0, 23.0, 18.0, 24.0, 15.0, 15.0, 15.0, 4.0, 10.0, 3.0, 4.0, 3.0, 3.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.0537404904198411e-07, -1.0171120834456815e-07, -9.804836764715219e-08, -9.438552694973623e-08, -9.072267914689292e-08, -8.705984555490431e-08, -8.3396997752061e-08, -7.973415705464504e-08, -7.607131635722908e-08, -7.240847565981312e-08, -6.874563496239716e-08, -6.50827942649812e-08, -6.141995356756524e-08, -5.7757109317435606e-08, -5.409426506730597e-08, -5.043142436989001e-08, -4.676858367247405e-08, -4.310574297505809e-08, -3.9442902277642133e-08, -3.5780058027512496e-08, -3.211721733009654e-08, -2.8454376632680578e-08, -2.479153415890778e-08, -2.112869168513498e-08, -1.7465850987719023e-08, -1.3803009402124644e-08, -1.0140167816530266e-08, -6.477326230935887e-09, -2.8144846453415084e-09, 8.483560520744504e-10, 4.511198525847249e-09, 8.174040999620047e-09, 1.1836888802463363e-08, 1.5499729499879322e-08, 1.916257197365212e-08, 2.282541444742492e-08, 2.6488255144840878e-08, 3.0151095842256836e-08, 3.3813940092386474e-08, 3.747678078980243e-08, 4.113962148721839e-08, 4.480246218463435e-08, 4.846530288205031e-08, 5.212814713217995e-08, 5.5790987829595906e-08, 5.9453828527011865e-08, 6.31166727771415e-08, 6.677951347455746e-08, 7.044235417197342e-08, 7.410519486938938e-08, 7.776803556680534e-08, 8.14308762642213e-08, 8.509371696163726e-08, 8.875656476448057e-08, 9.241940546189653e-08, 9.608224615931249e-08, 9.974508685672845e-08, 1.0340792755414441e-07, 1.0707076825156037e-07, 1.1073360894897633e-07, 1.1439645675181964e-07, 1.1805929034380824e-07, 1.2172213814665156e-07, 1.2538498594949488e-07, 1.2904781954148348e-07]}, "gradients/decoder.model.decoder.layers.2.encoder_attn.out_proj.weight": {"_type": "histogram", "values": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1048576.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], "bins": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]}, "gradients/decoder.model.decoder.layers.2.encoder_attn.out_proj.bias": {"_type": "histogram", "values": [5.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1015.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 4.0], "bins": [-5.960464477539063e-08, -5.774199962615967e-08, -5.587935447692871e-08, -5.4016709327697754e-08, -5.21540641784668e-08, -5.029141902923584e-08, -4.842877388000488e-08, -4.6566128730773926e-08, -4.470348358154297e-08, -4.284083843231201e-08, -4.0978193283081055e-08, -3.91155481338501e-08, -3.725290298461914e-08, -3.5390257835388184e-08, -3.3527612686157227e-08, -3.166496753692627e-08, -2.9802322387695312e-08, -2.7939677238464355e-08, -2.60770320892334e-08, -2.421438694000244e-08, -2.2351741790771484e-08, -2.0489096641540527e-08, -1.862645149230957e-08, -1.6763806343078613e-08, -1.4901161193847656e-08, -1.30385160446167e-08, -1.1175870895385742e-08, -9.313225746154785e-09, -7.450580596923828e-09, -5.587935447692871e-09, -3.725290298461914e-09, -1.862645149230957e-09, 0.0, 1.862645149230957e-09, 3.725290298461914e-09, 5.587935447692871e-09, 7.450580596923828e-09, 9.313225746154785e-09, 1.1175870895385742e-08, 1.30385160446167e-08, 1.4901161193847656e-08, 1.6763806343078613e-08, 1.862645149230957e-08, 2.0489096641540527e-08, 2.2351741790771484e-08, 2.421438694000244e-08, 2.60770320892334e-08, 2.7939677238464355e-08, 2.9802322387695312e-08, 3.166496753692627e-08, 3.3527612686157227e-08, 3.5390257835388184e-08, 3.725290298461914e-08, 3.91155481338501e-08, 4.0978193283081055e-08, 4.284083843231201e-08, 4.470348358154297e-08, 4.6566128730773926e-08, 4.842877388000488e-08, 5.029141902923584e-08, 5.21540641784668e-08, 5.4016709327697754e-08, 5.587935447692871e-08, 5.774199962615967e-08, 5.960464477539063e-08]}, "gradients/decoder.model.decoder.layers.2.encoder_attn.v_proj.weight": {"_type": "histogram", "values": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1048576.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], "bins": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]}, "gradients/decoder.model.decoder.layers.2.encoder_attn.v_proj.bias": {"_type": "histogram", "values": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1024.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], "bins": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]}, "gradients/decoder.model.decoder.layers.2.encoder_attn.k_proj.weight": {"_type": "histogram", "values": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1048576.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], "bins": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]}, "gradients/decoder.model.decoder.layers.2.encoder_attn.k_proj.bias": {"_type": "histogram", "values": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1024.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], "bins": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]}, "gradients/decoder.model.decoder.layers.2.encoder_attn.q_proj.weight": {"_type": "histogram", "values": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1048576.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], "bins": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]}, "gradients/decoder.model.decoder.layers.2.encoder_attn.q_proj.bias": {"_type": "histogram", "values": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1024.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], "bins": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]}, "gradients/decoder.model.decoder.layers.2.self_attn_layer_norm.weight": {"_type": "histogram", "values": [1.0, 1.0, 3.0, 6.0, 5.0, 11.0, 29.0, 39.0, 100.0, 216.0, 313.0, 157.0, 75.0, 25.0, 22.0, 8.0, 5.0, 3.0, 1.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-7.907269150564389e-08, -7.13993841827687e-08, -6.372607685989351e-08, -5.605276953701832e-08, -4.837946221414313e-08, -4.070615489126794e-08, -3.3032847568392754e-08, -2.5359540245517564e-08, -1.7686232922642375e-08, -1.0012925599767186e-08, -2.339618276891997e-09, 5.333689045983192e-09, 1.3006996368858381e-08, 2.068030369173357e-08, 2.835361101460876e-08, 3.602691833748395e-08, 4.370022566035914e-08, 5.137353298323433e-08, 5.9046840306109516e-08, 6.67201476289847e-08, 7.43934549518599e-08, 8.206676227473508e-08, 8.974006959761027e-08, 9.741337692048546e-08, 1.0508668424336065e-07, 1.1275999156623584e-07, 1.2043329888911103e-07, 1.2810660621198622e-07, 1.357799135348614e-07, 1.434532208577366e-07, 1.5112652818061179e-07, 1.5879983550348697e-07, 1.664731712480716e-07, 1.7414647857094678e-07, 1.8181978589382197e-07, 1.8949309321669716e-07, 1.9716640053957235e-07, 2.0483970786244754e-07, 2.1251301518532273e-07, 2.2018632250819792e-07, 2.278596298310731e-07, 2.355329371539483e-07, 2.432062444768235e-07, 2.508795660105534e-07, 2.5855285912257386e-07, 2.6622615223459434e-07, 2.7389947376832424e-07, 2.8157279530205415e-07, 2.892460884140746e-07, 2.969193815260951e-07, 3.04592703059825e-07, 3.122660245935549e-07, 3.199393177055754e-07, 3.2761261081759585e-07, 3.3528593235132576e-07, 3.4295925388505566e-07, 3.5063254699707613e-07, 3.583058401090966e-07, 3.659791616428265e-07, 3.736524831765564e-07, 3.813257762885769e-07, 3.8899906940059736e-07, 3.9667239093432727e-07, 4.0434571246805717e-07, 4.1201900558007765e-07]}, "gradients/decoder.model.decoder.layers.2.self_attn_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 2.0, 7.0, 5.0, 7.0, 8.0, 16.0, 13.0, 18.0, 14.0, 33.0, 25.0, 34.0, 36.0, 39.0, 44.0, 48.0, 52.0, 65.0, 56.0, 49.0, 55.0, 65.0, 52.0, 37.0, 38.0, 35.0, 28.0, 24.0, 21.0, 23.0, 13.0, 15.0, 12.0, 7.0, 6.0, 5.0, 2.0, 2.0, 3.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-8.714543753285398e-08, -8.44035454861114e-08, -8.166164633394146e-08, -7.891975428719888e-08, -7.617785513502895e-08, -7.343596308828637e-08, -7.06940710415438e-08, -6.795217188937386e-08, -6.521027273720392e-08, -6.246838069046134e-08, -5.972648153829141e-08, -5.698458949154883e-08, -5.4242690339378896e-08, -5.150079829263632e-08, -4.875890269318006e-08, -4.6017007093723805e-08, -4.327511504698123e-08, -4.053321944752497e-08, -3.7791323848068714e-08, -3.5049431801326136e-08, -3.23075326491562e-08, -2.9565638826056784e-08, -2.6823745002957367e-08, -2.408184940350111e-08, -2.1339953804044853e-08, -1.8598058204588597e-08, -1.585616260513234e-08, -1.3114268782032923e-08, -1.0372373182576666e-08, -7.63047758312041e-09, -4.8885837600209925e-09, -2.146688160564736e-09, 5.952074388915207e-10, 3.3371025942585675e-09, 6.078997749625614e-09, 8.820892460903451e-09, 1.1562788060359708e-08, 1.4304683659815964e-08, 1.704657748291538e-08, 1.9788473082371638e-08, 2.2530368681827895e-08, 2.527226428128415e-08, 2.8014159880740408e-08, 3.0756055480196665e-08, 3.349794752693924e-08, 3.623984667910918e-08, 3.8981738725851756e-08, 4.172363432530801e-08, 4.446552992476427e-08, 4.7207425524220525e-08, 4.994932112367678e-08, 5.269121317041936e-08, 5.5433112322589295e-08, 5.817500436933187e-08, 6.091690352150181e-08, 6.365879556824439e-08, 6.640068761498696e-08, 6.914257966172954e-08, 7.188447881389948e-08, 7.462637086064206e-08, 7.736827001281199e-08, 8.011016205955457e-08, 8.285205410629715e-08, 8.559395325846708e-08, 8.833585241063702e-08]}, "gradients/decoder.model.decoder.layers.2.self_attn.out_proj.weight": {"_type": "histogram", "values": [5.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 67.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 559.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1047286.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 572.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 80.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 6.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.7881393432617188e-07, -1.7229467630386353e-07, -1.6577541828155518e-07, -1.5925616025924683e-07, -1.5273690223693848e-07, -1.4621764421463013e-07, -1.3969838619232178e-07, -1.3317912817001343e-07, -1.2665987014770508e-07, -1.2014061212539673e-07, -1.1362135410308838e-07, -1.0710209608078003e-07, -1.0058283805847168e-07, -9.406358003616333e-08, -8.754432201385498e-08, -8.102506399154663e-08, -7.450580596923828e-08, -6.798654794692993e-08, -6.146728992462158e-08, -5.494803190231323e-08, -4.842877388000488e-08, -4.190951585769653e-08, -3.5390257835388184e-08, -2.8870999813079834e-08, -2.2351741790771484e-08, -1.5832483768463135e-08, -9.313225746154785e-09, -2.7939677238464355e-09, 3.725290298461914e-09, 1.0244548320770264e-08, 1.6763806343078613e-08, 2.3283064365386963e-08, 2.9802322387695312e-08, 3.632158041000366e-08, 4.284083843231201e-08, 4.936009645462036e-08, 5.587935447692871e-08, 6.239861249923706e-08, 6.891787052154541e-08, 7.543712854385376e-08, 8.195638656616211e-08, 8.847564458847046e-08, 9.499490261077881e-08, 1.0151416063308716e-07, 1.0803341865539551e-07, 1.1455267667770386e-07, 1.210719347000122e-07, 1.2759119272232056e-07, 1.341104507446289e-07, 1.4062970876693726e-07, 1.471489667892456e-07, 1.5366822481155396e-07, 1.601874828338623e-07, 1.6670674085617065e-07, 1.73225998878479e-07, 1.7974525690078735e-07, 1.862645149230957e-07, 1.9278377294540405e-07, 1.993030309677124e-07, 2.0582228899002075e-07, 2.123415470123291e-07, 2.1886080503463745e-07, 2.253800630569458e-07, 2.3189932107925415e-07, 2.384185791015625e-07]}, "gradients/decoder.model.decoder.layers.2.self_attn.out_proj.bias": {"_type": "histogram", "values": [5.0, 0.0, 0.0, 0.0, 0.0, 14.0, 0.0, 0.0, 0.0, 0.0, 59.0, 0.0, 0.0, 0.0, 0.0, 0.0, 121.0, 0.0, 0.0, 0.0, 0.0, 204.0, 0.0, 0.0, 0.0, 0.0, 224.0, 0.0, 0.0, 0.0, 0.0, 0.0, 203.0, 0.0, 0.0, 0.0, 0.0, 103.0, 0.0, 0.0, 0.0, 0.0, 55.0, 0.0, 0.0, 0.0, 0.0, 0.0, 25.0, 0.0, 0.0, 0.0, 0.0, 9.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.980232238769531e-07, -2.868473529815674e-07, -2.7567148208618164e-07, -2.644956111907959e-07, -2.5331974029541016e-07, -2.421438694000244e-07, -2.3096799850463867e-07, -2.1979212760925293e-07, -2.086162567138672e-07, -1.9744038581848145e-07, -1.862645149230957e-07, -1.7508864402770996e-07, -1.6391277313232422e-07, -1.5273690223693848e-07, -1.4156103134155273e-07, -1.30385160446167e-07, -1.1920928955078125e-07, -1.0803341865539551e-07, -9.685754776000977e-08, -8.568167686462402e-08, -7.450580596923828e-08, -6.332993507385254e-08, -5.21540641784668e-08, -4.0978193283081055e-08, -2.9802322387695312e-08, -1.862645149230957e-08, -7.450580596923828e-09, 3.725290298461914e-09, 1.4901161193847656e-08, 2.60770320892334e-08, 3.725290298461914e-08, 4.842877388000488e-08, 5.960464477539063e-08, 7.078051567077637e-08, 8.195638656616211e-08, 9.313225746154785e-08, 1.043081283569336e-07, 1.1548399925231934e-07, 1.2665987014770508e-07, 1.3783574104309082e-07, 1.4901161193847656e-07, 1.601874828338623e-07, 1.7136335372924805e-07, 1.825392246246338e-07, 1.9371509552001953e-07, 2.0489096641540527e-07, 2.1606683731079102e-07, 2.2724270820617676e-07, 2.384185791015625e-07, 2.4959444999694824e-07, 2.60770320892334e-07, 2.7194619178771973e-07, 2.8312206268310547e-07, 2.942979335784912e-07, 3.0547380447387695e-07, 3.166496753692627e-07, 3.2782554626464844e-07, 3.390014171600342e-07, 3.501772880554199e-07, 3.6135315895080566e-07, 3.725290298461914e-07, 3.8370490074157715e-07, 3.948807716369629e-07, 4.0605664253234863e-07, 4.172325134277344e-07]}, "gradients/decoder.model.decoder.layers.2.self_attn.v_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 4.0, 0.0, 1.0, 4.0, 0.0, 0.0, 6.0, 7.0, 0.0, 12.0, 0.0, 21.0, 48.0, 4.0, 2.0, 140.0, 323.0, 789.0, 1045940.0, 1030.0, 37.0, 93.0, 6.0, 5.0, 47.0, 17.0, 0.0, 15.0, 3.0, 0.0, 1.0, 0.0, 7.0, 0.0, 2.0, 0.0, 0.0, 4.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.086162567138672e-06, -2.012588083744049e-06, -1.9390136003494263e-06, -1.8654391169548035e-06, -1.7918646335601807e-06, -1.7182901501655579e-06, -1.644715666770935e-06, -1.5711411833763123e-06, -1.4975666999816895e-06, -1.4239922165870667e-06, -1.3504177331924438e-06, -1.276843249797821e-06, -1.2032687664031982e-06, -1.1296942830085754e-06, -1.0561197996139526e-06, -9.825453162193298e-07, -9.08970832824707e-07, -8.353963494300842e-07, -7.618218660354614e-07, -6.882473826408386e-07, -6.146728992462158e-07, -5.41098415851593e-07, -4.675239324569702e-07, -3.939494490623474e-07, -3.203749656677246e-07, -2.468004822731018e-07, -1.73225998878479e-07, -9.96515154838562e-08, -2.60770320892334e-08, 4.7497451305389404e-08, 1.210719347000122e-07, 1.94646418094635e-07, 2.682209014892578e-07, 3.417953848838806e-07, 4.153698682785034e-07, 4.889443516731262e-07, 5.62518835067749e-07, 6.360933184623718e-07, 7.096678018569946e-07, 7.832422852516174e-07, 8.568167686462402e-07, 9.30391252040863e-07, 1.0039657354354858e-06, 1.0775402188301086e-06, 1.1511147022247314e-06, 1.2246891856193542e-06, 1.298263669013977e-06, 1.3718381524085999e-06, 1.4454126358032227e-06, 1.5189871191978455e-06, 1.5925616025924683e-06, 1.666136085987091e-06, 1.7397105693817139e-06, 1.8132850527763367e-06, 1.8868595361709595e-06, 1.9604340195655823e-06, 2.034008502960205e-06, 2.107582986354828e-06, 2.1811574697494507e-06, 2.2547319531440735e-06, 2.3283064365386963e-06, 2.401880919933319e-06, 2.475455403327942e-06, 2.5490298867225647e-06, 2.6226043701171875e-06]}, "gradients/decoder.model.decoder.layers.2.self_attn.v_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 4.0, 0.0, 1.0, 0.0, 4.0, 0.0, 6.0, 7.0, 0.0, 12.0, 0.0, 18.0, 0.0, 46.0, 78.0, 0.0, 181.0, 0.0, 327.0, 0.0, 159.0, 0.0, 80.0, 45.0, 0.0, 17.0, 0.0, 14.0, 0.0, 3.0, 1.0, 0.0, 7.0, 0.0, 2.0, 0.0, 0.0, 0.0, 4.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-9.5367431640625e-07, -9.201467037200928e-07, -8.866190910339355e-07, -8.530914783477783e-07, -8.195638656616211e-07, -7.860362529754639e-07, -7.525086402893066e-07, -7.189810276031494e-07, -6.854534149169922e-07, -6.51925802230835e-07, -6.183981895446777e-07, -5.848705768585205e-07, -5.513429641723633e-07, -5.178153514862061e-07, -4.842877388000488e-07, -4.507601261138916e-07, -4.172325134277344e-07, -3.8370490074157715e-07, -3.501772880554199e-07, -3.166496753692627e-07, -2.8312206268310547e-07, -2.4959444999694824e-07, -2.1606683731079102e-07, -1.825392246246338e-07, -1.4901161193847656e-07, -1.1548399925231934e-07, -8.195638656616211e-08, -4.842877388000488e-08, -1.4901161193847656e-08, 1.862645149230957e-08, 5.21540641784668e-08, 8.568167686462402e-08, 1.1920928955078125e-07, 1.5273690223693848e-07, 1.862645149230957e-07, 2.1979212760925293e-07, 2.5331974029541016e-07, 2.868473529815674e-07, 3.203749656677246e-07, 3.5390257835388184e-07, 3.8743019104003906e-07, 4.209578037261963e-07, 4.544854164123535e-07, 4.880130290985107e-07, 5.21540641784668e-07, 5.550682544708252e-07, 5.885958671569824e-07, 6.221234798431396e-07, 6.556510925292969e-07, 6.891787052154541e-07, 7.227063179016113e-07, 7.562339305877686e-07, 7.897615432739258e-07, 8.23289155960083e-07, 8.568167686462402e-07, 8.903443813323975e-07, 9.238719940185547e-07, 9.57399606704712e-07, 9.909272193908691e-07, 1.0244548320770264e-06, 1.0579824447631836e-06, 1.0915100574493408e-06, 1.125037670135498e-06, 1.1585652828216553e-06, 1.1920928955078125e-06]}, "gradients/decoder.model.decoder.layers.2.self_attn.k_proj.weight": {"_type": "histogram", "values": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1048576.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], "bins": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]}, "gradients/decoder.model.decoder.layers.2.self_attn.k_proj.bias": {"_type": "histogram", "values": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1024.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], "bins": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]}, "gradients/decoder.model.decoder.layers.2.self_attn.q_proj.weight": {"_type": "histogram", "values": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1048576.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], "bins": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]}, "gradients/decoder.model.decoder.layers.2.self_attn.q_proj.bias": {"_type": "histogram", "values": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1024.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], "bins": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]}, "gradients/decoder.model.decoder.layers.1.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 3.0, 32.0, 748.0, 225.0, 10.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.3185128245349915e-07, -2.831737049291405e-07, -2.344961416156366e-07, -1.8581857830213266e-07, -1.3714100077777402e-07, -8.846342325341539e-08, -3.9785874150766176e-08, 8.891703373592463e-09, 5.75692808979511e-08, 1.0624685131688238e-07, 1.5492442173581367e-07, 2.036019850493176e-07, 2.5227956257367623e-07, 3.0095714009803487e-07, 3.496346892006841e-07, 3.983122667250427e-07, 4.4698984424940136e-07, 4.956673933520506e-07, 5.443449708764092e-07, 5.930225484007678e-07, 6.417001259251265e-07, 6.903777034494851e-07, 7.390552809738438e-07, 7.877328016547835e-07, 8.36410436022561e-07, 8.850880135469197e-07, 9.337655910712783e-07, 9.82443111752218e-07, 1.0311207461199956e-06, 1.0797982668009354e-06, 1.1284757874818752e-06, 1.1771534218496527e-06, 1.2258310562174302e-06, 1.27450857689837e-06, 1.3231862112661474e-06, 1.3718637319470872e-06, 1.4205413663148647e-06, 1.4692188869958045e-06, 1.5178964076767443e-06, 1.5665740420445218e-06, 1.6152516764122993e-06, 1.663929197093239e-06, 1.7126068314610166e-06, 1.7612843521419563e-06, 1.8099619865097338e-06, 1.8586395071906736e-06, 1.9073170278716134e-06, 1.9559947759262286e-06, 2.0046722966071684e-06, 2.053349817288108e-06, 2.102027337969048e-06, 2.150705086023663e-06, 2.199382606704603e-06, 2.2480601273855427e-06, 2.2967376480664825e-06, 2.3454153961210977e-06, 2.394092689428362e-06, 2.442770210109302e-06, 2.4914477307902416e-06, 2.540125478844857e-06, 2.5888029995257966e-06, 2.6374805202067364e-06, 2.686158040887676e-06, 2.734835561568616e-06, 2.783513309623231e-06]}, "gradients/decoder.model.decoder.layers.1.final_layer_norm.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 1.0, 3.0, 1.0, 1.0, 4.0, 1.0, 5.0, 3.0, 6.0, 8.0, 8.0, 13.0, 11.0, 14.0, 20.0, 17.0, 33.0, 30.0, 28.0, 25.0, 42.0, 40.0, 41.0, 34.0, 53.0, 37.0, 48.0, 40.0, 52.0, 50.0, 45.0, 45.0, 34.0, 31.0, 29.0, 23.0, 22.0, 12.0, 23.0, 18.0, 11.0, 10.0, 6.0, 12.0, 3.0, 4.0, 4.0, 2.0, 1.0, 6.0, 2.0, 4.0, 0.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.002447334030876e-06, -9.689688340586144e-07, -9.35490334086353e-07, -9.020118341140915e-07, -8.6853333414183e-07, -8.350548341695685e-07, -8.015762773538881e-07, -7.680977773816267e-07, -7.346192774093652e-07, -7.011407774371037e-07, -6.676622774648422e-07, -6.341837774925807e-07, -6.007052206769004e-07, -5.672267207046389e-07, -5.337482207323774e-07, -5.002697207601159e-07, -4.667912207878544e-07, -4.333127208155929e-07, -3.998342208433314e-07, -3.663556924493605e-07, -3.32877192477099e-07, -2.9939869250483753e-07, -2.659201641108666e-07, -2.3244166413860512e-07, -1.9896316416634363e-07, -1.6548466419408214e-07, -1.3200615001096594e-07, -9.852764293327709e-08, -6.504913585558825e-08, -3.157063588332676e-08, 1.907878299789445e-09, 3.538639248290565e-08, 6.886500614200486e-08, 1.023435132196937e-07, 1.3582202029738255e-07, 1.6930053448049875e-07, 2.0277903445276024e-07, 2.3625753442502173e-07, 2.6973606281899265e-07, 3.0321456279125414e-07, 3.3669306276351563e-07, 3.701715627357771e-07, 4.036500627080386e-07, 4.371285911020095e-07, 4.70607091074271e-07, 5.040856194682419e-07, 5.375641194405034e-07, 5.710426194127649e-07, 6.045211193850264e-07, 6.379996193572879e-07, 6.714781193295494e-07, 7.049566193018109e-07, 7.384351192740723e-07, 7.719136192463338e-07, 8.053921760620142e-07, 8.388706760342757e-07, 8.723491760065372e-07, 9.058276759787987e-07, 9.393061759510601e-07, 9.727847327667405e-07, 1.006263232739002e-06, 1.0397417327112635e-06, 1.073220232683525e-06, 1.1066987326557864e-06, 1.140177232628048e-06]}, "gradients/decoder.model.decoder.layers.1.fc2.weight": {"_type": "histogram", "values": [13.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 16.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 98.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 4194119.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 44.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 6.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 8.0], "bins": [-1.7881393432617188e-07, -1.73225998878479e-07, -1.6763806343078613e-07, -1.6205012798309326e-07, -1.564621925354004e-07, -1.5087425708770752e-07, -1.4528632164001465e-07, -1.3969838619232178e-07, -1.341104507446289e-07, -1.2852251529693604e-07, -1.2293457984924316e-07, -1.1734664440155029e-07, -1.1175870895385742e-07, -1.0617077350616455e-07, -1.0058283805847168e-07, -9.499490261077881e-08, -8.940696716308594e-08, -8.381903171539307e-08, -7.82310962677002e-08, -7.264316082000732e-08, -6.705522537231445e-08, -6.146728992462158e-08, -5.587935447692871e-08, -5.029141902923584e-08, -4.470348358154297e-08, -3.91155481338501e-08, -3.3527612686157227e-08, -2.7939677238464355e-08, -2.2351741790771484e-08, -1.6763806343078613e-08, -1.1175870895385742e-08, -5.587935447692871e-09, 0.0, 5.587935447692871e-09, 1.1175870895385742e-08, 1.6763806343078613e-08, 2.2351741790771484e-08, 2.7939677238464355e-08, 3.3527612686157227e-08, 3.91155481338501e-08, 4.470348358154297e-08, 5.029141902923584e-08, 5.587935447692871e-08, 6.146728992462158e-08, 6.705522537231445e-08, 7.264316082000732e-08, 7.82310962677002e-08, 8.381903171539307e-08, 8.940696716308594e-08, 9.499490261077881e-08, 1.0058283805847168e-07, 1.0617077350616455e-07, 1.1175870895385742e-07, 1.1734664440155029e-07, 1.2293457984924316e-07, 1.2852251529693604e-07, 1.341104507446289e-07, 1.3969838619232178e-07, 1.4528632164001465e-07, 1.5087425708770752e-07, 1.564621925354004e-07, 1.6205012798309326e-07, 1.6763806343078613e-07, 1.73225998878479e-07, 1.7881393432617188e-07]}, "gradients/decoder.model.decoder.layers.1.fc2.bias": {"_type": "histogram", "values": [4.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1018.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-5.960464477539063e-08, -5.774199962615967e-08, -5.587935447692871e-08, -5.4016709327697754e-08, -5.21540641784668e-08, -5.029141902923584e-08, -4.842877388000488e-08, -4.6566128730773926e-08, -4.470348358154297e-08, -4.284083843231201e-08, -4.0978193283081055e-08, -3.91155481338501e-08, -3.725290298461914e-08, -3.5390257835388184e-08, -3.3527612686157227e-08, -3.166496753692627e-08, -2.9802322387695312e-08, -2.7939677238464355e-08, -2.60770320892334e-08, -2.421438694000244e-08, -2.2351741790771484e-08, -2.0489096641540527e-08, -1.862645149230957e-08, -1.6763806343078613e-08, -1.4901161193847656e-08, -1.30385160446167e-08, -1.1175870895385742e-08, -9.313225746154785e-09, -7.450580596923828e-09, -5.587935447692871e-09, -3.725290298461914e-09, -1.862645149230957e-09, 0.0, 1.862645149230957e-09, 3.725290298461914e-09, 5.587935447692871e-09, 7.450580596923828e-09, 9.313225746154785e-09, 1.1175870895385742e-08, 1.30385160446167e-08, 1.4901161193847656e-08, 1.6763806343078613e-08, 1.862645149230957e-08, 2.0489096641540527e-08, 2.2351741790771484e-08, 2.421438694000244e-08, 2.60770320892334e-08, 2.7939677238464355e-08, 2.9802322387695312e-08, 3.166496753692627e-08, 3.3527612686157227e-08, 3.5390257835388184e-08, 3.725290298461914e-08, 3.91155481338501e-08, 4.0978193283081055e-08, 4.284083843231201e-08, 4.470348358154297e-08, 4.6566128730773926e-08, 4.842877388000488e-08, 5.029141902923584e-08, 5.21540641784668e-08, 5.4016709327697754e-08, 5.587935447692871e-08, 5.774199962615967e-08, 5.960464477539063e-08]}, "gradients/decoder.model.decoder.layers.1.fc1.weight": {"_type": "histogram", "values": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 4194304.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], "bins": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]}, "gradients/decoder.model.decoder.layers.1.fc1.bias": {"_type": "histogram", "values": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 4096.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], "bins": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]}, "gradients/decoder.model.decoder.layers.1.encoder_attn_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 3.0, 1.0, 0.0, 2.0, 2.0, 0.0, 2.0, 1.0, 0.0, 4.0, 6.0, 6.0, 5.0, 11.0, 6.0, 14.0, 38.0, 29.0, 57.0, 76.0, 123.0, 177.0, 135.0, 112.0, 68.0, 45.0, 25.0, 19.0, 8.0, 11.0, 9.0, 5.0, 7.0, 4.0, 0.0, 6.0, 0.0, 1.0, 0.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.7816257620070246e-08, -4.5805471415860666e-08, -4.3794681658937407e-08, -4.1783895454727826e-08, -3.977310569780457e-08, -3.776231949359499e-08, -3.5751533289385407e-08, -3.3740747085175826e-08, -3.172995732825257e-08, -2.9719169347686147e-08, -2.7708381367119728e-08, -2.5697595162910147e-08, -2.3686807182343728e-08, -2.1676019201777308e-08, -1.9665232997567728e-08, -1.7654445017001308e-08, -1.5643657036434888e-08, -1.3632869055868468e-08, -1.1622081963480468e-08, -9.611294871092468e-09, -7.600506890526049e-09, -5.589718909959629e-09, -3.578931817571629e-09, -1.568144725183629e-09, 4.426432553827908e-10, 2.4534307918600007e-09, 4.4642183283372106e-09, 6.4750058648144204e-09, 8.48579340129163e-09, 1.049658138185805e-08, 1.250736847424605e-08, 1.451815556663405e-08, 1.652894354720047e-08, 1.853973152776689e-08, 2.055051950833331e-08, 2.256130571254289e-08, 2.457209369310931e-08, 2.658288167367573e-08, 2.859366787788531e-08, 3.060445408209489e-08, 3.261524383901815e-08, 3.462603004322773e-08, 3.663681980015099e-08, 3.864760600436057e-08, 4.065839220857015e-08, 4.266918196549341e-08, 4.467996816970299e-08, 4.669075792662625e-08, 4.870154413083583e-08, 5.071233033504541e-08, 5.272312009196867e-08, 5.473390629617825e-08, 5.6744696053101507e-08, 5.875548225731109e-08, 6.076626846152067e-08, 6.277705466573025e-08, 6.478784086993983e-08, 6.679862707414941e-08, 6.880941327835899e-08, 7.082020658799593e-08, 7.283099279220551e-08, 7.484177899641509e-08, 7.685256520062467e-08, 7.886335140483425e-08, 8.087414471447119e-08]}, "gradients/decoder.model.decoder.layers.1.encoder_attn_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 3.0, 0.0, 0.0, 2.0, 3.0, 1.0, 6.0, 5.0, 3.0, 14.0, 10.0, 15.0, 33.0, 45.0, 57.0, 106.0, 127.0, 118.0, 117.0, 114.0, 61.0, 55.0, 33.0, 24.0, 21.0, 16.0, 9.0, 1.0, 5.0, 4.0, 6.0, 1.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6.523830364812966e-08, -6.304731670070396e-08, -6.085632975327826e-08, -5.866534635856624e-08, -5.647436296385422e-08, -5.428337601642852e-08, -5.209238906900282e-08, -4.990140212157712e-08, -4.77104187268651e-08, -4.55194317794394e-08, -4.332844838472738e-08, -4.113746143730168e-08, -3.894647448987598e-08, -3.675549109516396e-08, -3.456450414773826e-08, -3.237352075302624e-08, -3.018253380560054e-08, -2.7991548634531682e-08, -2.5800563463462822e-08, -2.3609576516037123e-08, -2.1418591344968263e-08, -1.9227606173899403e-08, -1.7036619226473704e-08, -1.4845634055404844e-08, -1.2654648884335984e-08, -1.0463663713267124e-08, -8.272677654019844e-09, -6.0816920388617746e-09, -3.890706423703705e-09, -1.6997212526348449e-09, 4.912648066124348e-10, 2.6822508658597144e-09, 4.873228931501217e-09, 7.0642145466592865e-09, 9.255200161817356e-09, 1.1446186221064636e-08, 1.3637171392133496e-08, 1.5828156563202356e-08, 1.8019143510628055e-08, 2.0210128681696915e-08, 2.2401113852765775e-08, 2.4592099023834635e-08, 2.6783084194903495e-08, 2.8974071142329194e-08, 3.1165058089754893e-08, 3.3356041484466914e-08, 3.554702843189261e-08, 3.7738011826604634e-08, 3.992899877403033e-08, 4.211998572145603e-08, 4.431096911616805e-08, 4.650195606359375e-08, 4.869293945830577e-08, 5.088392640573147e-08, 5.307491335315717e-08, 5.526590030058287e-08, 5.745688369529489e-08, 5.964786709000691e-08, 6.183885403743261e-08, 6.402984098485831e-08, 6.622082793228401e-08, 6.841180777428235e-08, 7.060279472170805e-08, 7.279378166913375e-08, 7.498476861655945e-08]}, "gradients/decoder.model.decoder.layers.1.encoder_attn.out_proj.weight": {"_type": "histogram", "values": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1048576.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], "bins": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]}, "gradients/decoder.model.decoder.layers.1.encoder_attn.out_proj.bias": {"_type": "histogram", "values": [1023.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [0.0, 9.313225746154785e-10, 1.862645149230957e-09, 2.7939677238464355e-09, 3.725290298461914e-09, 4.6566128730773926e-09, 5.587935447692871e-09, 6.51925802230835e-09, 7.450580596923828e-09, 8.381903171539307e-09, 9.313225746154785e-09, 1.0244548320770264e-08, 1.1175870895385742e-08, 1.210719347000122e-08, 1.30385160446167e-08, 1.3969838619232178e-08, 1.4901161193847656e-08, 1.5832483768463135e-08, 1.6763806343078613e-08, 1.7695128917694092e-08, 1.862645149230957e-08, 1.955777406692505e-08, 2.0489096641540527e-08, 2.1420419216156006e-08, 2.2351741790771484e-08, 2.3283064365386963e-08, 2.421438694000244e-08, 2.514570951461792e-08, 2.60770320892334e-08, 2.7008354663848877e-08, 2.7939677238464355e-08, 2.8870999813079834e-08, 2.9802322387695312e-08, 3.073364496231079e-08, 3.166496753692627e-08, 3.259629011154175e-08, 3.3527612686157227e-08, 3.4458935260772705e-08, 3.5390257835388184e-08, 3.632158041000366e-08, 3.725290298461914e-08, 3.818422555923462e-08, 3.91155481338501e-08, 4.0046870708465576e-08, 4.0978193283081055e-08, 4.190951585769653e-08, 4.284083843231201e-08, 4.377216100692749e-08, 4.470348358154297e-08, 4.563480615615845e-08, 4.6566128730773926e-08, 4.7497451305389404e-08, 4.842877388000488e-08, 4.936009645462036e-08, 5.029141902923584e-08, 5.122274160385132e-08, 5.21540641784668e-08, 5.3085386753082275e-08, 5.4016709327697754e-08, 5.494803190231323e-08, 5.587935447692871e-08, 5.681067705154419e-08, 5.774199962615967e-08, 5.8673322200775146e-08, 5.960464477539063e-08]}, "gradients/decoder.model.decoder.layers.1.encoder_attn.v_proj.weight": {"_type": "histogram", "values": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1048576.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], "bins": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]}, "gradients/decoder.model.decoder.layers.1.encoder_attn.v_proj.bias": {"_type": "histogram", "values": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1024.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], "bins": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]}, "gradients/decoder.model.decoder.layers.1.encoder_attn.k_proj.weight": {"_type": "histogram", "values": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1048576.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], "bins": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]}, "gradients/decoder.model.decoder.layers.1.encoder_attn.k_proj.bias": {"_type": "histogram", "values": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1024.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], "bins": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]}, "gradients/decoder.model.decoder.layers.1.encoder_attn.q_proj.weight": {"_type": "histogram", "values": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1048576.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], "bins": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]}, "gradients/decoder.model.decoder.layers.1.encoder_attn.q_proj.bias": {"_type": "histogram", "values": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1024.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], "bins": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]}, "gradients/decoder.model.decoder.layers.1.self_attn_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 2.0, 0.0, 3.0, 6.0, 9.0, 8.0, 7.0, 18.0, 32.0, 38.0, 80.0, 124.0, 224.0, 174.0, 104.0, 74.0, 37.0, 16.0, 15.0, 9.0, 8.0, 3.0, 7.0, 4.0, 3.0, 2.0, 0.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.205008607982563e-08, -4.053980617868547e-08, -3.90295262775453e-08, -3.7519242823691457e-08, -3.600896292255129e-08, -3.4498683021411125e-08, -3.298840312027096e-08, -3.1478123219130794e-08, -2.996783976527695e-08, -2.8457559864136783e-08, -2.6947278186639778e-08, -2.5436998285499612e-08, -2.3926716608002607e-08, -2.241643670686244e-08, -2.0906156805722276e-08, -1.939587512822527e-08, -1.7885595227085105e-08, -1.637531532594494e-08, -1.4865033648447934e-08, -1.3354753747307768e-08, -1.1844472069810763e-08, -1.0334192168670597e-08, -8.823911379352012e-09, -7.3136305900334264e-09, -5.803349800714841e-09, -4.2930690113962555e-09, -2.782788444122275e-09, -1.2725078768482945e-09, 2.37772912470291e-10, 1.7480532576996666e-09, 3.258334047018252e-09, 4.7686148363368375e-09, 6.278895625655423e-09, 7.789176414974008e-09, 9.299457204292594e-09, 1.080973710543276e-08, 1.2320018782929765e-08, 1.383029868406993e-08, 1.5340578585210096e-08, 1.68508602627071e-08, 1.8361141940204107e-08, 1.9871421841344272e-08, 2.1381703518841277e-08, 2.2891983419981443e-08, 2.4402265097478448e-08, 2.5912544998618614e-08, 2.742282489975878e-08, 2.8933106577255785e-08, 3.044338825475279e-08, 3.1953668155892956e-08, 3.346394805703312e-08, 3.4974231510886966e-08, 3.648451141202713e-08, 3.79947913131673e-08, 3.9505071214307463e-08, 4.101535466816131e-08, 4.2525631016587795e-08, 4.403591091772796e-08, 4.5546190818868126e-08, 4.705647427272197e-08, 4.8566754173862137e-08, 5.00770340750023e-08, 5.158731397614247e-08, 5.3097593877282634e-08, 5.460787733113648e-08]}, "gradients/decoder.model.decoder.layers.1.self_attn_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 2.0, 1.0, 2.0, 3.0, 2.0, 2.0, 7.0, 5.0, 7.0, 10.0, 15.0, 16.0, 35.0, 50.0, 87.0, 130.0, 129.0, 131.0, 118.0, 78.0, 54.0, 43.0, 25.0, 16.0, 17.0, 9.0, 3.0, 3.0, 6.0, 3.0, 4.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.365125505501055e-08, -4.224648719741708e-08, -4.084172289253729e-08, -3.943695503494382e-08, -3.803219073006403e-08, -3.662742287247056e-08, -3.522265501487709e-08, -3.38178907099973e-08, -3.241312640511751e-08, -3.100835854752404e-08, -2.960359424264425e-08, -2.819882638505078e-08, -2.6794062080170988e-08, -2.538929422257752e-08, -2.398452814134089e-08, -2.2579762060104258e-08, -2.117499420251079e-08, -1.9770228121274158e-08, -1.8365462040037528e-08, -1.6960694182444058e-08, -1.5555929877564267e-08, -1.4151162908149217e-08, -1.2746395938734167e-08, -1.1341629857497537e-08, -9.936863776260907e-09, -8.532097695024277e-09, -7.1273311696984365e-09, -5.722564644372596e-09, -4.317798563135966e-09, -2.913032481899336e-09, -1.5082655124842859e-09, -1.0349943124765559e-10, 1.3012666499889747e-09, 2.70603295327021e-09, 4.110799256551445e-09, 5.515565781877285e-09, 6.9203318631139155e-09, 8.325097944350546e-09, 9.729864913765596e-09, 1.1134630995002226e-08, 1.2539397076238856e-08, 1.3944163157475487e-08, 1.5348929238712117e-08, 1.6753695319948747e-08, 1.8158463177542217e-08, 1.9563227482422008e-08, 2.0967995340015477e-08, 2.2372761421252108e-08, 2.3777527502488738e-08, 2.5182293583725368e-08, 2.6587059664962e-08, 2.7991827522555468e-08, 2.939659182743526e-08, 3.080135968502873e-08, 3.220612398990852e-08, 3.361089184750199e-08, 3.501565970509546e-08, 3.642042756268893e-08, 3.782519186756872e-08, 3.922995972516219e-08, 4.063472403004198e-08, 4.203949188763545e-08, 4.344425974522892e-08, 4.484902405010871e-08, 4.62537883549885e-08]}, "gradients/decoder.model.decoder.layers.1.self_attn.out_proj.weight": {"_type": "histogram", "values": [51.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1048481.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 44.0], "bins": [-5.960464477539063e-08, -5.774199962615967e-08, -5.587935447692871e-08, -5.4016709327697754e-08, -5.21540641784668e-08, -5.029141902923584e-08, -4.842877388000488e-08, -4.6566128730773926e-08, -4.470348358154297e-08, -4.284083843231201e-08, -4.0978193283081055e-08, -3.91155481338501e-08, -3.725290298461914e-08, -3.5390257835388184e-08, -3.3527612686157227e-08, -3.166496753692627e-08, -2.9802322387695312e-08, -2.7939677238464355e-08, -2.60770320892334e-08, -2.421438694000244e-08, -2.2351741790771484e-08, -2.0489096641540527e-08, -1.862645149230957e-08, -1.6763806343078613e-08, -1.4901161193847656e-08, -1.30385160446167e-08, -1.1175870895385742e-08, -9.313225746154785e-09, -7.450580596923828e-09, -5.587935447692871e-09, -3.725290298461914e-09, -1.862645149230957e-09, 0.0, 1.862645149230957e-09, 3.725290298461914e-09, 5.587935447692871e-09, 7.450580596923828e-09, 9.313225746154785e-09, 1.1175870895385742e-08, 1.30385160446167e-08, 1.4901161193847656e-08, 1.6763806343078613e-08, 1.862645149230957e-08, 2.0489096641540527e-08, 2.2351741790771484e-08, 2.421438694000244e-08, 2.60770320892334e-08, 2.7939677238464355e-08, 2.9802322387695312e-08, 3.166496753692627e-08, 3.3527612686157227e-08, 3.5390257835388184e-08, 3.725290298461914e-08, 3.91155481338501e-08, 4.0978193283081055e-08, 4.284083843231201e-08, 4.470348358154297e-08, 4.6566128730773926e-08, 4.842877388000488e-08, 5.029141902923584e-08, 5.21540641784668e-08, 5.4016709327697754e-08, 5.587935447692871e-08, 5.774199962615967e-08, 5.960464477539063e-08]}, "gradients/decoder.model.decoder.layers.1.self_attn.out_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 6.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1010.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 5.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.1920928955078125e-07, -1.1548399925231934e-07, -1.1175870895385742e-07, -1.0803341865539551e-07, -1.043081283569336e-07, -1.0058283805847168e-07, -9.685754776000977e-08, -9.313225746154785e-08, -8.940696716308594e-08, -8.568167686462402e-08, -8.195638656616211e-08, -7.82310962677002e-08, -7.450580596923828e-08, -7.078051567077637e-08, -6.705522537231445e-08, -6.332993507385254e-08, -5.960464477539063e-08, -5.587935447692871e-08, -5.21540641784668e-08, -4.842877388000488e-08, -4.470348358154297e-08, -4.0978193283081055e-08, -3.725290298461914e-08, -3.3527612686157227e-08, -2.9802322387695312e-08, -2.60770320892334e-08, -2.2351741790771484e-08, -1.862645149230957e-08, -1.4901161193847656e-08, -1.1175870895385742e-08, -7.450580596923828e-09, -3.725290298461914e-09, 0.0, 3.725290298461914e-09, 7.450580596923828e-09, 1.1175870895385742e-08, 1.4901161193847656e-08, 1.862645149230957e-08, 2.2351741790771484e-08, 2.60770320892334e-08, 2.9802322387695312e-08, 3.3527612686157227e-08, 3.725290298461914e-08, 4.0978193283081055e-08, 4.470348358154297e-08, 4.842877388000488e-08, 5.21540641784668e-08, 5.587935447692871e-08, 5.960464477539063e-08, 6.332993507385254e-08, 6.705522537231445e-08, 7.078051567077637e-08, 7.450580596923828e-08, 7.82310962677002e-08, 8.195638656616211e-08, 8.568167686462402e-08, 8.940696716308594e-08, 9.313225746154785e-08, 9.685754776000977e-08, 1.0058283805847168e-07, 1.043081283569336e-07, 1.0803341865539551e-07, 1.1175870895385742e-07, 1.1548399925231934e-07, 1.1920928955078125e-07]}, "gradients/decoder.model.decoder.layers.1.self_attn.v_proj.weight": {"_type": "histogram", "values": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1048576.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], "bins": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]}, "gradients/decoder.model.decoder.layers.1.self_attn.v_proj.bias": {"_type": "histogram", "values": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1024.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], "bins": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]}, "gradients/decoder.model.decoder.layers.1.self_attn.k_proj.weight": {"_type": "histogram", "values": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1048576.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], "bins": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]}, "gradients/decoder.model.decoder.layers.1.self_attn.k_proj.bias": {"_type": "histogram", "values": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1024.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], "bins": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]}, "gradients/decoder.model.decoder.layers.1.self_attn.q_proj.weight": {"_type": "histogram", "values": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1048576.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], "bins": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]}, "gradients/decoder.model.decoder.layers.1.self_attn.q_proj.bias": {"_type": "histogram", "values": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1024.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], "bins": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]}, "gradients/decoder.model.decoder.layers.0.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 3.0, 3.0, 2.0, 5.0, 7.0, 7.0, 7.0, 27.0, 29.0, 43.0, 77.0, 154.0, 268.0, 168.0, 84.0, 48.0, 29.0, 16.0, 11.0, 5.0, 6.0, 4.0, 3.0, 2.0, 2.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.0067684286241274e-07, -9.737708239754284e-08, -9.407732903810029e-08, -9.077756857323038e-08, -8.747781521378784e-08, -8.417805474891793e-08, -8.087829428404802e-08, -7.757854092460548e-08, -7.427878756516293e-08, -7.097902710029302e-08, -6.767927374085048e-08, -6.437951327598057e-08, -6.107975991653802e-08, -5.777999945166812e-08, -5.448024253951189e-08, -5.1180485627355665e-08, -4.788072516248576e-08, -4.4580968250329533e-08, -4.128121133817331e-08, -3.79814508733034e-08, -3.4681697513860854e-08, -3.138193704899095e-08, -2.8082180136834722e-08, -2.4782423224678496e-08, -2.148266631252227e-08, -1.8182909400366043e-08, -1.4883151600031397e-08, -1.158339379969675e-08, -8.283636887540524e-09, -4.983879975384298e-09, -1.6841212868712319e-09, 1.6156356252849946e-09, 4.915392537441221e-09, 8.215149449597448e-09, 1.1514907249932094e-08, 1.481466505026674e-08, 1.8114421962422966e-08, 2.1414178874579193e-08, 2.471393756309226e-08, 2.8013694475248485e-08, 3.131345138740471e-08, 3.461320829956094e-08, 3.7912965211717164e-08, 4.121272212387339e-08, 4.4512482588743296e-08, 4.7812235948185844e-08, 5.111199641305575e-08, 5.4411753325211976e-08, 5.77115102373682e-08, 6.101127070223811e-08, 6.431102406168065e-08, 6.761078452655056e-08, 7.091053788599311e-08, 7.421029835086301e-08, 7.751005171030556e-08, 8.080981217517547e-08, 8.410957264004537e-08, 8.740933310491528e-08, 9.070908646435782e-08, 9.400884692922773e-08, 9.730860028867028e-08, 1.0060836075354018e-07, 1.0390812121841009e-07, 1.0720787457785264e-07, 1.1050762793729518e-07]}, "gradients/decoder.model.decoder.layers.0.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 2.0, 2.0, 2.0, 7.0, 6.0, 8.0, 8.0, 13.0, 18.0, 20.0, 36.0, 59.0, 71.0, 77.0, 104.0, 98.0, 103.0, 97.0, 71.0, 67.0, 47.0, 29.0, 22.0, 17.0, 12.0, 8.0, 5.0, 3.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.2618852451851126e-07, -1.225512420433006e-07, -1.1891395956808992e-07, -1.1527668419830661e-07, -1.1163940172309594e-07, -1.0800211924788528e-07, -1.0436483677267461e-07, -1.007275614028913e-07, -9.709027892768063e-08, -9.345299645246996e-08, -8.981571397725929e-08, -8.617843860747598e-08, -8.254115613226531e-08, -7.890387365705465e-08, -7.526659118184398e-08, -7.162931581206067e-08, -6.799203333685e-08, -6.435475086163933e-08, -6.071746838642866e-08, -5.7080189463931674e-08, -5.3442910541434685e-08, -4.980562806622402e-08, -4.616834559101335e-08, -4.253106666851636e-08, -3.889378064059201e-08, -3.5256498165381345e-08, -3.1619219242884355e-08, -2.7981936767673687e-08, -2.4344657845176698e-08, -2.070737536996603e-08, -1.70700946711122e-08, -1.3432813972258373e-08, -9.795535049761384e-09, -6.158254350907555e-09, -2.5209732079645164e-09, 1.116307934978522e-09, 4.753588633832351e-09, 8.390870220864599e-09, 1.2028150919718428e-08, 1.5665431618572256e-08, 1.9302712317426085e-08, 2.2939993016279914e-08, 2.6577273715133742e-08, 3.021455441398757e-08, 3.385183688919824e-08, 3.748911581169523e-08, 4.1126398286905896e-08, 4.4763680762116564e-08, 4.8400959684613554e-08, 5.203824215982422e-08, 5.567552108232121e-08, 5.931280355753188e-08, 6.295008603274255e-08, 6.658736140252586e-08, 7.022464387773653e-08, 7.38619263529472e-08, 7.749920882815786e-08, 8.113649130336853e-08, 8.47737737785792e-08, 8.841104914836251e-08, 9.204833162357318e-08, 9.568561409878384e-08, 9.932289657399451e-08, 1.0296017194377782e-07, 1.0659745441898849e-07]}, "gradients/decoder.model.decoder.layers.0.fc2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 8.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 25.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 4194267.0], "bins": [-2.980232238769531e-07, -2.9336661100387573e-07, -2.8870999813079834e-07, -2.8405338525772095e-07, -2.7939677238464355e-07, -2.7474015951156616e-07, -2.7008354663848877e-07, -2.654269337654114e-07, -2.60770320892334e-07, -2.561137080192566e-07, -2.514570951461792e-07, -2.468004822731018e-07, -2.421438694000244e-07, -2.3748725652694702e-07, -2.3283064365386963e-07, -2.2817403078079224e-07, -2.2351741790771484e-07, -2.1886080503463745e-07, -2.1420419216156006e-07, -2.0954757928848267e-07, -2.0489096641540527e-07, -2.0023435354232788e-07, -1.955777406692505e-07, -1.909211277961731e-07, -1.862645149230957e-07, -1.816079020500183e-07, -1.7695128917694092e-07, -1.7229467630386353e-07, -1.6763806343078613e-07, -1.6298145055770874e-07, -1.5832483768463135e-07, -1.5366822481155396e-07, -1.4901161193847656e-07, -1.4435499906539917e-07, -1.3969838619232178e-07, -1.3504177331924438e-07, -1.30385160446167e-07, -1.257285475730896e-07, -1.210719347000122e-07, -1.1641532182693481e-07, -1.1175870895385742e-07, -1.0710209608078003e-07, -1.0244548320770264e-07, -9.778887033462524e-08, -9.313225746154785e-08, -8.847564458847046e-08, -8.381903171539307e-08, -7.916241884231567e-08, -7.450580596923828e-08, -6.984919309616089e-08, -6.51925802230835e-08, -6.05359673500061e-08, -5.587935447692871e-08, -5.122274160385132e-08, -4.6566128730773926e-08, -4.190951585769653e-08, -3.725290298461914e-08, -3.259629011154175e-08, -2.7939677238464355e-08, -2.3283064365386963e-08, -1.862645149230957e-08, -1.3969838619232178e-08, -9.313225746154785e-09, -4.6566128730773926e-09, 0.0]}, "gradients/decoder.model.decoder.layers.0.fc2.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1023.0], "bins": [-5.960464477539063e-08, -5.8673322200775146e-08, -5.774199962615967e-08, -5.681067705154419e-08, -5.587935447692871e-08, -5.494803190231323e-08, -5.4016709327697754e-08, -5.3085386753082275e-08, -5.21540641784668e-08, -5.122274160385132e-08, -5.029141902923584e-08, -4.936009645462036e-08, -4.842877388000488e-08, -4.7497451305389404e-08, -4.6566128730773926e-08, -4.563480615615845e-08, -4.470348358154297e-08, -4.377216100692749e-08, -4.284083843231201e-08, -4.190951585769653e-08, -4.0978193283081055e-08, -4.0046870708465576e-08, -3.91155481338501e-08, -3.818422555923462e-08, -3.725290298461914e-08, -3.632158041000366e-08, -3.5390257835388184e-08, -3.4458935260772705e-08, -3.3527612686157227e-08, -3.259629011154175e-08, -3.166496753692627e-08, -3.073364496231079e-08, -2.9802322387695312e-08, -2.8870999813079834e-08, -2.7939677238464355e-08, -2.7008354663848877e-08, -2.60770320892334e-08, -2.514570951461792e-08, -2.421438694000244e-08, -2.3283064365386963e-08, -2.2351741790771484e-08, -2.1420419216156006e-08, -2.0489096641540527e-08, -1.955777406692505e-08, -1.862645149230957e-08, -1.7695128917694092e-08, -1.6763806343078613e-08, -1.5832483768463135e-08, -1.4901161193847656e-08, -1.3969838619232178e-08, -1.30385160446167e-08, -1.210719347000122e-08, -1.1175870895385742e-08, -1.0244548320770264e-08, -9.313225746154785e-09, -8.381903171539307e-09, -7.450580596923828e-09, -6.51925802230835e-09, -5.587935447692871e-09, -4.6566128730773926e-09, -3.725290298461914e-09, -2.7939677238464355e-09, -1.862645149230957e-09, -9.313225746154785e-10, 0.0]}, "gradients/decoder.model.decoder.layers.0.fc1.weight": {"_type": "histogram", "values": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 4194304.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], "bins": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]}, "gradients/decoder.model.decoder.layers.0.fc1.bias": {"_type": "histogram", "values": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 4096.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], "bins": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]}, "gradients/decoder.model.decoder.layers.0.encoder_attn_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 2.0, 7.0, 1.0, 3.0, 12.0, 6.0, 9.0, 21.0, 29.0, 51.0, 75.0, 220.0, 275.0, 133.0, 55.0, 44.0, 27.0, 16.0, 9.0, 9.0, 1.0, 1.0, 3.0, 0.0, 2.0, 4.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.4173648799651346e-08, -3.270053738901879e-08, -3.1227425978386236e-08, -2.975431456775368e-08, -2.8281203157121126e-08, -2.680809174648857e-08, -2.5334980335856017e-08, -2.3861868925223462e-08, -2.2388757514590907e-08, -2.0915646103958352e-08, -1.9442534693325797e-08, -1.7969423282693242e-08, -1.6496311872060687e-08, -1.5023200461428132e-08, -1.3550089050795577e-08, -1.2076977640163022e-08, -1.0603866229530468e-08, -9.130754818897913e-09, -7.657643408265358e-09, -6.184531997632803e-09, -4.711420587000248e-09, -3.238309176367693e-09, -1.765197765735138e-09, -2.920863551025832e-10, 1.1810250555299717e-09, 2.6541364661625266e-09, 4.1272478767950815e-09, 5.6003592874276364e-09, 7.073470698060191e-09, 8.546582108692746e-09, 1.0019693519325301e-08, 1.1492804929957856e-08, 1.2965912787876732e-08, 1.4439024198509287e-08, 1.5912135609141842e-08, 1.7385247019774397e-08, 1.8858358430406952e-08, 2.0331469841039507e-08, 2.180458125167206e-08, 2.3277692662304617e-08, 2.475080407293717e-08, 2.6223915483569726e-08, 2.769702689420228e-08, 2.9170138304834836e-08, 3.064324971546739e-08, 3.2116361126099946e-08, 3.35894725367325e-08, 3.5062583947365056e-08, 3.653569535799761e-08, 3.8008806768630166e-08, 3.948191817926272e-08, 4.0955029589895275e-08, 4.242814100052783e-08, 4.3901252411160385e-08, 4.537436382179294e-08, 4.6847475232425495e-08, 4.832058664305805e-08, 4.9793698053690605e-08, 5.126680946432316e-08, 5.2739920874955715e-08, 5.421303228558827e-08, 5.5686143696220824e-08, 5.715925510685338e-08, 5.8632366517485934e-08, 6.010547792811849e-08]}, "gradients/decoder.model.decoder.layers.0.encoder_attn_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 3.0, 1.0, 4.0, 3.0, 2.0, 9.0, 10.0, 16.0, 20.0, 47.0, 80.0, 131.0, 173.0, 182.0, 115.0, 76.0, 49.0, 25.0, 19.0, 16.0, 14.0, 4.0, 4.0, 3.0, 2.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-4.841377077013931e-08, -4.7033061889578676e-08, -4.5652349456304364e-08, -4.427164057574373e-08, -4.28909316951831e-08, -4.151021926190879e-08, -4.0129510381348155e-08, -3.874880150078752e-08, -3.736808906751321e-08, -3.598738018695258e-08, -3.4606667753678266e-08, -3.3225958873117634e-08, -3.184524643984332e-08, -3.046453755928269e-08, -2.9083828678722057e-08, -2.7703118021804585e-08, -2.6322407364887113e-08, -2.494169670796964e-08, -2.356098605105217e-08, -2.2180277170491536e-08, -2.0799566513574064e-08, -1.941885585665659e-08, -1.803814697609596e-08, -1.6657436319178487e-08, -1.5276725662261015e-08, -1.3896015005343543e-08, -1.251530523660449e-08, -1.1134595467865438e-08, -9.753884810947966e-09, -8.373174154030494e-09, -6.9924643852914414e-09, -5.611754616552389e-09, -4.231047512348596e-09, -2.8503372995203335e-09, -1.4696270866920713e-09, -8.891687386380909e-11, 1.2917933389644531e-09, 2.6725035517927154e-09, 4.053213764620978e-09, 5.43392353336003e-09, 6.814634190277502e-09, 8.195344847194974e-09, 9.576054615934027e-09, 1.0956764384673079e-08, 1.2337475041590551e-08, 1.3718185698508023e-08, 1.5098894579068656e-08, 1.6479605235986128e-08, 1.78603158929036e-08, 1.9241026549821072e-08, 2.0621737206738544e-08, 2.2002446087299177e-08, 2.338315674421665e-08, 2.476386740113412e-08, 2.6144576281694754e-08, 2.7525286938612226e-08, 2.8905997595529698e-08, 3.028670647609033e-08, 3.166741890936464e-08, 3.3048127789925275e-08, 3.442883667048591e-08, 3.580954910376022e-08, 3.719025798432085e-08, 3.8570966864881484e-08, 3.9951679298155796e-08]}, "gradients/decoder.model.decoder.layers.0.encoder_attn.out_proj.weight": {"_type": "histogram", "values": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1048576.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], "bins": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]}, "gradients/decoder.model.decoder.layers.0.encoder_attn.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1023.0], "bins": [-5.960464477539063e-08, -5.8673322200775146e-08, -5.774199962615967e-08, -5.681067705154419e-08, -5.587935447692871e-08, -5.494803190231323e-08, -5.4016709327697754e-08, -5.3085386753082275e-08, -5.21540641784668e-08, -5.122274160385132e-08, -5.029141902923584e-08, -4.936009645462036e-08, -4.842877388000488e-08, -4.7497451305389404e-08, -4.6566128730773926e-08, -4.563480615615845e-08, -4.470348358154297e-08, -4.377216100692749e-08, -4.284083843231201e-08, -4.190951585769653e-08, -4.0978193283081055e-08, -4.0046870708465576e-08, -3.91155481338501e-08, -3.818422555923462e-08, -3.725290298461914e-08, -3.632158041000366e-08, -3.5390257835388184e-08, -3.4458935260772705e-08, -3.3527612686157227e-08, -3.259629011154175e-08, -3.166496753692627e-08, -3.073364496231079e-08, -2.9802322387695312e-08, -2.8870999813079834e-08, -2.7939677238464355e-08, -2.7008354663848877e-08, -2.60770320892334e-08, -2.514570951461792e-08, -2.421438694000244e-08, -2.3283064365386963e-08, -2.2351741790771484e-08, -2.1420419216156006e-08, -2.0489096641540527e-08, -1.955777406692505e-08, -1.862645149230957e-08, -1.7695128917694092e-08, -1.6763806343078613e-08, -1.5832483768463135e-08, -1.4901161193847656e-08, -1.3969838619232178e-08, -1.30385160446167e-08, -1.210719347000122e-08, -1.1175870895385742e-08, -1.0244548320770264e-08, -9.313225746154785e-09, -8.381903171539307e-09, -7.450580596923828e-09, -6.51925802230835e-09, -5.587935447692871e-09, -4.6566128730773926e-09, -3.725290298461914e-09, -2.7939677238464355e-09, -1.862645149230957e-09, -9.313225746154785e-10, 0.0]}, "gradients/decoder.model.decoder.layers.0.encoder_attn.v_proj.weight": {"_type": "histogram", "values": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1048576.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], "bins": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]}, "gradients/decoder.model.decoder.layers.0.encoder_attn.v_proj.bias": {"_type": "histogram", "values": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1024.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], "bins": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]}, "gradients/decoder.model.decoder.layers.0.encoder_attn.k_proj.weight": {"_type": "histogram", "values": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1048576.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], "bins": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]}, "gradients/decoder.model.decoder.layers.0.encoder_attn.k_proj.bias": {"_type": "histogram", "values": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1024.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], "bins": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]}, "gradients/decoder.model.decoder.layers.0.encoder_attn.q_proj.weight": {"_type": "histogram", "values": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1048576.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], "bins": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]}, "gradients/decoder.model.decoder.layers.0.encoder_attn.q_proj.bias": {"_type": "histogram", "values": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1024.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], "bins": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]}, "gradients/decoder.model.decoder.layers.0.self_attn_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 2.0, 5.0, 16.0, 23.0, 142.0, 609.0, 167.0, 37.0, 7.0, 3.0, 2.0, 2.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.950964272940837e-08, -2.6713179224202577e-08, -2.3916715718996784e-08, -2.112025221379099e-08, -1.8323788708585198e-08, -1.5527325203379405e-08, -1.2730861698173612e-08, -9.93439819296782e-09, -7.1379346877620264e-09, -4.3414711825562335e-09, -1.5450076773504406e-09, 1.2514558278553523e-09, 4.047919333061145e-09, 6.844382838266938e-09, 9.640846343472731e-09, 1.2437309848678524e-08, 1.5233773353884317e-08, 1.803023685909011e-08, 2.0826700364295903e-08, 2.3623163869501695e-08, 2.6419627374707488e-08, 2.921609087991328e-08, 3.2012554385119074e-08, 3.4809019666681706e-08, 3.760548139553066e-08, 4.0401943124379613e-08, 4.3198408405942246e-08, 4.599487368750488e-08, 4.879133541635383e-08, 5.1587797145202785e-08, 5.438426242676542e-08, 5.718072770832805e-08, 5.997719654260436e-08, 6.277365827145331e-08, 6.557012000030227e-08, 6.836658883457858e-08, 7.116305056342753e-08, 7.395951229227649e-08, 7.67559811265528e-08, 7.955244285540175e-08, 8.23489045842507e-08, 8.514536631309966e-08, 8.794182804194861e-08, 9.073829687622492e-08, 9.353475860507388e-08, 9.633122033392283e-08, 9.912768916819914e-08, 1.019241508970481e-07, 1.0472061262589705e-07, 1.07517074354746e-07, 1.1031353608359495e-07, 1.1311000491787127e-07, 1.1590646664672022e-07, 1.1870292837556917e-07, 1.2149939720984548e-07, 1.2429585183326708e-07, 1.270923206675434e-07, 1.298887895018197e-07, 1.326852441252413e-07, 1.354817129595176e-07, 1.3827818179379392e-07, 1.4107463641721552e-07, 1.4387110525149183e-07, 1.4666755987491342e-07, 1.4946402870918973e-07]}, "gradients/decoder.model.decoder.layers.0.self_attn_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 1.0, 3.0, 1.0, 4.0, 5.0, 3.0, 7.0, 12.0, 15.0, 22.0, 46.0, 121.0, 153.0, 217.0, 162.0, 81.0, 64.0, 30.0, 17.0, 20.0, 9.0, 5.0, 5.0, 4.0, 2.0, 2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-3.420354488525845e-08, -3.324222674905286e-08, -3.228090861284727e-08, -3.131959047664168e-08, -3.035827234043609e-08, -2.9396952427873657e-08, -2.8435632515311227e-08, -2.7474314379105635e-08, -2.6512996242900044e-08, -2.5551678106694453e-08, -2.4590359970488862e-08, -2.362904005792643e-08, -2.266772192172084e-08, -2.170640378551525e-08, -2.074508387295282e-08, -1.9783765736747227e-08, -1.8822447600541636e-08, -1.7861129464336045e-08, -1.6899811328130454e-08, -1.5938491415568024e-08, -1.4977173279362432e-08, -1.4015855143156841e-08, -1.305453611877283e-08, -1.209321709438882e-08, -1.1131898958183228e-08, -1.0170580821977637e-08, -9.209261797593626e-09, -8.247942773209616e-09, -7.286624637004024e-09, -6.325306056709223e-09, -5.3639874764144224e-09, -4.4026688961196214e-09, -3.4413503158248204e-09, -2.4800317355300194e-09, -1.5187131552352184e-09, -5.573945749404174e-10, 4.0392400535438355e-10, 1.3652425856491845e-09, 2.3265611659439855e-09, 3.2878797462387865e-09, 4.2491983265335875e-09, 5.2105169068283885e-09, 6.1718354871231895e-09, 7.1331540674179905e-09, 8.094472647712792e-09, 9.055790783918383e-09, 1.0017109808302394e-08, 1.0978428832686404e-08, 1.1939746968891995e-08, 1.2901065105097587e-08, 1.3862384129481597e-08, 1.4823703153865608e-08, 1.57850212900712e-08, 1.674633942627679e-08, 1.770765933883922e-08, 1.8668977475044812e-08, 1.9630295611250403e-08, 2.0591613747455995e-08, 2.1552931883661586e-08, 2.2514251796224016e-08, 2.3475569932429607e-08, 2.44368880686352e-08, 2.539820798119763e-08, 2.635952611740322e-08, 2.732084425360881e-08]}, "gradients/decoder.model.decoder.layers.0.self_attn.out_proj.weight": {"_type": "histogram", "values": [65.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1048439.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 71.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.960464477539063e-08, -5.681067705154419e-08, -5.4016709327697754e-08, -5.122274160385132e-08, -4.842877388000488e-08, -4.563480615615845e-08, -4.284083843231201e-08, -4.0046870708465576e-08, -3.725290298461914e-08, -3.4458935260772705e-08, -3.166496753692627e-08, -2.8870999813079834e-08, -2.60770320892334e-08, -2.3283064365386963e-08, -2.0489096641540527e-08, -1.7695128917694092e-08, -1.4901161193847656e-08, -1.210719347000122e-08, -9.313225746154785e-09, -6.51925802230835e-09, -3.725290298461914e-09, -9.313225746154785e-10, 1.862645149230957e-09, 4.6566128730773926e-09, 7.450580596923828e-09, 1.0244548320770264e-08, 1.30385160446167e-08, 1.5832483768463135e-08, 1.862645149230957e-08, 2.1420419216156006e-08, 2.421438694000244e-08, 2.7008354663848877e-08, 2.9802322387695312e-08, 3.259629011154175e-08, 3.5390257835388184e-08, 3.818422555923462e-08, 4.0978193283081055e-08, 4.377216100692749e-08, 4.6566128730773926e-08, 4.936009645462036e-08, 5.21540641784668e-08, 5.494803190231323e-08, 5.774199962615967e-08, 6.05359673500061e-08, 6.332993507385254e-08, 6.612390279769897e-08, 6.891787052154541e-08, 7.171183824539185e-08, 7.450580596923828e-08, 7.729977369308472e-08, 8.009374141693115e-08, 8.288770914077759e-08, 8.568167686462402e-08, 8.847564458847046e-08, 9.12696123123169e-08, 9.406358003616333e-08, 9.685754776000977e-08, 9.96515154838562e-08, 1.0244548320770264e-07, 1.0523945093154907e-07, 1.0803341865539551e-07, 1.1082738637924194e-07, 1.1362135410308838e-07, 1.1641532182693481e-07, 1.1920928955078125e-07]}, "gradients/decoder.model.decoder.layers.0.self_attn.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1021.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.960464477539063e-08, -5.681067705154419e-08, -5.4016709327697754e-08, -5.122274160385132e-08, -4.842877388000488e-08, -4.563480615615845e-08, -4.284083843231201e-08, -4.0046870708465576e-08, -3.725290298461914e-08, -3.4458935260772705e-08, -3.166496753692627e-08, -2.8870999813079834e-08, -2.60770320892334e-08, -2.3283064365386963e-08, -2.0489096641540527e-08, -1.7695128917694092e-08, -1.4901161193847656e-08, -1.210719347000122e-08, -9.313225746154785e-09, -6.51925802230835e-09, -3.725290298461914e-09, -9.313225746154785e-10, 1.862645149230957e-09, 4.6566128730773926e-09, 7.450580596923828e-09, 1.0244548320770264e-08, 1.30385160446167e-08, 1.5832483768463135e-08, 1.862645149230957e-08, 2.1420419216156006e-08, 2.421438694000244e-08, 2.7008354663848877e-08, 2.9802322387695312e-08, 3.259629011154175e-08, 3.5390257835388184e-08, 3.818422555923462e-08, 4.0978193283081055e-08, 4.377216100692749e-08, 4.6566128730773926e-08, 4.936009645462036e-08, 5.21540641784668e-08, 5.494803190231323e-08, 5.774199962615967e-08, 6.05359673500061e-08, 6.332993507385254e-08, 6.612390279769897e-08, 6.891787052154541e-08, 7.171183824539185e-08, 7.450580596923828e-08, 7.729977369308472e-08, 8.009374141693115e-08, 8.288770914077759e-08, 8.568167686462402e-08, 8.847564458847046e-08, 9.12696123123169e-08, 9.406358003616333e-08, 9.685754776000977e-08, 9.96515154838562e-08, 1.0244548320770264e-07, 1.0523945093154907e-07, 1.0803341865539551e-07, 1.1082738637924194e-07, 1.1362135410308838e-07, 1.1641532182693481e-07, 1.1920928955078125e-07]}, "gradients/decoder.model.decoder.layers.0.self_attn.v_proj.weight": {"_type": "histogram", "values": [8.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1048560.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 8.0], "bins": [-5.960464477539063e-08, -5.774199962615967e-08, -5.587935447692871e-08, -5.4016709327697754e-08, -5.21540641784668e-08, -5.029141902923584e-08, -4.842877388000488e-08, -4.6566128730773926e-08, -4.470348358154297e-08, -4.284083843231201e-08, -4.0978193283081055e-08, -3.91155481338501e-08, -3.725290298461914e-08, -3.5390257835388184e-08, -3.3527612686157227e-08, -3.166496753692627e-08, -2.9802322387695312e-08, -2.7939677238464355e-08, -2.60770320892334e-08, -2.421438694000244e-08, -2.2351741790771484e-08, -2.0489096641540527e-08, -1.862645149230957e-08, -1.6763806343078613e-08, -1.4901161193847656e-08, -1.30385160446167e-08, -1.1175870895385742e-08, -9.313225746154785e-09, -7.450580596923828e-09, -5.587935447692871e-09, -3.725290298461914e-09, -1.862645149230957e-09, 0.0, 1.862645149230957e-09, 3.725290298461914e-09, 5.587935447692871e-09, 7.450580596923828e-09, 9.313225746154785e-09, 1.1175870895385742e-08, 1.30385160446167e-08, 1.4901161193847656e-08, 1.6763806343078613e-08, 1.862645149230957e-08, 2.0489096641540527e-08, 2.2351741790771484e-08, 2.421438694000244e-08, 2.60770320892334e-08, 2.7939677238464355e-08, 2.9802322387695312e-08, 3.166496753692627e-08, 3.3527612686157227e-08, 3.5390257835388184e-08, 3.725290298461914e-08, 3.91155481338501e-08, 4.0978193283081055e-08, 4.284083843231201e-08, 4.470348358154297e-08, 4.6566128730773926e-08, 4.842877388000488e-08, 5.029141902923584e-08, 5.21540641784668e-08, 5.4016709327697754e-08, 5.587935447692871e-08, 5.774199962615967e-08, 5.960464477539063e-08]}, "gradients/decoder.model.decoder.layers.0.self_attn.v_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1020.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-5.960464477539063e-08, -5.774199962615967e-08, -5.587935447692871e-08, -5.4016709327697754e-08, -5.21540641784668e-08, -5.029141902923584e-08, -4.842877388000488e-08, -4.6566128730773926e-08, -4.470348358154297e-08, -4.284083843231201e-08, -4.0978193283081055e-08, -3.91155481338501e-08, -3.725290298461914e-08, -3.5390257835388184e-08, -3.3527612686157227e-08, -3.166496753692627e-08, -2.9802322387695312e-08, -2.7939677238464355e-08, -2.60770320892334e-08, -2.421438694000244e-08, -2.2351741790771484e-08, -2.0489096641540527e-08, -1.862645149230957e-08, -1.6763806343078613e-08, -1.4901161193847656e-08, -1.30385160446167e-08, -1.1175870895385742e-08, -9.313225746154785e-09, -7.450580596923828e-09, -5.587935447692871e-09, -3.725290298461914e-09, -1.862645149230957e-09, 0.0, 1.862645149230957e-09, 3.725290298461914e-09, 5.587935447692871e-09, 7.450580596923828e-09, 9.313225746154785e-09, 1.1175870895385742e-08, 1.30385160446167e-08, 1.4901161193847656e-08, 1.6763806343078613e-08, 1.862645149230957e-08, 2.0489096641540527e-08, 2.2351741790771484e-08, 2.421438694000244e-08, 2.60770320892334e-08, 2.7939677238464355e-08, 2.9802322387695312e-08, 3.166496753692627e-08, 3.3527612686157227e-08, 3.5390257835388184e-08, 3.725290298461914e-08, 3.91155481338501e-08, 4.0978193283081055e-08, 4.284083843231201e-08, 4.470348358154297e-08, 4.6566128730773926e-08, 4.842877388000488e-08, 5.029141902923584e-08, 5.21540641784668e-08, 5.4016709327697754e-08, 5.587935447692871e-08, 5.774199962615967e-08, 5.960464477539063e-08]}, "gradients/decoder.model.decoder.layers.0.self_attn.k_proj.weight": {"_type": "histogram", "values": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1048576.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], "bins": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]}, "gradients/decoder.model.decoder.layers.0.self_attn.k_proj.bias": {"_type": "histogram", "values": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1024.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], "bins": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]}, "gradients/decoder.model.decoder.layers.0.self_attn.q_proj.weight": {"_type": "histogram", "values": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1048576.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], "bins": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]}, "gradients/decoder.model.decoder.layers.0.self_attn.q_proj.bias": {"_type": "histogram", "values": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1024.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], "bins": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]}, "gradients/decoder.model.decoder.layernorm_embedding.weight": {"_type": "histogram", "values": [2.0, 3.0, 1.0, 2.0, 3.0, 5.0, 15.0, 33.0, 87.0, 419.0, 345.0, 58.0, 26.0, 8.0, 8.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.4917313485038903e-08, -3.140420545832967e-08, -2.7891100984334116e-08, -2.4377992957624883e-08, -2.086488670727249e-08, -1.7351780456920096e-08, -1.3838672430210863e-08, -1.032556617985847e-08, -6.812459929506076e-09, -3.2993532350644728e-09, 2.1375345937713064e-10, 3.726860597907944e-09, 7.2399668482603374e-09, 1.0753073098612731e-08, 1.4266181125321964e-08, 1.7779287375674357e-08, 2.129239362602675e-08, 2.4805499876379145e-08, 2.8318606126731538e-08, 3.183171415344077e-08, 3.5344818627436325e-08, 3.885792665414556e-08, 4.237103468085479e-08, 4.5884142707564024e-08, 4.939724718155958e-08, 5.291035520826881e-08, 5.6423459682264365e-08, 5.99365677089736e-08, 6.344967573568283e-08, 6.696278376239206e-08, 7.04758917891013e-08, 7.398899271038317e-08, 7.750209363166505e-08, 8.101520165837428e-08, 8.452830968508351e-08, 8.804141771179275e-08, 9.155451863307462e-08, 9.506762665978385e-08, 9.858073468649309e-08, 1.0209384271320232e-07, 1.056069436344842e-07, 1.0912005166119343e-07, 1.1263315968790266e-07, 1.161462677146119e-07, 1.1965937574132113e-07, 1.2317246955717565e-07, 1.266855917947396e-07, 1.301986856105941e-07, 1.3371180784815806e-07, 1.372249158748673e-07, 1.4073802390157653e-07, 1.4425113192828576e-07, 1.47764239954995e-07, 1.512773337708495e-07, 1.5479045600841346e-07, 1.5830354982426798e-07, 1.618166578509772e-07, 1.6532976587768644e-07, 1.6884287390439567e-07, 1.723559819311049e-07, 1.7586908995781414e-07, 1.7938219798452337e-07, 1.828952918003779e-07, 1.8640839982708712e-07, 1.8992150785379636e-07]}, "gradients/decoder.model.decoder.layernorm_embedding.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 2.0, 1.0, 2.0, 3.0, 0.0, 5.0, 2.0, 4.0, 7.0, 11.0, 24.0, 25.0, 58.0, 124.0, 235.0, 228.0, 120.0, 74.0, 31.0, 19.0, 15.0, 13.0, 5.0, 4.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.846665563855822e-08, -3.6339582010214144e-08, -3.4212511934583745e-08, -3.2085438306239666e-08, -2.995836467789559e-08, -2.783129282590835e-08, -2.570422097392111e-08, -2.3577147345577032e-08, -2.1450075493589793e-08, -1.9323003641602554e-08, -1.7195930013258476e-08, -1.5068858161271237e-08, -1.2941785421105578e-08, -1.081471268093992e-08, -8.68764082895268e-09, -6.560568088787022e-09, -4.4334953486213635e-09, -2.30642283050031e-09, -1.7935031237925614e-10, 1.9477219836971926e-09, 4.074794723862851e-09, 6.20186746402851e-09, 8.328939316015749e-09, 1.0456012056181407e-08, 1.2583084796347066e-08, 1.4710157536512725e-08, 1.6837230276678383e-08, 1.8964302128665622e-08, 2.109137398065286e-08, 2.321844760899694e-08, 2.5345519460984178e-08, 2.7472591312971417e-08, 2.9599661388601817e-08, 3.1726735016945895e-08, 3.3853805092576295e-08, 3.598087872092037e-08, 3.810795234926445e-08, 4.023502242489485e-08, 4.236209605323893e-08, 4.448916968158301e-08, 4.6616243309927086e-08, 4.8743316938271164e-08, 5.0870387013901563e-08, 5.299746064224564e-08, 5.512453427058972e-08, 5.725160434622012e-08, 5.93786779745642e-08, 6.150575160290828e-08, 6.363282523125235e-08, 6.575989885959643e-08, 6.788697248794051e-08, 7.001403901085723e-08, 7.214111263920131e-08, 7.426818626754539e-08, 7.639525989588947e-08, 7.852233352423355e-08, 8.064940004715027e-08, 8.277647367549434e-08, 8.490354730383842e-08, 8.703061382675514e-08, 8.915768745509922e-08, 9.12847610834433e-08, 9.341183471178738e-08, 9.553890834013146e-08, 9.766598196847553e-08]}, "gradients/decoder.model.decoder.embed_positions.weight": {"_type": "histogram", "values": [2.0, 1.0, 2.0, 2.0, 0.0, 2.0, 1.0, 2.0, 0.0, 2.0, 3.0, 2.0, 8.0, 7.0, 7.0, 11.0, 12.0, 19.0, 48.0, 46.0, 89.0, 141.0, 273.0, 1049150.0, 319.0, 175.0, 73.0, 67.0, 40.0, 26.0, 29.0, 15.0, 11.0, 7.0, 6.0, 8.0, 5.0, 4.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.0919098514250436e-07, -1.0458744981178825e-07, -9.998391448107213e-08, -9.538037204492866e-08, -9.077683671421255e-08, -8.617330138349644e-08, -8.156976605278032e-08, -7.696623072206421e-08, -7.23626953913481e-08, -6.775916006063198e-08, -6.315562472991587e-08, -5.855208584648608e-08, -5.394854696305629e-08, -4.9345011632340174e-08, -4.474147630162406e-08, -4.013793741819427e-08, -3.553439853476448e-08, -3.0930863204048364e-08, -2.6327324320618573e-08, -2.172378898990246e-08, -1.7120251882829507e-08, -1.2516714775756554e-08, -7.913179445040441e-09, -3.3096405616106495e-09, 1.2938947691054636e-09, 5.897431432089206e-09, 1.0500968095072949e-08, 1.5104504313967482e-08, 1.9708041421040434e-08, 2.4311578528113387e-08, 2.89151138588295e-08, 3.351865274225929e-08, 3.812218096754805e-08, 4.272571629826416e-08, 4.732925518169395e-08, 5.1932790512410065e-08, 5.653632939583986e-08, 6.113986472655597e-08, 6.574340005727208e-08, 7.034694249341555e-08, 7.495047782413167e-08, 7.955401315484778e-08, 8.415754848556389e-08, 8.876108381628e-08, 9.336462625242348e-08, 9.796816158313959e-08, 1.025716969138557e-07, 1.0717523934999917e-07, 1.1177876757528793e-07, 1.1638230290600404e-07, 1.2098584534214751e-07, 1.2558938067286363e-07, 1.3019291600357974e-07, 1.3479645133429585e-07, 1.3939998666501197e-07, 1.4400352199572808e-07, 1.486070573264442e-07, 1.532105926571603e-07, 1.5781412798787642e-07, 1.6241766331859253e-07, 1.6702119864930864e-07, 1.7162474819087947e-07, 1.7622826931074087e-07, 1.808318188523117e-07, 1.854353541830278e-07]}, "gradients/decoder.model.decoder.embed_tokens.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 9.0, 0.0, 2.0, 1.0, 2.0, 4.0, 0.0, 2.0, 3.0, 1.0, 0.0, 1.0, 2.0, 9.0, 37.0, 103.0, 478.0, 51447020.0, 22608.0, 924.0, 182.0, 59.0, 19.0, 10.0, 7.0, 4.0, 1.0, 5.0, 2.0], "bins": [-135.625, -133.065185546875, -130.50537109375, -127.945556640625, -125.3857421875, -122.825927734375, -120.26611328125, -117.706298828125, -115.146484375, -112.586669921875, -110.02685546875, -107.467041015625, -104.9072265625, -102.347412109375, -99.78759765625, -97.227783203125, -94.66796875, -92.108154296875, -89.54833984375, -86.988525390625, -84.4287109375, -81.868896484375, -79.30908203125, -76.749267578125, -74.189453125, -71.629638671875, -69.06982421875, -66.510009765625, -63.9501953125, -61.390380859375, -58.83056640625, -56.270751953125, -53.7109375, -51.151123046875, -48.59130859375, -46.031494140625, -43.4716796875, -40.911865234375, -38.35205078125, -35.792236328125, -33.232421875, -30.672607421875, -28.11279296875, -25.552978515625, -22.9931640625, -20.433349609375, -17.87353515625, -15.313720703125, -12.75390625, -10.194091796875, -7.63427734375, -5.074462890625, -2.5146484375, 0.045166015625, 2.60498046875, 5.164794921875, 7.724609375, 10.284423828125, 12.84423828125, 15.404052734375, 17.9638671875, 20.523681640625, 23.08349609375, 25.643310546875, 28.203125]}, "gradients/encoder.adapter.layers.2.conv.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 0.0, 1.0, 2.0, 2.0, 5.0, 6.0, 11.0, 12.0, 9.0, 15.0, 15.0, 17.0, 29.0, 29.0, 62.0, 52.0, 81.0, 89.0, 139.0, 227.0, 408.0, 671.0, 1505.0, 4153.0, 6276168.0, 4247.0, 1479.0, 811.0, 381.0, 242.0, 123.0, 112.0, 58.0, 44.0, 47.0, 36.0, 34.0, 26.0, 28.0, 15.0, 7.0, 14.0, 12.0, 7.0, 6.0, 5.0, 1.0, 1.0, 2.0, 3.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.1767578125, -0.17032623291015625, -0.1638946533203125, -0.15746307373046875, -0.151031494140625, -0.14459991455078125, -0.1381683349609375, -0.13173675537109375, -0.12530517578125, -0.11887359619140625, -0.1124420166015625, -0.10601043701171875, -0.099578857421875, -0.09314727783203125, -0.0867156982421875, -0.08028411865234375, -0.0738525390625, -0.06742095947265625, -0.0609893798828125, -0.05455780029296875, -0.048126220703125, -0.04169464111328125, -0.0352630615234375, -0.02883148193359375, -0.02239990234375, -0.01596832275390625, -0.0095367431640625, -0.00310516357421875, 0.003326416015625, 0.00975799560546875, 0.0161895751953125, 0.02262115478515625, 0.029052734375, 0.03548431396484375, 0.0419158935546875, 0.04834747314453125, 0.054779052734375, 0.06121063232421875, 0.0676422119140625, 0.07407379150390625, 0.08050537109375, 0.08693695068359375, 0.0933685302734375, 0.09980010986328125, 0.106231689453125, 0.11266326904296875, 0.1190948486328125, 0.12552642822265625, 0.1319580078125, 0.13838958740234375, 0.1448211669921875, 0.15125274658203125, 0.157684326171875, 0.16411590576171875, 0.1705474853515625, 0.17697906494140625, 0.18341064453125, 0.18984222412109375, 0.1962738037109375, 0.20270538330078125, 0.209136962890625, 0.21556854248046875, 0.2220001220703125, 0.22843170166015625, 0.23486328125]}, "gradients/encoder.adapter.layers.2.conv.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 2.0, 4.0, 6.0, 2.0, 7.0, 7.0, 15.0, 19.0, 15.0, 25.0, 34.0, 34.0, 28.0, 28.0, 36.0, 64.0, 44.0, 48.0, 57.0, 45.0, 1076.0, 51.0, 54.0, 31.0, 42.0, 41.0, 31.0, 35.0, 34.0, 27.0, 25.0, 15.0, 15.0, 11.0, 10.0, 3.0, 5.0, 4.0, 5.0, 1.0, 2.0, 0.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-8.015625, -7.7769775390625, -7.538330078125, -7.2996826171875, -7.06103515625, -6.8223876953125, -6.583740234375, -6.3450927734375, -6.1064453125, -5.8677978515625, -5.629150390625, -5.3905029296875, -5.15185546875, -4.9132080078125, -4.674560546875, -4.4359130859375, -4.197265625, -3.9586181640625, -3.719970703125, -3.4813232421875, -3.24267578125, -3.0040283203125, -2.765380859375, -2.5267333984375, -2.2880859375, -2.0494384765625, -1.810791015625, -1.5721435546875, -1.33349609375, -1.0948486328125, -0.856201171875, -0.6175537109375, -0.37890625, -0.1402587890625, 0.098388671875, 0.3370361328125, 0.57568359375, 0.8143310546875, 1.052978515625, 1.2916259765625, 1.5302734375, 1.7689208984375, 2.007568359375, 2.2462158203125, 2.48486328125, 2.7235107421875, 2.962158203125, 3.2008056640625, 3.439453125, 3.6781005859375, 3.916748046875, 4.1553955078125, 4.39404296875, 4.6326904296875, 4.871337890625, 5.1099853515625, 5.3486328125, 5.5872802734375, 5.825927734375, 6.0645751953125, 6.30322265625, 6.5418701171875, 6.780517578125, 7.0191650390625, 7.2578125]}, "gradients/encoder.adapter.layers.1.conv.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 3.0, 3.0, 2.0, 3.0, 4.0, 6.0, 5.0, 6.0, 14.0, 17.0, 19.0, 27.0, 38.0, 48.0, 74.0, 80.0, 115.0, 222.0, 347.0, 586.0, 1346.0, 4910.0, 6238513.0, 40145.0, 2699.0, 938.0, 438.0, 285.0, 154.0, 104.0, 84.0, 57.0, 41.0, 31.0, 22.0, 14.0, 12.0, 10.0, 4.0, 6.0, 3.0, 6.0, 3.0, 2.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.08502197265625, -0.08217048645019531, -0.07931900024414062, -0.07646751403808594, -0.07361602783203125, -0.07076454162597656, -0.06791305541992188, -0.06506156921386719, -0.0622100830078125, -0.05935859680175781, -0.056507110595703125, -0.05365562438964844, -0.05080413818359375, -0.04795265197753906, -0.045101165771484375, -0.04224967956542969, -0.039398193359375, -0.03654670715332031, -0.033695220947265625, -0.030843734741210938, -0.02799224853515625, -0.025140762329101562, -0.022289276123046875, -0.019437789916992188, -0.0165863037109375, -0.013734817504882812, -0.010883331298828125, -0.008031845092773438, -0.00518035888671875, -0.0023288726806640625, 0.000522613525390625, 0.0033740997314453125, 0.0062255859375, 0.009077072143554688, 0.011928558349609375, 0.014780044555664062, 0.01763153076171875, 0.020483016967773438, 0.023334503173828125, 0.026185989379882812, 0.0290374755859375, 0.03188896179199219, 0.034740447998046875, 0.03759193420410156, 0.04044342041015625, 0.04329490661621094, 0.046146392822265625, 0.04899787902832031, 0.051849365234375, 0.05470085144042969, 0.057552337646484375, 0.06040382385253906, 0.06325531005859375, 0.06610679626464844, 0.06895828247070312, 0.07180976867675781, 0.0746612548828125, 0.07751274108886719, 0.08036422729492188, 0.08321571350097656, 0.08606719970703125, 0.08891868591308594, 0.09177017211914062, 0.09462165832519531, 0.09747314453125]}, "gradients/encoder.adapter.layers.1.conv.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 4.0, 3.0, 1.0, 2.0, 1.0, 5.0, 5.0, 31.0, 215.0, 1668.0, 75.0, 13.0, 7.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-0.054962158203125, -0.05359935760498047, -0.05223655700683594, -0.050873756408691406, -0.049510955810546875, -0.048148155212402344, -0.04678535461425781, -0.04542255401611328, -0.04405975341796875, -0.04269695281982422, -0.04133415222167969, -0.039971351623535156, -0.038608551025390625, -0.037245750427246094, -0.03588294982910156, -0.03452014923095703, -0.0331573486328125, -0.03179454803466797, -0.030431747436523438, -0.029068946838378906, -0.027706146240234375, -0.026343345642089844, -0.024980545043945312, -0.02361774444580078, -0.02225494384765625, -0.02089214324951172, -0.019529342651367188, -0.018166542053222656, -0.016803741455078125, -0.015440940856933594, -0.014078140258789062, -0.012715339660644531, -0.0113525390625, -0.009989738464355469, -0.008626937866210938, -0.007264137268066406, -0.005901336669921875, -0.004538536071777344, -0.0031757354736328125, -0.0018129348754882812, -0.00045013427734375, 0.0009126663208007812, 0.0022754669189453125, 0.0036382675170898438, 0.005001068115234375, 0.006363868713378906, 0.0077266693115234375, 0.009089469909667969, 0.0104522705078125, 0.011815071105957031, 0.013177871704101562, 0.014540672302246094, 0.015903472900390625, 0.017266273498535156, 0.018629074096679688, 0.01999187469482422, 0.02135467529296875, 0.02271747589111328, 0.024080276489257812, 0.025443077087402344, 0.026805877685546875, 0.028168678283691406, 0.029531478881835938, 0.03089427947998047, 0.032257080078125]}, "gradients/encoder.adapter.layers.0.conv.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 2.0, 1.0, 2.0, 3.0, 0.0, 6.0, 7.0, 9.0, 9.0, 20.0, 15.0, 35.0, 50.0, 66.0, 108.0, 200.0, 290.0, 578.0, 1011.0, 1973.0, 4344.0, 10880.0, 33577.0, 165660.0, 5448970.0, 532687.0, 61553.0, 17037.0, 6370.0, 2849.0, 1383.0, 699.0, 391.0, 232.0, 164.0, 72.0, 55.0, 43.0, 20.0, 19.0, 15.0, 9.0, 14.0, 3.0, 2.0, 5.0, 2.0, 4.0, 3.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.006061553955078125, -0.005886971950531006, -0.005712389945983887, -0.005537807941436768, -0.0053632259368896484, -0.005188643932342529, -0.00501406192779541, -0.004839479923248291, -0.004664897918701172, -0.004490315914154053, -0.004315733909606934, -0.0041411519050598145, -0.003966569900512695, -0.003791987895965576, -0.003617405891418457, -0.003442823886871338, -0.0032682418823242188, -0.0030936598777770996, -0.0029190778732299805, -0.0027444958686828613, -0.002569913864135742, -0.002395331859588623, -0.002220749855041504, -0.0020461678504943848, -0.0018715858459472656, -0.0016970038414001465, -0.0015224218368530273, -0.0013478398323059082, -0.001173257827758789, -0.00099867582321167, -0.0008240938186645508, -0.0006495118141174316, -0.0004749298095703125, -0.00030034780502319336, -0.00012576580047607422, 4.881620407104492e-05, 0.00022339820861816406, 0.0003979802131652832, 0.0005725622177124023, 0.0007471442222595215, 0.0009217262268066406, 0.0010963082313537598, 0.001270890235900879, 0.001445472240447998, 0.0016200542449951172, 0.0017946362495422363, 0.0019692182540893555, 0.0021438002586364746, 0.0023183822631835938, 0.002492964267730713, 0.002667546272277832, 0.002842128276824951, 0.0030167102813720703, 0.0031912922859191895, 0.0033658742904663086, 0.0035404562950134277, 0.003715038299560547, 0.003889620304107666, 0.004064202308654785, 0.004238784313201904, 0.0044133663177490234, 0.004587948322296143, 0.004762530326843262, 0.004937112331390381, 0.0051116943359375]}, "gradients/encoder.adapter.layers.0.conv.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 0.0, 1.0, 3.0, 3.0, 3.0, 6.0, 1.0, 1.0, 3.0, 6.0, 9.0, 14.0, 14.0, 19.0, 18.0, 25.0, 37.0, 35.0, 49.0, 72.0, 127.0, 220.0, 651.0, 240.0, 141.0, 88.0, 62.0, 45.0, 38.0, 21.0, 17.0, 10.0, 18.0, 8.0, 8.0, 6.0, 2.0, 3.0, 4.0, 1.0, 1.0, 4.0, 0.0, 1.0, 3.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.0093536376953125, -0.009088397026062012, -0.008823156356811523, -0.008557915687561035, -0.008292675018310547, -0.008027434349060059, -0.00776219367980957, -0.007496953010559082, -0.007231712341308594, -0.0069664716720581055, -0.006701231002807617, -0.006435990333557129, -0.006170749664306641, -0.005905508995056152, -0.005640268325805664, -0.005375027656555176, -0.0051097869873046875, -0.004844546318054199, -0.004579305648803711, -0.004314064979553223, -0.004048824310302734, -0.003783583641052246, -0.003518342971801758, -0.0032531023025512695, -0.0029878616333007812, -0.002722620964050293, -0.0024573802947998047, -0.0021921396255493164, -0.0019268989562988281, -0.0016616582870483398, -0.0013964176177978516, -0.0011311769485473633, -0.000865936279296875, -0.0006006956100463867, -0.00033545494079589844, -7.021427154541016e-05, 0.00019502639770507812, 0.0004602670669555664, 0.0007255077362060547, 0.000990748405456543, 0.0012559890747070312, 0.0015212297439575195, 0.0017864704132080078, 0.002051711082458496, 0.0023169517517089844, 0.0025821924209594727, 0.002847433090209961, 0.0031126737594604492, 0.0033779144287109375, 0.0036431550979614258, 0.003908395767211914, 0.004173636436462402, 0.004438877105712891, 0.004704117774963379, 0.004969358444213867, 0.0052345991134643555, 0.005499839782714844, 0.005765080451965332, 0.00603032112121582, 0.006295561790466309, 0.006560802459716797, 0.006826043128967285, 0.0070912837982177734, 0.007356524467468262, 0.00762176513671875]}, "gradients/encoder.encoder.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 3.0, 16.0, 152.0, 583.0, 167.0, 60.0, 21.0, 10.0, 6.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.14195919036865234, -0.13913390040397644, -0.13630861043930054, -0.13348332047462463, -0.13065803050994873, -0.12783274054527283, -0.12500745058059692, -0.12218216061592102, -0.11935687065124512, -0.11653158068656921, -0.11370629072189331, -0.11088100075721741, -0.1080557107925415, -0.1052304208278656, -0.1024051308631897, -0.0995798408985138, -0.09675455838441849, -0.09392926841974258, -0.09110397845506668, -0.08827868849039078, -0.08545339852571487, -0.08262810856103897, -0.07980282604694366, -0.07697753608226776, -0.07415224611759186, -0.07132695615291595, -0.06850166618824005, -0.06567637622356415, -0.06285108625888824, -0.06002579629421234, -0.057200510054826736, -0.05437522009015083, -0.05154993012547493, -0.048724640160799026, -0.04589935019612312, -0.04307406395673752, -0.040248773992061615, -0.03742348402738571, -0.03459819406270981, -0.031772904098033905, -0.028947614133358, -0.0261223241686821, -0.023297034204006195, -0.02047174610197544, -0.017646456137299538, -0.014821166172623634, -0.01199587807059288, -0.009170588105916977, -0.006345298141241074, -0.0035200086422264576, -0.0006947191432118416, 0.002130569890141487, 0.0049558598548173904, 0.007781149819493294, 0.010606437921524048, 0.013431727886199951, 0.016257017850875854, 0.019082307815551758, 0.02190759778022766, 0.024732885882258415, 0.02755817584693432, 0.030383465811610222, 0.033208753913640976, 0.03603404387831688, 0.03885933384299278]}, "gradients/encoder.encoder.layer_norm.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 3.0, 1.0, 5.0, 4.0, 9.0, 6.0, 11.0, 17.0, 7.0, 10.0, 17.0, 12.0, 22.0, 29.0, 14.0, 22.0, 34.0, 24.0, 28.0, 39.0, 46.0, 36.0, 45.0, 28.0, 36.0, 35.0, 47.0, 30.0, 44.0, 39.0, 43.0, 36.0, 28.0, 26.0, 23.0, 25.0, 27.0, 15.0, 21.0, 9.0, 13.0, 10.0, 9.0, 3.0, 4.0, 4.0, 5.0, 4.0, 4.0, 4.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0], "bins": [-0.014975249767303467, -0.014527416788041592, -0.014079583808779716, -0.013631750829517841, -0.013183917850255966, -0.012736084870994091, -0.012288251891732216, -0.01184041891247034, -0.011392585933208466, -0.01094475295394659, -0.010496919974684715, -0.01004908699542284, -0.009601254016160965, -0.00915342103689909, -0.008705588057637215, -0.00825775507837534, -0.007809922099113464, -0.007362089119851589, -0.006914256140589714, -0.006466423161327839, -0.006018590182065964, -0.005570757202804089, -0.0051229242235422134, -0.004675091244280338, -0.004227258265018463, -0.003779425285756588, -0.003331592306494713, -0.0028837593272328377, -0.0024359263479709625, -0.0019880933687090874, -0.0015402603894472122, -0.001092427410185337, -0.0006445944309234619, -0.00019676145166158676, 0.0002510715276002884, 0.0006989045068621635, 0.0011467374861240387, 0.0015945704653859138, 0.002042403444647789, 0.002490236423909664, 0.0029380694031715393, 0.0033859023824334145, 0.0038337353616952896, 0.004281568340957165, 0.00472940132021904, 0.005177234299480915, 0.00562506727874279, 0.006072900258004665, 0.0065207332372665405, 0.006968566216528416, 0.007416399195790291, 0.007864232175052166, 0.008312065154314041, 0.008759898133575916, 0.009207731112837791, 0.009655564092099667, 0.010103397071361542, 0.010551230050623417, 0.010999063029885292, 0.011446896009147167, 0.011894728988409042, 0.012342561967670918, 0.012790394946932793, 0.013238227926194668, 0.013686060905456543]}, "gradients/encoder.encoder.layers.23.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 6.0, 5.0, 5.0, 12.0, 28.0, 42.0, 91.0, 206.0, 417.0, 957.0, 2487.0, 7254.0, 25176.0, 166815.0, 3918247.0, 48238.0, 13579.0, 5542.0, 2386.0, 1123.0, 549.0, 328.0, 251.0, 154.0, 103.0, 77.0, 55.0, 46.0, 29.0, 28.0, 15.0, 10.0, 7.0, 8.0, 3.0, 3.0, 3.0, 2.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.0003094673156738281, -0.00029417872428894043, -0.00027889013290405273, -0.00026360154151916504, -0.00024831295013427734, -0.00023302435874938965, -0.00021773576736450195, -0.00020244717597961426, -0.00018715858459472656, -0.00017186999320983887, -0.00015658140182495117, -0.00014129281044006348, -0.00012600421905517578, -0.00011071562767028809, -9.542703628540039e-05, -8.01384449005127e-05, -6.4849853515625e-05, -4.9561262130737305e-05, -3.427267074584961e-05, -1.8984079360961914e-05, -3.6954879760742188e-06, 1.1593103408813477e-05, 2.6881694793701172e-05, 4.217028617858887e-05, 5.745887756347656e-05, 7.274746894836426e-05, 8.803606033325195e-05, 0.00010332465171813965, 0.00011861324310302734, 0.00013390183448791504, 0.00014919042587280273, 0.00016447901725769043, 0.00017976760864257812, 0.00019505620002746582, 0.00021034479141235352, 0.0002256333827972412, 0.0002409219741821289, 0.0002562105655670166, 0.0002714991569519043, 0.000286787748336792, 0.0003020763397216797, 0.0003173649311065674, 0.0003326535224914551, 0.0003479421138763428, 0.00036323070526123047, 0.00037851929664611816, 0.00039380788803100586, 0.00040909647941589355, 0.00042438507080078125, 0.00043967366218566895, 0.00045496225357055664, 0.00047025084495544434, 0.00048553943634033203, 0.0005008280277252197, 0.0005161166191101074, 0.0005314052104949951, 0.0005466938018798828, 0.0005619823932647705, 0.0005772709846496582, 0.0005925595760345459, 0.0006078481674194336, 0.0006231367588043213, 0.000638425350189209, 0.0006537139415740967, 0.0006690025329589844]}, "gradients/encoder.encoder.layers.23.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 3.0, 0.0, 3.0, 3.0, 9.0, 8.0, 12.0, 21.0, 22.0, 28.0, 50.0, 83.0, 112.0, 173.0, 151.0, 118.0, 68.0, 43.0, 23.0, 22.0, 9.0, 14.0, 7.0, 9.0, 5.0, 2.0, 8.0, 3.0, 3.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.6927719116210938e-05, -1.6082078218460083e-05, -1.5236437320709229e-05, -1.4390796422958374e-05, -1.354515552520752e-05, -1.2699514627456665e-05, -1.185387372970581e-05, -1.1008232831954956e-05, -1.0162591934204102e-05, -9.316951036453247e-06, -8.471310138702393e-06, -7.625669240951538e-06, -6.780028343200684e-06, -5.934387445449829e-06, -5.088746547698975e-06, -4.24310564994812e-06, -3.3974647521972656e-06, -2.551823854446411e-06, -1.7061829566955566e-06, -8.605420589447021e-07, -1.4901161193847656e-08, 8.307397365570068e-07, 1.6763806343078613e-06, 2.522021532058716e-06, 3.3676624298095703e-06, 4.213303327560425e-06, 5.058944225311279e-06, 5.904585123062134e-06, 6.750226020812988e-06, 7.595866918563843e-06, 8.441507816314697e-06, 9.287148714065552e-06, 1.0132789611816406e-05, 1.097843050956726e-05, 1.1824071407318115e-05, 1.266971230506897e-05, 1.3515353202819824e-05, 1.4360994100570679e-05, 1.5206634998321533e-05, 1.6052275896072388e-05, 1.6897916793823242e-05, 1.7743557691574097e-05, 1.858919858932495e-05, 1.9434839487075806e-05, 2.028048038482666e-05, 2.1126121282577515e-05, 2.197176218032837e-05, 2.2817403078079224e-05, 2.3663043975830078e-05, 2.4508684873580933e-05, 2.5354325771331787e-05, 2.619996666908264e-05, 2.7045607566833496e-05, 2.789124846458435e-05, 2.8736889362335205e-05, 2.958253026008606e-05, 3.0428171157836914e-05, 3.127381205558777e-05, 3.211945295333862e-05, 3.296509385108948e-05, 3.381073474884033e-05, 3.4656375646591187e-05, 3.550201654434204e-05, 3.6347657442092896e-05, 3.719329833984375e-05]}, "gradients/encoder.encoder.layers.23.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [4.0, 2.0, 1.0, 5.0, 6.0, 4.0, 5.0, 12.0, 15.0, 16.0, 48.0, 96.0, 160.0, 477.0, 1828.0, 10261.0, 102413.0, 3977632.0, 89974.0, 8872.0, 1506.0, 394.0, 136.0, 89.0, 59.0, 33.0, 40.0, 24.0, 28.0, 32.0, 20.0, 15.0, 19.0, 9.0, 10.0, 13.0, 8.0, 8.0, 6.0, 6.0, 3.0, 4.0, 2.0, 1.0, 0.0, 1.0, 2.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.00011372566223144531, -0.00010721758008003235, -0.00010070949792861938, -9.420141577720642e-05, -8.769333362579346e-05, -8.118525147438049e-05, -7.467716932296753e-05, -6.816908717155457e-05, -6.16610050201416e-05, -5.515292286872864e-05, -4.8644840717315674e-05, -4.213675856590271e-05, -3.5628676414489746e-05, -2.9120594263076782e-05, -2.261251211166382e-05, -1.6104429960250854e-05, -9.59634780883789e-06, -3.0882656574249268e-06, 3.419816493988037e-06, 9.927898645401001e-06, 1.6435980796813965e-05, 2.294406294822693e-05, 2.9452145099639893e-05, 3.5960227251052856e-05, 4.246830940246582e-05, 4.8976391553878784e-05, 5.548447370529175e-05, 6.199255585670471e-05, 6.850063800811768e-05, 7.500872015953064e-05, 8.15168023109436e-05, 8.802488446235657e-05, 9.453296661376953e-05, 0.0001010410487651825, 0.00010754913091659546, 0.00011405721306800842, 0.00012056529521942139, 0.00012707337737083435, 0.00013358145952224731, 0.00014008954167366028, 0.00014659762382507324, 0.0001531057059764862, 0.00015961378812789917, 0.00016612187027931213, 0.0001726299524307251, 0.00017913803458213806, 0.00018564611673355103, 0.000192154198884964, 0.00019866228103637695, 0.00020517036318778992, 0.00021167844533920288, 0.00021818652749061584, 0.0002246946096420288, 0.00023120269179344177, 0.00023771077394485474, 0.0002442188560962677, 0.00025072693824768066, 0.00025723502039909363, 0.0002637431025505066, 0.00027025118470191956, 0.0002767592668533325, 0.0002832673490047455, 0.00028977543115615845, 0.0002962835133075714, 0.0003027915954589844]}, "gradients/encoder.encoder.layers.23.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 3.0, 0.0, 2.0, 1.0, 1.0, 0.0, 6.0, 2.0, 6.0, 7.0, 8.0, 6.0, 7.0, 7.0, 12.0, 21.0, 18.0, 19.0, 27.0, 26.0, 29.0, 33.0, 37.0, 34.0, 51.0, 52.0, 66.0, 84.0, 104.0, 301.0, 1128.0, 1313.0, 271.0, 117.0, 83.0, 46.0, 42.0, 33.0, 27.0, 14.0, 11.0, 9.0, 6.0, 5.0, 4.0, 5.0, 2.0, 4.0, 2.0, 1.0], "bins": [-5.7578086853027344e-05, -5.630403757095337e-05, -5.5029988288879395e-05, -5.375593900680542e-05, -5.2481889724731445e-05, -5.120784044265747e-05, -4.9933791160583496e-05, -4.865974187850952e-05, -4.738569259643555e-05, -4.611164331436157e-05, -4.48375940322876e-05, -4.356354475021362e-05, -4.228949546813965e-05, -4.1015446186065674e-05, -3.97413969039917e-05, -3.8467347621917725e-05, -3.719329833984375e-05, -3.5919249057769775e-05, -3.46451997756958e-05, -3.3371150493621826e-05, -3.209710121154785e-05, -3.082305192947388e-05, -2.9549002647399902e-05, -2.8274953365325928e-05, -2.7000904083251953e-05, -2.572685480117798e-05, -2.4452805519104004e-05, -2.317875623703003e-05, -2.1904706954956055e-05, -2.063065767288208e-05, -1.9356608390808105e-05, -1.808255910873413e-05, -1.6808509826660156e-05, -1.553446054458618e-05, -1.4260411262512207e-05, -1.2986361980438232e-05, -1.1712312698364258e-05, -1.0438263416290283e-05, -9.164214134216309e-06, -7.890164852142334e-06, -6.616115570068359e-06, -5.342066287994385e-06, -4.06801700592041e-06, -2.7939677238464355e-06, -1.519918441772461e-06, -2.4586915969848633e-07, 1.0281801223754883e-06, 2.302229404449463e-06, 3.5762786865234375e-06, 4.850327968597412e-06, 6.124377250671387e-06, 7.398426532745361e-06, 8.672475814819336e-06, 9.94652509689331e-06, 1.1220574378967285e-05, 1.249462366104126e-05, 1.3768672943115234e-05, 1.5042722225189209e-05, 1.6316771507263184e-05, 1.7590820789337158e-05, 1.8864870071411133e-05, 2.0138919353485107e-05, 2.1412968635559082e-05, 2.2687017917633057e-05, 2.396106719970703e-05]}, "gradients/encoder.encoder.layers.23.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 23.0, 274.0, 705.0, 17.0, 0.0, 1.0], "bins": [-0.015486485324800014, -0.01522890105843544, -0.014971316792070866, -0.014713732525706291, -0.014456148259341717, -0.014198563992977142, -0.013940979726612568, -0.013683395460247993, -0.013425811193883419, -0.013168226927518845, -0.01291064266115427, -0.012653058394789696, -0.012395474128425121, -0.012137889862060547, -0.011880305595695972, -0.011622721329331398, -0.011365137062966824, -0.01110755279660225, -0.010849968530237675, -0.0105923842638731, -0.010334799997508526, -0.010077215731143951, -0.009819631464779377, -0.009562047198414803, -0.009304462932050228, -0.009046878665685654, -0.00878929439932108, -0.008531710132956505, -0.00827412586659193, -0.008016541600227356, -0.0077589573338627815, -0.007501373067498207, -0.007243788801133633, -0.006986204534769058, -0.006728620268404484, -0.006471036002039909, -0.006213451735675335, -0.0059558674693107605, -0.005698283202946186, -0.005440698936581612, -0.005183114670217037, -0.004925530403852463, -0.004667946137487888, -0.004410361871123314, -0.0041527776047587395, -0.003895193338394165, -0.0036376090720295906, -0.003380024805665016, -0.0031224405393004417, -0.0028648562729358673, -0.002607272006571293, -0.0023496877402067184, -0.002092103473842144, -0.0018345192074775696, -0.0015769349411129951, -0.0013193506747484207, -0.0010617665247991681, -0.0008041822584345937, -0.0005465979920700192, -0.0002890137257054448, -3.142945934087038e-05, 0.00022615480702370405, 0.0004837390733882785, 0.0007413233397528529, 0.0009989076061174273]}, "gradients/encoder.encoder.layers.23.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 3.0, 0.0, 5.0, 3.0, 7.0, 8.0, 6.0, 18.0, 21.0, 36.0, 42.0, 35.0, 64.0, 49.0, 60.0, 65.0, 58.0, 68.0, 62.0, 75.0, 52.0, 45.0, 63.0, 36.0, 34.0, 32.0, 21.0, 14.0, 13.0, 7.0, 2.0, 4.0, 2.0, 4.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.0009464621543884277, -0.0009225178509950638, -0.0008985735476016998, -0.0008746292442083359, -0.0008506849408149719, -0.000826740637421608, -0.000802796334028244, -0.0007788520306348801, -0.0007549077272415161, -0.0007309634238481522, -0.0007070191204547882, -0.0006830748170614243, -0.0006591305136680603, -0.0006351862102746964, -0.0006112419068813324, -0.0005872976034879684, -0.0005633533000946045, -0.0005394089967012405, -0.0005154646933078766, -0.0004915203899145126, -0.0004675760865211487, -0.00044363178312778473, -0.0004196874797344208, -0.0003957431763410568, -0.00037179887294769287, -0.0003478545695543289, -0.00032391026616096497, -0.000299965962767601, -0.00027602165937423706, -0.0002520773559808731, -0.00022813305258750916, -0.0002041887491941452, -0.00018024444580078125, -0.0001563001424074173, -0.00013235583901405334, -0.00010841153562068939, -8.446723222732544e-05, -6.052292883396149e-05, -3.6578625440597534e-05, -1.2634322047233582e-05, 1.1309981346130371e-05, 3.5254284739494324e-05, 5.9198588132858276e-05, 8.314289152622223e-05, 0.00010708719491958618, 0.00013103149831295013, 0.0001549758017063141, 0.00017892010509967804, 0.000202864408493042, 0.00022680871188640594, 0.0002507530152797699, 0.00027469731867313385, 0.0002986416220664978, 0.00032258592545986176, 0.0003465302288532257, 0.00037047453224658966, 0.0003944188356399536, 0.00041836313903331757, 0.0004423074424266815, 0.00046625174582004547, 0.0004901960492134094, 0.0005141403526067734, 0.0005380846560001373, 0.0005620289593935013, 0.0005859732627868652]}, "gradients/encoder.encoder.layers.23.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 3.0, 6.0, 4.0, 4.0, 4.0, 3.0, 7.0, 10.0, 9.0, 20.0, 31.0, 38.0, 50.0, 91.0, 167.0, 265.0, 558.0, 1299.0, 3784.0, 17115.0, 722227.0, 283502.0, 13604.0, 3283.0, 1249.0, 548.0, 270.0, 149.0, 94.0, 64.0, 28.0, 19.0, 13.0, 12.0, 9.0, 13.0, 5.0, 2.0, 1.0, 2.0, 2.0, 0.0, 2.0, 0.0, 1.0, 0.0, 1.0, 1.0], "bins": [-0.0009665489196777344, -0.0009403824806213379, -0.0009142160415649414, -0.0008880496025085449, -0.0008618831634521484, -0.000835716724395752, -0.0008095502853393555, -0.000783383846282959, -0.0007572174072265625, -0.000731050968170166, -0.0007048845291137695, -0.000678718090057373, -0.0006525516510009766, -0.0006263852119445801, -0.0006002187728881836, -0.0005740523338317871, -0.0005478858947753906, -0.0005217194557189941, -0.0004955530166625977, -0.00046938657760620117, -0.0004432201385498047, -0.0004170536994934082, -0.0003908872604370117, -0.00036472082138061523, -0.00033855438232421875, -0.00031238794326782227, -0.0002862215042114258, -0.0002600550651550293, -0.0002338886260986328, -0.00020772218704223633, -0.00018155574798583984, -0.00015538930892944336, -0.00012922286987304688, -0.00010305643081665039, -7.68899917602539e-05, -5.072355270385742e-05, -2.4557113647460938e-05, 1.6093254089355469e-06, 2.777576446533203e-05, 5.3942203521728516e-05, 8.0108642578125e-05, 0.00010627508163452148, 0.00013244152069091797, 0.00015860795974731445, 0.00018477439880371094, 0.00021094083786010742, 0.0002371072769165039, 0.0002632737159729004, 0.0002894401550292969, 0.00031560659408569336, 0.00034177303314208984, 0.00036793947219848633, 0.0003941059112548828, 0.0004202723503112793, 0.0004464387893676758, 0.00047260522842407227, 0.0004987716674804688, 0.0005249381065368652, 0.0005511045455932617, 0.0005772709846496582, 0.0006034374237060547, 0.0006296038627624512, 0.0006557703018188477, 0.0006819367408752441, 0.0007081031799316406]}, "gradients/encoder.encoder.layers.23.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 0.0, 1.0, 1.0, 1.0, 2.0, 5.0, 12.0, 15.0, 13.0, 23.0, 49.0, 63.0, 100.0, 193.0, 188.0, 134.0, 85.0, 39.0, 28.0, 16.0, 22.0, 8.0, 3.0, 2.0, 4.0, 1.0, 2.0, 3.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.7418136596679688e-05, -2.641044557094574e-05, -2.5402754545211792e-05, -2.4395063519477844e-05, -2.3387372493743896e-05, -2.237968146800995e-05, -2.1371990442276e-05, -2.0364299416542053e-05, -1.9356608390808105e-05, -1.8348917365074158e-05, -1.734122633934021e-05, -1.6333535313606262e-05, -1.5325844287872314e-05, -1.4318153262138367e-05, -1.3310462236404419e-05, -1.2302771210670471e-05, -1.1295080184936523e-05, -1.0287389159202576e-05, -9.279698133468628e-06, -8.27200710773468e-06, -7.264316082000732e-06, -6.256625056266785e-06, -5.248934030532837e-06, -4.241243004798889e-06, -3.2335519790649414e-06, -2.2258609533309937e-06, -1.218169927597046e-06, -2.1047890186309814e-07, 7.972121238708496e-07, 1.8049031496047974e-06, 2.812594175338745e-06, 3.820285201072693e-06, 4.827976226806641e-06, 5.835667252540588e-06, 6.843358278274536e-06, 7.851049304008484e-06, 8.858740329742432e-06, 9.86643135547638e-06, 1.0874122381210327e-05, 1.1881813406944275e-05, 1.2889504432678223e-05, 1.389719545841217e-05, 1.4904886484146118e-05, 1.5912577509880066e-05, 1.6920268535614014e-05, 1.792795956134796e-05, 1.893565058708191e-05, 1.9943341612815857e-05, 2.0951032638549805e-05, 2.1958723664283752e-05, 2.29664146900177e-05, 2.3974105715751648e-05, 2.4981796741485596e-05, 2.5989487767219543e-05, 2.699717879295349e-05, 2.800486981868744e-05, 2.9012560844421387e-05, 3.0020251870155334e-05, 3.102794289588928e-05, 3.203563392162323e-05, 3.304332494735718e-05, 3.4051015973091125e-05, 3.505870699882507e-05, 3.606639802455902e-05, 3.707408905029297e-05]}, "gradients/encoder.encoder.layers.23.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 5.0, 3.0, 6.0, 9.0, 10.0, 13.0, 21.0, 21.0, 32.0, 45.0, 59.0, 98.0, 128.0, 163.0, 231.0, 337.0, 536.0, 793.0, 1223.0, 2168.0, 3992.0, 7930.0, 16855.0, 41368.0, 126421.0, 568047.0, 183881.0, 52843.0, 20506.0, 9424.0, 4758.0, 2493.0, 1420.0, 850.0, 573.0, 396.0, 262.0, 160.0, 128.0, 97.0, 64.0, 58.0, 39.0, 24.0, 25.0, 17.0, 7.0, 10.0, 7.0, 3.0, 1.0, 2.0, 2.0, 2.0, 1.0, 0.0, 2.0, 2.0, 0.0, 0.0, 1.0], "bins": [-0.0001531839370727539, -0.00014783255755901337, -0.00014248117804527283, -0.0001371297985315323, -0.00013177841901779175, -0.0001264270395040512, -0.00012107565999031067, -0.00011572428047657013, -0.00011037290096282959, -0.00010502152144908905, -9.967014193534851e-05, -9.431876242160797e-05, -8.896738290786743e-05, -8.361600339412689e-05, -7.826462388038635e-05, -7.291324436664581e-05, -6.756186485290527e-05, -6.221048533916473e-05, -5.6859105825424194e-05, -5.1507726311683655e-05, -4.6156346797943115e-05, -4.0804967284202576e-05, -3.5453587770462036e-05, -3.0102208256721497e-05, -2.4750828742980957e-05, -1.9399449229240417e-05, -1.4048069715499878e-05, -8.696690201759338e-06, -3.345310688018799e-06, 2.0060688257217407e-06, 7.35744833946228e-06, 1.270882785320282e-05, 1.806020736694336e-05, 2.34115868806839e-05, 2.876296639442444e-05, 3.411434590816498e-05, 3.946572542190552e-05, 4.481710493564606e-05, 5.01684844493866e-05, 5.5519863963127136e-05, 6.0871243476867676e-05, 6.622262299060822e-05, 7.157400250434875e-05, 7.69253820180893e-05, 8.227676153182983e-05, 8.762814104557037e-05, 9.297952055931091e-05, 9.833090007305145e-05, 0.00010368227958679199, 0.00010903365910053253, 0.00011438503861427307, 0.00011973641812801361, 0.00012508779764175415, 0.0001304391771554947, 0.00013579055666923523, 0.00014114193618297577, 0.0001464933156967163, 0.00015184469521045685, 0.0001571960747241974, 0.00016254745423793793, 0.00016789883375167847, 0.000173250213265419, 0.00017860159277915955, 0.00018395297229290009, 0.00018930435180664062]}, "gradients/encoder.encoder.layers.23.attention.v_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0, 2.0, 3.0, 2.0, 3.0, 5.0, 6.0, 6.0, 10.0, 20.0, 22.0, 31.0, 37.0, 50.0, 41.0, 43.0, 57.0, 82.0, 59.0, 63.0, 58.0, 64.0, 61.0, 80.0, 43.0, 35.0, 37.0, 19.0, 20.0, 16.0, 5.0, 5.0, 3.0, 5.0, 2.0, 5.0, 1.0, 4.0, 1.0, 4.0, 0.0, 2.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-9.608268737792969e-05, -9.25995409488678e-05, -8.911639451980591e-05, -8.563324809074402e-05, -8.215010166168213e-05, -7.866695523262024e-05, -7.518380880355835e-05, -7.170066237449646e-05, -6.821751594543457e-05, -6.473436951637268e-05, -6.125122308731079e-05, -5.77680766582489e-05, -5.428493022918701e-05, -5.080178380012512e-05, -4.731863737106323e-05, -4.383549094200134e-05, -4.035234451293945e-05, -3.6869198083877563e-05, -3.3386051654815674e-05, -2.9902905225753784e-05, -2.6419758796691895e-05, -2.2936612367630005e-05, -1.9453465938568115e-05, -1.5970319509506226e-05, -1.2487173080444336e-05, -9.004026651382446e-06, -5.520880222320557e-06, -2.037733793258667e-06, 1.4454126358032227e-06, 4.928559064865112e-06, 8.411705493927002e-06, 1.1894851922988892e-05, 1.537799835205078e-05, 1.886114478111267e-05, 2.234429121017456e-05, 2.582743763923645e-05, 2.931058406829834e-05, 3.279373049736023e-05, 3.627687692642212e-05, 3.976002335548401e-05, 4.32431697845459e-05, 4.672631621360779e-05, 5.020946264266968e-05, 5.369260907173157e-05, 5.717575550079346e-05, 6.065890192985535e-05, 6.414204835891724e-05, 6.762519478797913e-05, 7.110834121704102e-05, 7.45914876461029e-05, 7.80746340751648e-05, 8.155778050422668e-05, 8.504092693328857e-05, 8.852407336235046e-05, 9.200721979141235e-05, 9.549036622047424e-05, 9.897351264953613e-05, 0.00010245665907859802, 0.00010593980550765991, 0.0001094229519367218, 0.00011290609836578369, 0.00011638924479484558, 0.00011987239122390747, 0.00012335553765296936, 0.00012683868408203125]}, "gradients/encoder.encoder.layers.23.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 0.0, 1.0, 0.0, 4.0, 2.0, 5.0, 5.0, 9.0, 12.0, 13.0, 25.0, 27.0, 39.0, 59.0, 65.0, 86.0, 123.0, 152.0, 219.0, 332.0, 442.0, 678.0, 1119.0, 1635.0, 2380.0, 3751.0, 6438.0, 13520.0, 929123.0, 60147.0, 11869.0, 5807.0, 3348.0, 2248.0, 1540.0, 998.0, 668.0, 452.0, 285.0, 238.0, 161.0, 143.0, 100.0, 96.0, 50.0, 25.0, 32.0, 19.0, 25.0, 19.0, 16.0, 7.0, 4.0, 2.0, 1.0, 2.0, 0.0, 3.0, 2.0, 1.0, 1.0], "bins": [-0.000885009765625, -0.0008572936058044434, -0.0008295774459838867, -0.0008018612861633301, -0.0007741451263427734, -0.0007464289665222168, -0.0007187128067016602, -0.0006909966468811035, -0.0006632804870605469, -0.0006355643272399902, -0.0006078481674194336, -0.000580132007598877, -0.0005524158477783203, -0.0005246996879577637, -0.000496983528137207, -0.0004692673683166504, -0.00044155120849609375, -0.0004138350486755371, -0.00038611888885498047, -0.00035840272903442383, -0.0003306865692138672, -0.00030297040939331055, -0.0002752542495727539, -0.00024753808975219727, -0.00021982192993164062, -0.00019210577011108398, -0.00016438961029052734, -0.0001366734504699707, -0.00010895729064941406, -8.124113082885742e-05, -5.352497100830078e-05, -2.580881118774414e-05, 1.9073486328125e-06, 2.962350845336914e-05, 5.733966827392578e-05, 8.505582809448242e-05, 0.00011277198791503906, 0.0001404881477355957, 0.00016820430755615234, 0.00019592046737670898, 0.00022363662719726562, 0.00025135278701782227, 0.0002790689468383789, 0.00030678510665893555, 0.0003345012664794922, 0.00036221742630004883, 0.00038993358612060547, 0.0004176497459411621, 0.00044536590576171875, 0.0004730820655822754, 0.000500798225402832, 0.0005285143852233887, 0.0005562305450439453, 0.000583946704864502, 0.0006116628646850586, 0.0006393790245056152, 0.0006670951843261719, 0.0006948113441467285, 0.0007225275039672852, 0.0007502436637878418, 0.0007779598236083984, 0.0008056759834289551, 0.0008333921432495117, 0.0008611083030700684, 0.000888824462890625]}, "gradients/encoder.encoder.layers.23.attention.k_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 2.0, 0.0, 3.0, 2.0, 1.0, 3.0, 2.0, 3.0, 3.0, 4.0, 7.0, 14.0, 22.0, 20.0, 30.0, 38.0, 36.0, 47.0, 53.0, 62.0, 146.0, 61.0, 59.0, 62.0, 55.0, 48.0, 46.0, 30.0, 33.0, 41.0, 13.0, 15.0, 7.0, 10.0, 8.0, 4.0, 3.0, 1.0, 7.0, 4.0, 0.0, 2.0, 2.0, 2.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-1.9073486328125e-06, -1.841224730014801e-06, -1.775100827217102e-06, -1.708976924419403e-06, -1.642853021621704e-06, -1.5767291188240051e-06, -1.5106052160263062e-06, -1.4444813132286072e-06, -1.3783574104309082e-06, -1.3122335076332092e-06, -1.2461096048355103e-06, -1.1799857020378113e-06, -1.1138617992401123e-06, -1.0477378964424133e-06, -9.816139936447144e-07, -9.154900908470154e-07, -8.493661880493164e-07, -7.832422852516174e-07, -7.171183824539185e-07, -6.509944796562195e-07, -5.848705768585205e-07, -5.187466740608215e-07, -4.5262277126312256e-07, -3.864988684654236e-07, -3.203749656677246e-07, -2.5425106287002563e-07, -1.8812716007232666e-07, -1.2200325727462769e-07, -5.587935447692871e-08, 1.0244548320770264e-08, 7.636845111846924e-08, 1.424923539161682e-07, 2.086162567138672e-07, 2.7474015951156616e-07, 3.4086406230926514e-07, 4.069879651069641e-07, 4.731118679046631e-07, 5.392357707023621e-07, 6.05359673500061e-07, 6.7148357629776e-07, 7.37607479095459e-07, 8.03731381893158e-07, 8.698552846908569e-07, 9.359791874885559e-07, 1.0021030902862549e-06, 1.0682269930839539e-06, 1.1343508958816528e-06, 1.2004747986793518e-06, 1.2665987014770508e-06, 1.3327226042747498e-06, 1.3988465070724487e-06, 1.4649704098701477e-06, 1.5310943126678467e-06, 1.5972182154655457e-06, 1.6633421182632446e-06, 1.7294660210609436e-06, 1.7955899238586426e-06, 1.8617138266563416e-06, 1.9278377294540405e-06, 1.9939616322517395e-06, 2.0600855350494385e-06, 2.1262094378471375e-06, 2.1923333406448364e-06, 2.2584572434425354e-06, 2.3245811462402344e-06]}, "gradients/encoder.encoder.layers.23.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 3.0, 0.0, 2.0, 0.0, 3.0, 8.0, 3.0, 10.0, 7.0, 14.0, 18.0, 28.0, 28.0, 47.0, 49.0, 67.0, 92.0, 117.0, 163.0, 190.0, 313.0, 424.0, 545.0, 767.0, 1096.0, 1672.0, 2632.0, 4317.0, 7678.0, 18061.0, 926520.0, 57301.0, 10841.0, 5543.0, 3329.0, 2006.0, 1339.0, 910.0, 638.0, 484.0, 311.0, 262.0, 196.0, 120.0, 104.0, 79.0, 57.0, 41.0, 29.0, 29.0, 25.0, 22.0, 7.0, 7.0, 5.0, 3.0, 4.0, 3.0, 2.0, 1.0, 0.0, 2.0, 1.0], "bins": [-0.00011283159255981445, -0.00010927673429250717, -0.00010572187602519989, -0.00010216701775789261, -9.861215949058533e-05, -9.505730122327805e-05, -9.150244295597076e-05, -8.794758468866348e-05, -8.43927264213562e-05, -8.083786815404892e-05, -7.728300988674164e-05, -7.372815161943436e-05, -7.017329335212708e-05, -6.66184350848198e-05, -6.306357681751251e-05, -5.950871855020523e-05, -5.595386028289795e-05, -5.239900201559067e-05, -4.8844143748283386e-05, -4.5289285480976105e-05, -4.173442721366882e-05, -3.817956894636154e-05, -3.462471067905426e-05, -3.106985241174698e-05, -2.7514994144439697e-05, -2.3960135877132416e-05, -2.0405277609825134e-05, -1.6850419342517853e-05, -1.3295561075210571e-05, -9.74070280790329e-06, -6.185844540596008e-06, -2.630986273288727e-06, 9.238719940185547e-07, 4.478730261325836e-06, 8.033588528633118e-06, 1.15884467959404e-05, 1.514330506324768e-05, 1.8698163330554962e-05, 2.2253021597862244e-05, 2.5807879865169525e-05, 2.9362738132476807e-05, 3.291759639978409e-05, 3.647245466709137e-05, 4.002731293439865e-05, 4.358217120170593e-05, 4.7137029469013214e-05, 5.0691887736320496e-05, 5.424674600362778e-05, 5.780160427093506e-05, 6.135646253824234e-05, 6.491132080554962e-05, 6.84661790728569e-05, 7.202103734016418e-05, 7.557589560747147e-05, 7.913075387477875e-05, 8.268561214208603e-05, 8.624047040939331e-05, 8.979532867670059e-05, 9.335018694400787e-05, 9.690504521131516e-05, 0.00010045990347862244, 0.00010401476174592972, 0.000107569620013237, 0.00011112447828054428, 0.00011467933654785156]}, "gradients/encoder.encoder.layers.23.attention.q_proj.bias": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 1.0, 1.0, 1.0, 2.0, 0.0, 6.0, 3.0, 2.0, 4.0, 1.0, 4.0, 7.0, 6.0, 3.0, 3.0, 17.0, 84.0, 620.0, 181.0, 17.0, 7.0, 8.0, 5.0, 4.0, 2.0, 4.0, 1.0, 5.0, 1.0, 1.0, 3.0, 2.0, 0.0, 3.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-8.499622344970703e-05, -8.213147521018982e-05, -7.926672697067261e-05, -7.64019787311554e-05, -7.353723049163818e-05, -7.067248225212097e-05, -6.780773401260376e-05, -6.494298577308655e-05, -6.207823753356934e-05, -5.9213489294052124e-05, -5.634874105453491e-05, -5.34839928150177e-05, -5.061924457550049e-05, -4.7754496335983276e-05, -4.4889748096466064e-05, -4.202499985694885e-05, -3.916025161743164e-05, -3.629550337791443e-05, -3.343075513839722e-05, -3.0566006898880005e-05, -2.7701258659362793e-05, -2.483651041984558e-05, -2.197176218032837e-05, -1.9107013940811157e-05, -1.6242265701293945e-05, -1.3377517461776733e-05, -1.0512769222259521e-05, -7.64802098274231e-06, -4.783272743225098e-06, -1.9185245037078857e-06, 9.462237358093262e-07, 3.810971975326538e-06, 6.67572021484375e-06, 9.540468454360962e-06, 1.2405216693878174e-05, 1.5269964933395386e-05, 1.8134713172912598e-05, 2.099946141242981e-05, 2.386420965194702e-05, 2.6728957891464233e-05, 2.9593706130981445e-05, 3.245845437049866e-05, 3.532320261001587e-05, 3.818795084953308e-05, 4.105269908905029e-05, 4.3917447328567505e-05, 4.678219556808472e-05, 4.964694380760193e-05, 5.251169204711914e-05, 5.537644028663635e-05, 5.8241188526153564e-05, 6.110593676567078e-05, 6.397068500518799e-05, 6.68354332447052e-05, 6.970018148422241e-05, 7.256492972373962e-05, 7.542967796325684e-05, 7.829442620277405e-05, 8.115917444229126e-05, 8.402392268180847e-05, 8.688867092132568e-05, 8.97534191608429e-05, 9.261816740036011e-05, 9.548291563987732e-05, 9.834766387939453e-05]}, "gradients/encoder.encoder.layers.23.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 3.0, 0.0, 3.0, 1.0, 1.0, 0.0, 0.0, 1.0, 6.0, 5.0, 6.0, 9.0, 14.0, 12.0, 21.0, 35.0, 38.0, 59.0, 53.0, 85.0, 118.0, 163.0, 134.0, 81.0, 57.0, 38.0, 22.0, 13.0, 13.0, 8.0, 9.0, 2.0, 3.0, 3.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00320967729203403, -0.00308601139113307, -0.00296234549023211, -0.00283867958933115, -0.00271501368843019, -0.00259134778752923, -0.00246768188662827, -0.00234401598572731, -0.00222035008482635, -0.0020966841839253902, -0.0019730182830244303, -0.0018493523821234703, -0.0017256864812225103, -0.0016020205803215504, -0.0014783546794205904, -0.0013546887785196304, -0.0012310228776186705, -0.0011073569767177105, -0.0009836910758167505, -0.0008600251749157906, -0.0007363592740148306, -0.0006126933731138706, -0.0004890274722129107, -0.0003653615713119507, -0.00024169567041099072, -0.00011802976951003075, 5.636131390929222e-06, 0.0001293020322918892, 0.00025296793319284916, 0.00037663383409380913, 0.0005002997349947691, 0.0006239656358957291, 0.0007476317696273327, 0.0008712976705282927, 0.0009949635714292526, 0.0011186294723302126, 0.0012422953732311726, 0.0013659612741321325, 0.0014896271750330925, 0.0016132930759340525, 0.0017369589768350124, 0.0018606248777359724, 0.0019842907786369324, 0.0021079566795378923, 0.0022316225804388523, 0.0023552884813398123, 0.0024789543822407722, 0.002602620283141732, 0.002726286184042692, 0.002849952084943652, 0.002973617985844612, 0.003097283886745572, 0.003220949787646532, 0.003344615688547492, 0.003468281589448452, 0.003591947490349412, 0.003715613391250372, 0.003839279292151332, 0.003962945193052292, 0.004086610861122608, 0.004210276994854212, 0.004333943128585815, 0.004457608796656132, 0.004581274464726448, 0.004704940598458052]}, "gradients/encoder.encoder.layers.23.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 4.0, 1.0, 2.0, 3.0, 2.0, 6.0, 14.0, 16.0, 13.0, 23.0, 16.0, 22.0, 26.0, 32.0, 28.0, 20.0, 32.0, 31.0, 35.0, 24.0, 36.0, 31.0, 29.0, 26.0, 22.0, 32.0, 30.0, 24.0, 23.0, 32.0, 37.0, 39.0, 30.0, 23.0, 27.0, 25.0, 23.0, 21.0, 29.0, 17.0, 21.0, 22.0, 14.0, 13.0, 11.0, 7.0, 7.0, 6.0, 3.0, 3.0, 4.0, 2.0, 0.0, 1.0], "bins": [-0.0008870959281921387, -0.0008617835119366646, -0.0008364710956811905, -0.0008111586794257164, -0.0007858462631702423, -0.0007605338469147682, -0.0007352214306592941, -0.00070990901440382, -0.000684596598148346, -0.0006592841818928719, -0.0006339717656373978, -0.0006086593493819237, -0.0005833469331264496, -0.0005580345168709755, -0.0005327221006155014, -0.0005074096843600273, -0.0004820972681045532, -0.00045678485184907913, -0.00043147243559360504, -0.00040616001933813095, -0.00038084760308265686, -0.00035553518682718277, -0.0003302227705717087, -0.0003049103543162346, -0.0002795979380607605, -0.0002542855218052864, -0.00022897310554981232, -0.00020366068929433823, -0.00017834827303886414, -0.00015303585678339005, -0.00012772344052791595, -0.00010241102427244186, -7.709860801696777e-05, -5.178619176149368e-05, -2.6473775506019592e-05, -1.1613592505455017e-06, 2.415105700492859e-05, 4.946347326040268e-05, 7.477588951587677e-05, 0.00010008830577135086, 0.00012540072202682495, 0.00015071313828229904, 0.00017602555453777313, 0.00020133797079324722, 0.0002266503870487213, 0.0002519628033041954, 0.0002772752195596695, 0.0003025876358151436, 0.0003279000520706177, 0.00035321246832609177, 0.00037852488458156586, 0.00040383730083703995, 0.00042914971709251404, 0.00045446213334798813, 0.0004797745496034622, 0.0005050869658589363, 0.0005303993821144104, 0.0005557117983698845, 0.0005810242146253586, 0.0006063366308808327, 0.0006316490471363068, 0.0006569614633917809, 0.0006822738796472549, 0.000707586295902729, 0.0007328987121582031]}, "gradients/encoder.encoder.layers.22.feed_forward.output_dense.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 2.0, 2.0, 3.0, 3.0, 2.0, 4.0, 3.0, 1.0, 12.0, 20.0, 31.0, 51.0, 65.0, 151.0, 317.0, 669.0, 1973.0, 7490.0, 50625.0, 4024515.0, 92873.0, 10873.0, 2751.0, 952.0, 429.0, 205.0, 116.0, 51.0, 32.0, 20.0, 12.0, 5.0, 16.0, 4.0, 0.0, 5.0, 3.0, 2.0, 2.0, 2.0, 0.0, 1.0, 1.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.000843048095703125, -0.0008135885000228882, -0.0007841289043426514, -0.0007546693086624146, -0.0007252097129821777, -0.0006957501173019409, -0.0006662905216217041, -0.0006368309259414673, -0.0006073713302612305, -0.0005779117345809937, -0.0005484521389007568, -0.00051899254322052, -0.0004895329475402832, -0.0004600733518600464, -0.00043061375617980957, -0.00040115416049957275, -0.00037169456481933594, -0.0003422349691390991, -0.0003127753734588623, -0.0002833157777786255, -0.00025385618209838867, -0.00022439658641815186, -0.00019493699073791504, -0.00016547739505767822, -0.0001360177993774414, -0.00010655820369720459, -7.709860801696777e-05, -4.763901233673096e-05, -1.817941665649414e-05, 1.1280179023742676e-05, 4.073977470397949e-05, 7.019937038421631e-05, 9.965896606445312e-05, 0.00012911856174468994, 0.00015857815742492676, 0.00018803775310516357, 0.0002174973487854004, 0.0002469569444656372, 0.000276416540145874, 0.00030587613582611084, 0.00033533573150634766, 0.00036479532718658447, 0.0003942549228668213, 0.0004237145185470581, 0.0004531741142272949, 0.00048263370990753174, 0.0005120933055877686, 0.0005415529012680054, 0.0005710124969482422, 0.000600472092628479, 0.0006299316883087158, 0.0006593912839889526, 0.0006888508796691895, 0.0007183104753494263, 0.0007477700710296631, 0.0007772296667098999, 0.0008066892623901367, 0.0008361488580703735, 0.0008656084537506104, 0.0008950680494308472, 0.000924527645111084, 0.0009539872407913208, 0.0009834468364715576, 0.0010129064321517944, 0.0010423660278320312]}, "gradients/encoder.encoder.layers.22.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 3.0, 2.0, 3.0, 4.0, 4.0, 3.0, 4.0, 11.0, 10.0, 18.0, 24.0, 23.0, 41.0, 49.0, 53.0, 72.0, 89.0, 105.0, 101.0, 99.0, 80.0, 44.0, 44.0, 40.0, 20.0, 11.0, 9.0, 12.0, 5.0, 4.0, 6.0, 3.0, 2.0, 4.0, 2.0, 2.0, 1.0, 3.0, 0.0, 1.0, 2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.0802021026611328e-05, -2.0052306354045868e-05, -1.9302591681480408e-05, -1.8552877008914948e-05, -1.7803162336349487e-05, -1.7053447663784027e-05, -1.6303732991218567e-05, -1.5554018318653107e-05, -1.4804303646087646e-05, -1.4054588973522186e-05, -1.3304874300956726e-05, -1.2555159628391266e-05, -1.1805444955825806e-05, -1.1055730283260345e-05, -1.0306015610694885e-05, -9.556300938129425e-06, -8.806586265563965e-06, -8.056871592998505e-06, -7.3071569204330444e-06, -6.557442247867584e-06, -5.807727575302124e-06, -5.058012902736664e-06, -4.308298230171204e-06, -3.5585835576057434e-06, -2.808868885040283e-06, -2.059154212474823e-06, -1.3094395399093628e-06, -5.597248673439026e-07, 1.8998980522155762e-07, 9.397044777870178e-07, 1.689419150352478e-06, 2.4391338229179382e-06, 3.1888484954833984e-06, 3.938563168048859e-06, 4.688277840614319e-06, 5.437992513179779e-06, 6.187707185745239e-06, 6.9374218583106995e-06, 7.68713653087616e-06, 8.43685120344162e-06, 9.18656587600708e-06, 9.93628054857254e-06, 1.0685995221138e-05, 1.143570989370346e-05, 1.2185424566268921e-05, 1.2935139238834381e-05, 1.3684853911399841e-05, 1.4434568583965302e-05, 1.5184283256530762e-05, 1.5933997929096222e-05, 1.6683712601661682e-05, 1.7433427274227142e-05, 1.8183141946792603e-05, 1.8932856619358063e-05, 1.9682571291923523e-05, 2.0432285964488983e-05, 2.1182000637054443e-05, 2.1931715309619904e-05, 2.2681429982185364e-05, 2.3431144654750824e-05, 2.4180859327316284e-05, 2.4930573999881744e-05, 2.5680288672447205e-05, 2.6430003345012665e-05, 2.7179718017578125e-05]}, "gradients/encoder.encoder.layers.22.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 2.0, 6.0, 10.0, 11.0, 24.0, 37.0, 65.0, 110.0, 217.0, 333.0, 698.0, 1486.0, 3151.0, 7008.0, 18579.0, 56545.0, 241514.0, 3548804.0, 227101.0, 56047.0, 19010.0, 7295.0, 3107.0, 1473.0, 746.0, 361.0, 203.0, 95.0, 84.0, 44.0, 44.0, 31.0, 21.0, 9.0, 8.0, 7.0, 3.0, 4.0, 2.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.00017523765563964844, -0.00016862154006958008, -0.00016200542449951172, -0.00015538930892944336, -0.000148773193359375, -0.00014215707778930664, -0.00013554096221923828, -0.00012892484664916992, -0.00012230873107910156, -0.0001156926155090332, -0.00010907649993896484, -0.00010246038436889648, -9.584426879882812e-05, -8.922815322875977e-05, -8.26120376586914e-05, -7.599592208862305e-05, -6.937980651855469e-05, -6.276369094848633e-05, -5.614757537841797e-05, -4.953145980834961e-05, -4.291534423828125e-05, -3.629922866821289e-05, -2.968311309814453e-05, -2.3066997528076172e-05, -1.6450881958007812e-05, -9.834766387939453e-06, -3.2186508178710938e-06, 3.3974647521972656e-06, 1.0013580322265625e-05, 1.6629695892333984e-05, 2.3245811462402344e-05, 2.9861927032470703e-05, 3.647804260253906e-05, 4.309415817260742e-05, 4.971027374267578e-05, 5.632638931274414e-05, 6.29425048828125e-05, 6.955862045288086e-05, 7.617473602294922e-05, 8.279085159301758e-05, 8.940696716308594e-05, 9.60230827331543e-05, 0.00010263919830322266, 0.00010925531387329102, 0.00011587142944335938, 0.00012248754501342773, 0.0001291036605834961, 0.00013571977615356445, 0.0001423358917236328, 0.00014895200729370117, 0.00015556812286376953, 0.0001621842384338379, 0.00016880035400390625, 0.0001754164695739746, 0.00018203258514404297, 0.00018864870071411133, 0.0001952648162841797, 0.00020188093185424805, 0.0002084970474243164, 0.00021511316299438477, 0.00022172927856445312, 0.00022834539413452148, 0.00023496150970458984, 0.0002415776252746582, 0.00024819374084472656]}, "gradients/encoder.encoder.layers.22.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 2.0, 4.0, 2.0, 5.0, 9.0, 6.0, 8.0, 11.0, 21.0, 21.0, 23.0, 30.0, 36.0, 50.0, 50.0, 91.0, 163.0, 418.0, 1858.0, 478.0, 153.0, 112.0, 73.0, 73.0, 62.0, 54.0, 48.0, 50.0, 31.0, 24.0, 27.0, 12.0, 12.0, 19.0, 16.0, 11.0, 10.0, 3.0, 4.0, 1.0, 3.0, 3.0, 0.0, 0.0, 2.0, 0.0, 1.0], "bins": [-5.4776668548583984e-05, -5.323905497789383e-05, -5.1701441407203674e-05, -5.016382783651352e-05, -4.8626214265823364e-05, -4.708860069513321e-05, -4.5550987124443054e-05, -4.40133735537529e-05, -4.2475759983062744e-05, -4.093814641237259e-05, -3.9400532841682434e-05, -3.786291927099228e-05, -3.6325305700302124e-05, -3.478769212961197e-05, -3.3250078558921814e-05, -3.171246498823166e-05, -3.0174851417541504e-05, -2.863723784685135e-05, -2.7099624276161194e-05, -2.556201070547104e-05, -2.4024397134780884e-05, -2.248678356409073e-05, -2.0949169993400574e-05, -1.941155642271042e-05, -1.7873942852020264e-05, -1.633632928133011e-05, -1.4798715710639954e-05, -1.3261102139949799e-05, -1.1723488569259644e-05, -1.0185874998569489e-05, -8.648261427879333e-06, -7.1106478571891785e-06, -5.5730342864990234e-06, -4.035420715808868e-06, -2.4978071451187134e-06, -9.601935744285583e-07, 5.774199962615967e-07, 2.1150335669517517e-06, 3.6526471376419067e-06, 5.190260708332062e-06, 6.727874279022217e-06, 8.265487849712372e-06, 9.803101420402527e-06, 1.1340714991092682e-05, 1.2878328561782837e-05, 1.4415942132472992e-05, 1.5953555703163147e-05, 1.7491169273853302e-05, 1.9028782844543457e-05, 2.0566396415233612e-05, 2.2104009985923767e-05, 2.3641623556613922e-05, 2.5179237127304077e-05, 2.6716850697994232e-05, 2.8254464268684387e-05, 2.9792077839374542e-05, 3.13296914100647e-05, 3.286730498075485e-05, 3.440491855144501e-05, 3.594253212213516e-05, 3.748014569282532e-05, 3.901775926351547e-05, 4.055537283420563e-05, 4.209298640489578e-05, 4.363059997558594e-05]}, "gradients/encoder.encoder.layers.22.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 31.0, 241.0, 577.0, 136.0, 26.0, 7.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0007223406573757529, -0.0006348453462123871, -0.0005473499768413603, -0.0004598546656779945, -0.0003723593254107982, -0.0002848639851436019, -0.00019736867398023605, -0.0001098733046092093, -2.2377993445843458e-05, 6.511733954539523e-05, 0.0001526126725366339, 0.00024010799825191498, 0.0003276033385191113, 0.0004150986787863076, 0.0005025939899496734, 0.0005900893593207002, 0.000677584670484066, 0.0007650799816474319, 0.0008525753510184586, 0.0009400706621818244, 0.0010275660315528512, 0.001115061342716217, 0.0012025566538795829, 0.0012900519650429487, 0.0013775473926216364, 0.0014650427037850022, 0.001552538014948368, 0.0016400334425270557, 0.0017275287536904216, 0.0018150240648537874, 0.0019025193760171533, 0.001990014687180519, 0.002077509881928563, 0.002165005309507251, 0.002252500504255295, 0.0023399959318339825, 0.0024274911265820265, 0.002514986554160714, 0.002602481748908758, 0.002689977176487446, 0.0027774726040661335, 0.002864968031644821, 0.002952463226392865, 0.003039958653971553, 0.003127453848719597, 0.0032149492762982845, 0.003302444703876972, 0.003389939898625016, 0.0034774350933730602, 0.003564930520951748, 0.003652425715699792, 0.0037399211432784796, 0.0038274163380265236, 0.003914911765605211, 0.004002407193183899, 0.004089902620762587, 0.004177398048341274, 0.004264893475919962, 0.00435238890349865, 0.00443988386541605, 0.004527379292994738, 0.004614874720573425, 0.004702370148152113, 0.004789865575730801, 0.004877360537648201]}, "gradients/encoder.encoder.layers.22.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 2.0, 5.0, 3.0, 7.0, 10.0, 13.0, 21.0, 19.0, 27.0, 35.0, 37.0, 39.0, 56.0, 50.0, 56.0, 65.0, 66.0, 49.0, 70.0, 53.0, 61.0, 41.0, 49.0, 26.0, 42.0, 28.0, 21.0, 19.0, 12.0, 11.0, 10.0, 5.0, 1.0, 0.0, 6.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0003432631492614746, -0.0003342423588037491, -0.00032522156834602356, -0.00031620077788829803, -0.0003071799874305725, -0.000298159196972847, -0.00028913840651512146, -0.00028011761605739594, -0.0002710968255996704, -0.0002620760351419449, -0.00025305524468421936, -0.00024403445422649384, -0.0002350136637687683, -0.00022599287331104279, -0.00021697208285331726, -0.00020795129239559174, -0.0001989305019378662, -0.00018990971148014069, -0.00018088892102241516, -0.00017186813056468964, -0.0001628473401069641, -0.00015382654964923859, -0.00014480575919151306, -0.00013578496873378754, -0.000126764178276062, -0.00011774338781833649, -0.00010872259736061096, -9.970180690288544e-05, -9.068101644515991e-05, -8.166022598743439e-05, -7.263943552970886e-05, -6.361864507198334e-05, -5.459785461425781e-05, -4.557706415653229e-05, -3.655627369880676e-05, -2.7535483241081238e-05, -1.8514692783355713e-05, -9.493902325630188e-06, -4.731118679046631e-07, 8.547678589820862e-06, 1.7568469047546387e-05, 2.658925950527191e-05, 3.5610049962997437e-05, 4.463084042072296e-05, 5.3651630878448486e-05, 6.267242133617401e-05, 7.169321179389954e-05, 8.071400225162506e-05, 8.973479270935059e-05, 9.875558316707611e-05, 0.00010777637362480164, 0.00011679716408252716, 0.00012581795454025269, 0.0001348387449979782, 0.00014385953545570374, 0.00015288032591342926, 0.00016190111637115479, 0.0001709219068288803, 0.00017994269728660583, 0.00018896348774433136, 0.00019798427820205688, 0.0002070050686597824, 0.00021602585911750793, 0.00022504664957523346, 0.00023406744003295898]}, "gradients/encoder.encoder.layers.22.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 5.0, 9.0, 4.0, 12.0, 16.0, 20.0, 32.0, 31.0, 41.0, 58.0, 79.0, 119.0, 153.0, 254.0, 374.0, 540.0, 821.0, 1418.0, 2395.0, 4421.0, 8543.0, 19402.0, 53513.0, 232684.0, 576313.0, 92024.0, 29254.0, 11946.0, 5983.0, 3182.0, 1858.0, 1099.0, 659.0, 437.0, 261.0, 190.0, 109.0, 94.0, 64.0, 43.0, 34.0, 21.0, 15.0, 13.0, 4.0, 6.0, 5.0, 1.0, 2.0, 2.0, 1.0, 1.0, 2.0, 2.0], "bins": [-0.0003333091735839844, -0.00032356753945350647, -0.00031382590532302856, -0.00030408427119255066, -0.00029434263706207275, -0.00028460100293159485, -0.00027485936880111694, -0.00026511773467063904, -0.00025537610054016113, -0.00024563446640968323, -0.00023589283227920532, -0.00022615119814872742, -0.0002164095640182495, -0.0002066679298877716, -0.0001969262957572937, -0.0001871846616268158, -0.0001774430274963379, -0.00016770139336585999, -0.00015795975923538208, -0.00014821812510490417, -0.00013847649097442627, -0.00012873485684394836, -0.00011899322271347046, -0.00010925158858299255, -9.950995445251465e-05, -8.976832032203674e-05, -8.002668619155884e-05, -7.028505206108093e-05, -6.054341793060303e-05, -5.080178380012512e-05, -4.106014966964722e-05, -3.131851553916931e-05, -2.1576881408691406e-05, -1.1835247278213501e-05, -2.0936131477355957e-06, 7.64802098274231e-06, 1.7389655113220215e-05, 2.713128924369812e-05, 3.6872923374176025e-05, 4.661455750465393e-05, 5.6356191635131836e-05, 6.609782576560974e-05, 7.583945989608765e-05, 8.558109402656555e-05, 9.532272815704346e-05, 0.00010506436228752136, 0.00011480599641799927, 0.00012454763054847717, 0.00013428926467895508, 0.00014403089880943298, 0.0001537725329399109, 0.0001635141670703888, 0.0001732558012008667, 0.0001829974353313446, 0.0001927390694618225, 0.00020248070359230042, 0.00021222233772277832, 0.00022196397185325623, 0.00023170560598373413, 0.00024144724011421204, 0.00025118887424468994, 0.00026093050837516785, 0.00027067214250564575, 0.00028041377663612366, 0.00029015541076660156]}, "gradients/encoder.encoder.layers.22.attention.out_proj.bias": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 4.0, 3.0, 3.0, 5.0, 4.0, 7.0, 7.0, 15.0, 13.0, 19.0, 16.0, 30.0, 49.0, 63.0, 82.0, 101.0, 124.0, 119.0, 79.0, 71.0, 46.0, 44.0, 32.0, 18.0, 6.0, 8.0, 7.0, 12.0, 2.0, 3.0, 7.0, 3.0, 1.0, 2.0, 1.0, 2.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.110004425048828e-05, -2.0255334675312042e-05, -1.9410625100135803e-05, -1.8565915524959564e-05, -1.7721205949783325e-05, -1.6876496374607086e-05, -1.6031786799430847e-05, -1.5187077224254608e-05, -1.4342367649078369e-05, -1.349765807390213e-05, -1.2652948498725891e-05, -1.1808238923549652e-05, -1.0963529348373413e-05, -1.0118819773197174e-05, -9.274110198020935e-06, -8.429400622844696e-06, -7.584691047668457e-06, -6.739981472492218e-06, -5.895271897315979e-06, -5.05056232213974e-06, -4.205852746963501e-06, -3.361143171787262e-06, -2.516433596611023e-06, -1.671724021434784e-06, -8.270144462585449e-07, 1.7695128917694092e-08, 8.624047040939331e-07, 1.7071142792701721e-06, 2.551823854446411e-06, 3.39653342962265e-06, 4.241243004798889e-06, 5.085952579975128e-06, 5.930662155151367e-06, 6.775371730327606e-06, 7.620081305503845e-06, 8.464790880680084e-06, 9.309500455856323e-06, 1.0154210031032562e-05, 1.0998919606208801e-05, 1.184362918138504e-05, 1.268833875656128e-05, 1.3533048331737518e-05, 1.4377757906913757e-05, 1.5222467482089996e-05, 1.6067177057266235e-05, 1.6911886632442474e-05, 1.7756596207618713e-05, 1.8601305782794952e-05, 1.944601535797119e-05, 2.029072493314743e-05, 2.113543450832367e-05, 2.198014408349991e-05, 2.2824853658676147e-05, 2.3669563233852386e-05, 2.4514272809028625e-05, 2.5358982384204865e-05, 2.6203691959381104e-05, 2.7048401534557343e-05, 2.789311110973358e-05, 2.873782068490982e-05, 2.958253026008606e-05, 3.04272398352623e-05, 3.127194941043854e-05, 3.2116658985614777e-05, 3.2961368560791016e-05]}, "gradients/encoder.encoder.layers.22.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 2.0, 0.0, 1.0, 1.0, 0.0, 6.0, 7.0, 7.0, 14.0, 13.0, 14.0, 22.0, 39.0, 49.0, 70.0, 89.0, 145.0, 175.0, 282.0, 420.0, 713.0, 1086.0, 1630.0, 2694.0, 4797.0, 8442.0, 16371.0, 33145.0, 77160.0, 225168.0, 435373.0, 136229.0, 52078.0, 23909.0, 12008.0, 6448.0, 3688.0, 2281.0, 1344.0, 852.0, 566.0, 363.0, 265.0, 178.0, 121.0, 64.0, 66.0, 53.0, 33.0, 26.0, 18.0, 13.0, 12.0, 8.0, 4.0, 4.0, 1.0, 2.0, 2.0, 1.0, 1.0, 0.0, 2.0], "bins": [-0.0001475811004638672, -0.00014286302030086517, -0.00013814494013786316, -0.00013342685997486115, -0.00012870877981185913, -0.00012399069964885712, -0.0001192726194858551, -0.00011455453932285309, -0.00010983645915985107, -0.00010511837899684906, -0.00010040029883384705, -9.568221867084503e-05, -9.096413850784302e-05, -8.6246058344841e-05, -8.152797818183899e-05, -7.680989801883698e-05, -7.209181785583496e-05, -6.737373769283295e-05, -6.265565752983093e-05, -5.793757736682892e-05, -5.3219497203826904e-05, -4.850141704082489e-05, -4.3783336877822876e-05, -3.906525671482086e-05, -3.434717655181885e-05, -2.9629096388816833e-05, -2.491101622581482e-05, -2.0192936062812805e-05, -1.547485589981079e-05, -1.0756775736808777e-05, -6.038695573806763e-06, -1.3206154108047485e-06, 3.3974647521972656e-06, 8.11554491519928e-06, 1.2833625078201294e-05, 1.7551705241203308e-05, 2.2269785404205322e-05, 2.6987865567207336e-05, 3.170594573020935e-05, 3.6424025893211365e-05, 4.114210605621338e-05, 4.586018621921539e-05, 5.057826638221741e-05, 5.529634654521942e-05, 6.0014426708221436e-05, 6.473250687122345e-05, 6.945058703422546e-05, 7.416866719722748e-05, 7.888674736022949e-05, 8.36048275232315e-05, 8.832290768623352e-05, 9.304098784923553e-05, 9.775906801223755e-05, 0.00010247714817523956, 0.00010719522833824158, 0.00011191330850124359, 0.0001166313886642456, 0.00012134946882724762, 0.00012606754899024963, 0.00013078562915325165, 0.00013550370931625366, 0.00014022178947925568, 0.0001449398696422577, 0.0001496579498052597, 0.00015437602996826172]}, "gradients/encoder.encoder.layers.22.attention.v_proj.bias": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 2.0, 0.0, 2.0, 3.0, 3.0, 5.0, 4.0, 0.0, 4.0, 10.0, 12.0, 14.0, 8.0, 13.0, 16.0, 19.0, 27.0, 30.0, 40.0, 32.0, 20.0, 42.0, 37.0, 35.0, 44.0, 28.0, 39.0, 29.0, 32.0, 43.0, 36.0, 33.0, 37.0, 36.0, 22.0, 32.0, 28.0, 18.0, 32.0, 21.0, 14.0, 26.0, 13.0, 10.0, 12.0, 11.0, 5.0, 10.0, 4.0, 5.0, 6.0, 5.0, 0.0, 4.0, 1.0, 0.0, 3.0, 0.0, 2.0, 0.0, 2.0], "bins": [-4.5359134674072266e-05, -4.389416426420212e-05, -4.242919385433197e-05, -4.096422344446182e-05, -3.9499253034591675e-05, -3.803428262472153e-05, -3.656931221485138e-05, -3.510434180498123e-05, -3.3639371395111084e-05, -3.2174400985240936e-05, -3.070943057537079e-05, -2.924446016550064e-05, -2.7779489755630493e-05, -2.6314519345760345e-05, -2.4849548935890198e-05, -2.338457852602005e-05, -2.1919608116149902e-05, -2.0454637706279755e-05, -1.8989667296409607e-05, -1.752469688653946e-05, -1.605972647666931e-05, -1.4594756066799164e-05, -1.3129785656929016e-05, -1.1664815247058868e-05, -1.019984483718872e-05, -8.734874427318573e-06, -7.269904017448425e-06, -5.804933607578278e-06, -4.33996319770813e-06, -2.874992787837982e-06, -1.4100223779678345e-06, 5.494803190231323e-08, 1.519918441772461e-06, 2.9848888516426086e-06, 4.449859261512756e-06, 5.914829671382904e-06, 7.379800081253052e-06, 8.8447704911232e-06, 1.0309740900993347e-05, 1.1774711310863495e-05, 1.3239681720733643e-05, 1.470465213060379e-05, 1.6169622540473938e-05, 1.7634592950344086e-05, 1.9099563360214233e-05, 2.056453377008438e-05, 2.202950417995453e-05, 2.3494474589824677e-05, 2.4959444999694824e-05, 2.6424415409564972e-05, 2.788938581943512e-05, 2.9354356229305267e-05, 3.0819326639175415e-05, 3.228429704904556e-05, 3.374926745891571e-05, 3.521423786878586e-05, 3.6679208278656006e-05, 3.8144178688526154e-05, 3.96091490983963e-05, 4.107411950826645e-05, 4.25390899181366e-05, 4.4004060328006744e-05, 4.546903073787689e-05, 4.693400114774704e-05, 4.839897155761719e-05]}, "gradients/encoder.encoder.layers.22.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 2.0, 0.0, 3.0, 0.0, 0.0, 5.0, 2.0, 3.0, 0.0, 7.0, 8.0, 14.0, 10.0, 22.0, 32.0, 46.0, 71.0, 118.0, 154.0, 245.0, 338.0, 564.0, 931.0, 1521.0, 2770.0, 5207.0, 11764.0, 34412.0, 343807.0, 584162.0, 37168.0, 12443.0, 5725.0, 2789.0, 1576.0, 990.0, 551.0, 362.0, 238.0, 154.0, 116.0, 82.0, 47.0, 32.0, 24.0, 20.0, 7.0, 10.0, 6.0, 8.0, 2.0, 1.0, 4.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0005092620849609375, -0.0004923045635223389, -0.00047534704208374023, -0.0004583895206451416, -0.00044143199920654297, -0.00042447447776794434, -0.0004075169563293457, -0.00039055943489074707, -0.00037360191345214844, -0.0003566443920135498, -0.00033968687057495117, -0.00032272934913635254, -0.0003057718276977539, -0.0002888143062591553, -0.00027185678482055664, -0.000254899263381958, -0.00023794174194335938, -0.00022098422050476074, -0.0002040266990661621, -0.00018706917762756348, -0.00017011165618896484, -0.0001531541347503662, -0.00013619661331176758, -0.00011923909187316895, -0.00010228157043457031, -8.532404899597168e-05, -6.836652755737305e-05, -5.1409006118774414e-05, -3.445148468017578e-05, -1.749396324157715e-05, -5.364418029785156e-07, 1.6421079635620117e-05, 3.337860107421875e-05, 5.033612251281738e-05, 6.729364395141602e-05, 8.425116539001465e-05, 0.00010120868682861328, 0.00011816620826721191, 0.00013512372970581055, 0.00015208125114440918, 0.0001690387725830078, 0.00018599629402160645, 0.00020295381546020508, 0.0002199113368988037, 0.00023686885833740234, 0.000253826379776001, 0.0002707839012145996, 0.00028774142265319824, 0.0003046989440917969, 0.0003216564655303955, 0.00033861398696899414, 0.0003555715084075928, 0.0003725290298461914, 0.00038948655128479004, 0.00040644407272338867, 0.0004234015941619873, 0.00044035911560058594, 0.00045731663703918457, 0.0004742741584777832, 0.0004912316799163818, 0.0005081892013549805, 0.0005251467227935791, 0.0005421042442321777, 0.0005590617656707764, 0.000576019287109375]}, "gradients/encoder.encoder.layers.22.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 3.0, 1.0, 1.0, 3.0, 2.0, 2.0, 3.0, 3.0, 2.0, 10.0, 9.0, 6.0, 8.0, 18.0, 12.0, 21.0, 45.0, 21.0, 24.0, 34.0, 45.0, 57.0, 136.0, 94.0, 76.0, 72.0, 43.0, 43.0, 33.0, 52.0, 16.0, 22.0, 14.0, 10.0, 10.0, 21.0, 17.0, 4.0, 7.0, 0.0, 0.0, 6.0, 4.0, 2.0, 4.0, 3.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-1.8477439880371094e-06, -1.778826117515564e-06, -1.7099082469940186e-06, -1.6409903764724731e-06, -1.5720725059509277e-06, -1.5031546354293823e-06, -1.434236764907837e-06, -1.3653188943862915e-06, -1.296401023864746e-06, -1.2274831533432007e-06, -1.1585652828216553e-06, -1.0896474123001099e-06, -1.0207295417785645e-06, -9.51811671257019e-07, -8.828938007354736e-07, -8.139759302139282e-07, -7.450580596923828e-07, -6.761401891708374e-07, -6.07222318649292e-07, -5.383044481277466e-07, -4.6938657760620117e-07, -4.0046870708465576e-07, -3.3155083656311035e-07, -2.6263296604156494e-07, -1.9371509552001953e-07, -1.2479722499847412e-07, -5.587935447692871e-08, 1.30385160446167e-08, 8.195638656616211e-08, 1.5087425708770752e-07, 2.1979212760925293e-07, 2.8870999813079834e-07, 3.5762786865234375e-07, 4.2654573917388916e-07, 4.954636096954346e-07, 5.6438148021698e-07, 6.332993507385254e-07, 7.022172212600708e-07, 7.711350917816162e-07, 8.400529623031616e-07, 9.08970832824707e-07, 9.778887033462524e-07, 1.0468065738677979e-06, 1.1157244443893433e-06, 1.1846423149108887e-06, 1.253560185432434e-06, 1.3224780559539795e-06, 1.391395926475525e-06, 1.4603137969970703e-06, 1.5292316675186157e-06, 1.5981495380401611e-06, 1.6670674085617065e-06, 1.735985279083252e-06, 1.8049031496047974e-06, 1.8738210201263428e-06, 1.942738890647888e-06, 2.0116567611694336e-06, 2.080574631690979e-06, 2.1494925022125244e-06, 2.21841037273407e-06, 2.2873282432556152e-06, 2.3562461137771606e-06, 2.425163984298706e-06, 2.4940818548202515e-06, 2.562999725341797e-06]}, "gradients/encoder.encoder.layers.22.attention.q_proj.weight": {"_type": "histogram", "values": [2.0, 2.0, 0.0, 1.0, 0.0, 1.0, 2.0, 4.0, 5.0, 3.0, 7.0, 12.0, 12.0, 9.0, 19.0, 36.0, 53.0, 76.0, 102.0, 191.0, 308.0, 499.0, 907.0, 1724.0, 3920.0, 9862.0, 33674.0, 305626.0, 625475.0, 44981.0, 11836.0, 4604.0, 2085.0, 980.0, 584.0, 360.0, 209.0, 122.0, 91.0, 53.0, 40.0, 30.0, 18.0, 15.0, 8.0, 4.0, 5.0, 7.0, 2.0, 2.0, 1.0, 1.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.00011098384857177734, -0.00010703317821025848, -0.00010308250784873962, -9.913183748722076e-05, -9.51811671257019e-05, -9.123049676418304e-05, -8.727982640266418e-05, -8.332915604114532e-05, -7.937848567962646e-05, -7.54278153181076e-05, -7.147714495658875e-05, -6.752647459506989e-05, -6.357580423355103e-05, -5.9625133872032166e-05, -5.5674463510513306e-05, -5.1723793148994446e-05, -4.7773122787475586e-05, -4.3822452425956726e-05, -3.9871782064437866e-05, -3.5921111702919006e-05, -3.1970441341400146e-05, -2.8019770979881287e-05, -2.4069100618362427e-05, -2.0118430256843567e-05, -1.6167759895324707e-05, -1.2217089533805847e-05, -8.266419172286987e-06, -4.3157488107681274e-06, -3.650784492492676e-07, 3.5855919122695923e-06, 7.536262273788452e-06, 1.1486932635307312e-05, 1.5437602996826172e-05, 1.9388273358345032e-05, 2.333894371986389e-05, 2.728961408138275e-05, 3.124028444290161e-05, 3.519095480442047e-05, 3.914162516593933e-05, 4.309229552745819e-05, 4.704296588897705e-05, 5.099363625049591e-05, 5.494430661201477e-05, 5.889497697353363e-05, 6.284564733505249e-05, 6.679631769657135e-05, 7.074698805809021e-05, 7.469765841960907e-05, 7.864832878112793e-05, 8.259899914264679e-05, 8.654966950416565e-05, 9.050033986568451e-05, 9.445101022720337e-05, 9.840168058872223e-05, 0.00010235235095024109, 0.00010630302131175995, 0.00011025369167327881, 0.00011420436203479767, 0.00011815503239631653, 0.0001221057027578354, 0.00012605637311935425, 0.0001300070434808731, 0.00013395771384239197, 0.00013790838420391083, 0.0001418590545654297]}, "gradients/encoder.encoder.layers.22.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 3.0, 0.0, 2.0, 0.0, 2.0, 2.0, 4.0, 4.0, 12.0, 12.0, 12.0, 16.0, 21.0, 31.0, 54.0, 80.0, 103.0, 118.0, 111.0, 109.0, 91.0, 61.0, 40.0, 26.0, 22.0, 11.0, 12.0, 10.0, 14.0, 7.0, 6.0, 5.0, 3.0, 1.0, 4.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.2842159271240234e-05, -3.168173134326935e-05, -3.052130341529846e-05, -2.9360875487327576e-05, -2.820044755935669e-05, -2.7040019631385803e-05, -2.5879591703414917e-05, -2.471916377544403e-05, -2.3558735847473145e-05, -2.2398307919502258e-05, -2.1237879991531372e-05, -2.0077452063560486e-05, -1.89170241355896e-05, -1.7756596207618713e-05, -1.6596168279647827e-05, -1.543574035167694e-05, -1.4275312423706055e-05, -1.3114884495735168e-05, -1.1954456567764282e-05, -1.0794028639793396e-05, -9.63360071182251e-06, -8.473172783851624e-06, -7.312744855880737e-06, -6.152316927909851e-06, -4.991888999938965e-06, -3.831461071968079e-06, -2.6710331439971924e-06, -1.5106052160263062e-06, -3.501772880554199e-07, 8.102506399154663e-07, 1.9706785678863525e-06, 3.1311064958572388e-06, 4.291534423828125e-06, 5.451962351799011e-06, 6.6123902797698975e-06, 7.772818207740784e-06, 8.93324613571167e-06, 1.0093674063682556e-05, 1.1254101991653442e-05, 1.2414529919624329e-05, 1.3574957847595215e-05, 1.4735385775566101e-05, 1.5895813703536987e-05, 1.7056241631507874e-05, 1.821666955947876e-05, 1.9377097487449646e-05, 2.0537525415420532e-05, 2.169795334339142e-05, 2.2858381271362305e-05, 2.401880919933319e-05, 2.5179237127304077e-05, 2.6339665055274963e-05, 2.750009298324585e-05, 2.8660520911216736e-05, 2.9820948839187622e-05, 3.098137676715851e-05, 3.2141804695129395e-05, 3.330223262310028e-05, 3.446266055107117e-05, 3.562308847904205e-05, 3.678351640701294e-05, 3.7943944334983826e-05, 3.910437226295471e-05, 4.02648001909256e-05, 4.1425228118896484e-05]}, "gradients/encoder.encoder.layers.22.layer_norm.weight": {"_type": "histogram", "values": [4.0, 23.0, 799.0, 192.0, 5.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0010823264019563794, -0.0006898073479533195, -0.0002972882939502597, 9.523076005280018e-05, 0.00048774981405586004, 0.0008802687516435981, 0.0012727879220619798, 0.0016653070924803615, 0.0020578261464834213, 0.0024503450840711594, 0.002842864254489541, 0.0032353834249079227, 0.0036279023624956608, 0.004020421300083399, 0.004412940703332424, 0.004805459640920162, 0.0051979785785079, 0.005590497516095638, 0.005983016453683376, 0.006375535856932402, 0.00676805479452014, 0.007160573732107878, 0.007553093135356903, 0.007945612072944641, 0.00833813101053238, 0.008730649948120117, 0.009123168885707855, 0.009515687823295593, 0.009908206760883331, 0.01030072569847107, 0.010693245567381382, 0.01108576450496912, 0.011478282511234283, 0.011870801448822021, 0.01226332038640976, 0.012655839323997498, 0.013048358261585236, 0.013440877199172974, 0.013833397068083286, 0.014225916005671024, 0.014618434943258762, 0.0150109538808465, 0.015403472818434238, 0.01579599268734455, 0.01618851162493229, 0.016581030562520027, 0.016973549500107765, 0.017366068437695503, 0.01775858737528324, 0.01815110631287098, 0.018543625250458717, 0.018936144188046455, 0.019328663125634193, 0.01972118206322193, 0.02011370100080967, 0.020506221801042557, 0.020898740738630295, 0.021291259676218033, 0.02168377861380577, 0.02207629755139351, 0.022468816488981247, 0.022861335426568985, 0.023253854364156723, 0.02364637330174446, 0.0240388922393322]}, "gradients/encoder.encoder.layers.22.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 1.0, 3.0, 2.0, 2.0, 11.0, 9.0, 11.0, 22.0, 22.0, 29.0, 32.0, 42.0, 42.0, 43.0, 47.0, 58.0, 47.0, 73.0, 53.0, 64.0, 49.0, 55.0, 52.0, 48.0, 32.0, 34.0, 34.0, 20.0, 26.0, 13.0, 10.0, 14.0, 5.0, 3.0, 7.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0004851222038269043, -0.0004721628502011299, -0.00045920349657535553, -0.00044624414294958115, -0.00043328478932380676, -0.0004203254356980324, -0.000407366082072258, -0.0003944067284464836, -0.00038144737482070923, -0.00036848802119493484, -0.00035552866756916046, -0.0003425693139433861, -0.0003296099603176117, -0.0003166506066918373, -0.00030369125306606293, -0.00029073189944028854, -0.00027777254581451416, -0.0002648131921887398, -0.0002518538385629654, -0.000238894484937191, -0.00022593513131141663, -0.00021297577768564224, -0.00020001642405986786, -0.00018705707043409348, -0.0001740977168083191, -0.0001611383631825447, -0.00014817900955677032, -0.00013521965593099594, -0.00012226030230522156, -0.00010930094867944717, -9.634159505367279e-05, -8.338224142789841e-05, -7.042288780212402e-05, -5.746353417634964e-05, -4.4504180550575256e-05, -3.154482692480087e-05, -1.858547329902649e-05, -5.626119673252106e-06, 7.333233952522278e-06, 2.029258757829666e-05, 3.3251941204071045e-05, 4.621129482984543e-05, 5.917064845561981e-05, 7.21300020813942e-05, 8.508935570716858e-05, 9.804870933294296e-05, 0.00011100806295871735, 0.00012396741658449173, 0.0001369267702102661, 0.0001498861238360405, 0.00016284547746181488, 0.00017580483108758926, 0.00018876418471336365, 0.00020172353833913803, 0.00021468289196491241, 0.0002276422455906868, 0.00024060159921646118, 0.00025356095284223557, 0.00026652030646800995, 0.00027947966009378433, 0.0002924390137195587, 0.0003053983673453331, 0.0003183577209711075, 0.00033131707459688187, 0.00034427642822265625]}, "gradients/encoder.encoder.layers.21.feed_forward.output_dense.weight": {"_type": "histogram", "values": [5.0, 0.0, 1.0, 3.0, 5.0, 6.0, 7.0, 10.0, 25.0, 24.0, 57.0, 100.0, 178.0, 348.0, 647.0, 1375.0, 2717.0, 5832.0, 13512.0, 34319.0, 103995.0, 3017993.0, 866726.0, 91157.0, 30474.0, 12300.0, 5605.0, 2755.0, 1488.0, 855.0, 540.0, 356.0, 224.0, 190.0, 135.0, 104.0, 64.0, 49.0, 32.0, 31.0, 14.0, 12.0, 6.0, 10.0, 3.0, 5.0, 1.0, 1.0, 1.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00022327899932861328, -0.0002131108194589615, -0.0002029426395893097, -0.0001927744597196579, -0.0001826062798500061, -0.0001724380999803543, -0.00016226992011070251, -0.00015210174024105072, -0.00014193356037139893, -0.00013176538050174713, -0.00012159720063209534, -0.00011142902076244354, -0.00010126084089279175, -9.109266102313995e-05, -8.092448115348816e-05, -7.075630128383636e-05, -6.058812141418457e-05, -5.0419941544532776e-05, -4.025176167488098e-05, -3.0083581805229187e-05, -1.9915401935577393e-05, -9.747222065925598e-06, 4.209578037261963e-07, 1.058913767337799e-05, 2.0757317543029785e-05, 3.092549741268158e-05, 4.1093677282333374e-05, 5.126185715198517e-05, 6.143003702163696e-05, 7.159821689128876e-05, 8.176639676094055e-05, 9.193457663059235e-05, 0.00010210275650024414, 0.00011227093636989594, 0.00012243911623954773, 0.00013260729610919952, 0.00014277547597885132, 0.0001529436558485031, 0.0001631118357181549, 0.0001732800155878067, 0.0001834481954574585, 0.0001936163753271103, 0.00020378455519676208, 0.00021395273506641388, 0.00022412091493606567, 0.00023428909480571747, 0.00024445727467536926, 0.00025462545454502106, 0.00026479363441467285, 0.00027496181428432465, 0.00028512999415397644, 0.00029529817402362823, 0.00030546635389328003, 0.0003156345337629318, 0.0003258027136325836, 0.0003359708935022354, 0.0003461390733718872, 0.000356307253241539, 0.0003664754331111908, 0.0003766436129808426, 0.0003868117928504944, 0.0003969799727201462, 0.000407148152589798, 0.00041731633245944977, 0.00042748451232910156]}, "gradients/encoder.encoder.layers.21.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 3.0, 0.0, 2.0, 2.0, 2.0, 3.0, 4.0, 6.0, 6.0, 10.0, 9.0, 9.0, 15.0, 33.0, 27.0, 35.0, 41.0, 63.0, 81.0, 95.0, 119.0, 87.0, 88.0, 62.0, 49.0, 28.0, 42.0, 22.0, 15.0, 15.0, 6.0, 12.0, 6.0, 6.0, 2.0, 8.0, 1.0, 2.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.9073486328125e-05, -1.826602965593338e-05, -1.745857298374176e-05, -1.665111631155014e-05, -1.584365963935852e-05, -1.50362029671669e-05, -1.422874629497528e-05, -1.3421289622783661e-05, -1.2613832950592041e-05, -1.1806376278400421e-05, -1.0998919606208801e-05, -1.0191462934017181e-05, -9.384006261825562e-06, -8.576549589633942e-06, -7.769092917442322e-06, -6.961636245250702e-06, -6.154179573059082e-06, -5.346722900867462e-06, -4.539266228675842e-06, -3.7318095564842224e-06, -2.9243528842926025e-06, -2.1168962121009827e-06, -1.3094395399093628e-06, -5.019828677177429e-07, 3.0547380447387695e-07, 1.1129304766654968e-06, 1.9203871488571167e-06, 2.7278438210487366e-06, 3.5353004932403564e-06, 4.342757165431976e-06, 5.150213837623596e-06, 5.957670509815216e-06, 6.765127182006836e-06, 7.572583854198456e-06, 8.380040526390076e-06, 9.187497198581696e-06, 9.994953870773315e-06, 1.0802410542964935e-05, 1.1609867215156555e-05, 1.2417323887348175e-05, 1.3224780559539795e-05, 1.4032237231731415e-05, 1.4839693903923035e-05, 1.5647150576114655e-05, 1.6454607248306274e-05, 1.7262063920497894e-05, 1.8069520592689514e-05, 1.8876977264881134e-05, 1.9684433937072754e-05, 2.0491890609264374e-05, 2.1299347281455994e-05, 2.2106803953647614e-05, 2.2914260625839233e-05, 2.3721717298030853e-05, 2.4529173970222473e-05, 2.5336630642414093e-05, 2.6144087314605713e-05, 2.6951543986797333e-05, 2.7759000658988953e-05, 2.8566457331180573e-05, 2.9373914003372192e-05, 3.0181370675563812e-05, 3.098882734775543e-05, 3.179628401994705e-05, 3.260374069213867e-05]}, "gradients/encoder.encoder.layers.21.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 3.0, 3.0, 6.0, 3.0, 8.0, 18.0, 29.0, 31.0, 68.0, 81.0, 141.0, 249.0, 419.0, 843.0, 1730.0, 3729.0, 10058.0, 30227.0, 130537.0, 3271233.0, 632392.0, 79220.0, 20525.0, 7063.0, 2860.0, 1272.0, 637.0, 331.0, 196.0, 110.0, 85.0, 44.0, 30.0, 32.0, 24.0, 13.0, 11.0, 11.0, 4.0, 3.0, 6.0, 3.0, 3.0, 2.0, 2.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.00023627281188964844, -0.00022636353969573975, -0.00021645426750183105, -0.00020654499530792236, -0.00019663572311401367, -0.00018672645092010498, -0.0001768171787261963, -0.0001669079065322876, -0.0001569986343383789, -0.00014708936214447021, -0.00013718008995056152, -0.00012727081775665283, -0.00011736154556274414, -0.00010745227336883545, -9.754300117492676e-05, -8.763372898101807e-05, -7.772445678710938e-05, -6.781518459320068e-05, -5.790591239929199e-05, -4.79966402053833e-05, -3.808736801147461e-05, -2.8178095817565918e-05, -1.8268823623657227e-05, -8.359551429748535e-06, 1.5497207641601562e-06, 1.1458992958068848e-05, 2.136826515197754e-05, 3.127753734588623e-05, 4.118680953979492e-05, 5.109608173370361e-05, 6.1005353927612305e-05, 7.0914626121521e-05, 8.082389831542969e-05, 9.073317050933838e-05, 0.00010064244270324707, 0.00011055171489715576, 0.00012046098709106445, 0.00013037025928497314, 0.00014027953147888184, 0.00015018880367279053, 0.00016009807586669922, 0.0001700073480606079, 0.0001799166202545166, 0.0001898258924484253, 0.00019973516464233398, 0.00020964443683624268, 0.00021955370903015137, 0.00022946298122406006, 0.00023937225341796875, 0.00024928152561187744, 0.00025919079780578613, 0.0002691000699996948, 0.0002790093421936035, 0.0002889186143875122, 0.0002988278865814209, 0.0003087371587753296, 0.0003186464309692383, 0.00032855570316314697, 0.00033846497535705566, 0.00034837424755096436, 0.00035828351974487305, 0.00036819279193878174, 0.00037810206413269043, 0.0003880113363265991, 0.0003979206085205078]}, "gradients/encoder.encoder.layers.21.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 2.0, 4.0, 2.0, 2.0, 3.0, 5.0, 9.0, 8.0, 8.0, 11.0, 20.0, 22.0, 22.0, 35.0, 30.0, 42.0, 70.0, 59.0, 90.0, 90.0, 131.0, 196.0, 542.0, 1448.0, 423.0, 174.0, 114.0, 87.0, 71.0, 58.0, 60.0, 49.0, 38.0, 24.0, 31.0, 14.0, 17.0, 15.0, 15.0, 11.0, 5.0, 6.0, 4.0, 7.0, 1.0, 1.0, 3.0, 2.0, 3.0, 1.0, 1.0, 1.0, 0.0, 3.0], "bins": [-6.16908073425293e-05, -5.984492599964142e-05, -5.799904465675354e-05, -5.615316331386566e-05, -5.430728197097778e-05, -5.2461400628089905e-05, -5.0615519285202026e-05, -4.876963794231415e-05, -4.692375659942627e-05, -4.507787525653839e-05, -4.323199391365051e-05, -4.1386112570762634e-05, -3.9540231227874756e-05, -3.769434988498688e-05, -3.5848468542099e-05, -3.400258719921112e-05, -3.215670585632324e-05, -3.0310824513435364e-05, -2.8464943170547485e-05, -2.6619061827659607e-05, -2.477318048477173e-05, -2.292729914188385e-05, -2.108141779899597e-05, -1.9235536456108093e-05, -1.7389655113220215e-05, -1.5543773770332336e-05, -1.3697892427444458e-05, -1.185201108455658e-05, -1.0006129741668701e-05, -8.160248398780823e-06, -6.314367055892944e-06, -4.468485713005066e-06, -2.6226043701171875e-06, -7.767230272293091e-07, 1.0691583156585693e-06, 2.9150396585464478e-06, 4.760921001434326e-06, 6.606802344322205e-06, 8.452683687210083e-06, 1.0298565030097961e-05, 1.214444637298584e-05, 1.3990327715873718e-05, 1.5836209058761597e-05, 1.7682090401649475e-05, 1.9527971744537354e-05, 2.1373853087425232e-05, 2.321973443031311e-05, 2.506561577320099e-05, 2.6911497116088867e-05, 2.8757378458976746e-05, 3.0603259801864624e-05, 3.24491411447525e-05, 3.429502248764038e-05, 3.614090383052826e-05, 3.798678517341614e-05, 3.9832666516304016e-05, 4.1678547859191895e-05, 4.352442920207977e-05, 4.537031054496765e-05, 4.721619188785553e-05, 4.906207323074341e-05, 5.090795457363129e-05, 5.2753835916519165e-05, 5.4599717259407043e-05, 5.644559860229492e-05]}, "gradients/encoder.encoder.layers.21.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 1.0, 3.0, 4.0, 17.0, 30.0, 87.0, 214.0, 342.0, 180.0, 70.0, 39.0, 18.0, 8.0, 3.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0005800141952931881, -0.0005286485538817942, -0.0004772829415742308, -0.0004259173001628369, -0.0003745516878552735, -0.0003231860464438796, -0.0002718204050324857, -0.0002204547927249223, -0.00016908915131352842, -0.00011772352445404977, -6.63578903186135e-05, -1.4992256183177233e-05, 3.637337067630142e-05, 8.773899753578007e-05, 0.00013910463894717395, 0.00019047025125473738, 0.00024183589266613126, 0.00029320153407752514, 0.00034456714638508856, 0.00039593278779648244, 0.00044729840010404587, 0.0004986640415154397, 0.0005500296829268336, 0.0006013952661305666, 0.0006527609657496214, 0.0007041266071610153, 0.0007554922485724092, 0.0008068578317761421, 0.000858223473187536, 0.0009095891145989299, 0.0009609547560103238, 0.0010123203974217176, 0.0010636859806254506, 0.0011150515638291836, 0.0011664172634482384, 0.0012177828466519713, 0.0012691485462710261, 0.001320514129474759, 0.001371879829093814, 0.0014232454122975469, 0.0014746109955012798, 0.0015259765787050128, 0.0015773422783240676, 0.0016287078615278006, 0.0016800735611468554, 0.0017314391443505883, 0.0017828047275543213, 0.001834170427173376, 0.0018855361267924309, 0.0019369017099961638, 0.001988267293199897, 0.0020396329928189516, 0.0020909986924380064, 0.0021423641592264175, 0.0021937298588454723, 0.002245095558464527, 0.0022964610252529383, 0.002347826724871993, 0.002399192191660404, 0.002450557891279459, 0.002501923590898514, 0.0025532892905175686, 0.0026046547573059797, 0.0026560204569250345, 0.0027073861565440893]}, "gradients/encoder.encoder.layers.21.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 4.0, 0.0, 5.0, 7.0, 4.0, 8.0, 12.0, 8.0, 19.0, 17.0, 20.0, 21.0, 27.0, 44.0, 30.0, 35.0, 44.0, 64.0, 46.0, 43.0, 59.0, 45.0, 44.0, 42.0, 42.0, 41.0, 48.0, 45.0, 29.0, 29.0, 25.0, 22.0, 16.0, 17.0, 13.0, 9.0, 5.0, 9.0, 3.0, 5.0, 7.0, 2.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0003205537796020508, -0.000311482697725296, -0.00030241161584854126, -0.0002933405339717865, -0.00028426945209503174, -0.000275198370218277, -0.0002661272883415222, -0.00025705620646476746, -0.0002479851245880127, -0.00023891404271125793, -0.00022984296083450317, -0.0002207718789577484, -0.00021170079708099365, -0.0002026297152042389, -0.00019355863332748413, -0.00018448755145072937, -0.0001754164695739746, -0.00016634538769721985, -0.0001572743058204651, -0.00014820322394371033, -0.00013913214206695557, -0.0001300610601902008, -0.00012098997831344604, -0.00011191889643669128, -0.00010284781455993652, -9.377673268318176e-05, -8.4705650806427e-05, -7.563456892967224e-05, -6.656348705291748e-05, -5.749240517616272e-05, -4.842132329940796e-05, -3.93502414226532e-05, -3.0279159545898438e-05, -2.1208077669143677e-05, -1.2136995792388916e-05, -3.0659139156341553e-06, 6.0051679611206055e-06, 1.5076249837875366e-05, 2.4147331714630127e-05, 3.321841359138489e-05, 4.228949546813965e-05, 5.136057734489441e-05, 6.043165922164917e-05, 6.950274109840393e-05, 7.857382297515869e-05, 8.764490485191345e-05, 9.671598672866821e-05, 0.00010578706860542297, 0.00011485815048217773, 0.0001239292323589325, 0.00013300031423568726, 0.00014207139611244202, 0.00015114247798919678, 0.00016021355986595154, 0.0001692846417427063, 0.00017835572361946106, 0.00018742680549621582, 0.00019649788737297058, 0.00020556896924972534, 0.0002146400511264801, 0.00022371113300323486, 0.00023278221487998962, 0.00024185329675674438, 0.00025092437863349915, 0.0002599954605102539]}, "gradients/encoder.encoder.layers.21.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 3.0, 2.0, 7.0, 10.0, 4.0, 11.0, 18.0, 27.0, 45.0, 71.0, 104.0, 155.0, 290.0, 456.0, 764.0, 1319.0, 2187.0, 4177.0, 8203.0, 19107.0, 51041.0, 202549.0, 589034.0, 108508.0, 33218.0, 13332.0, 6234.0, 3223.0, 1800.0, 1031.0, 637.0, 365.0, 237.0, 152.0, 93.0, 56.0, 34.0, 18.0, 17.0, 9.0, 7.0, 3.0, 4.0, 4.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00017595291137695312, -0.0001697354018688202, -0.00016351789236068726, -0.00015730038285255432, -0.0001510828733444214, -0.00014486536383628845, -0.00013864785432815552, -0.00013243034482002258, -0.00012621283531188965, -0.00011999532580375671, -0.00011377781629562378, -0.00010756030678749084, -0.00010134279727935791, -9.512528777122498e-05, -8.890777826309204e-05, -8.26902687549591e-05, -7.647275924682617e-05, -7.025524973869324e-05, -6.40377402305603e-05, -5.782023072242737e-05, -5.1602721214294434e-05, -4.53852117061615e-05, -3.9167702198028564e-05, -3.295019268989563e-05, -2.6732683181762695e-05, -2.051517367362976e-05, -1.4297664165496826e-05, -8.080154657363892e-06, -1.862645149230957e-06, 4.3548643589019775e-06, 1.0572373867034912e-05, 1.6789883375167847e-05, 2.300739288330078e-05, 2.9224902391433716e-05, 3.544241189956665e-05, 4.1659921407699585e-05, 4.787743091583252e-05, 5.4094940423965454e-05, 6.031244993209839e-05, 6.652995944023132e-05, 7.274746894836426e-05, 7.896497845649719e-05, 8.518248796463013e-05, 9.139999747276306e-05, 9.7617506980896e-05, 0.00010383501648902893, 0.00011005252599716187, 0.0001162700355052948, 0.00012248754501342773, 0.00012870505452156067, 0.0001349225640296936, 0.00014114007353782654, 0.00014735758304595947, 0.0001535750925540924, 0.00015979260206222534, 0.00016601011157035828, 0.0001722276210784912, 0.00017844513058662415, 0.00018466264009475708, 0.00019088014960289001, 0.00019709765911102295, 0.00020331516861915588, 0.00020953267812728882, 0.00021575018763542175, 0.0002219676971435547]}, "gradients/encoder.encoder.layers.21.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 3.0, 1.0, 3.0, 1.0, 1.0, 1.0, 3.0, 5.0, 3.0, 6.0, 9.0, 9.0, 13.0, 14.0, 23.0, 33.0, 27.0, 60.0, 60.0, 73.0, 67.0, 101.0, 94.0, 76.0, 80.0, 61.0, 39.0, 27.0, 34.0, 14.0, 14.0, 12.0, 10.0, 6.0, 6.0, 5.0, 4.0, 2.0, 6.0, 1.0, 3.0, 1.0, 2.0, 0.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-2.3603439331054688e-05, -2.2862106561660767e-05, -2.2120773792266846e-05, -2.1379441022872925e-05, -2.0638108253479004e-05, -1.9896775484085083e-05, -1.9155442714691162e-05, -1.841410994529724e-05, -1.767277717590332e-05, -1.69314444065094e-05, -1.619011163711548e-05, -1.5448778867721558e-05, -1.4707446098327637e-05, -1.3966113328933716e-05, -1.3224780559539795e-05, -1.2483447790145874e-05, -1.1742115020751953e-05, -1.1000782251358032e-05, -1.0259449481964111e-05, -9.51811671257019e-06, -8.77678394317627e-06, -8.035451173782349e-06, -7.294118404388428e-06, -6.552785634994507e-06, -5.811452865600586e-06, -5.070120096206665e-06, -4.328787326812744e-06, -3.5874545574188232e-06, -2.8461217880249023e-06, -2.1047890186309814e-06, -1.3634562492370605e-06, -6.221234798431396e-07, 1.1920928955078125e-07, 8.605420589447021e-07, 1.601874828338623e-06, 2.343207597732544e-06, 3.084540367126465e-06, 3.825873136520386e-06, 4.567205905914307e-06, 5.3085386753082275e-06, 6.0498714447021484e-06, 6.791204214096069e-06, 7.53253698348999e-06, 8.273869752883911e-06, 9.015202522277832e-06, 9.756535291671753e-06, 1.0497868061065674e-05, 1.1239200830459595e-05, 1.1980533599853516e-05, 1.2721866369247437e-05, 1.3463199138641357e-05, 1.4204531908035278e-05, 1.49458646774292e-05, 1.568719744682312e-05, 1.642853021621704e-05, 1.7169862985610962e-05, 1.7911195755004883e-05, 1.8652528524398804e-05, 1.9393861293792725e-05, 2.0135194063186646e-05, 2.0876526832580566e-05, 2.1617859601974487e-05, 2.2359192371368408e-05, 2.310052514076233e-05, 2.384185791015625e-05]}, "gradients/encoder.encoder.layers.21.attention.v_proj.weight": {"_type": "histogram", "values": [2.0, 2.0, 2.0, 3.0, 6.0, 8.0, 11.0, 8.0, 20.0, 15.0, 34.0, 55.0, 60.0, 75.0, 141.0, 181.0, 261.0, 368.0, 516.0, 809.0, 1203.0, 1743.0, 2826.0, 4225.0, 6994.0, 11470.0, 19207.0, 34463.0, 64491.0, 133040.0, 361601.0, 204386.0, 90430.0, 45925.0, 25008.0, 14832.0, 8746.0, 5286.0, 3364.0, 2191.0, 1493.0, 977.0, 616.0, 447.0, 309.0, 206.0, 144.0, 96.0, 89.0, 61.0, 32.0, 22.0, 18.0, 17.0, 10.0, 12.0, 8.0, 4.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0], "bins": [-9.274482727050781e-05, -8.972734212875366e-05, -8.670985698699951e-05, -8.369237184524536e-05, -8.067488670349121e-05, -7.765740156173706e-05, -7.463991641998291e-05, -7.162243127822876e-05, -6.860494613647461e-05, -6.558746099472046e-05, -6.256997585296631e-05, -5.955249071121216e-05, -5.653500556945801e-05, -5.351752042770386e-05, -5.050003528594971e-05, -4.748255014419556e-05, -4.4465065002441406e-05, -4.1447579860687256e-05, -3.8430094718933105e-05, -3.5412609577178955e-05, -3.2395124435424805e-05, -2.9377639293670654e-05, -2.6360154151916504e-05, -2.3342669010162354e-05, -2.0325183868408203e-05, -1.7307698726654053e-05, -1.4290213584899902e-05, -1.1272728443145752e-05, -8.255243301391602e-06, -5.237758159637451e-06, -2.2202730178833008e-06, 7.972121238708496e-07, 3.814697265625e-06, 6.83218240737915e-06, 9.8496675491333e-06, 1.2867152690887451e-05, 1.58846378326416e-05, 1.8902122974395752e-05, 2.1919608116149902e-05, 2.4937093257904053e-05, 2.7954578399658203e-05, 3.0972063541412354e-05, 3.3989548683166504e-05, 3.7007033824920654e-05, 4.0024518966674805e-05, 4.3042004108428955e-05, 4.6059489250183105e-05, 4.9076974391937256e-05, 5.2094459533691406e-05, 5.511194467544556e-05, 5.812942981719971e-05, 6.114691495895386e-05, 6.416440010070801e-05, 6.718188524246216e-05, 7.019937038421631e-05, 7.321685552597046e-05, 7.623434066772461e-05, 7.925182580947876e-05, 8.226931095123291e-05, 8.528679609298706e-05, 8.830428123474121e-05, 9.132176637649536e-05, 9.433925151824951e-05, 9.735673666000366e-05, 0.00010037422180175781]}, "gradients/encoder.encoder.layers.21.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 3.0, 0.0, 1.0, 0.0, 1.0, 1.0, 2.0, 1.0, 3.0, 7.0, 1.0, 3.0, 7.0, 9.0, 8.0, 6.0, 12.0, 14.0, 20.0, 23.0, 28.0, 33.0, 29.0, 19.0, 29.0, 30.0, 43.0, 49.0, 35.0, 45.0, 42.0, 51.0, 37.0, 38.0, 50.0, 38.0, 36.0, 38.0, 36.0, 29.0, 30.0, 29.0, 17.0, 19.0, 11.0, 10.0, 8.0, 4.0, 6.0, 7.0, 8.0, 4.0, 3.0, 3.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 2.0, 1.0], "bins": [-5.02467155456543e-05, -4.872586578130722e-05, -4.7205016016960144e-05, -4.568416625261307e-05, -4.416331648826599e-05, -4.2642466723918915e-05, -4.112161695957184e-05, -3.960076719522476e-05, -3.8079917430877686e-05, -3.655906766653061e-05, -3.503821790218353e-05, -3.3517368137836456e-05, -3.199651837348938e-05, -3.0475668609142303e-05, -2.8954818844795227e-05, -2.743396908044815e-05, -2.5913119316101074e-05, -2.4392269551753998e-05, -2.287141978740692e-05, -2.1350570023059845e-05, -1.982972025871277e-05, -1.8308870494365692e-05, -1.6788020730018616e-05, -1.526717096567154e-05, -1.3746321201324463e-05, -1.2225471436977386e-05, -1.070462167263031e-05, -9.183771908283234e-06, -7.662922143936157e-06, -6.142072379589081e-06, -4.621222615242004e-06, -3.100372850894928e-06, -1.5795230865478516e-06, -5.8673322200775146e-08, 1.4621764421463013e-06, 2.9830262064933777e-06, 4.503875970840454e-06, 6.0247257351875305e-06, 7.545575499534607e-06, 9.066425263881683e-06, 1.058727502822876e-05, 1.2108124792575836e-05, 1.3628974556922913e-05, 1.5149824321269989e-05, 1.6670674085617065e-05, 1.8191523849964142e-05, 1.9712373614311218e-05, 2.1233223378658295e-05, 2.275407314300537e-05, 2.4274922907352448e-05, 2.5795772671699524e-05, 2.73166224360466e-05, 2.8837472200393677e-05, 3.0358321964740753e-05, 3.187917172908783e-05, 3.3400021493434906e-05, 3.492087125778198e-05, 3.644172102212906e-05, 3.7962570786476135e-05, 3.948342055082321e-05, 4.100427031517029e-05, 4.2525120079517365e-05, 4.404596984386444e-05, 4.556681960821152e-05, 4.7087669372558594e-05]}, "gradients/encoder.encoder.layers.21.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 3.0, 2.0, 4.0, 2.0, 2.0, 3.0, 9.0, 13.0, 9.0, 19.0, 25.0, 28.0, 42.0, 56.0, 72.0, 117.0, 161.0, 235.0, 297.0, 451.0, 708.0, 1212.0, 1971.0, 3332.0, 6432.0, 14958.0, 33268.0, 99821.0, 509709.0, 274006.0, 57483.0, 21889.0, 9873.0, 5256.0, 2647.0, 1507.0, 953.0, 683.0, 396.0, 259.0, 199.0, 131.0, 82.0, 68.0, 39.0, 32.0, 27.0, 19.0, 17.0, 11.0, 13.0, 6.0, 0.0, 2.0, 3.0, 2.0, 2.0, 2.0, 3.0, 0.0, 1.0], "bins": [-2.5093555450439453e-05, -2.4303793907165527e-05, -2.35140323638916e-05, -2.2724270820617676e-05, -2.193450927734375e-05, -2.1144747734069824e-05, -2.03549861907959e-05, -1.9565224647521973e-05, -1.8775463104248047e-05, -1.798570156097412e-05, -1.7195940017700195e-05, -1.640617847442627e-05, -1.5616416931152344e-05, -1.4826655387878418e-05, -1.4036893844604492e-05, -1.3247132301330566e-05, -1.245737075805664e-05, -1.1667609214782715e-05, -1.0877847671508789e-05, -1.0088086128234863e-05, -9.298324584960938e-06, -8.508563041687012e-06, -7.718801498413086e-06, -6.92903995513916e-06, -6.139278411865234e-06, -5.349516868591309e-06, -4.559755325317383e-06, -3.769993782043457e-06, -2.9802322387695312e-06, -2.1904706954956055e-06, -1.4007091522216797e-06, -6.109476089477539e-07, 1.7881393432617188e-07, 9.685754776000977e-07, 1.7583370208740234e-06, 2.5480985641479492e-06, 3.337860107421875e-06, 4.127621650695801e-06, 4.9173831939697266e-06, 5.707144737243652e-06, 6.496906280517578e-06, 7.286667823791504e-06, 8.07642936706543e-06, 8.866190910339355e-06, 9.655952453613281e-06, 1.0445713996887207e-05, 1.1235475540161133e-05, 1.2025237083435059e-05, 1.2814998626708984e-05, 1.360476016998291e-05, 1.4394521713256836e-05, 1.5184283256530762e-05, 1.5974044799804688e-05, 1.6763806343078613e-05, 1.755356788635254e-05, 1.8343329429626465e-05, 1.913309097290039e-05, 1.9922852516174316e-05, 2.0712614059448242e-05, 2.1502375602722168e-05, 2.2292137145996094e-05, 2.308189868927002e-05, 2.3871660232543945e-05, 2.466142177581787e-05, 2.5451183319091797e-05]}, "gradients/encoder.encoder.layers.21.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 3.0, 1.0, 1.0, 0.0, 0.0, 3.0, 5.0, 2.0, 8.0, 9.0, 3.0, 4.0, 15.0, 12.0, 9.0, 23.0, 26.0, 32.0, 37.0, 46.0, 67.0, 77.0, 111.0, 115.0, 98.0, 72.0, 61.0, 34.0, 27.0, 21.0, 21.0, 15.0, 12.0, 9.0, 9.0, 1.0, 11.0, 2.0, 3.0, 3.0, 2.0, 3.0, 0.0, 2.0, 0.0, 2.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.516674041748047e-06, -3.400258719921112e-06, -3.2838433980941772e-06, -3.1674280762672424e-06, -3.0510127544403076e-06, -2.934597432613373e-06, -2.818182110786438e-06, -2.701766788959503e-06, -2.5853514671325684e-06, -2.4689361453056335e-06, -2.3525208234786987e-06, -2.236105501651764e-06, -2.119690179824829e-06, -2.0032748579978943e-06, -1.8868595361709595e-06, -1.7704442143440247e-06, -1.6540288925170898e-06, -1.537613570690155e-06, -1.4211982488632202e-06, -1.3047829270362854e-06, -1.1883676052093506e-06, -1.0719522833824158e-06, -9.55536961555481e-07, -8.391216397285461e-07, -7.227063179016113e-07, -6.062909960746765e-07, -4.898756742477417e-07, -3.734603524208069e-07, -2.5704503059387207e-07, -1.4062970876693726e-07, -2.421438694000244e-08, 9.220093488693237e-08, 2.086162567138672e-07, 3.25031578540802e-07, 4.414469003677368e-07, 5.578622221946716e-07, 6.742775440216064e-07, 7.906928658485413e-07, 9.071081876754761e-07, 1.0235235095024109e-06, 1.1399388313293457e-06, 1.2563541531562805e-06, 1.3727694749832153e-06, 1.4891847968101501e-06, 1.605600118637085e-06, 1.7220154404640198e-06, 1.8384307622909546e-06, 1.9548460841178894e-06, 2.0712614059448242e-06, 2.187676727771759e-06, 2.304092049598694e-06, 2.4205073714256287e-06, 2.5369226932525635e-06, 2.6533380150794983e-06, 2.769753336906433e-06, 2.886168658733368e-06, 3.0025839805603027e-06, 3.1189993023872375e-06, 3.2354146242141724e-06, 3.351829946041107e-06, 3.468245267868042e-06, 3.584660589694977e-06, 3.7010759115219116e-06, 3.8174912333488464e-06, 3.933906555175781e-06]}, "gradients/encoder.encoder.layers.21.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 1.0, 1.0, 0.0, 2.0, 7.0, 7.0, 10.0, 7.0, 12.0, 18.0, 26.0, 42.0, 59.0, 72.0, 95.0, 145.0, 268.0, 381.0, 601.0, 1123.0, 1575.0, 2802.0, 5959.0, 10598.0, 22610.0, 55844.0, 231844.0, 501948.0, 129729.0, 47152.0, 16735.0, 8103.0, 4372.0, 2677.0, 1368.0, 820.0, 551.0, 296.0, 217.0, 163.0, 87.0, 76.0, 50.0, 42.0, 19.0, 16.0, 16.0, 4.0, 4.0, 5.0, 5.0, 3.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.5497207641601562e-05, -1.5002675354480743e-05, -1.4508143067359924e-05, -1.4013610780239105e-05, -1.3519078493118286e-05, -1.3024546205997467e-05, -1.2530013918876648e-05, -1.2035481631755829e-05, -1.154094934463501e-05, -1.104641705751419e-05, -1.0551884770393372e-05, -1.0057352483272552e-05, -9.562820196151733e-06, -9.068287909030914e-06, -8.573755621910095e-06, -8.079223334789276e-06, -7.584691047668457e-06, -7.090158760547638e-06, -6.595626473426819e-06, -6.101094186306e-06, -5.606561899185181e-06, -5.1120296120643616e-06, -4.6174973249435425e-06, -4.122965037822723e-06, -3.6284327507019043e-06, -3.133900463581085e-06, -2.639368176460266e-06, -2.144835889339447e-06, -1.650303602218628e-06, -1.1557713150978088e-06, -6.612390279769897e-07, -1.6670674085617065e-07, 3.2782554626464844e-07, 8.223578333854675e-07, 1.3168901205062866e-06, 1.8114224076271057e-06, 2.305954694747925e-06, 2.800486981868744e-06, 3.295019268989563e-06, 3.789551556110382e-06, 4.284083843231201e-06, 4.77861613035202e-06, 5.273148417472839e-06, 5.7676807045936584e-06, 6.2622129917144775e-06, 6.756745278835297e-06, 7.251277565956116e-06, 7.745809853076935e-06, 8.240342140197754e-06, 8.734874427318573e-06, 9.229406714439392e-06, 9.723939001560211e-06, 1.021847128868103e-05, 1.071300357580185e-05, 1.1207535862922668e-05, 1.1702068150043488e-05, 1.2196600437164307e-05, 1.2691132724285126e-05, 1.3185665011405945e-05, 1.3680197298526764e-05, 1.4174729585647583e-05, 1.4669261872768402e-05, 1.5163794159889221e-05, 1.565832644701004e-05, 1.615285873413086e-05]}, "gradients/encoder.encoder.layers.21.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0, 4.0, 3.0, 4.0, 1.0, 7.0, 7.0, 10.0, 4.0, 12.0, 22.0, 22.0, 23.0, 34.0, 57.0, 56.0, 74.0, 68.0, 115.0, 82.0, 71.0, 72.0, 62.0, 40.0, 38.0, 27.0, 27.0, 12.0, 18.0, 9.0, 8.0, 7.0, 5.0, 2.0, 5.0, 1.0, 2.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-7.927417755126953e-06, -7.68713653087616e-06, -7.446855306625366e-06, -7.206574082374573e-06, -6.966292858123779e-06, -6.726011633872986e-06, -6.485730409622192e-06, -6.245449185371399e-06, -6.0051679611206055e-06, -5.764886736869812e-06, -5.5246055126190186e-06, -5.284324288368225e-06, -5.044043064117432e-06, -4.803761839866638e-06, -4.563480615615845e-06, -4.323199391365051e-06, -4.082918167114258e-06, -3.842636942863464e-06, -3.602355718612671e-06, -3.3620744943618774e-06, -3.121793270111084e-06, -2.8815120458602905e-06, -2.641230821609497e-06, -2.4009495973587036e-06, -2.16066837310791e-06, -1.9203871488571167e-06, -1.6801059246063232e-06, -1.4398247003555298e-06, -1.1995434761047363e-06, -9.592622518539429e-07, -7.189810276031494e-07, -4.78699803352356e-07, -2.384185791015625e-07, 1.862645149230957e-09, 2.421438694000244e-07, 4.824250936508179e-07, 7.227063179016113e-07, 9.629875421524048e-07, 1.2032687664031982e-06, 1.4435499906539917e-06, 1.6838312149047852e-06, 1.9241124391555786e-06, 2.164393663406372e-06, 2.4046748876571655e-06, 2.644956111907959e-06, 2.8852373361587524e-06, 3.125518560409546e-06, 3.3657997846603394e-06, 3.606081008911133e-06, 3.846362233161926e-06, 4.08664345741272e-06, 4.326924681663513e-06, 4.567205905914307e-06, 4.8074871301651e-06, 5.0477683544158936e-06, 5.288049578666687e-06, 5.5283308029174805e-06, 5.768612027168274e-06, 6.008893251419067e-06, 6.249174475669861e-06, 6.489455699920654e-06, 6.729736924171448e-06, 6.970018148422241e-06, 7.210299372673035e-06, 7.450580596923828e-06]}, "gradients/encoder.encoder.layers.21.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 9.0, 24.0, 94.0, 417.0, 314.0, 94.0, 38.0, 18.0, 11.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00030105639598332345, -0.00025688885943964124, -0.00021272135199978948, -0.0001685538300080225, -0.0001243863080162555, -8.021878602448851e-05, -3.605126403272152e-05, 8.116272510960698e-06, 5.228377995081246e-05, 9.645130194257945e-05, 0.00014061882393434644, 0.00018478634592611343, 0.00022895386791788042, 0.00027312140446156263, 0.0003172889119014144, 0.0003614564484450966, 0.00040562395588494837, 0.0004497914924286306, 0.0004939589998684824, 0.0005381265073083341, 0.0005822940729558468, 0.0006264615803956985, 0.0006706290878355503, 0.0007147965952754021, 0.0007589641027152538, 0.0008031316101551056, 0.0008472991175949574, 0.00089146668324247, 0.0009356341906823218, 0.0009798017563298345, 0.0010239692637696862, 0.001068136771209538, 0.0011123043950647116, 0.0011564719025045633, 0.001200639409944415, 0.0012448069173842669, 0.0012889744248241186, 0.0013331420486792922, 0.001377309556119144, 0.0014214770635589957, 0.0014656445709988475, 0.0015098120784386992, 0.001553979585878551, 0.0015981470933184028, 0.0016423147171735764, 0.0016864822246134281, 0.0017306497320532799, 0.0017748172394931316, 0.0018189847469329834, 0.0018631522543728352, 0.001907319761812687, 0.0019514872692525387, 0.0019956547766923904, 0.002039822284132242, 0.002083989791572094, 0.0021281572990119457, 0.0021723248064517975, 0.0022164923138916492, 0.002260659821331501, 0.0023048273287713528, 0.0023489948362112045, 0.0023931623436510563, 0.002437329851090908, 0.00248149735853076, 0.0025256650988012552]}, "gradients/encoder.encoder.layers.21.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 1.0, 3.0, 4.0, 4.0, 12.0, 13.0, 16.0, 20.0, 29.0, 25.0, 33.0, 39.0, 32.0, 37.0, 46.0, 52.0, 45.0, 43.0, 43.0, 46.0, 55.0, 40.0, 45.0, 33.0, 54.0, 42.0, 36.0, 28.0, 25.0, 20.0, 22.0, 25.0, 15.0, 10.0, 6.0, 8.0, 6.0, 3.0, 4.0], "bins": [-0.0003363490104675293, -0.00032862741500139236, -0.00032090581953525543, -0.0003131842240691185, -0.00030546262860298157, -0.00029774103313684464, -0.0002900194376707077, -0.00028229784220457077, -0.00027457624673843384, -0.0002668546512722969, -0.00025913305580616, -0.00025141146034002304, -0.0002436898648738861, -0.00023596826940774918, -0.00022824667394161224, -0.0002205250784754753, -0.00021280348300933838, -0.00020508188754320145, -0.00019736029207706451, -0.00018963869661092758, -0.00018191710114479065, -0.00017419550567865372, -0.00016647391021251678, -0.00015875231474637985, -0.00015103071928024292, -0.000143309123814106, -0.00013558752834796906, -0.00012786593288183212, -0.00012014433741569519, -0.00011242274194955826, -0.00010470114648342133, -9.69795510172844e-05, -8.925795555114746e-05, -8.153636008501053e-05, -7.38147646188736e-05, -6.609316915273666e-05, -5.837157368659973e-05, -5.06499782204628e-05, -4.292838275432587e-05, -3.5206787288188934e-05, -2.7485191822052002e-05, -1.976359635591507e-05, -1.2042000889778137e-05, -4.320405423641205e-06, 3.4011900424957275e-06, 1.112278550863266e-05, 1.8844380974769592e-05, 2.6565976440906525e-05, 3.428757190704346e-05, 4.200916737318039e-05, 4.973076283931732e-05, 5.7452358305454254e-05, 6.517395377159119e-05, 7.289554923772812e-05, 8.061714470386505e-05, 8.833874017000198e-05, 9.606033563613892e-05, 0.00010378193110227585, 0.00011150352656841278, 0.00011922512203454971, 0.00012694671750068665, 0.00013466831296682358, 0.0001423899084329605, 0.00015011150389909744, 0.00015783309936523438]}, "gradients/encoder.encoder.layers.20.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 2.0, 2.0, 4.0, 6.0, 3.0, 5.0, 11.0, 13.0, 17.0, 24.0, 21.0, 60.0, 56.0, 74.0, 87.0, 138.0, 185.0, 221.0, 346.0, 534.0, 876.0, 1692.0, 3185.0, 5794.0, 10882.0, 21634.0, 51491.0, 161229.0, 3416704.0, 382907.0, 76282.0, 29880.0, 14025.0, 6955.0, 3607.0, 2111.0, 1175.0, 729.0, 449.0, 291.0, 183.0, 124.0, 93.0, 56.0, 45.0, 28.0, 26.0, 16.0, 10.0, 3.0, 4.0, 2.0, 1.0, 1.0, 1.0, 0.0, 1.0], "bins": [-0.00018298625946044922, -0.00017786212265491486, -0.0001727379858493805, -0.00016761384904384613, -0.00016248971223831177, -0.0001573655754327774, -0.00015224143862724304, -0.00014711730182170868, -0.00014199316501617432, -0.00013686902821063995, -0.0001317448914051056, -0.00012662075459957123, -0.00012149661779403687, -0.0001163724809885025, -0.00011124834418296814, -0.00010612420737743378, -0.00010100007057189941, -9.587593376636505e-05, -9.075179696083069e-05, -8.562766015529633e-05, -8.050352334976196e-05, -7.53793865442276e-05, -7.025524973869324e-05, -6.513111293315887e-05, -6.000697612762451e-05, -5.488283932209015e-05, -4.9758702516555786e-05, -4.463456571102142e-05, -3.951042890548706e-05, -3.43862920999527e-05, -2.9262155294418335e-05, -2.4138018488883972e-05, -1.901388168334961e-05, -1.3889744877815247e-05, -8.765608072280884e-06, -3.641471266746521e-06, 1.4826655387878418e-06, 6.606802344322205e-06, 1.1730939149856567e-05, 1.685507595539093e-05, 2.1979212760925293e-05, 2.7103349566459656e-05, 3.222748637199402e-05, 3.735162317752838e-05, 4.2475759983062744e-05, 4.759989678859711e-05, 5.272403359413147e-05, 5.784817039966583e-05, 6.29723072052002e-05, 6.809644401073456e-05, 7.322058081626892e-05, 7.834471762180328e-05, 8.346885442733765e-05, 8.859299123287201e-05, 9.371712803840637e-05, 9.884126484394073e-05, 0.0001039654016494751, 0.00010908953845500946, 0.00011421367526054382, 0.00011933781206607819, 0.00012446194887161255, 0.0001295860856771469, 0.00013471022248268127, 0.00013983435928821564, 0.00014495849609375]}, "gradients/encoder.encoder.layers.20.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 2.0, 2.0, 8.0, 6.0, 10.0, 10.0, 16.0, 17.0, 14.0, 25.0, 38.0, 37.0, 65.0, 74.0, 104.0, 106.0, 102.0, 93.0, 66.0, 59.0, 39.0, 25.0, 19.0, 18.0, 17.0, 12.0, 4.0, 8.0, 1.0, 8.0, 3.0, 1.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-3.1888484954833984e-05, -3.099720925092697e-05, -3.010593354701996e-05, -2.9214657843112946e-05, -2.8323382139205933e-05, -2.743210643529892e-05, -2.6540830731391907e-05, -2.5649555027484894e-05, -2.475827932357788e-05, -2.3867003619670868e-05, -2.2975727915763855e-05, -2.2084452211856842e-05, -2.119317650794983e-05, -2.0301900804042816e-05, -1.9410625100135803e-05, -1.851934939622879e-05, -1.7628073692321777e-05, -1.6736797988414764e-05, -1.584552228450775e-05, -1.4954246580600739e-05, -1.4062970876693726e-05, -1.3171695172786713e-05, -1.22804194688797e-05, -1.1389143764972687e-05, -1.0497868061065674e-05, -9.606592357158661e-06, -8.715316653251648e-06, -7.824040949344635e-06, -6.932765245437622e-06, -6.041489541530609e-06, -5.150213837623596e-06, -4.258938133716583e-06, -3.3676624298095703e-06, -2.4763867259025574e-06, -1.5851110219955444e-06, -6.938353180885315e-07, 1.9744038581848145e-07, 1.0887160897254944e-06, 1.9799917936325073e-06, 2.8712674975395203e-06, 3.762543201446533e-06, 4.653818905353546e-06, 5.545094609260559e-06, 6.436370313167572e-06, 7.327646017074585e-06, 8.218921720981598e-06, 9.11019742488861e-06, 1.0001473128795624e-05, 1.0892748832702637e-05, 1.178402453660965e-05, 1.2675300240516663e-05, 1.3566575944423676e-05, 1.4457851648330688e-05, 1.53491273522377e-05, 1.6240403056144714e-05, 1.7131678760051727e-05, 1.802295446395874e-05, 1.8914230167865753e-05, 1.9805505871772766e-05, 2.069678157567978e-05, 2.1588057279586792e-05, 2.2479332983493805e-05, 2.3370608687400818e-05, 2.426188439130783e-05, 2.5153160095214844e-05]}, "gradients/encoder.encoder.layers.20.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 6.0, 3.0, 2.0, 7.0, 11.0, 10.0, 15.0, 15.0, 44.0, 47.0, 59.0, 164.0, 287.0, 690.0, 1561.0, 4000.0, 12097.0, 42089.0, 216100.0, 3449731.0, 382629.0, 60303.0, 16037.0, 4963.0, 1874.0, 744.0, 364.0, 168.0, 85.0, 63.0, 34.0, 19.0, 13.0, 14.0, 11.0, 4.0, 12.0, 2.0, 5.0, 2.0, 1.0, 2.0, 1.0, 3.0, 1.0, 0.0, 0.0, 1.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.00018596649169921875, -0.00017870962619781494, -0.00017145276069641113, -0.00016419589519500732, -0.00015693902969360352, -0.0001496821641921997, -0.0001424252986907959, -0.0001351684331893921, -0.00012791156768798828, -0.00012065470218658447, -0.00011339783668518066, -0.00010614097118377686, -9.888410568237305e-05, -9.162724018096924e-05, -8.437037467956543e-05, -7.711350917816162e-05, -6.985664367675781e-05, -6.2599778175354e-05, -5.5342912673950195e-05, -4.808604717254639e-05, -4.082918167114258e-05, -3.357231616973877e-05, -2.631545066833496e-05, -1.9058585166931152e-05, -1.1801719665527344e-05, -4.544854164123535e-06, 2.7120113372802734e-06, 9.968876838684082e-06, 1.722574234008789e-05, 2.44826078414917e-05, 3.173947334289551e-05, 3.8996338844299316e-05, 4.6253204345703125e-05, 5.3510069847106934e-05, 6.076693534851074e-05, 6.802380084991455e-05, 7.528066635131836e-05, 8.253753185272217e-05, 8.979439735412598e-05, 9.705126285552979e-05, 0.0001043081283569336, 0.0001115649938583374, 0.00011882185935974121, 0.00012607872486114502, 0.00013333559036254883, 0.00014059245586395264, 0.00014784932136535645, 0.00015510618686676025, 0.00016236305236816406, 0.00016961991786956787, 0.00017687678337097168, 0.0001841336488723755, 0.0001913905143737793, 0.0001986473798751831, 0.00020590424537658691, 0.00021316111087799072, 0.00022041797637939453, 0.00022767484188079834, 0.00023493170738220215, 0.00024218857288360596, 0.00024944543838500977, 0.0002567023038864136, 0.0002639591693878174, 0.0002712160348892212, 0.000278472900390625]}, "gradients/encoder.encoder.layers.20.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 4.0, 1.0, 1.0, 5.0, 4.0, 8.0, 18.0, 20.0, 18.0, 45.0, 44.0, 49.0, 65.0, 99.0, 122.0, 156.0, 309.0, 793.0, 1126.0, 418.0, 210.0, 125.0, 102.0, 72.0, 77.0, 40.0, 32.0, 32.0, 17.0, 26.0, 18.0, 4.0, 13.0, 5.0, 5.0, 1.0, 2.0, 5.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.424022674560547e-05, -5.179736763238907e-05, -4.935450851917267e-05, -4.691164940595627e-05, -4.446879029273987e-05, -4.202593117952347e-05, -3.958307206630707e-05, -3.714021295309067e-05, -3.469735383987427e-05, -3.225449472665787e-05, -2.9811635613441467e-05, -2.7368776500225067e-05, -2.4925917387008667e-05, -2.2483058273792267e-05, -2.0040199160575867e-05, -1.7597340047359467e-05, -1.5154480934143066e-05, -1.2711621820926666e-05, -1.0268762707710266e-05, -7.825903594493866e-06, -5.383044481277466e-06, -2.9401853680610657e-06, -4.973262548446655e-07, 1.9455328583717346e-06, 4.388391971588135e-06, 6.831251084804535e-06, 9.274110198020935e-06, 1.1716969311237335e-05, 1.4159828424453735e-05, 1.6602687537670135e-05, 1.9045546650886536e-05, 2.1488405764102936e-05, 2.3931264877319336e-05, 2.6374123990535736e-05, 2.8816983103752136e-05, 3.1259842216968536e-05, 3.3702701330184937e-05, 3.614556044340134e-05, 3.858841955661774e-05, 4.103127866983414e-05, 4.347413778305054e-05, 4.591699689626694e-05, 4.835985600948334e-05, 5.080271512269974e-05, 5.324557423591614e-05, 5.568843334913254e-05, 5.813129246234894e-05, 6.057415157556534e-05, 6.301701068878174e-05, 6.545986980199814e-05, 6.790272891521454e-05, 7.034558802843094e-05, 7.278844714164734e-05, 7.523130625486374e-05, 7.767416536808014e-05, 8.011702448129654e-05, 8.255988359451294e-05, 8.500274270772934e-05, 8.744560182094574e-05, 8.988846093416214e-05, 9.233132004737854e-05, 9.477417916059494e-05, 9.721703827381134e-05, 9.965989738702774e-05, 0.00010210275650024414]}, "gradients/encoder.encoder.layers.20.final_layer_norm.weight": {"_type": "histogram", "values": [3.0, 1.0, 4.0, 8.0, 18.0, 47.0, 123.0, 321.0, 267.0, 127.0, 58.0, 28.0, 4.0, 7.0, 2.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00031933627906255424, -0.00027886059251613915, -0.00023838490596972406, -0.00019790920487139374, -0.00015743351832497865, -0.00011695783177856356, -7.648213068023324e-05, -3.600644413381815e-05, 4.469242412596941e-06, 4.494493259699084e-05, 8.542062278138474e-05, 0.00012589631660375744, 0.00016637200315017253, 0.00020684768969658762, 0.00024732339079491794, 0.00028779907734133303, 0.0003282747638877481, 0.0003687504504341632, 0.0004092261369805783, 0.00044970185263082385, 0.0004901775391772389, 0.000530653225723654, 0.0005711289122700691, 0.0006116045988164842, 0.0006520802853628993, 0.0006925559719093144, 0.0007330316584557295, 0.0007735073450021446, 0.0008139830315485597, 0.0008544587180949748, 0.0008949344046413898, 0.0009354101493954659, 0.00097588577773422, 0.001016361522488296, 0.0010568371508270502, 0.0010973128955811262, 0.0011377885239198804, 0.0011782642686739564, 0.0012187398970127106, 0.0012592156417667866, 0.0012996912701055408, 0.0013401670148596168, 0.001380642643198371, 0.001421118387952447, 0.0014615940162912011, 0.0015020697610452771, 0.0015425453893840313, 0.0015830211341381073, 0.0016234968788921833, 0.0016639726236462593, 0.0017044482519850135, 0.0017449239967390895, 0.0017853996250778437, 0.0018258753698319197, 0.0018663509981706738, 0.0019068267429247499, 0.001947302371263504, 0.00198777811601758, 0.002028253860771656, 0.0020687293726950884, 0.0021092051174491644, 0.0021496808622032404, 0.0021901566069573164, 0.0022306321188807487, 0.0022711078636348248]}, "gradients/encoder.encoder.layers.20.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 2.0, 1.0, 2.0, 2.0, 1.0, 4.0, 8.0, 4.0, 6.0, 11.0, 11.0, 14.0, 10.0, 14.0, 26.0, 18.0, 21.0, 24.0, 33.0, 30.0, 32.0, 33.0, 56.0, 47.0, 52.0, 44.0, 53.0, 45.0, 57.0, 50.0, 40.0, 36.0, 29.0, 31.0, 24.0, 17.0, 18.0, 16.0, 23.0, 12.0, 8.0, 9.0, 10.0, 7.0, 4.0, 8.0, 2.0, 5.0, 2.0, 2.0, 2.0, 2.0, 1.0, 0.0, 1.0], "bins": [-0.00023829936981201172, -0.00023134611546993256, -0.0002243928611278534, -0.00021743960678577423, -0.00021048635244369507, -0.0002035330981016159, -0.00019657984375953674, -0.00018962658941745758, -0.00018267333507537842, -0.00017572008073329926, -0.0001687668263912201, -0.00016181357204914093, -0.00015486031770706177, -0.0001479070633649826, -0.00014095380902290344, -0.00013400055468082428, -0.00012704730033874512, -0.00012009404599666595, -0.00011314079165458679, -0.00010618753731250763, -9.923428297042847e-05, -9.22810286283493e-05, -8.532777428627014e-05, -7.837451994419098e-05, -7.142126560211182e-05, -6.446801126003265e-05, -5.751475691795349e-05, -5.056150257587433e-05, -4.3608248233795166e-05, -3.6654993891716003e-05, -2.970173954963684e-05, -2.2748485207557678e-05, -1.5795230865478516e-05, -8.841976523399353e-06, -1.8887221813201904e-06, 5.064532160758972e-06, 1.2017786502838135e-05, 1.8971040844917297e-05, 2.592429518699646e-05, 3.287754952907562e-05, 3.9830803871154785e-05, 4.678405821323395e-05, 5.373731255531311e-05, 6.069056689739227e-05, 6.764382123947144e-05, 7.45970755815506e-05, 8.155032992362976e-05, 8.850358426570892e-05, 9.545683860778809e-05, 0.00010241009294986725, 0.00010936334729194641, 0.00011631660163402557, 0.00012326985597610474, 0.0001302231103181839, 0.00013717636466026306, 0.00014412961900234222, 0.0001510828733444214, 0.00015803612768650055, 0.0001649893820285797, 0.00017194263637065887, 0.00017889589071273804, 0.0001858491450548172, 0.00019280239939689636, 0.00019975565373897552, 0.0002067089080810547]}, "gradients/encoder.encoder.layers.20.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 4.0, 3.0, 5.0, 3.0, 11.0, 13.0, 19.0, 37.0, 42.0, 75.0, 114.0, 159.0, 265.0, 504.0, 829.0, 1610.0, 2784.0, 5541.0, 12164.0, 29789.0, 90494.0, 561841.0, 245885.0, 57449.0, 20719.0, 8722.0, 4358.0, 2181.0, 1181.0, 687.0, 410.0, 244.0, 151.0, 98.0, 52.0, 33.0, 37.0, 16.0, 13.0, 10.0, 5.0, 5.0, 4.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.00023448467254638672, -0.00022733770310878754, -0.00022019073367118835, -0.00021304376423358917, -0.00020589679479599, -0.0001987498253583908, -0.00019160285592079163, -0.00018445588648319244, -0.00017730891704559326, -0.00017016194760799408, -0.0001630149781703949, -0.00015586800873279572, -0.00014872103929519653, -0.00014157406985759735, -0.00013442710041999817, -0.000127280130982399, -0.0001201331615447998, -0.00011298619210720062, -0.00010583922266960144, -9.869225323200226e-05, -9.154528379440308e-05, -8.43983143568039e-05, -7.725134491920471e-05, -7.010437548160553e-05, -6.295740604400635e-05, -5.5810436606407166e-05, -4.8663467168807983e-05, -4.15164977312088e-05, -3.436952829360962e-05, -2.7222558856010437e-05, -2.0075589418411255e-05, -1.2928619980812073e-05, -5.781650543212891e-06, 1.3653188943862915e-06, 8.512288331985474e-06, 1.5659257769584656e-05, 2.2806227207183838e-05, 2.995319664478302e-05, 3.71001660823822e-05, 4.4247135519981384e-05, 5.1394104957580566e-05, 5.854107439517975e-05, 6.568804383277893e-05, 7.283501327037811e-05, 7.99819827079773e-05, 8.712895214557648e-05, 9.427592158317566e-05, 0.00010142289102077484, 0.00010856986045837402, 0.0001157168298959732, 0.0001228637993335724, 0.00013001076877117157, 0.00013715773820877075, 0.00014430470764636993, 0.00015145167708396912, 0.0001585986465215683, 0.00016574561595916748, 0.00017289258539676666, 0.00018003955483436584, 0.00018718652427196503, 0.0001943334937095642, 0.0002014804631471634, 0.00020862743258476257, 0.00021577440202236176, 0.00022292137145996094]}, "gradients/encoder.encoder.layers.20.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 4.0, 1.0, 2.0, 2.0, 3.0, 7.0, 6.0, 6.0, 5.0, 11.0, 17.0, 24.0, 19.0, 30.0, 44.0, 54.0, 51.0, 80.0, 91.0, 98.0, 97.0, 83.0, 59.0, 52.0, 41.0, 25.0, 26.0, 16.0, 14.0, 8.0, 9.0, 5.0, 8.0, 4.0, 5.0, 1.0, 3.0, 3.0, 0.0, 1.0, 0.0, 2.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.6285648345947266e-05, -2.5472603738307953e-05, -2.465955913066864e-05, -2.3846514523029327e-05, -2.3033469915390015e-05, -2.2220425307750702e-05, -2.140738070011139e-05, -2.0594336092472076e-05, -1.9781291484832764e-05, -1.896824687719345e-05, -1.8155202269554138e-05, -1.7342157661914825e-05, -1.6529113054275513e-05, -1.57160684466362e-05, -1.4903023838996887e-05, -1.4089979231357574e-05, -1.3276934623718262e-05, -1.2463890016078949e-05, -1.1650845408439636e-05, -1.0837800800800323e-05, -1.002475619316101e-05, -9.211711585521698e-06, -8.398666977882385e-06, -7.5856223702430725e-06, -6.77257776260376e-06, -5.959533154964447e-06, -5.146488547325134e-06, -4.3334439396858215e-06, -3.520399332046509e-06, -2.707354724407196e-06, -1.8943101167678833e-06, -1.0812655091285706e-06, -2.682209014892578e-07, 5.448237061500549e-07, 1.3578683137893677e-06, 2.1709129214286804e-06, 2.983957529067993e-06, 3.797002136707306e-06, 4.610046744346619e-06, 5.423091351985931e-06, 6.236135959625244e-06, 7.049180567264557e-06, 7.86222517490387e-06, 8.675269782543182e-06, 9.488314390182495e-06, 1.0301358997821808e-05, 1.111440360546112e-05, 1.1927448213100433e-05, 1.2740492820739746e-05, 1.3553537428379059e-05, 1.4366582036018372e-05, 1.5179626643657684e-05, 1.5992671251296997e-05, 1.680571585893631e-05, 1.7618760466575623e-05, 1.8431805074214935e-05, 1.9244849681854248e-05, 2.005789428949356e-05, 2.0870938897132874e-05, 2.1683983504772186e-05, 2.24970281124115e-05, 2.3310072720050812e-05, 2.4123117327690125e-05, 2.4936161935329437e-05, 2.574920654296875e-05]}, "gradients/encoder.encoder.layers.20.attention.v_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0, 3.0, 8.0, 10.0, 8.0, 10.0, 17.0, 28.0, 35.0, 45.0, 80.0, 97.0, 141.0, 186.0, 285.0, 459.0, 625.0, 1009.0, 1512.0, 2314.0, 3677.0, 5892.0, 9649.0, 16780.0, 29312.0, 54229.0, 104910.0, 253595.0, 310393.0, 116182.0, 59249.0, 32211.0, 17853.0, 10411.0, 6337.0, 3851.0, 2531.0, 1548.0, 997.0, 650.0, 439.0, 314.0, 224.0, 128.0, 96.0, 67.0, 50.0, 42.0, 22.0, 18.0, 16.0, 7.0, 6.0, 0.0, 5.0, 1.0, 0.0, 5.0], "bins": [-0.00010162591934204102, -9.864289313554764e-05, -9.565986692905426e-05, -9.267684072256088e-05, -8.96938145160675e-05, -8.671078830957413e-05, -8.372776210308075e-05, -8.074473589658737e-05, -7.7761709690094e-05, -7.477868348360062e-05, -7.179565727710724e-05, -6.881263107061386e-05, -6.582960486412048e-05, -6.28465786576271e-05, -5.986355245113373e-05, -5.688052624464035e-05, -5.389750003814697e-05, -5.0914473831653595e-05, -4.793144762516022e-05, -4.494842141866684e-05, -4.196539521217346e-05, -3.8982369005680084e-05, -3.5999342799186707e-05, -3.301631659269333e-05, -3.003329038619995e-05, -2.7050264179706573e-05, -2.4067237973213196e-05, -2.1084211766719818e-05, -1.810118556022644e-05, -1.5118159353733063e-05, -1.2135133147239685e-05, -9.152106940746307e-06, -6.16908073425293e-06, -3.186054527759552e-06, -2.0302832126617432e-07, 2.7799978852272034e-06, 5.763024091720581e-06, 8.746050298213959e-06, 1.1729076504707336e-05, 1.4712102711200714e-05, 1.7695128917694092e-05, 2.067815512418747e-05, 2.3661181330680847e-05, 2.6644207537174225e-05, 2.9627233743667603e-05, 3.261025995016098e-05, 3.559328615665436e-05, 3.8576312363147736e-05, 4.155933856964111e-05, 4.454236477613449e-05, 4.752539098262787e-05, 5.0508417189121246e-05, 5.3491443395614624e-05, 5.6474469602108e-05, 5.945749580860138e-05, 6.244052201509476e-05, 6.542354822158813e-05, 6.840657442808151e-05, 7.138960063457489e-05, 7.437262684106827e-05, 7.735565304756165e-05, 8.033867925405502e-05, 8.33217054605484e-05, 8.630473166704178e-05, 8.928775787353516e-05]}, "gradients/encoder.encoder.layers.20.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 2.0, 3.0, 1.0, 5.0, 5.0, 6.0, 9.0, 7.0, 12.0, 9.0, 10.0, 22.0, 23.0, 21.0, 21.0, 30.0, 36.0, 38.0, 22.0, 37.0, 35.0, 31.0, 43.0, 37.0, 37.0, 36.0, 41.0, 39.0, 44.0, 32.0, 26.0, 40.0, 35.0, 32.0, 28.0, 21.0, 22.0, 16.0, 19.0, 10.0, 10.0, 8.0, 12.0, 7.0, 5.0, 7.0, 4.0, 5.0, 5.0, 2.0, 4.0, 2.0, 2.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 2.0], "bins": [-4.178285598754883e-05, -4.035327583551407e-05, -3.892369568347931e-05, -3.749411553144455e-05, -3.606453537940979e-05, -3.463495522737503e-05, -3.320537507534027e-05, -3.177579492330551e-05, -3.0346214771270752e-05, -2.8916634619235992e-05, -2.7487054467201233e-05, -2.6057474315166473e-05, -2.4627894163131714e-05, -2.3198314011096954e-05, -2.1768733859062195e-05, -2.0339153707027435e-05, -1.8909573554992676e-05, -1.7479993402957916e-05, -1.6050413250923157e-05, -1.4620833098888397e-05, -1.3191252946853638e-05, -1.1761672794818878e-05, -1.0332092642784119e-05, -8.902512490749359e-06, -7.4729323387146e-06, -6.04335218667984e-06, -4.6137720346450806e-06, -3.184191882610321e-06, -1.7546117305755615e-06, -3.25031578540802e-07, 1.1045485734939575e-06, 2.534128725528717e-06, 3.9637088775634766e-06, 5.393289029598236e-06, 6.822869181632996e-06, 8.252449333667755e-06, 9.682029485702515e-06, 1.1111609637737274e-05, 1.2541189789772034e-05, 1.3970769941806793e-05, 1.5400350093841553e-05, 1.6829930245876312e-05, 1.8259510397911072e-05, 1.968909054994583e-05, 2.111867070198059e-05, 2.254825085401535e-05, 2.397783100605011e-05, 2.540741115808487e-05, 2.683699131011963e-05, 2.826657146215439e-05, 2.9696151614189148e-05, 3.112573176622391e-05, 3.255531191825867e-05, 3.3984892070293427e-05, 3.5414472222328186e-05, 3.6844052374362946e-05, 3.8273632526397705e-05, 3.9703212678432465e-05, 4.1132792830467224e-05, 4.2562372982501984e-05, 4.399195313453674e-05, 4.54215332865715e-05, 4.685111343860626e-05, 4.828069359064102e-05, 4.971027374267578e-05]}, "gradients/encoder.encoder.layers.20.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 1.0, 0.0, 2.0, 2.0, 8.0, 7.0, 6.0, 14.0, 11.0, 21.0, 23.0, 50.0, 88.0, 105.0, 116.0, 224.0, 334.0, 536.0, 792.0, 1500.0, 2610.0, 4921.0, 8364.0, 20628.0, 57594.0, 232305.0, 485227.0, 156305.0, 43425.0, 16803.0, 7602.0, 3493.0, 2091.0, 1288.0, 773.0, 412.0, 320.0, 189.0, 116.0, 81.0, 54.0, 46.0, 19.0, 15.0, 19.0, 8.0, 7.0, 2.0, 3.0, 4.0, 2.0, 2.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0], "bins": [-1.3589859008789062e-05, -1.312699168920517e-05, -1.2664124369621277e-05, -1.2201257050037384e-05, -1.1738389730453491e-05, -1.1275522410869598e-05, -1.0812655091285706e-05, -1.0349787771701813e-05, -9.88692045211792e-06, -9.424053132534027e-06, -8.961185812950134e-06, -8.498318493366241e-06, -8.035451173782349e-06, -7.572583854198456e-06, -7.109716534614563e-06, -6.64684921503067e-06, -6.183981895446777e-06, -5.7211145758628845e-06, -5.258247256278992e-06, -4.795379936695099e-06, -4.332512617111206e-06, -3.869645297527313e-06, -3.4067779779434204e-06, -2.9439106583595276e-06, -2.4810433387756348e-06, -2.018176019191742e-06, -1.5553086996078491e-06, -1.0924413800239563e-06, -6.295740604400635e-07, -1.6670674085617065e-07, 2.9616057872772217e-07, 7.59027898311615e-07, 1.2218952178955078e-06, 1.6847625374794006e-06, 2.1476298570632935e-06, 2.6104971766471863e-06, 3.073364496231079e-06, 3.536231815814972e-06, 3.999099135398865e-06, 4.4619664549827576e-06, 4.92483377456665e-06, 5.387701094150543e-06, 5.850568413734436e-06, 6.313435733318329e-06, 6.776303052902222e-06, 7.2391703724861145e-06, 7.702037692070007e-06, 8.1649050116539e-06, 8.627772331237793e-06, 9.090639650821686e-06, 9.553506970405579e-06, 1.0016374289989471e-05, 1.0479241609573364e-05, 1.0942108929157257e-05, 1.140497624874115e-05, 1.1867843568325043e-05, 1.2330710887908936e-05, 1.2793578207492828e-05, 1.3256445527076721e-05, 1.3719312846660614e-05, 1.4182180166244507e-05, 1.46450474858284e-05, 1.5107914805412292e-05, 1.5570782124996185e-05, 1.6033649444580078e-05]}, "gradients/encoder.encoder.layers.20.attention.k_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 2.0, 5.0, 1.0, 3.0, 2.0, 3.0, 3.0, 0.0, 10.0, 29.0, 12.0, 20.0, 28.0, 29.0, 29.0, 47.0, 46.0, 57.0, 45.0, 46.0, 67.0, 61.0, 62.0, 67.0, 48.0, 73.0, 35.0, 36.0, 23.0, 26.0, 21.0, 13.0, 9.0, 9.0, 5.0, 7.0, 8.0, 6.0, 7.0, 1.0, 2.0, 0.0, 3.0, 3.0, 0.0, 0.0, 3.0, 1.0, 1.0, 1.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0], "bins": [-1.7881393432617188e-06, -1.7248094081878662e-06, -1.6614794731140137e-06, -1.5981495380401611e-06, -1.5348196029663086e-06, -1.471489667892456e-06, -1.4081597328186035e-06, -1.344829797744751e-06, -1.2814998626708984e-06, -1.218169927597046e-06, -1.1548399925231934e-06, -1.0915100574493408e-06, -1.0281801223754883e-06, -9.648501873016357e-07, -9.015202522277832e-07, -8.381903171539307e-07, -7.748603820800781e-07, -7.115304470062256e-07, -6.48200511932373e-07, -5.848705768585205e-07, -5.21540641784668e-07, -4.5821070671081543e-07, -3.948807716369629e-07, -3.3155083656311035e-07, -2.682209014892578e-07, -2.0489096641540527e-07, -1.4156103134155273e-07, -7.82310962677002e-08, -1.4901161193847656e-08, 4.842877388000488e-08, 1.1175870895385742e-07, 1.7508864402770996e-07, 2.384185791015625e-07, 3.0174851417541504e-07, 3.650784492492676e-07, 4.284083843231201e-07, 4.917383193969727e-07, 5.550682544708252e-07, 6.183981895446777e-07, 6.817281246185303e-07, 7.450580596923828e-07, 8.083879947662354e-07, 8.717179298400879e-07, 9.350478649139404e-07, 9.98377799987793e-07, 1.0617077350616455e-06, 1.125037670135498e-06, 1.1883676052093506e-06, 1.2516975402832031e-06, 1.3150274753570557e-06, 1.3783574104309082e-06, 1.4416873455047607e-06, 1.5050172805786133e-06, 1.5683472156524658e-06, 1.6316771507263184e-06, 1.695007085800171e-06, 1.7583370208740234e-06, 1.821666955947876e-06, 1.8849968910217285e-06, 1.948326826095581e-06, 2.0116567611694336e-06, 2.074986696243286e-06, 2.1383166313171387e-06, 2.201646566390991e-06, 2.2649765014648438e-06]}, "gradients/encoder.encoder.layers.20.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 4.0, 3.0, 1.0, 2.0, 12.0, 3.0, 16.0, 14.0, 24.0, 35.0, 67.0, 97.0, 105.0, 212.0, 280.0, 417.0, 733.0, 955.0, 2015.0, 3581.0, 4846.0, 10872.0, 16012.0, 40308.0, 69754.0, 230009.0, 415503.0, 125762.0, 66739.0, 24905.0, 16213.0, 8322.0, 3869.0, 2818.0, 1345.0, 1021.0, 632.0, 291.0, 277.0, 145.0, 123.0, 70.0, 39.0, 45.0, 24.0, 15.0, 14.0, 8.0, 4.0, 3.0, 2.0, 1.0, 1.0, 2.0, 0.0, 0.0, 3.0], "bins": [-9.119510650634766e-06, -8.845701813697815e-06, -8.571892976760864e-06, -8.298084139823914e-06, -8.024275302886963e-06, -7.750466465950012e-06, -7.4766576290130615e-06, -7.202848792076111e-06, -6.92903995513916e-06, -6.6552311182022095e-06, -6.381422281265259e-06, -6.107613444328308e-06, -5.833804607391357e-06, -5.559995770454407e-06, -5.286186933517456e-06, -5.012378096580505e-06, -4.738569259643555e-06, -4.464760422706604e-06, -4.190951585769653e-06, -3.917142748832703e-06, -3.643333911895752e-06, -3.3695250749588013e-06, -3.0957162380218506e-06, -2.8219074010849e-06, -2.5480985641479492e-06, -2.2742897272109985e-06, -2.000480890274048e-06, -1.7266720533370972e-06, -1.4528632164001465e-06, -1.1790543794631958e-06, -9.052455425262451e-07, -6.314367055892944e-07, -3.5762786865234375e-07, -8.381903171539307e-08, 1.8998980522155762e-07, 4.637986421585083e-07, 7.37607479095459e-07, 1.0114163160324097e-06, 1.2852251529693604e-06, 1.559033989906311e-06, 1.8328428268432617e-06, 2.1066516637802124e-06, 2.380460500717163e-06, 2.6542693376541138e-06, 2.9280781745910645e-06, 3.201887011528015e-06, 3.475695848464966e-06, 3.7495046854019165e-06, 4.023313522338867e-06, 4.297122359275818e-06, 4.5709311962127686e-06, 4.844740033149719e-06, 5.11854887008667e-06, 5.392357707023621e-06, 5.666166543960571e-06, 5.939975380897522e-06, 6.213784217834473e-06, 6.487593054771423e-06, 6.761401891708374e-06, 7.035210728645325e-06, 7.309019565582275e-06, 7.582828402519226e-06, 7.856637239456177e-06, 8.130446076393127e-06, 8.404254913330078e-06]}, "gradients/encoder.encoder.layers.20.attention.q_proj.bias": {"_type": "histogram", "values": [4.0, 0.0, 0.0, 0.0, 2.0, 5.0, 0.0, 8.0, 4.0, 5.0, 4.0, 8.0, 5.0, 11.0, 17.0, 11.0, 15.0, 15.0, 30.0, 35.0, 26.0, 58.0, 43.0, 34.0, 36.0, 47.0, 52.0, 53.0, 71.0, 58.0, 40.0, 46.0, 41.0, 32.0, 31.0, 46.0, 20.0, 20.0, 12.0, 10.0, 9.0, 17.0, 10.0, 9.0, 9.0, 4.0, 1.0, 3.0, 2.0, 2.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.4570693969726562e-06, -3.3294782042503357e-06, -3.201887011528015e-06, -3.0742958188056946e-06, -2.946704626083374e-06, -2.8191134333610535e-06, -2.691522240638733e-06, -2.5639310479164124e-06, -2.436339855194092e-06, -2.3087486624717712e-06, -2.1811574697494507e-06, -2.05356627702713e-06, -1.9259750843048096e-06, -1.798383891582489e-06, -1.6707926988601685e-06, -1.543201506137848e-06, -1.4156103134155273e-06, -1.2880191206932068e-06, -1.1604279279708862e-06, -1.0328367352485657e-06, -9.052455425262451e-07, -7.776543498039246e-07, -6.50063157081604e-07, -5.224719643592834e-07, -3.948807716369629e-07, -2.6728957891464233e-07, -1.3969838619232178e-07, -1.210719347000122e-08, 1.1548399925231934e-07, 2.430751919746399e-07, 3.7066638469696045e-07, 4.98257577419281e-07, 6.258487701416016e-07, 7.534399628639221e-07, 8.810311555862427e-07, 1.0086223483085632e-06, 1.1362135410308838e-06, 1.2638047337532043e-06, 1.391395926475525e-06, 1.5189871191978455e-06, 1.646578311920166e-06, 1.7741695046424866e-06, 1.9017606973648071e-06, 2.0293518900871277e-06, 2.1569430828094482e-06, 2.284534275531769e-06, 2.4121254682540894e-06, 2.53971666097641e-06, 2.6673078536987305e-06, 2.794899046421051e-06, 2.9224902391433716e-06, 3.050081431865692e-06, 3.1776726245880127e-06, 3.3052638173103333e-06, 3.432855010032654e-06, 3.5604462027549744e-06, 3.688037395477295e-06, 3.8156285881996155e-06, 3.943219780921936e-06, 4.070810973644257e-06, 4.198402166366577e-06, 4.325993359088898e-06, 4.453584551811218e-06, 4.581175744533539e-06, 4.708766937255859e-06]}, "gradients/encoder.encoder.layers.20.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 3.0, 4.0, 3.0, 7.0, 10.0, 19.0, 29.0, 65.0, 139.0, 353.0, 180.0, 97.0, 47.0, 28.0, 9.0, 9.0, 7.0, 4.0, 3.0, 2.0, 2.0, 1.0], "bins": [-0.001171031966805458, -0.0011483730049803853, -0.0011257140431553125, -0.0011030550813302398, -0.001080396119505167, -0.0010577370412647724, -0.0010350781958550215, -0.0010124191176146269, -0.0009897601557895541, -0.0009671011939644814, -0.0009444422321394086, -0.0009217832703143358, -0.0008991242502816021, -0.0008764652884565294, -0.0008538063266314566, -0.0008311473648063838, -0.0008084884029813111, -0.0007858294411562383, -0.0007631704793311656, -0.0007405114592984319, -0.0007178524974733591, -0.0006951935356482863, -0.0006725345738232136, -0.0006498756119981408, -0.000627216650173068, -0.0006045576883479953, -0.0005818987265229225, -0.0005592397646978498, -0.0005365807446651161, -0.0005139217828400433, -0.0004912628210149705, -0.0004686038591898978, -0.0004459447809495032, -0.0004232858191244304, -0.0004006268281955272, -0.00037796786637045443, -0.0003553088754415512, -0.00033264991361647844, -0.0003099909517914057, -0.0002873319899663329, -0.0002646729990374297, -0.0002420140226604417, -0.0002193550462834537, -0.00019669608445838094, -0.00017403710808139294, -0.00015137813170440495, -0.00012871916987933218, -0.00010606019350234419, -8.34012171253562e-05, -6.074224438634701e-05, -3.8083271647337824e-05, -1.5424302546307445e-05, 7.234673830680549e-06, 2.9893650207668543e-05, 5.255261203274131e-05, 7.52115884097293e-05, 9.78705647867173e-05, 0.00012052954116370529, 0.00014318851754069328, 0.00016584747936576605, 0.00018850645574275404, 0.00021116543211974204, 0.0002338243939448148, 0.000256483384873718, 0.0002791423466987908]}, "gradients/encoder.encoder.layers.20.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 2.0, 2.0, 2.0, 2.0, 4.0, 6.0, 4.0, 6.0, 5.0, 7.0, 13.0, 12.0, 17.0, 24.0, 17.0, 23.0, 19.0, 35.0, 35.0, 34.0, 49.0, 40.0, 39.0, 40.0, 51.0, 42.0, 46.0, 47.0, 39.0, 39.0, 49.0, 38.0, 25.0, 27.0, 25.0, 24.0, 18.0, 20.0, 18.0, 14.0, 12.0, 11.0, 9.0, 5.0, 5.0, 3.0, 3.0, 3.0, 4.0, 4.0, 0.0, 2.0], "bins": [-0.00022238492965698242, -0.00021641608327627182, -0.00021044723689556122, -0.00020447839051485062, -0.00019850954413414001, -0.0001925406977534294, -0.0001865718513727188, -0.0001806030049920082, -0.0001746341586112976, -0.000168665312230587, -0.0001626964658498764, -0.0001567276194691658, -0.0001507587730884552, -0.0001447899267077446, -0.000138821080327034, -0.0001328522339463234, -0.0001268833875656128, -0.00012091454118490219, -0.00011494569480419159, -0.00010897684842348099, -0.00010300800204277039, -9.703915566205978e-05, -9.107030928134918e-05, -8.510146290063858e-05, -7.913261651992798e-05, -7.316377013921738e-05, -6.719492375850677e-05, -6.122607737779617e-05, -5.525723099708557e-05, -4.928838461637497e-05, -4.331953823566437e-05, -3.7350691854953766e-05, -3.1381845474243164e-05, -2.5412999093532562e-05, -1.944415271282196e-05, -1.3475306332111359e-05, -7.506459951400757e-06, -1.537613570690155e-06, 4.431232810020447e-06, 1.0400079190731049e-05, 1.636892557144165e-05, 2.2337771952152252e-05, 2.8306618332862854e-05, 3.4275464713573456e-05, 4.024431109428406e-05, 4.621315747499466e-05, 5.218200385570526e-05, 5.815085023641586e-05, 6.411969661712646e-05, 7.008854299783707e-05, 7.605738937854767e-05, 8.202623575925827e-05, 8.799508213996887e-05, 9.396392852067947e-05, 9.993277490139008e-05, 0.00010590162128210068, 0.00011187046766281128, 0.00011783931404352188, 0.00012380816042423248, 0.00012977700680494308, 0.0001357458531856537, 0.0001417146995663643, 0.0001476835459470749, 0.0001536523923277855, 0.0001596212387084961]}, "gradients/encoder.encoder.layers.19.feed_forward.output_dense.weight": {"_type": "histogram", "values": [4.0, 3.0, 5.0, 2.0, 7.0, 9.0, 20.0, 16.0, 40.0, 62.0, 74.0, 106.0, 129.0, 154.0, 234.0, 273.0, 375.0, 488.0, 645.0, 821.0, 1197.0, 1814.0, 2655.0, 4176.0, 6948.0, 11375.0, 19422.0, 34888.0, 73752.0, 215165.0, 3206763.0, 430637.0, 94260.0, 38377.0, 18939.0, 10494.0, 6355.0, 4046.0, 2644.0, 1925.0, 1314.0, 889.0, 688.0, 558.0, 337.0, 300.0, 237.0, 156.0, 127.0, 110.0, 83.0, 50.0, 47.0, 32.0, 28.0, 16.0, 5.0, 6.0, 6.0, 5.0, 4.0, 4.0, 1.0, 2.0], "bins": [-0.00010466575622558594, -0.0001012515276670456, -9.783729910850525e-05, -9.44230705499649e-05, -9.100884199142456e-05, -8.759461343288422e-05, -8.418038487434387e-05, -8.076615631580353e-05, -7.735192775726318e-05, -7.393769919872284e-05, -7.05234706401825e-05, -6.710924208164215e-05, -6.36950135231018e-05, -6.028078496456146e-05, -5.686655640602112e-05, -5.3452327847480774e-05, -5.003809928894043e-05, -4.6623870730400085e-05, -4.320964217185974e-05, -3.97954136133194e-05, -3.638118505477905e-05, -3.296695649623871e-05, -2.9552727937698364e-05, -2.613849937915802e-05, -2.2724270820617676e-05, -1.931004226207733e-05, -1.5895813703536987e-05, -1.2481585144996643e-05, -9.067356586456299e-06, -5.653128027915955e-06, -2.2388994693756104e-06, 1.1753290891647339e-06, 4.589557647705078e-06, 8.003786206245422e-06, 1.1418014764785767e-05, 1.483224332332611e-05, 1.8246471881866455e-05, 2.16607004404068e-05, 2.5074928998947144e-05, 2.8489157557487488e-05, 3.190338611602783e-05, 3.5317614674568176e-05, 3.873184323310852e-05, 4.2146071791648865e-05, 4.556030035018921e-05, 4.897452890872955e-05, 5.23887574672699e-05, 5.580298602581024e-05, 5.9217214584350586e-05, 6.263144314289093e-05, 6.604567170143127e-05, 6.945990025997162e-05, 7.287412881851196e-05, 7.628835737705231e-05, 7.970258593559265e-05, 8.3116814494133e-05, 8.653104305267334e-05, 8.994527161121368e-05, 9.335950016975403e-05, 9.677372872829437e-05, 0.00010018795728683472, 0.00010360218584537506, 0.0001070164144039154, 0.00011043064296245575, 0.0001138448715209961]}, "gradients/encoder.encoder.layers.19.feed_forward.output_dense.bias": {"_type": "histogram", "values": [2.0, 0.0, 2.0, 1.0, 2.0, 0.0, 1.0, 1.0, 2.0, 2.0, 0.0, 2.0, 0.0, 1.0, 3.0, 4.0, 5.0, 7.0, 7.0, 10.0, 18.0, 20.0, 29.0, 24.0, 31.0, 44.0, 35.0, 56.0, 61.0, 80.0, 82.0, 83.0, 92.0, 66.0, 58.0, 50.0, 24.0, 25.0, 18.0, 18.0, 6.0, 7.0, 9.0, 8.0, 3.0, 4.0, 1.0, 2.0, 4.0, 4.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 1.0], "bins": [-2.1338462829589844e-05, -2.064276486635208e-05, -1.994706690311432e-05, -1.9251368939876556e-05, -1.8555670976638794e-05, -1.785997301340103e-05, -1.716427505016327e-05, -1.6468577086925507e-05, -1.5772879123687744e-05, -1.5077181160449982e-05, -1.438148319721222e-05, -1.3685785233974457e-05, -1.2990087270736694e-05, -1.2294389307498932e-05, -1.159869134426117e-05, -1.0902993381023407e-05, -1.0207295417785645e-05, -9.511597454547882e-06, -8.81589949131012e-06, -8.120201528072357e-06, -7.424503564834595e-06, -6.728805601596832e-06, -6.03310763835907e-06, -5.337409675121307e-06, -4.641711711883545e-06, -3.9460137486457825e-06, -3.25031578540802e-06, -2.5546178221702576e-06, -1.8589198589324951e-06, -1.1632218956947327e-06, -4.675239324569702e-07, 2.2817403078079224e-07, 9.238719940185547e-07, 1.6195699572563171e-06, 2.3152679204940796e-06, 3.010965883731842e-06, 3.7066638469696045e-06, 4.402361810207367e-06, 5.098059773445129e-06, 5.793757736682892e-06, 6.489455699920654e-06, 7.185153663158417e-06, 7.88085162639618e-06, 8.576549589633942e-06, 9.272247552871704e-06, 9.967945516109467e-06, 1.0663643479347229e-05, 1.1359341442584991e-05, 1.2055039405822754e-05, 1.2750737369060516e-05, 1.3446435332298279e-05, 1.4142133295536041e-05, 1.4837831258773804e-05, 1.5533529222011566e-05, 1.622922718524933e-05, 1.692492514848709e-05, 1.7620623111724854e-05, 1.8316321074962616e-05, 1.901201903820038e-05, 1.970771700143814e-05, 2.0403414964675903e-05, 2.1099112927913666e-05, 2.1794810891151428e-05, 2.249050885438919e-05, 2.3186206817626953e-05]}, "gradients/encoder.encoder.layers.19.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 6.0, 3.0, 5.0, 5.0, 16.0, 27.0, 33.0, 45.0, 61.0, 94.0, 167.0, 209.0, 337.0, 510.0, 756.0, 1160.0, 1898.0, 3293.0, 5143.0, 8670.0, 15780.0, 29446.0, 60882.0, 135815.0, 439024.0, 2749830.0, 466583.0, 141522.0, 62850.0, 30892.0, 16207.0, 8975.0, 5375.0, 3072.0, 1994.0, 1219.0, 769.0, 523.0, 358.0, 253.0, 138.0, 104.0, 77.0, 52.0, 40.0, 26.0, 15.0, 14.0, 5.0, 7.0, 6.0, 3.0, 3.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-8.577108383178711e-05, -8.286535739898682e-05, -7.995963096618652e-05, -7.705390453338623e-05, -7.414817810058594e-05, -7.124245166778564e-05, -6.833672523498535e-05, -6.543099880218506e-05, -6.252527236938477e-05, -5.961954593658447e-05, -5.671381950378418e-05, -5.380809307098389e-05, -5.0902366638183594e-05, -4.79966402053833e-05, -4.509091377258301e-05, -4.2185187339782715e-05, -3.927946090698242e-05, -3.637373447418213e-05, -3.3468008041381836e-05, -3.056228160858154e-05, -2.765655517578125e-05, -2.4750828742980957e-05, -2.1845102310180664e-05, -1.893937587738037e-05, -1.6033649444580078e-05, -1.3127923011779785e-05, -1.0222196578979492e-05, -7.316470146179199e-06, -4.410743713378906e-06, -1.5050172805786133e-06, 1.4007091522216797e-06, 4.306435585021973e-06, 7.212162017822266e-06, 1.0117888450622559e-05, 1.3023614883422852e-05, 1.5929341316223145e-05, 1.8835067749023438e-05, 2.174079418182373e-05, 2.4646520614624023e-05, 2.7552247047424316e-05, 3.045797348022461e-05, 3.33636999130249e-05, 3.6269426345825195e-05, 3.917515277862549e-05, 4.208087921142578e-05, 4.4986605644226074e-05, 4.789233207702637e-05, 5.079805850982666e-05, 5.370378494262695e-05, 5.6609511375427246e-05, 5.951523780822754e-05, 6.242096424102783e-05, 6.532669067382812e-05, 6.823241710662842e-05, 7.113814353942871e-05, 7.4043869972229e-05, 7.69495964050293e-05, 7.985532283782959e-05, 8.276104927062988e-05, 8.566677570343018e-05, 8.857250213623047e-05, 9.147822856903076e-05, 9.438395500183105e-05, 9.728968143463135e-05, 0.00010019540786743164]}, "gradients/encoder.encoder.layers.19.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 5.0, 1.0, 6.0, 5.0, 2.0, 6.0, 16.0, 11.0, 19.0, 24.0, 27.0, 34.0, 36.0, 50.0, 46.0, 88.0, 87.0, 144.0, 179.0, 335.0, 702.0, 755.0, 450.0, 237.0, 152.0, 116.0, 78.0, 77.0, 59.0, 61.0, 41.0, 40.0, 30.0, 29.0, 21.0, 19.0, 22.0, 14.0, 12.0, 13.0, 10.0, 3.0, 3.0, 2.0, 6.0, 7.0, 4.0, 2.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 1.0], "bins": [-4.684925079345703e-05, -4.518870264291763e-05, -4.3528154492378235e-05, -4.186760634183884e-05, -4.020705819129944e-05, -3.854651004076004e-05, -3.688596189022064e-05, -3.5225413739681244e-05, -3.3564865589141846e-05, -3.190431743860245e-05, -3.024376928806305e-05, -2.858322113752365e-05, -2.6922672986984253e-05, -2.5262124836444855e-05, -2.3601576685905457e-05, -2.194102853536606e-05, -2.028048038482666e-05, -1.8619932234287262e-05, -1.6959384083747864e-05, -1.5298835933208466e-05, -1.3638287782669067e-05, -1.197773963212967e-05, -1.0317191481590271e-05, -8.656643331050873e-06, -6.996095180511475e-06, -5.335547029972076e-06, -3.6749988794326782e-06, -2.01445072889328e-06, -3.5390257835388184e-07, 1.3066455721855164e-06, 2.9671937227249146e-06, 4.627741873264313e-06, 6.288290023803711e-06, 7.948838174343109e-06, 9.609386324882507e-06, 1.1269934475421906e-05, 1.2930482625961304e-05, 1.4591030776500702e-05, 1.62515789270401e-05, 1.7912127077579498e-05, 1.9572675228118896e-05, 2.1233223378658295e-05, 2.2893771529197693e-05, 2.455431967973709e-05, 2.621486783027649e-05, 2.7875415980815887e-05, 2.9535964131355286e-05, 3.1196512281894684e-05, 3.285706043243408e-05, 3.451760858297348e-05, 3.617815673351288e-05, 3.7838704884052277e-05, 3.9499253034591675e-05, 4.115980118513107e-05, 4.282034933567047e-05, 4.448089748620987e-05, 4.614144563674927e-05, 4.7801993787288666e-05, 4.9462541937828064e-05, 5.112309008836746e-05, 5.278363823890686e-05, 5.444418638944626e-05, 5.610473453998566e-05, 5.7765282690525055e-05, 5.942583084106445e-05]}, "gradients/encoder.encoder.layers.19.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 8.0, 10.0, 38.0, 90.0, 219.0, 282.0, 173.0, 92.0, 45.0, 26.0, 12.0, 7.0, 6.0, 3.0, 1.0, 2.0, 1.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0005014660418964922, -0.0004635138902813196, -0.00042556176776997745, -0.0003876096161548048, -0.00034965749364346266, -0.00031170534202829003, -0.0002737531904131174, -0.00023580106790177524, -0.00019784891628660262, -0.00015989677922334522, -0.00012194463488413021, -8.39924905449152e-05, -4.60403534816578e-05, -8.088216418400407e-06, 2.9863935196772218e-05, 6.781605770811439e-05, 0.00010576820932328701, 0.0001437203463865444, 0.0001816724834498018, 0.00021962463506497443, 0.0002575767575763166, 0.0002955289091914892, 0.00033348106080666184, 0.000371433183318004, 0.00040938533493317664, 0.00044733748654834926, 0.00048528960905969143, 0.000523241760674864, 0.0005611939122900367, 0.0005991460056975484, 0.0006370982155203819, 0.0006750503089278936, 0.0007130025187507272, 0.0007509546703658998, 0.0007889068219810724, 0.0008268589153885841, 0.0008648110670037568, 0.0009027632186189294, 0.000940715370234102, 0.0009786675218492746, 0.0010166196152567863, 0.001054571708664298, 0.0010925239184871316, 0.0011304760118946433, 0.0011684282217174768, 0.0012063803151249886, 0.0012443324085325003, 0.0012822846183553338, 0.0013202368281781673, 0.001358188921585679, 0.0013961411314085126, 0.0014340932248160243, 0.0014720454346388578, 0.0015099975280463696, 0.0015479496214538813, 0.0015859018312767148, 0.0016238539246842265, 0.0016618060180917382, 0.0016997582279145718, 0.0017377103213220835, 0.001775662531144917, 0.0018136146245524287, 0.0018515668343752623, 0.001889518927782774, 0.0019274710211902857]}, "gradients/encoder.encoder.layers.19.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 3.0, 1.0, 4.0, 0.0, 3.0, 8.0, 8.0, 18.0, 5.0, 10.0, 12.0, 20.0, 27.0, 30.0, 21.0, 29.0, 27.0, 32.0, 33.0, 53.0, 44.0, 49.0, 54.0, 42.0, 39.0, 49.0, 40.0, 40.0, 37.0, 37.0, 32.0, 31.0, 23.0, 21.0, 22.0, 20.0, 18.0, 13.0, 13.0, 17.0, 7.0, 8.0, 4.0, 4.0, 3.0, 2.0, 2.0, 0.0, 0.0, 1.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0002334117889404297, -0.00022569391876459122, -0.00021797604858875275, -0.00021025817841291428, -0.0002025403082370758, -0.00019482243806123734, -0.00018710456788539886, -0.0001793866977095604, -0.00017166882753372192, -0.00016395095735788345, -0.00015623308718204498, -0.0001485152170062065, -0.00014079734683036804, -0.00013307947665452957, -0.0001253616064786911, -0.00011764373630285263, -0.00010992586612701416, -0.00010220799595117569, -9.449012577533722e-05, -8.677225559949875e-05, -7.905438542366028e-05, -7.133651524782181e-05, -6.361864507198334e-05, -5.590077489614487e-05, -4.8182904720306396e-05, -4.0465034544467926e-05, -3.2747164368629456e-05, -2.5029294192790985e-05, -1.7311424016952515e-05, -9.593553841114044e-06, -1.8756836652755737e-06, 5.842186510562897e-06, 1.3560056686401367e-05, 2.1277926862239838e-05, 2.8995797038078308e-05, 3.671366721391678e-05, 4.443153738975525e-05, 5.214940756559372e-05, 5.986727774143219e-05, 6.758514791727066e-05, 7.530301809310913e-05, 8.30208882689476e-05, 9.073875844478607e-05, 9.845662862062454e-05, 0.00010617449879646301, 0.00011389236897230148, 0.00012161023914813995, 0.00012932810932397842, 0.0001370459794998169, 0.00014476384967565536, 0.00015248171985149384, 0.0001601995900273323, 0.00016791746020317078, 0.00017563533037900925, 0.00018335320055484772, 0.0001910710707306862, 0.00019878894090652466, 0.00020650681108236313, 0.0002142246812582016, 0.00022194255143404007, 0.00022966042160987854, 0.000237378291785717, 0.0002450961619615555, 0.00025281403213739395, 0.0002605319023132324]}, "gradients/encoder.encoder.layers.19.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 4.0, 2.0, 2.0, 5.0, 6.0, 10.0, 6.0, 20.0, 15.0, 32.0, 29.0, 60.0, 68.0, 101.0, 179.0, 276.0, 481.0, 843.0, 1592.0, 3300.0, 7356.0, 17916.0, 49714.0, 184606.0, 609719.0, 112694.0, 34958.0, 13269.0, 5485.0, 2614.0, 1378.0, 708.0, 416.0, 226.0, 155.0, 100.0, 65.0, 42.0, 33.0, 22.0, 22.0, 13.0, 6.0, 6.0, 7.0, 2.0, 2.0, 2.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.00023245811462402344, -0.00022548437118530273, -0.00021851062774658203, -0.00021153688430786133, -0.00020456314086914062, -0.00019758939743041992, -0.00019061565399169922, -0.00018364191055297852, -0.0001766681671142578, -0.0001696944236755371, -0.0001627206802368164, -0.0001557469367980957, -0.000148773193359375, -0.0001417994499206543, -0.0001348257064819336, -0.0001278519630432129, -0.00012087821960449219, -0.00011390447616577148, -0.00010693073272705078, -9.995698928833008e-05, -9.298324584960938e-05, -8.600950241088867e-05, -7.903575897216797e-05, -7.206201553344727e-05, -6.508827209472656e-05, -5.811452865600586e-05, -5.1140785217285156e-05, -4.416704177856445e-05, -3.719329833984375e-05, -3.0219554901123047e-05, -2.3245811462402344e-05, -1.627206802368164e-05, -9.298324584960938e-06, -2.3245811462402344e-06, 4.649162292480469e-06, 1.1622905731201172e-05, 1.8596649169921875e-05, 2.5570392608642578e-05, 3.254413604736328e-05, 3.9517879486083984e-05, 4.649162292480469e-05, 5.346536636352539e-05, 6.0439109802246094e-05, 6.74128532409668e-05, 7.43865966796875e-05, 8.13603401184082e-05, 8.83340835571289e-05, 9.530782699584961e-05, 0.00010228157043457031, 0.00010925531387329102, 0.00011622905731201172, 0.00012320280075073242, 0.00013017654418945312, 0.00013715028762817383, 0.00014412403106689453, 0.00015109777450561523, 0.00015807151794433594, 0.00016504526138305664, 0.00017201900482177734, 0.00017899274826049805, 0.00018596649169921875, 0.00019294023513793945, 0.00019991397857666016, 0.00020688772201538086, 0.00021386146545410156]}, "gradients/encoder.encoder.layers.19.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 3.0, 2.0, 0.0, 7.0, 6.0, 7.0, 15.0, 9.0, 18.0, 26.0, 35.0, 48.0, 64.0, 68.0, 95.0, 123.0, 137.0, 85.0, 48.0, 64.0, 45.0, 30.0, 20.0, 18.0, 15.0, 7.0, 6.0, 4.0, 2.0, 2.0, 1.0, 2.0, 2.0, 0.0, 2.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.491474151611328e-05, -2.3914501070976257e-05, -2.2914260625839233e-05, -2.191402018070221e-05, -2.0913779735565186e-05, -1.991353929042816e-05, -1.8913298845291138e-05, -1.7913058400154114e-05, -1.691281795501709e-05, -1.5912577509880066e-05, -1.4912337064743042e-05, -1.3912096619606018e-05, -1.2911856174468994e-05, -1.191161572933197e-05, -1.0911375284194946e-05, -9.911134839057922e-06, -8.910894393920898e-06, -7.910653948783875e-06, -6.910413503646851e-06, -5.910173058509827e-06, -4.909932613372803e-06, -3.909692168235779e-06, -2.909451723098755e-06, -1.909211277961731e-06, -9.08970832824707e-07, 9.12696123123169e-08, 1.0915100574493408e-06, 2.0917505025863647e-06, 3.0919909477233887e-06, 4.092231392860413e-06, 5.0924718379974365e-06, 6.0927122831344604e-06, 7.092952728271484e-06, 8.093193173408508e-06, 9.093433618545532e-06, 1.0093674063682556e-05, 1.109391450881958e-05, 1.2094154953956604e-05, 1.3094395399093628e-05, 1.4094635844230652e-05, 1.5094876289367676e-05, 1.60951167345047e-05, 1.7095357179641724e-05, 1.8095597624778748e-05, 1.909583806991577e-05, 2.0096078515052795e-05, 2.109631896018982e-05, 2.2096559405326843e-05, 2.3096799850463867e-05, 2.409704029560089e-05, 2.5097280740737915e-05, 2.609752118587494e-05, 2.7097761631011963e-05, 2.8098002076148987e-05, 2.909824252128601e-05, 3.0098482966423035e-05, 3.109872341156006e-05, 3.209896385669708e-05, 3.3099204301834106e-05, 3.409944474697113e-05, 3.5099685192108154e-05, 3.609992563724518e-05, 3.71001660823822e-05, 3.8100406527519226e-05, 3.910064697265625e-05]}, "gradients/encoder.encoder.layers.19.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0, 6.0, 1.0, 9.0, 15.0, 13.0, 18.0, 41.0, 58.0, 76.0, 107.0, 180.0, 307.0, 440.0, 636.0, 1035.0, 1559.0, 2533.0, 3977.0, 6545.0, 10906.0, 18111.0, 31098.0, 53718.0, 100567.0, 249482.0, 312567.0, 111588.0, 58953.0, 33444.0, 19703.0, 11835.0, 7143.0, 4339.0, 2748.0, 1631.0, 1171.0, 713.0, 421.0, 300.0, 198.0, 120.0, 82.0, 80.0, 30.0, 17.0, 17.0, 9.0, 5.0, 10.0, 2.0, 2.0, 2.0, 1.0, 0.0, 0.0, 0.0, 2.0], "bins": [-8.612871170043945e-05, -8.344370871782303e-05, -8.07587057352066e-05, -7.807370275259018e-05, -7.538869976997375e-05, -7.270369678735733e-05, -7.00186938047409e-05, -6.733369082212448e-05, -6.464868783950806e-05, -6.196368485689163e-05, -5.927868187427521e-05, -5.659367889165878e-05, -5.390867590904236e-05, -5.1223672926425934e-05, -4.853866994380951e-05, -4.5853666961193085e-05, -4.316866397857666e-05, -4.0483660995960236e-05, -3.779865801334381e-05, -3.5113655030727386e-05, -3.242865204811096e-05, -2.9743649065494537e-05, -2.7058646082878113e-05, -2.4373643100261688e-05, -2.1688640117645264e-05, -1.900363713502884e-05, -1.6318634152412415e-05, -1.363363116979599e-05, -1.0948628187179565e-05, -8.263625204563141e-06, -5.578622221946716e-06, -2.8936192393302917e-06, -2.086162567138672e-07, 2.4763867259025574e-06, 5.161389708518982e-06, 7.846392691135406e-06, 1.0531395673751831e-05, 1.3216398656368256e-05, 1.590140163898468e-05, 1.8586404621601105e-05, 2.127140760421753e-05, 2.3956410586833954e-05, 2.664141356945038e-05, 2.9326416552066803e-05, 3.201141953468323e-05, 3.469642251729965e-05, 3.738142549991608e-05, 4.00664284825325e-05, 4.2751431465148926e-05, 4.543643444776535e-05, 4.8121437430381775e-05, 5.08064404129982e-05, 5.3491443395614624e-05, 5.617644637823105e-05, 5.886144936084747e-05, 6.15464523434639e-05, 6.423145532608032e-05, 6.691645830869675e-05, 6.960146129131317e-05, 7.22864642739296e-05, 7.497146725654602e-05, 7.765647023916245e-05, 8.034147322177887e-05, 8.30264762043953e-05, 8.571147918701172e-05]}, "gradients/encoder.encoder.layers.19.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 1.0, 1.0, 5.0, 1.0, 4.0, 6.0, 10.0, 10.0, 7.0, 7.0, 15.0, 10.0, 17.0, 20.0, 21.0, 13.0, 27.0, 23.0, 24.0, 38.0, 36.0, 29.0, 41.0, 42.0, 39.0, 49.0, 36.0, 44.0, 41.0, 33.0, 44.0, 34.0, 25.0, 25.0, 29.0, 22.0, 28.0, 21.0, 20.0, 15.0, 21.0, 19.0, 19.0, 15.0, 4.0, 5.0, 5.0, 6.0, 0.0, 2.0, 2.0, 5.0, 3.0], "bins": [-5.751848220825195e-05, -5.5966898798942566e-05, -5.441531538963318e-05, -5.286373198032379e-05, -5.1312148571014404e-05, -4.976056516170502e-05, -4.820898175239563e-05, -4.665739834308624e-05, -4.5105814933776855e-05, -4.355423152446747e-05, -4.200264811515808e-05, -4.0451064705848694e-05, -3.889948129653931e-05, -3.734789788722992e-05, -3.579631447792053e-05, -3.4244731068611145e-05, -3.269314765930176e-05, -3.114156424999237e-05, -2.9589980840682983e-05, -2.8038397431373596e-05, -2.648681402206421e-05, -2.4935230612754822e-05, -2.3383647203445435e-05, -2.1832063794136047e-05, -2.028048038482666e-05, -1.8728896975517273e-05, -1.7177313566207886e-05, -1.56257301568985e-05, -1.4074146747589111e-05, -1.2522563338279724e-05, -1.0970979928970337e-05, -9.41939651966095e-06, -7.867813110351562e-06, -6.316229701042175e-06, -4.764646291732788e-06, -3.213062882423401e-06, -1.6614794731140137e-06, -1.0989606380462646e-07, 1.4416873455047607e-06, 2.993270754814148e-06, 4.544854164123535e-06, 6.096437573432922e-06, 7.64802098274231e-06, 9.199604392051697e-06, 1.0751187801361084e-05, 1.2302771210670471e-05, 1.3854354619979858e-05, 1.5405938029289246e-05, 1.6957521438598633e-05, 1.850910484790802e-05, 2.0060688257217407e-05, 2.1612271666526794e-05, 2.316385507583618e-05, 2.471543848514557e-05, 2.6267021894454956e-05, 2.7818605303764343e-05, 2.937018871307373e-05, 3.092177212238312e-05, 3.2473355531692505e-05, 3.402493894100189e-05, 3.557652235031128e-05, 3.7128105759620667e-05, 3.8679689168930054e-05, 4.023127257823944e-05, 4.178285598754883e-05]}, "gradients/encoder.encoder.layers.19.attention.k_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 2.0, 3.0, 2.0, 3.0, 6.0, 6.0, 10.0, 13.0, 21.0, 17.0, 50.0, 33.0, 75.0, 72.0, 138.0, 166.0, 253.0, 287.0, 576.0, 725.0, 1230.0, 1592.0, 3321.0, 4349.0, 9561.0, 14645.0, 39293.0, 75850.0, 313153.0, 353925.0, 139354.0, 40883.0, 23322.0, 9442.0, 6766.0, 3131.0, 2302.0, 1189.0, 974.0, 521.0, 423.0, 241.0, 196.0, 115.0, 98.0, 49.0, 55.0, 31.0, 39.0, 11.0, 16.0, 10.0, 10.0, 6.0, 4.0, 1.0, 2.0, 3.0, 1.0, 1.0, 0.0, 1.0], "bins": [-8.344650268554688e-06, -8.07642936706543e-06, -7.808208465576172e-06, -7.539987564086914e-06, -7.271766662597656e-06, -7.0035457611083984e-06, -6.735324859619141e-06, -6.467103958129883e-06, -6.198883056640625e-06, -5.930662155151367e-06, -5.662441253662109e-06, -5.3942203521728516e-06, -5.125999450683594e-06, -4.857778549194336e-06, -4.589557647705078e-06, -4.32133674621582e-06, -4.0531158447265625e-06, -3.7848949432373047e-06, -3.516674041748047e-06, -3.248453140258789e-06, -2.9802322387695312e-06, -2.7120113372802734e-06, -2.4437904357910156e-06, -2.175569534301758e-06, -1.9073486328125e-06, -1.6391277313232422e-06, -1.3709068298339844e-06, -1.1026859283447266e-06, -8.344650268554688e-07, -5.662441253662109e-07, -2.980232238769531e-07, -2.9802322387695312e-08, 2.384185791015625e-07, 5.066394805908203e-07, 7.748603820800781e-07, 1.043081283569336e-06, 1.3113021850585938e-06, 1.5795230865478516e-06, 1.8477439880371094e-06, 2.115964889526367e-06, 2.384185791015625e-06, 2.652406692504883e-06, 2.9206275939941406e-06, 3.1888484954833984e-06, 3.4570693969726562e-06, 3.725290298461914e-06, 3.993511199951172e-06, 4.26173210144043e-06, 4.5299530029296875e-06, 4.798173904418945e-06, 5.066394805908203e-06, 5.334615707397461e-06, 5.602836608886719e-06, 5.8710575103759766e-06, 6.139278411865234e-06, 6.407499313354492e-06, 6.67572021484375e-06, 6.943941116333008e-06, 7.212162017822266e-06, 7.4803829193115234e-06, 7.748603820800781e-06, 8.016824722290039e-06, 8.285045623779297e-06, 8.553266525268555e-06, 8.821487426757812e-06]}, "gradients/encoder.encoder.layers.19.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 3.0, 2.0, 1.0, 0.0, 3.0, 2.0, 2.0, 2.0, 6.0, 8.0, 7.0, 13.0, 15.0, 16.0, 24.0, 67.0, 31.0, 46.0, 57.0, 66.0, 61.0, 69.0, 127.0, 61.0, 56.0, 38.0, 39.0, 34.0, 21.0, 45.0, 13.0, 10.0, 20.0, 10.0, 6.0, 9.0, 10.0, 3.0, 5.0, 1.0, 0.0, 2.0, 2.0, 3.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-2.384185791015625e-06, -2.316199243068695e-06, -2.248212695121765e-06, -2.180226147174835e-06, -2.1122395992279053e-06, -2.0442530512809753e-06, -1.9762665033340454e-06, -1.9082799553871155e-06, -1.8402934074401855e-06, -1.7723068594932556e-06, -1.7043203115463257e-06, -1.6363337635993958e-06, -1.5683472156524658e-06, -1.5003606677055359e-06, -1.432374119758606e-06, -1.364387571811676e-06, -1.296401023864746e-06, -1.2284144759178162e-06, -1.1604279279708862e-06, -1.0924413800239563e-06, -1.0244548320770264e-06, -9.564682841300964e-07, -8.884817361831665e-07, -8.204951882362366e-07, -7.525086402893066e-07, -6.845220923423767e-07, -6.165355443954468e-07, -5.485489964485168e-07, -4.805624485015869e-07, -4.12575900554657e-07, -3.4458935260772705e-07, -2.766028046607971e-07, -2.086162567138672e-07, -1.4062970876693726e-07, -7.264316082000732e-08, -4.6566128730773926e-09, 6.332993507385254e-08, 1.3131648302078247e-07, 1.993030309677124e-07, 2.6728957891464233e-07, 3.3527612686157227e-07, 4.032626748085022e-07, 4.7124922275543213e-07, 5.392357707023621e-07, 6.07222318649292e-07, 6.752088665962219e-07, 7.431954145431519e-07, 8.111819624900818e-07, 8.791685104370117e-07, 9.471550583839417e-07, 1.0151416063308716e-06, 1.0831281542778015e-06, 1.1511147022247314e-06, 1.2191012501716614e-06, 1.2870877981185913e-06, 1.3550743460655212e-06, 1.4230608940124512e-06, 1.491047441959381e-06, 1.559033989906311e-06, 1.627020537853241e-06, 1.695007085800171e-06, 1.7629936337471008e-06, 1.8309801816940308e-06, 1.8989667296409607e-06, 1.9669532775878906e-06]}, "gradients/encoder.encoder.layers.19.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 3.0, 2.0, 1.0, 3.0, 3.0, 2.0, 11.0, 14.0, 14.0, 37.0, 42.0, 56.0, 58.0, 105.0, 160.0, 220.0, 308.0, 414.0, 776.0, 941.0, 1582.0, 2360.0, 3810.0, 6263.0, 11223.0, 21879.0, 45774.0, 113885.0, 385517.0, 283368.0, 88939.0, 37692.0, 18182.0, 9606.0, 5560.0, 3319.0, 2113.0, 1394.0, 864.0, 784.0, 372.0, 256.0, 186.0, 128.0, 100.0, 72.0, 53.0, 33.0, 26.0, 18.0, 16.0, 13.0, 6.0, 2.0, 3.0, 0.0, 2.0, 1.0, 1.0, 0.0, 1.0], "bins": [-7.68899917602539e-06, -7.447786629199982e-06, -7.206574082374573e-06, -6.965361535549164e-06, -6.724148988723755e-06, -6.482936441898346e-06, -6.241723895072937e-06, -6.000511348247528e-06, -5.759298801422119e-06, -5.51808625459671e-06, -5.276873707771301e-06, -5.035661160945892e-06, -4.794448614120483e-06, -4.5532360672950745e-06, -4.3120235204696655e-06, -4.070810973644257e-06, -3.829598426818848e-06, -3.5883858799934387e-06, -3.3471733331680298e-06, -3.105960786342621e-06, -2.864748239517212e-06, -2.623535692691803e-06, -2.382323145866394e-06, -2.141110599040985e-06, -1.8998980522155762e-06, -1.6586855053901672e-06, -1.4174729585647583e-06, -1.1762604117393494e-06, -9.350478649139404e-07, -6.938353180885315e-07, -4.5262277126312256e-07, -2.1141022443771362e-07, 2.9802322387695312e-08, 2.7101486921310425e-07, 5.122274160385132e-07, 7.534399628639221e-07, 9.94652509689331e-07, 1.23586505651474e-06, 1.477077603340149e-06, 1.7182901501655579e-06, 1.959502696990967e-06, 2.2007152438163757e-06, 2.4419277906417847e-06, 2.6831403374671936e-06, 2.9243528842926025e-06, 3.1655654311180115e-06, 3.4067779779434204e-06, 3.6479905247688293e-06, 3.889203071594238e-06, 4.130415618419647e-06, 4.371628165245056e-06, 4.612840712070465e-06, 4.854053258895874e-06, 5.095265805721283e-06, 5.336478352546692e-06, 5.577690899372101e-06, 5.81890344619751e-06, 6.060115993022919e-06, 6.301328539848328e-06, 6.5425410866737366e-06, 6.7837536334991455e-06, 7.0249661803245544e-06, 7.266178727149963e-06, 7.507391273975372e-06, 7.748603820800781e-06]}, "gradients/encoder.encoder.layers.19.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 2.0, 0.0, 7.0, 0.0, 2.0, 0.0, 6.0, 1.0, 1.0, 6.0, 8.0, 5.0, 7.0, 16.0, 14.0, 21.0, 24.0, 38.0, 28.0, 55.0, 41.0, 85.0, 50.0, 53.0, 99.0, 55.0, 75.0, 43.0, 52.0, 33.0, 41.0, 29.0, 26.0, 19.0, 18.0, 6.0, 7.0, 8.0, 13.0, 1.0, 2.0, 2.0, 2.0, 5.0, 0.0, 5.0, 0.0, 3.0, 0.0, 1.0, 0.0, 0.0, 1.0, 3.0, 0.0, 2.0], "bins": [-4.76837158203125e-06, -4.621222615242004e-06, -4.474073648452759e-06, -4.326924681663513e-06, -4.179775714874268e-06, -4.032626748085022e-06, -3.885477781295776e-06, -3.7383288145065308e-06, -3.591179847717285e-06, -3.4440308809280396e-06, -3.296881914138794e-06, -3.1497329473495483e-06, -3.0025839805603027e-06, -2.855435013771057e-06, -2.7082860469818115e-06, -2.561137080192566e-06, -2.4139881134033203e-06, -2.2668391466140747e-06, -2.119690179824829e-06, -1.9725412130355835e-06, -1.8253922462463379e-06, -1.6782432794570923e-06, -1.5310943126678467e-06, -1.383945345878601e-06, -1.2367963790893555e-06, -1.0896474123001099e-06, -9.424984455108643e-07, -7.953494787216187e-07, -6.48200511932373e-07, -5.010515451431274e-07, -3.5390257835388184e-07, -2.0675361156463623e-07, -5.960464477539063e-08, 8.754432201385498e-08, 2.3469328880310059e-07, 3.818422555923462e-07, 5.289912223815918e-07, 6.761401891708374e-07, 8.23289155960083e-07, 9.704381227493286e-07, 1.1175870895385742e-06, 1.2647360563278198e-06, 1.4118850231170654e-06, 1.559033989906311e-06, 1.7061829566955566e-06, 1.8533319234848022e-06, 2.000480890274048e-06, 2.1476298570632935e-06, 2.294778823852539e-06, 2.4419277906417847e-06, 2.5890767574310303e-06, 2.736225724220276e-06, 2.8833746910095215e-06, 3.030523657798767e-06, 3.1776726245880127e-06, 3.3248215913772583e-06, 3.471970558166504e-06, 3.6191195249557495e-06, 3.766268491744995e-06, 3.913417458534241e-06, 4.060566425323486e-06, 4.207715392112732e-06, 4.3548643589019775e-06, 4.502013325691223e-06, 4.649162292480469e-06]}, "gradients/encoder.encoder.layers.19.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 3.0, 2.0, 4.0, 4.0, 10.0, 14.0, 21.0, 35.0, 83.0, 143.0, 315.0, 183.0, 84.0, 41.0, 23.0, 25.0, 12.0, 5.0, 7.0, 4.0, 1.0, 1.0, 1.0], "bins": [-0.0010581646347418427, -0.001037664245814085, -0.0010171637404710054, -0.0009966633515432477, -0.0009761628462001681, -0.0009556624572724104, -0.0009351620101369917, -0.0009146615630015731, -0.0008941611158661544, -0.0008736606687307358, -0.0008531602215953171, -0.0008326597744598985, -0.0008121593855321407, -0.0007916589383967221, -0.0007711584912613034, -0.0007506580441258848, -0.0007301575969904661, -0.0007096571498550475, -0.0006891567027196288, -0.0006686562555842102, -0.0006481558084487915, -0.0006276554195210338, -0.0006071549723856151, -0.0005866545252501965, -0.0005661540781147778, -0.0005456536309793591, -0.0005251531838439405, -0.0005046527367085218, -0.00048415231867693365, -0.000463651871541515, -0.0004431514535099268, -0.00042265100637450814, -0.0004021505010314286, -0.0003816500538960099, -0.00036114960676059127, -0.00034064918872900307, -0.0003201487415935844, -0.00029964829445816576, -0.00027914787642657757, -0.0002586474292911589, -0.00023814698215574026, -0.0002176465350203216, -0.00019714610243681818, -0.00017664566985331476, -0.0001561452227178961, -0.00013564477558247745, -0.00011514434299897403, -9.46439104154706e-05, -7.414346328005195e-05, -5.364302342059091e-05, -3.314258356112987e-05, -1.2642143701668829e-05, 7.85829615779221e-06, 2.835873601725325e-05, 4.885917587671429e-05, 6.935960846021771e-05, 8.986005559563637e-05, 0.00011036049545509741, 0.00013086093531455845, 0.00015136136789806187, 0.00017186181503348053, 0.00019236226216889918, 0.0002128626947524026, 0.00023336312733590603, 0.0002538635744713247]}, "gradients/encoder.encoder.layers.19.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 0.0, 8.0, 3.0, 4.0, 2.0, 8.0, 7.0, 9.0, 9.0, 14.0, 17.0, 20.0, 28.0, 30.0, 24.0, 29.0, 31.0, 35.0, 33.0, 48.0, 37.0, 50.0, 42.0, 49.0, 48.0, 39.0, 48.0, 32.0, 41.0, 40.0, 26.0, 28.0, 22.0, 15.0, 25.0, 18.0, 15.0, 16.0, 13.0, 9.0, 10.0, 3.0, 6.0, 7.0, 2.0, 4.0, 4.0, 2.0, 2.0, 0.0, 1.0, 1.0, 0.0, 3.0, 1.0], "bins": [-0.00019490718841552734, -0.0001890193670988083, -0.00018313154578208923, -0.00017724372446537018, -0.00017135590314865112, -0.00016546808183193207, -0.000159580260515213, -0.00015369243919849396, -0.0001478046178817749, -0.00014191679656505585, -0.0001360289752483368, -0.00013014115393161774, -0.00012425333261489868, -0.00011836551129817963, -0.00011247768998146057, -0.00010658986866474152, -0.00010070204734802246, -9.48142260313034e-05, -8.892640471458435e-05, -8.30385833978653e-05, -7.715076208114624e-05, -7.126294076442719e-05, -6.537511944770813e-05, -5.9487298130989075e-05, -5.359947681427002e-05, -4.7711655497550964e-05, -4.182383418083191e-05, -3.5936012864112854e-05, -3.00481915473938e-05, -2.4160370230674744e-05, -1.827254891395569e-05, -1.2384727597236633e-05, -6.496906280517578e-06, -6.09084963798523e-07, 5.278736352920532e-06, 1.1166557669639587e-05, 1.7054378986358643e-05, 2.2942200303077698e-05, 2.8830021619796753e-05, 3.471784293651581e-05, 4.060566425323486e-05, 4.649348556995392e-05, 5.2381306886672974e-05, 5.826912820339203e-05, 6.415694952011108e-05, 7.004477083683014e-05, 7.59325921535492e-05, 8.182041347026825e-05, 8.77082347869873e-05, 9.359605610370636e-05, 9.948387742042542e-05, 0.00010537169873714447, 0.00011125952005386353, 0.00011714734137058258, 0.00012303516268730164, 0.0001289229840040207, 0.00013481080532073975, 0.0001406986266374588, 0.00014658644795417786, 0.0001524742692708969, 0.00015836209058761597, 0.00016424991190433502, 0.00017013773322105408, 0.00017602555453777313, 0.0001819133758544922]}, "gradients/encoder.encoder.layers.18.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 2.0, 0.0, 2.0, 0.0, 3.0, 5.0, 11.0, 13.0, 26.0, 49.0, 62.0, 104.0, 139.0, 257.0, 371.0, 575.0, 1020.0, 1753.0, 3319.0, 6075.0, 12581.0, 29432.0, 87131.0, 764793.0, 3086731.0, 124658.0, 40430.0, 16999.0, 7949.0, 3983.0, 2158.0, 1267.0, 727.0, 496.0, 339.0, 201.0, 172.0, 93.0, 84.0, 71.0, 48.0, 34.0, 22.0, 17.0, 18.0, 21.0, 6.0, 11.0, 6.0, 6.0, 7.0, 5.0, 2.0, 6.0, 4.0, 1.0, 3.0, 1.0, 0.0, 0.0, 2.0, 0.0, 2.0], "bins": [-0.0001348257064819336, -0.0001294594258069992, -0.00012409314513206482, -0.00011872686445713043, -0.00011336058378219604, -0.00010799430310726166, -0.00010262802243232727, -9.726174175739288e-05, -9.18954610824585e-05, -8.652918040752411e-05, -8.116289973258972e-05, -7.579661905765533e-05, -7.043033838272095e-05, -6.506405770778656e-05, -5.969777703285217e-05, -5.4331496357917786e-05, -4.89652156829834e-05, -4.359893500804901e-05, -3.8232654333114624e-05, -3.286637365818024e-05, -2.750009298324585e-05, -2.2133812308311462e-05, -1.6767531633377075e-05, -1.1401250958442688e-05, -6.034970283508301e-06, -6.686896085739136e-07, 4.697591066360474e-06, 1.006387174129486e-05, 1.5430152416229248e-05, 2.0796433091163635e-05, 2.6162713766098022e-05, 3.152899444103241e-05, 3.68952751159668e-05, 4.2261555790901184e-05, 4.762783646583557e-05, 5.299411714076996e-05, 5.8360397815704346e-05, 6.372667849063873e-05, 6.909295916557312e-05, 7.445923984050751e-05, 7.98255205154419e-05, 8.519180119037628e-05, 9.055808186531067e-05, 9.592436254024506e-05, 0.00010129064321517944, 0.00010665692389011383, 0.00011202320456504822, 0.0001173894852399826, 0.000122755765914917, 0.00012812204658985138, 0.00013348832726478577, 0.00013885460793972015, 0.00014422088861465454, 0.00014958716928958893, 0.00015495344996452332, 0.0001603197306394577, 0.0001656860113143921, 0.00017105229198932648, 0.00017641857266426086, 0.00018178485333919525, 0.00018715113401412964, 0.00019251741468906403, 0.0001978836953639984, 0.0002032499760389328, 0.0002086162567138672]}, "gradients/encoder.encoder.layers.18.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 3.0, 4.0, 6.0, 5.0, 6.0, 11.0, 15.0, 19.0, 17.0, 34.0, 46.0, 71.0, 83.0, 120.0, 121.0, 117.0, 97.0, 60.0, 41.0, 35.0, 25.0, 24.0, 15.0, 12.0, 9.0, 2.0, 4.0, 4.0, 2.0, 2.0, 1.0, 1.0, 1.0, 1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.4259090423583984e-05, -2.3225322365760803e-05, -2.2191554307937622e-05, -2.115778625011444e-05, -2.012401819229126e-05, -1.909025013446808e-05, -1.8056482076644897e-05, -1.7022714018821716e-05, -1.5988945960998535e-05, -1.4955177903175354e-05, -1.3921409845352173e-05, -1.2887641787528992e-05, -1.185387372970581e-05, -1.082010567188263e-05, -9.786337614059448e-06, -8.752569556236267e-06, -7.718801498413086e-06, -6.685033440589905e-06, -5.651265382766724e-06, -4.6174973249435425e-06, -3.5837292671203613e-06, -2.54996120929718e-06, -1.516193151473999e-06, -4.824250936508179e-07, 5.513429641723633e-07, 1.5851110219955444e-06, 2.6188790798187256e-06, 3.6526471376419067e-06, 4.686415195465088e-06, 5.720183253288269e-06, 6.75395131111145e-06, 7.787719368934631e-06, 8.821487426757812e-06, 9.855255484580994e-06, 1.0889023542404175e-05, 1.1922791600227356e-05, 1.2956559658050537e-05, 1.3990327715873718e-05, 1.50240957736969e-05, 1.605786383152008e-05, 1.7091631889343262e-05, 1.8125399947166443e-05, 1.9159168004989624e-05, 2.0192936062812805e-05, 2.1226704120635986e-05, 2.2260472178459167e-05, 2.329424023628235e-05, 2.432800829410553e-05, 2.536177635192871e-05, 2.6395544409751892e-05, 2.7429312467575073e-05, 2.8463080525398254e-05, 2.9496848583221436e-05, 3.053061664104462e-05, 3.15643846988678e-05, 3.259815275669098e-05, 3.363192081451416e-05, 3.466568887233734e-05, 3.569945693016052e-05, 3.6733224987983704e-05, 3.7766993045806885e-05, 3.8800761103630066e-05, 3.983452916145325e-05, 4.086829721927643e-05, 4.190206527709961e-05]}, "gradients/encoder.encoder.layers.18.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 3.0, 1.0, 6.0, 5.0, 12.0, 14.0, 25.0, 31.0, 58.0, 75.0, 88.0, 138.0, 241.0, 406.0, 546.0, 828.0, 1250.0, 1998.0, 3117.0, 5294.0, 8565.0, 14571.0, 25661.0, 47194.0, 93424.0, 213734.0, 881558.0, 2326303.0, 310510.0, 125005.0, 58419.0, 31092.0, 17324.0, 10111.0, 6158.0, 3742.0, 2361.0, 1507.0, 977.0, 645.0, 442.0, 306.0, 161.0, 138.0, 72.0, 52.0, 41.0, 23.0, 19.0, 15.0, 8.0, 8.0, 5.0, 5.0, 4.0, 2.0, 2.0, 0.0, 0.0, 1.0, 2.0], "bins": [-7.56382942199707e-05, -7.313117384910583e-05, -7.062405347824097e-05, -6.81169331073761e-05, -6.560981273651123e-05, -6.310269236564636e-05, -6.0595571994781494e-05, -5.8088451623916626e-05, -5.558133125305176e-05, -5.307421088218689e-05, -5.056709051132202e-05, -4.805997014045715e-05, -4.5552849769592285e-05, -4.304572939872742e-05, -4.053860902786255e-05, -3.803148865699768e-05, -3.552436828613281e-05, -3.3017247915267944e-05, -3.0510127544403076e-05, -2.8003007173538208e-05, -2.549588680267334e-05, -2.298876643180847e-05, -2.0481646060943604e-05, -1.7974525690078735e-05, -1.5467405319213867e-05, -1.2960284948348999e-05, -1.0453164577484131e-05, -7.946044206619263e-06, -5.4389238357543945e-06, -2.9318034648895264e-06, -4.246830940246582e-07, 2.08243727684021e-06, 4.589557647705078e-06, 7.096678018569946e-06, 9.603798389434814e-06, 1.2110918760299683e-05, 1.461803913116455e-05, 1.712515950202942e-05, 1.9632279872894287e-05, 2.2139400243759155e-05, 2.4646520614624023e-05, 2.715364098548889e-05, 2.966076135635376e-05, 3.216788172721863e-05, 3.4675002098083496e-05, 3.7182122468948364e-05, 3.968924283981323e-05, 4.21963632106781e-05, 4.470348358154297e-05, 4.721060395240784e-05, 4.9717724323272705e-05, 5.222484469413757e-05, 5.473196506500244e-05, 5.723908543586731e-05, 5.974620580673218e-05, 6.225332617759705e-05, 6.476044654846191e-05, 6.726756691932678e-05, 6.977468729019165e-05, 7.228180766105652e-05, 7.478892803192139e-05, 7.729604840278625e-05, 7.980316877365112e-05, 8.231028914451599e-05, 8.481740951538086e-05]}, "gradients/encoder.encoder.layers.18.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 1.0, 4.0, 1.0, 3.0, 3.0, 3.0, 3.0, 1.0, 5.0, 11.0, 13.0, 12.0, 15.0, 13.0, 17.0, 27.0, 33.0, 43.0, 29.0, 47.0, 62.0, 68.0, 85.0, 100.0, 145.0, 183.0, 341.0, 695.0, 778.0, 380.0, 216.0, 120.0, 102.0, 85.0, 69.0, 64.0, 49.0, 29.0, 36.0, 36.0, 28.0, 24.0, 24.0, 17.0, 7.0, 11.0, 16.0, 15.0, 5.0, 2.0, 3.0, 5.0, 2.0, 2.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0, 2.0], "bins": [-4.798173904418945e-05, -4.633236676454544e-05, -4.468299448490143e-05, -4.3033622205257416e-05, -4.13842499256134e-05, -3.973487764596939e-05, -3.808550536632538e-05, -3.6436133086681366e-05, -3.4786760807037354e-05, -3.313738852739334e-05, -3.148801624774933e-05, -2.9838643968105316e-05, -2.8189271688461304e-05, -2.653989940881729e-05, -2.489052712917328e-05, -2.3241154849529266e-05, -2.1591782569885254e-05, -1.994241029024124e-05, -1.829303801059723e-05, -1.6643665730953217e-05, -1.4994293451309204e-05, -1.3344921171665192e-05, -1.169554889202118e-05, -1.0046176612377167e-05, -8.396804332733154e-06, -6.747432053089142e-06, -5.098059773445129e-06, -3.448687493801117e-06, -1.7993152141571045e-06, -1.4994293451309204e-07, 1.4994293451309204e-06, 3.148801624774933e-06, 4.798173904418945e-06, 6.447546184062958e-06, 8.09691846370697e-06, 9.746290743350983e-06, 1.1395663022994995e-05, 1.3045035302639008e-05, 1.469440758228302e-05, 1.6343779861927032e-05, 1.7993152141571045e-05, 1.9642524421215057e-05, 2.129189670085907e-05, 2.2941268980503082e-05, 2.4590641260147095e-05, 2.6240013539791107e-05, 2.788938581943512e-05, 2.9538758099079132e-05, 3.1188130378723145e-05, 3.283750265836716e-05, 3.448687493801117e-05, 3.613624721765518e-05, 3.7785619497299194e-05, 3.943499177694321e-05, 4.108436405658722e-05, 4.273373633623123e-05, 4.4383108615875244e-05, 4.6032480895519257e-05, 4.768185317516327e-05, 4.933122545480728e-05, 5.0980597734451294e-05, 5.2629970014095306e-05, 5.427934229373932e-05, 5.592871457338333e-05, 5.7578086853027344e-05]}, "gradients/encoder.encoder.layers.18.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 3.0, 4.0, 8.0, 7.0, 10.0, 16.0, 27.0, 38.0, 55.0, 86.0, 142.0, 121.0, 125.0, 99.0, 66.0, 50.0, 47.0, 20.0, 23.0, 15.0, 11.0, 9.0, 13.0, 2.0, 1.0, 4.0, 1.0, 3.0, 2.0, 1.0, 2.0, 1.0, 3.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00038310178206302226, -0.0003661571827251464, -0.000349212612491101, -0.0003322680131532252, -0.00031532341381534934, -0.0002983788144774735, -0.0002814342442434281, -0.00026448964490555227, -0.0002475450746715069, -0.00023060048988554627, -0.00021365589054767042, -0.0001967113057617098, -0.00017976670642383397, -0.00016282212163787335, -0.00014587753685191274, -0.0001289329375140369, -0.00011198833817616105, -9.504374611424282e-05, -7.809915405232459e-05, -6.115456926636398e-05, -4.420997720444575e-05, -2.726538514252752e-05, -1.0320800356566906e-05, 6.623791705351323e-06, 2.3568383767269552e-05, 4.051297582918778e-05, 5.74575642531272e-05, 7.440215267706662e-05, 9.134674473898485e-05, 0.00010829133680090308, 0.0001252359215868637, 0.0001421805063728243, 0.0001591250766068697, 0.0001760696613928303, 0.00019301426073070616, 0.00020995884551666677, 0.0002269034448545426, 0.00024384802964050323, 0.00026079261442646384, 0.0002777372137643397, 0.00029468181310221553, 0.00031162641244009137, 0.00032857098267413676, 0.0003455155820120126, 0.00036246018134988844, 0.00037940475158393383, 0.0003963493509218097, 0.0004132939502596855, 0.0004302385204937309, 0.00044718311983160675, 0.00046412769006565213, 0.000481072289403528, 0.0004980168887414038, 0.0005149614880792797, 0.0005319060292094946, 0.0005488506285473704, 0.0005657952278852463, 0.0005827398272231221, 0.000599684426560998, 0.0006166290258988738, 0.0006335735670290887, 0.0006505181663669646, 0.0006674627657048404, 0.0006844073650427163, 0.0007013519643805921]}, "gradients/encoder.encoder.layers.18.final_layer_norm.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 1.0, 3.0, 2.0, 2.0, 1.0, 2.0, 3.0, 7.0, 4.0, 5.0, 13.0, 11.0, 9.0, 18.0, 18.0, 21.0, 22.0, 26.0, 28.0, 34.0, 44.0, 43.0, 35.0, 49.0, 56.0, 50.0, 38.0, 48.0, 40.0, 43.0, 37.0, 38.0, 32.0, 34.0, 27.0, 28.0, 26.0, 25.0, 15.0, 19.0, 11.0, 8.0, 10.0, 8.0, 4.0, 2.0, 7.0, 1.0, 2.0, 1.0, 3.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0], "bins": [-0.00025027990341186523, -0.00024265144020318985, -0.00023502297699451447, -0.00022739451378583908, -0.0002197660505771637, -0.0002121375873684883, -0.00020450912415981293, -0.00019688066095113754, -0.00018925219774246216, -0.00018162373453378677, -0.0001739952713251114, -0.000166366808116436, -0.00015873834490776062, -0.00015110988169908524, -0.00014348141849040985, -0.00013585295528173447, -0.00012822449207305908, -0.0001205960288643837, -0.00011296756565570831, -0.00010533910244703293, -9.771063923835754e-05, -9.008217602968216e-05, -8.245371282100677e-05, -7.482524961233139e-05, -6.7196786403656e-05, -5.956832319498062e-05, -5.193985998630524e-05, -4.431139677762985e-05, -3.668293356895447e-05, -2.9054470360279083e-05, -2.14260071516037e-05, -1.3797543942928314e-05, -6.16908073425293e-06, 1.4593824744224548e-06, 9.08784568309784e-06, 1.6716308891773224e-05, 2.434477210044861e-05, 3.197323530912399e-05, 3.960169851779938e-05, 4.723016172647476e-05, 5.4858624935150146e-05, 6.248708814382553e-05, 7.011555135250092e-05, 7.77440145611763e-05, 8.537247776985168e-05, 9.300094097852707e-05, 0.00010062940418720245, 0.00010825786739587784, 0.00011588633060455322, 0.0001235147938132286, 0.000131143257021904, 0.00013877172023057938, 0.00014640018343925476, 0.00015402864664793015, 0.00016165710985660553, 0.00016928557306528091, 0.0001769140362739563, 0.00018454249948263168, 0.00019217096269130707, 0.00019979942589998245, 0.00020742788910865784, 0.00021505635231733322, 0.0002226848155260086, 0.000230313278734684, 0.00023794174194335938]}, "gradients/encoder.encoder.layers.18.attention.out_proj.weight": {"_type": "histogram", "values": [3.0, 1.0, 0.0, 2.0, 7.0, 7.0, 5.0, 5.0, 12.0, 24.0, 16.0, 25.0, 40.0, 77.0, 82.0, 109.0, 172.0, 242.0, 302.0, 440.0, 550.0, 769.0, 1130.0, 1572.0, 2329.0, 3457.0, 5012.0, 8290.0, 13953.0, 25586.0, 50808.0, 116878.0, 451436.0, 205525.0, 75191.0, 35342.0, 18756.0, 10415.0, 6498.0, 4100.0, 2711.0, 1845.0, 1404.0, 959.0, 679.0, 487.0, 373.0, 264.0, 196.0, 132.0, 95.0, 67.0, 53.0, 34.0, 38.0, 21.0, 8.0, 14.0, 14.0, 4.0, 1.0, 3.0, 2.0, 4.0], "bins": [-0.00013315677642822266, -0.00012909062206745148, -0.0001250244677066803, -0.00012095831334590912, -0.00011689215898513794, -0.00011282600462436676, -0.00010875985026359558, -0.0001046936959028244, -0.00010062754154205322, -9.656138718128204e-05, -9.249523282051086e-05, -8.842907845973969e-05, -8.43629240989685e-05, -8.029676973819733e-05, -7.623061537742615e-05, -7.216446101665497e-05, -6.809830665588379e-05, -6.403215229511261e-05, -5.996599793434143e-05, -5.589984357357025e-05, -5.183368921279907e-05, -4.776753485202789e-05, -4.3701380491256714e-05, -3.9635226130485535e-05, -3.5569071769714355e-05, -3.1502917408943176e-05, -2.7436763048171997e-05, -2.3370608687400818e-05, -1.930445432662964e-05, -1.523829996585846e-05, -1.117214560508728e-05, -7.105991244316101e-06, -3.039836883544922e-06, 1.0263174772262573e-06, 5.0924718379974365e-06, 9.158626198768616e-06, 1.3224780559539795e-05, 1.7290934920310974e-05, 2.1357089281082153e-05, 2.5423243641853333e-05, 2.9489398002624512e-05, 3.355555236339569e-05, 3.762170672416687e-05, 4.168786108493805e-05, 4.575401544570923e-05, 4.982016980648041e-05, 5.388632416725159e-05, 5.7952478528022766e-05, 6.201863288879395e-05, 6.608478724956512e-05, 7.01509416103363e-05, 7.421709597110748e-05, 7.828325033187866e-05, 8.234940469264984e-05, 8.641555905342102e-05, 9.04817134141922e-05, 9.454786777496338e-05, 9.861402213573456e-05, 0.00010268017649650574, 0.00010674633085727692, 0.0001108124852180481, 0.00011487863957881927, 0.00011894479393959045, 0.00012301094830036163, 0.0001270771026611328]}, "gradients/encoder.encoder.layers.18.attention.out_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 1.0, 2.0, 1.0, 2.0, 1.0, 1.0, 1.0, 3.0, 3.0, 1.0, 3.0, 2.0, 4.0, 4.0, 13.0, 6.0, 3.0, 6.0, 10.0, 10.0, 17.0, 19.0, 31.0, 45.0, 42.0, 51.0, 55.0, 57.0, 76.0, 85.0, 88.0, 67.0, 48.0, 44.0, 42.0, 39.0, 24.0, 22.0, 12.0, 20.0, 10.0, 8.0, 7.0, 5.0, 2.0, 3.0, 4.0, 4.0, 2.0, 2.0, 4.0, 1.0, 2.0, 1.0, 0.0, 1.0, 2.0, 0.0, 1.0, 1.0], "bins": [-2.1696090698242188e-05, -2.1047890186309814e-05, -2.039968967437744e-05, -1.975148916244507e-05, -1.9103288650512695e-05, -1.8455088138580322e-05, -1.780688762664795e-05, -1.7158687114715576e-05, -1.6510486602783203e-05, -1.586228609085083e-05, -1.5214085578918457e-05, -1.4565885066986084e-05, -1.3917684555053711e-05, -1.3269484043121338e-05, -1.2621283531188965e-05, -1.1973083019256592e-05, -1.1324882507324219e-05, -1.0676681995391846e-05, -1.0028481483459473e-05, -9.3802809715271e-06, -8.732080459594727e-06, -8.083879947662354e-06, -7.4356794357299805e-06, -6.787478923797607e-06, -6.139278411865234e-06, -5.491077899932861e-06, -4.842877388000488e-06, -4.194676876068115e-06, -3.546476364135742e-06, -2.898275852203369e-06, -2.250075340270996e-06, -1.601874828338623e-06, -9.5367431640625e-07, -3.0547380447387695e-07, 3.427267074584961e-07, 9.909272193908691e-07, 1.6391277313232422e-06, 2.2873282432556152e-06, 2.9355287551879883e-06, 3.5837292671203613e-06, 4.231929779052734e-06, 4.880130290985107e-06, 5.5283308029174805e-06, 6.1765313148498535e-06, 6.8247318267822266e-06, 7.4729323387146e-06, 8.121132850646973e-06, 8.769333362579346e-06, 9.417533874511719e-06, 1.0065734386444092e-05, 1.0713934898376465e-05, 1.1362135410308838e-05, 1.2010335922241211e-05, 1.2658536434173584e-05, 1.3306736946105957e-05, 1.395493745803833e-05, 1.4603137969970703e-05, 1.5251338481903076e-05, 1.589953899383545e-05, 1.6547739505767822e-05, 1.7195940017700195e-05, 1.784414052963257e-05, 1.849234104156494e-05, 1.9140541553497314e-05, 1.9788742065429688e-05]}, "gradients/encoder.encoder.layers.18.attention.v_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 2.0, 4.0, 3.0, 6.0, 2.0, 11.0, 16.0, 23.0, 34.0, 49.0, 79.0, 107.0, 152.0, 223.0, 280.0, 414.0, 606.0, 887.0, 1289.0, 1812.0, 2749.0, 3953.0, 6042.0, 9355.0, 13986.0, 21697.0, 34563.0, 55854.0, 97205.0, 200826.0, 288964.0, 123760.0, 67770.0, 40839.0, 26016.0, 16554.0, 10629.0, 7174.0, 4665.0, 3085.0, 2130.0, 1494.0, 936.0, 673.0, 509.0, 354.0, 226.0, 183.0, 106.0, 87.0, 68.0, 40.0, 29.0, 23.0, 10.0, 1.0, 6.0, 4.0, 2.0, 5.0, 1.0, 2.0], "bins": [-6.717443466186523e-05, -6.508734077215195e-05, -6.300024688243866e-05, -6.091315299272537e-05, -5.8826059103012085e-05, -5.67389652132988e-05, -5.465187132358551e-05, -5.256477743387222e-05, -5.0477683544158936e-05, -4.839058965444565e-05, -4.630349576473236e-05, -4.4216401875019073e-05, -4.2129307985305786e-05, -4.00422140955925e-05, -3.795512020587921e-05, -3.5868026316165924e-05, -3.378093242645264e-05, -3.169383853673935e-05, -2.9606744647026062e-05, -2.7519650757312775e-05, -2.5432556867599487e-05, -2.33454629778862e-05, -2.1258369088172913e-05, -1.9171275198459625e-05, -1.7084181308746338e-05, -1.499708741903305e-05, -1.2909993529319763e-05, -1.0822899639606476e-05, -8.735805749893188e-06, -6.648711860179901e-06, -4.561617970466614e-06, -2.4745240807533264e-06, -3.8743019104003906e-07, 1.6996636986732483e-06, 3.7867575883865356e-06, 5.873851478099823e-06, 7.96094536781311e-06, 1.0048039257526398e-05, 1.2135133147239685e-05, 1.4222227036952972e-05, 1.630932092666626e-05, 1.8396414816379547e-05, 2.0483508706092834e-05, 2.2570602595806122e-05, 2.465769648551941e-05, 2.6744790375232697e-05, 2.8831884264945984e-05, 3.091897815465927e-05, 3.300607204437256e-05, 3.5093165934085846e-05, 3.718025982379913e-05, 3.926735371351242e-05, 4.135444760322571e-05, 4.3441541492938995e-05, 4.552863538265228e-05, 4.761572927236557e-05, 4.970282316207886e-05, 5.1789917051792145e-05, 5.387701094150543e-05, 5.596410483121872e-05, 5.805119872093201e-05, 6.0138292610645294e-05, 6.222538650035858e-05, 6.431248039007187e-05, 6.639957427978516e-05]}, "gradients/encoder.encoder.layers.18.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 4.0, 4.0, 6.0, 5.0, 8.0, 10.0, 8.0, 14.0, 11.0, 15.0, 25.0, 21.0, 36.0, 25.0, 21.0, 34.0, 34.0, 27.0, 33.0, 36.0, 41.0, 47.0, 37.0, 34.0, 35.0, 33.0, 44.0, 46.0, 34.0, 34.0, 33.0, 25.0, 19.0, 29.0, 17.0, 25.0, 23.0, 10.0, 13.0, 8.0, 8.0, 14.0, 5.0, 8.0, 3.0, 4.0, 2.0, 4.0, 5.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-4.571676254272461e-05, -4.4218264520168304e-05, -4.2719766497612e-05, -4.1221268475055695e-05, -3.972277045249939e-05, -3.8224272429943085e-05, -3.672577440738678e-05, -3.5227276384830475e-05, -3.372877836227417e-05, -3.2230280339717865e-05, -3.073178231716156e-05, -2.9233284294605255e-05, -2.773478627204895e-05, -2.6236288249492645e-05, -2.473779022693634e-05, -2.3239292204380035e-05, -2.174079418182373e-05, -2.0242296159267426e-05, -1.874379813671112e-05, -1.7245300114154816e-05, -1.574680209159851e-05, -1.4248304069042206e-05, -1.2749806046485901e-05, -1.1251308023929596e-05, -9.752810001373291e-06, -8.254311978816986e-06, -6.755813956260681e-06, -5.257315933704376e-06, -3.7588179111480713e-06, -2.2603198885917664e-06, -7.618218660354614e-07, 7.366761565208435e-07, 2.2351741790771484e-06, 3.7336722016334534e-06, 5.232170224189758e-06, 6.730668246746063e-06, 8.229166269302368e-06, 9.727664291858673e-06, 1.1226162314414978e-05, 1.2724660336971283e-05, 1.4223158359527588e-05, 1.5721656382083893e-05, 1.7220154404640198e-05, 1.8718652427196503e-05, 2.0217150449752808e-05, 2.1715648472309113e-05, 2.3214146494865417e-05, 2.4712644517421722e-05, 2.6211142539978027e-05, 2.7709640562534332e-05, 2.9208138585090637e-05, 3.070663660764694e-05, 3.220513463020325e-05, 3.370363265275955e-05, 3.520213067531586e-05, 3.670062869787216e-05, 3.819912672042847e-05, 3.969762474298477e-05, 4.119612276554108e-05, 4.269462078809738e-05, 4.4193118810653687e-05, 4.569161683320999e-05, 4.7190114855766296e-05, 4.86886128783226e-05, 5.0187110900878906e-05]}, "gradients/encoder.encoder.layers.18.attention.k_proj.weight": {"_type": "histogram", "values": [3.0, 3.0, 5.0, 4.0, 4.0, 12.0, 13.0, 20.0, 26.0, 42.0, 47.0, 89.0, 156.0, 283.0, 335.0, 686.0, 1276.0, 2357.0, 4595.0, 9750.0, 22598.0, 44990.0, 164038.0, 489113.0, 206133.0, 60015.0, 22350.0, 9759.0, 3961.0, 2646.0, 1342.0, 760.0, 451.0, 276.0, 154.0, 73.0, 81.0, 40.0, 26.0, 11.0, 11.0, 15.0, 2.0, 6.0, 3.0, 7.0, 1.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-8.225440979003906e-06, -7.876195013523102e-06, -7.526949048042297e-06, -7.177703082561493e-06, -6.8284571170806885e-06, -6.479211151599884e-06, -6.12996518611908e-06, -5.780719220638275e-06, -5.431473255157471e-06, -5.082227289676666e-06, -4.732981324195862e-06, -4.383735358715057e-06, -4.034489393234253e-06, -3.6852434277534485e-06, -3.335997462272644e-06, -2.9867514967918396e-06, -2.637505531311035e-06, -2.2882595658302307e-06, -1.9390136003494263e-06, -1.5897676348686218e-06, -1.2405216693878174e-06, -8.912757039070129e-07, -5.420297384262085e-07, -1.9278377294540405e-07, 1.564621925354004e-07, 5.057081580162048e-07, 8.549541234970093e-07, 1.2042000889778137e-06, 1.5534460544586182e-06, 1.9026920199394226e-06, 2.251937985420227e-06, 2.6011839509010315e-06, 2.950429916381836e-06, 3.2996758818626404e-06, 3.648921847343445e-06, 3.998167812824249e-06, 4.347413778305054e-06, 4.696659743785858e-06, 5.045905709266663e-06, 5.395151674747467e-06, 5.7443976402282715e-06, 6.093643605709076e-06, 6.44288957118988e-06, 6.792135536670685e-06, 7.141381502151489e-06, 7.490627467632294e-06, 7.839873433113098e-06, 8.189119398593903e-06, 8.538365364074707e-06, 8.887611329555511e-06, 9.236857295036316e-06, 9.58610326051712e-06, 9.935349225997925e-06, 1.028459519147873e-05, 1.0633841156959534e-05, 1.0983087122440338e-05, 1.1332333087921143e-05, 1.1681579053401947e-05, 1.2030825018882751e-05, 1.2380070984363556e-05, 1.272931694984436e-05, 1.3078562915325165e-05, 1.342780888080597e-05, 1.3777054846286774e-05, 1.4126300811767578e-05]}, "gradients/encoder.encoder.layers.18.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 2.0, 2.0, 2.0, 5.0, 2.0, 0.0, 2.0, 7.0, 7.0, 7.0, 12.0, 14.0, 10.0, 27.0, 36.0, 29.0, 39.0, 38.0, 48.0, 52.0, 50.0, 0.0, 61.0, 58.0, 62.0, 57.0, 53.0, 46.0, 47.0, 47.0, 29.0, 29.0, 27.0, 21.0, 20.0, 15.0, 16.0, 0.0, 12.0, 4.0, 3.0, 9.0, 3.0, 2.0, 0.0, 3.0, 3.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.8477439880371094e-06, -1.7918646335601807e-06, -1.735985279083252e-06, -1.6801059246063232e-06, -1.6242265701293945e-06, -1.5683472156524658e-06, -1.5124678611755371e-06, -1.4565885066986084e-06, -1.4007091522216797e-06, -1.344829797744751e-06, -1.2889504432678223e-06, -1.2330710887908936e-06, -1.1771917343139648e-06, -1.1213123798370361e-06, -1.0654330253601074e-06, -1.0095536708831787e-06, -9.5367431640625e-07, -8.977949619293213e-07, -8.419156074523926e-07, -7.860362529754639e-07, -7.301568984985352e-07, -6.742775440216064e-07, -6.183981895446777e-07, -5.62518835067749e-07, -5.066394805908203e-07, -4.507601261138916e-07, -3.948807716369629e-07, -3.390014171600342e-07, -2.8312206268310547e-07, -2.2724270820617676e-07, -1.7136335372924805e-07, -1.1548399925231934e-07, -5.960464477539063e-08, -3.725290298461914e-09, 5.21540641784668e-08, 1.0803341865539551e-07, 1.6391277313232422e-07, 2.1979212760925293e-07, 2.7567148208618164e-07, 3.3155083656311035e-07, 3.8743019104003906e-07, 4.4330954551696777e-07, 4.991888999938965e-07, 5.550682544708252e-07, 6.109476089477539e-07, 6.668269634246826e-07, 7.227063179016113e-07, 7.7858567237854e-07, 8.344650268554688e-07, 8.903443813323975e-07, 9.462237358093262e-07, 1.0021030902862549e-06, 1.0579824447631836e-06, 1.1138617992401123e-06, 1.169741153717041e-06, 1.2256205081939697e-06, 1.2814998626708984e-06, 1.3373792171478271e-06, 1.3932585716247559e-06, 1.4491379261016846e-06, 1.5050172805786133e-06, 1.560896635055542e-06, 1.6167759895324707e-06, 1.6726553440093994e-06, 1.7285346984863281e-06]}, "gradients/encoder.encoder.layers.18.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 3.0, 7.0, 3.0, 15.0, 13.0, 16.0, 22.0, 26.0, 53.0, 53.0, 98.0, 145.0, 107.0, 234.0, 334.0, 517.0, 452.0, 1010.0, 1533.0, 1364.0, 3076.0, 4980.0, 4758.0, 10847.0, 18488.0, 32651.0, 35778.0, 98781.0, 244093.0, 238688.0, 176595.0, 76994.0, 39861.0, 16102.0, 15476.0, 9253.0, 4106.0, 4058.0, 2736.0, 1255.0, 1293.0, 869.0, 565.0, 261.0, 305.0, 214.0, 97.0, 111.0, 77.0, 54.0, 51.0, 30.0, 19.0, 14.0, 11.0, 15.0, 1.0, 2.0, 2.0, 3.0], "bins": [-5.364418029785156e-06, -5.2032992243766785e-06, -5.042180418968201e-06, -4.881061613559723e-06, -4.719942808151245e-06, -4.558824002742767e-06, -4.3977051973342896e-06, -4.236586391925812e-06, -4.075467586517334e-06, -3.914348781108856e-06, -3.7532299757003784e-06, -3.5921111702919006e-06, -3.430992364883423e-06, -3.269873559474945e-06, -3.1087547540664673e-06, -2.9476359486579895e-06, -2.7865171432495117e-06, -2.625398337841034e-06, -2.464279532432556e-06, -2.3031607270240784e-06, -2.1420419216156006e-06, -1.980923116207123e-06, -1.819804310798645e-06, -1.6586855053901672e-06, -1.4975666999816895e-06, -1.3364478945732117e-06, -1.1753290891647339e-06, -1.014210283756256e-06, -8.530914783477783e-07, -6.919726729393005e-07, -5.308538675308228e-07, -3.6973506212234497e-07, -2.086162567138672e-07, -4.7497451305389404e-08, 1.1362135410308838e-07, 2.7474015951156616e-07, 4.3585896492004395e-07, 5.969777703285217e-07, 7.580965757369995e-07, 9.192153811454773e-07, 1.080334186553955e-06, 1.2414529919624329e-06, 1.4025717973709106e-06, 1.5636906027793884e-06, 1.7248094081878662e-06, 1.885928213596344e-06, 2.0470470190048218e-06, 2.2081658244132996e-06, 2.3692846298217773e-06, 2.530403435230255e-06, 2.691522240638733e-06, 2.8526410460472107e-06, 3.0137598514556885e-06, 3.1748786568641663e-06, 3.335997462272644e-06, 3.497116267681122e-06, 3.6582350730895996e-06, 3.819353878498077e-06, 3.980472683906555e-06, 4.141591489315033e-06, 4.302710294723511e-06, 4.4638291001319885e-06, 4.624947905540466e-06, 4.786066710948944e-06, 4.947185516357422e-06]}, "gradients/encoder.encoder.layers.18.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 3.0, 4.0, 5.0, 3.0, 3.0, 2.0, 9.0, 5.0, 5.0, 16.0, 20.0, 3.0, 11.0, 13.0, 28.0, 27.0, 36.0, 18.0, 37.0, 36.0, 43.0, 35.0, 60.0, 33.0, 53.0, 49.0, 53.0, 54.0, 46.0, 43.0, 18.0, 33.0, 33.0, 31.0, 24.0, 23.0, 11.0, 16.0, 15.0, 9.0, 12.0, 6.0, 11.0, 3.0, 2.0, 3.0, 3.0, 2.0, 2.0, 0.0, 2.0, 2.0, 1.0, 2.0, 1.0, 2.0], "bins": [-3.635883331298828e-06, -3.5259872674942017e-06, -3.416091203689575e-06, -3.3061951398849487e-06, -3.1962990760803223e-06, -3.086403012275696e-06, -2.9765069484710693e-06, -2.866610884666443e-06, -2.7567148208618164e-06, -2.64681875705719e-06, -2.5369226932525635e-06, -2.427026629447937e-06, -2.3171305656433105e-06, -2.207234501838684e-06, -2.0973384380340576e-06, -1.987442374229431e-06, -1.8775463104248047e-06, -1.7676502466201782e-06, -1.6577541828155518e-06, -1.5478581190109253e-06, -1.4379620552062988e-06, -1.3280659914016724e-06, -1.218169927597046e-06, -1.1082738637924194e-06, -9.98377799987793e-07, -8.884817361831665e-07, -7.7858567237854e-07, -6.686896085739136e-07, -5.587935447692871e-07, -4.4889748096466064e-07, -3.390014171600342e-07, -2.2910535335540771e-07, -1.1920928955078125e-07, -9.313225746154785e-09, 1.0058283805847168e-07, 2.1047890186309814e-07, 3.203749656677246e-07, 4.302710294723511e-07, 5.401670932769775e-07, 6.50063157081604e-07, 7.599592208862305e-07, 8.698552846908569e-07, 9.797513484954834e-07, 1.0896474123001099e-06, 1.1995434761047363e-06, 1.3094395399093628e-06, 1.4193356037139893e-06, 1.5292316675186157e-06, 1.6391277313232422e-06, 1.7490237951278687e-06, 1.8589198589324951e-06, 1.9688159227371216e-06, 2.078711986541748e-06, 2.1886080503463745e-06, 2.298504114151001e-06, 2.4084001779556274e-06, 2.518296241760254e-06, 2.6281923055648804e-06, 2.738088369369507e-06, 2.8479844331741333e-06, 2.9578804969787598e-06, 3.0677765607833862e-06, 3.1776726245880127e-06, 3.287568688392639e-06, 3.3974647521972656e-06]}, "gradients/encoder.encoder.layers.18.layer_norm.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 3.0, 1.0, 1.0, 0.0, 3.0, 3.0, 4.0, 6.0, 4.0, 6.0, 7.0, 10.0, 8.0, 13.0, 24.0, 27.0, 43.0, 40.0, 70.0, 96.0, 150.0, 126.0, 88.0, 64.0, 45.0, 25.0, 27.0, 30.0, 11.0, 11.0, 13.0, 12.0, 5.0, 8.0, 7.0, 7.0, 3.0, 1.0, 4.0, 3.0, 2.0, 0.0, 2.0, 1.0, 3.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.00024307223793584853, -0.00023490611056331545, -0.00022673996863886714, -0.00021857384126633406, -0.00021040771389380097, -0.0002022415865212679, -0.0001940754591487348, -0.0001859093172242865, -0.00017774318985175341, -0.00016957706247922033, -0.00016141092055477202, -0.00015324479318223894, -0.00014507866580970585, -0.00013691253843717277, -0.0001287464110646397, -0.00012058026914019138, -0.0001124141417676583, -0.00010424801439512521, -9.608187974663451e-05, -8.791574509814382e-05, -7.974961772561073e-05, -7.158349035307765e-05, -6.341735570458695e-05, -5.525122469407506e-05, -4.708509368356317e-05, -3.891896267305128e-05, -3.075283166253939e-05, -2.2586700652027503e-05, -1.4420569641515613e-05, -6.2544386310037225e-06, 1.9116923795081675e-06, 1.0077823390020058e-05, 1.8243968952447176e-05, 2.6410099962959066e-05, 3.4576230973470956e-05, 4.2742361983982846e-05, 5.0908492994494736e-05, 5.9074624005006626e-05, 6.724075501551852e-05, 7.540688966400921e-05, 8.35730170365423e-05, 9.173914440907538e-05, 9.990527905756608e-05, 0.00010807141370605677, 0.00011623754107858986, 0.00012440366845112294, 0.00013256981037557125, 0.00014073593774810433, 0.00014890206512063742, 0.0001570681924931705, 0.00016523431986570358, 0.0001734004617901519, 0.00018156658916268498, 0.00018973271653521806, 0.00019789885845966637, 0.00020606498583219945, 0.00021423111320473254, 0.00022239724057726562, 0.0002305633679497987, 0.00023872950987424701, 0.0002468956517986953, 0.0002550617791712284, 0.0002632279065437615, 0.0002713940339162946, 0.00027956016128882766]}, "gradients/encoder.encoder.layers.18.layer_norm.bias": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 2.0, 1.0, 4.0, 4.0, 1.0, 8.0, 5.0, 3.0, 8.0, 5.0, 12.0, 11.0, 10.0, 15.0, 9.0, 12.0, 29.0, 19.0, 29.0, 32.0, 38.0, 35.0, 48.0, 34.0, 45.0, 44.0, 41.0, 44.0, 40.0, 33.0, 32.0, 40.0, 31.0, 27.0, 39.0, 24.0, 35.0, 28.0, 18.0, 18.0, 23.0, 17.0, 12.0, 10.0, 4.0, 9.0, 6.0, 9.0, 4.0, 3.0, 1.0, 1.0, 1.0, 5.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.00016021728515625, -0.00015490595251321793, -0.00014959461987018585, -0.00014428328722715378, -0.0001389719545841217, -0.00013366062194108963, -0.00012834928929805756, -0.00012303795665502548, -0.00011772662401199341, -0.00011241529136896133, -0.00010710395872592926, -0.00010179262608289719, -9.648129343986511e-05, -9.116996079683304e-05, -8.585862815380096e-05, -8.054729551076889e-05, -7.523596286773682e-05, -6.992463022470474e-05, -6.461329758167267e-05, -5.9301964938640594e-05, -5.399063229560852e-05, -4.8679299652576447e-05, -4.336796700954437e-05, -3.80566343665123e-05, -3.2745301723480225e-05, -2.743396908044815e-05, -2.2122636437416077e-05, -1.6811303794384003e-05, -1.1499971151351929e-05, -6.188638508319855e-06, -8.773058652877808e-07, 4.434026777744293e-06, 9.745359420776367e-06, 1.5056692063808441e-05, 2.0368024706840515e-05, 2.567935734987259e-05, 3.099068999290466e-05, 3.630202263593674e-05, 4.161335527896881e-05, 4.6924687922000885e-05, 5.223602056503296e-05, 5.754735320806503e-05, 6.285868585109711e-05, 6.817001849412918e-05, 7.348135113716125e-05, 7.879268378019333e-05, 8.41040164232254e-05, 8.941534906625748e-05, 9.472668170928955e-05, 0.00010003801435232162, 0.0001053493469953537, 0.00011066067963838577, 0.00011597201228141785, 0.00012128334492444992, 0.000126594677567482, 0.00013190601021051407, 0.00013721734285354614, 0.00014252867549657822, 0.0001478400081396103, 0.00015315134078264236, 0.00015846267342567444, 0.0001637740060687065, 0.00016908533871173859, 0.00017439667135477066, 0.00017970800399780273]}, "gradients/encoder.encoder.layers.17.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 3.0, 2.0, 3.0, 5.0, 11.0, 10.0, 7.0, 9.0, 18.0, 28.0, 44.0, 43.0, 62.0, 87.0, 138.0, 221.0, 310.0, 540.0, 880.0, 1457.0, 2461.0, 4486.0, 8978.0, 19959.0, 49852.0, 172186.0, 3547599.0, 270360.0, 63921.0, 24926.0, 11439.0, 5940.0, 3437.0, 2000.0, 1203.0, 665.0, 360.0, 238.0, 132.0, 80.0, 59.0, 38.0, 22.0, 27.0, 15.0, 15.0, 4.0, 5.0, 3.0, 3.0, 4.0, 2.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00014650821685791016, -0.0001420155167579651, -0.00013752281665802002, -0.00013303011655807495, -0.00012853741645812988, -0.00012404471635818481, -0.00011955201625823975, -0.00011505931615829468, -0.00011056661605834961, -0.00010607391595840454, -0.00010158121585845947, -9.70885157585144e-05, -9.259581565856934e-05, -8.810311555862427e-05, -8.36104154586792e-05, -7.911771535873413e-05, -7.462501525878906e-05, -7.0132315158844e-05, -6.563961505889893e-05, -6.114691495895386e-05, -5.665421485900879e-05, -5.216151475906372e-05, -4.766881465911865e-05, -4.3176114559173584e-05, -3.8683414459228516e-05, -3.419071435928345e-05, -2.969801425933838e-05, -2.520531415939331e-05, -2.0712614059448242e-05, -1.6219913959503174e-05, -1.1727213859558105e-05, -7.234513759613037e-06, -2.7418136596679688e-06, 1.7508864402770996e-06, 6.243586540222168e-06, 1.0736286640167236e-05, 1.5228986740112305e-05, 1.9721686840057373e-05, 2.421438694000244e-05, 2.870708703994751e-05, 3.319978713989258e-05, 3.7692487239837646e-05, 4.2185187339782715e-05, 4.667788743972778e-05, 5.117058753967285e-05, 5.566328763961792e-05, 6.015598773956299e-05, 6.464868783950806e-05, 6.914138793945312e-05, 7.36340880393982e-05, 7.812678813934326e-05, 8.261948823928833e-05, 8.71121883392334e-05, 9.160488843917847e-05, 9.609758853912354e-05, 0.0001005902886390686, 0.00010508298873901367, 0.00010957568883895874, 0.00011406838893890381, 0.00011856108903884888, 0.00012305378913879395, 0.00012754648923873901, 0.00013203918933868408, 0.00013653188943862915, 0.00014102458953857422]}, "gradients/encoder.encoder.layers.17.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0, 2.0, 2.0, 1.0, 4.0, 3.0, 3.0, 4.0, 8.0, 13.0, 11.0, 22.0, 28.0, 42.0, 52.0, 57.0, 92.0, 115.0, 118.0, 95.0, 71.0, 71.0, 55.0, 48.0, 17.0, 20.0, 12.0, 9.0, 3.0, 13.0, 7.0, 5.0, 5.0, 5.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.11732292175293e-05, -3.0242837965488434e-05, -2.931244671344757e-05, -2.8382055461406708e-05, -2.7451664209365845e-05, -2.652127295732498e-05, -2.559088170528412e-05, -2.4660490453243256e-05, -2.3730099201202393e-05, -2.279970794916153e-05, -2.1869316697120667e-05, -2.0938925445079803e-05, -2.000853419303894e-05, -1.9078142940998077e-05, -1.8147751688957214e-05, -1.721736043691635e-05, -1.6286969184875488e-05, -1.5356577932834625e-05, -1.4426186680793762e-05, -1.34957954287529e-05, -1.2565404176712036e-05, -1.1635012924671173e-05, -1.070462167263031e-05, -9.774230420589447e-06, -8.843839168548584e-06, -7.913447916507721e-06, -6.983056664466858e-06, -6.052665412425995e-06, -5.122274160385132e-06, -4.191882908344269e-06, -3.2614916563034058e-06, -2.3311004042625427e-06, -1.4007091522216797e-06, -4.7031790018081665e-07, 4.600733518600464e-07, 1.3904646039009094e-06, 2.3208558559417725e-06, 3.2512471079826355e-06, 4.1816383600234985e-06, 5.1120296120643616e-06, 6.042420864105225e-06, 6.972812116146088e-06, 7.90320336818695e-06, 8.833594620227814e-06, 9.763985872268677e-06, 1.069437712430954e-05, 1.1624768376350403e-05, 1.2555159628391266e-05, 1.3485550880432129e-05, 1.4415942132472992e-05, 1.5346333384513855e-05, 1.6276724636554718e-05, 1.720711588859558e-05, 1.8137507140636444e-05, 1.9067898392677307e-05, 1.999828964471817e-05, 2.0928680896759033e-05, 2.1859072148799896e-05, 2.278946340084076e-05, 2.3719854652881622e-05, 2.4650245904922485e-05, 2.558063715696335e-05, 2.651102840900421e-05, 2.7441419661045074e-05, 2.8371810913085938e-05]}, "gradients/encoder.encoder.layers.17.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [2.0, 2.0, 0.0, 0.0, 0.0, 3.0, 5.0, 4.0, 9.0, 6.0, 14.0, 11.0, 32.0, 48.0, 71.0, 102.0, 147.0, 280.0, 436.0, 632.0, 1229.0, 1828.0, 3280.0, 5940.0, 10847.0, 20724.0, 43281.0, 97326.0, 292595.0, 2813388.0, 625275.0, 154655.0, 61831.0, 28108.0, 14621.0, 7591.0, 4109.0, 2341.0, 1365.0, 812.0, 481.0, 281.0, 201.0, 139.0, 77.0, 50.0, 37.0, 28.0, 13.0, 14.0, 12.0, 8.0, 0.0, 2.0, 2.0, 5.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-8.440017700195312e-05, -8.156057447195053e-05, -7.872097194194794e-05, -7.588136941194534e-05, -7.304176688194275e-05, -7.020216435194016e-05, -6.736256182193756e-05, -6.452295929193497e-05, -6.168335676193237e-05, -5.884375423192978e-05, -5.6004151701927185e-05, -5.316454917192459e-05, -5.0324946641922e-05, -4.74853441119194e-05, -4.464574158191681e-05, -4.1806139051914215e-05, -3.896653652191162e-05, -3.612693399190903e-05, -3.328733146190643e-05, -3.044772893190384e-05, -2.7608126401901245e-05, -2.476852387189865e-05, -2.1928921341896057e-05, -1.9089318811893463e-05, -1.624971628189087e-05, -1.3410113751888275e-05, -1.0570511221885681e-05, -7.730908691883087e-06, -4.891306161880493e-06, -2.051703631877899e-06, 7.878988981246948e-07, 3.627501428127289e-06, 6.467103958129883e-06, 9.306706488132477e-06, 1.214630901813507e-05, 1.4985911548137665e-05, 1.782551407814026e-05, 2.0665116608142853e-05, 2.3504719138145447e-05, 2.634432166814804e-05, 2.9183924198150635e-05, 3.202352672815323e-05, 3.486312925815582e-05, 3.770273178815842e-05, 4.054233431816101e-05, 4.3381936848163605e-05, 4.62215393781662e-05, 4.906114190816879e-05, 5.190074443817139e-05, 5.474034696817398e-05, 5.7579949498176575e-05, 6.041955202817917e-05, 6.325915455818176e-05, 6.609875708818436e-05, 6.893835961818695e-05, 7.177796214818954e-05, 7.461756467819214e-05, 7.745716720819473e-05, 8.029676973819733e-05, 8.313637226819992e-05, 8.597597479820251e-05, 8.881557732820511e-05, 9.16551798582077e-05, 9.44947823882103e-05, 9.733438491821289e-05]}, "gradients/encoder.encoder.layers.17.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 4.0, 3.0, 4.0, 5.0, 4.0, 5.0, 11.0, 16.0, 11.0, 18.0, 28.0, 38.0, 42.0, 68.0, 71.0, 103.0, 131.0, 182.0, 412.0, 987.0, 883.0, 323.0, 161.0, 133.0, 84.0, 88.0, 50.0, 45.0, 32.0, 26.0, 22.0, 27.0, 15.0, 13.0, 10.0, 8.0, 7.0, 4.0, 6.0, 3.0, 3.0, 2.0, 3.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.4895877838134766e-05, -5.285628139972687e-05, -5.081668496131897e-05, -4.877708852291107e-05, -4.6737492084503174e-05, -4.4697895646095276e-05, -4.265829920768738e-05, -4.061870276927948e-05, -3.857910633087158e-05, -3.6539509892463684e-05, -3.4499913454055786e-05, -3.246031701564789e-05, -3.042072057723999e-05, -2.8381124138832092e-05, -2.6341527700424194e-05, -2.4301931262016296e-05, -2.22623348236084e-05, -2.02227383852005e-05, -1.8183141946792603e-05, -1.6143545508384705e-05, -1.4103949069976807e-05, -1.2064352631568909e-05, -1.002475619316101e-05, -7.985159754753113e-06, -5.945563316345215e-06, -3.905966877937317e-06, -1.866370439529419e-06, 1.73225998878479e-07, 2.212822437286377e-06, 4.252418875694275e-06, 6.292015314102173e-06, 8.33161175251007e-06, 1.0371208190917969e-05, 1.2410804629325867e-05, 1.4450401067733765e-05, 1.6489997506141663e-05, 1.852959394454956e-05, 2.056919038295746e-05, 2.2608786821365356e-05, 2.4648383259773254e-05, 2.6687979698181152e-05, 2.872757613658905e-05, 3.076717257499695e-05, 3.2806769013404846e-05, 3.4846365451812744e-05, 3.688596189022064e-05, 3.892555832862854e-05, 4.096515476703644e-05, 4.3004751205444336e-05, 4.5044347643852234e-05, 4.708394408226013e-05, 4.912354052066803e-05, 5.116313695907593e-05, 5.3202733397483826e-05, 5.5242329835891724e-05, 5.728192627429962e-05, 5.932152271270752e-05, 6.136111915111542e-05, 6.340071558952332e-05, 6.544031202793121e-05, 6.747990846633911e-05, 6.951950490474701e-05, 7.155910134315491e-05, 7.35986977815628e-05, 7.56382942199707e-05]}, "gradients/encoder.encoder.layers.17.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 2.0, 1.0, 6.0, 6.0, 20.0, 18.0, 41.0, 68.0, 132.0, 201.0, 173.0, 120.0, 71.0, 53.0, 43.0, 23.0, 12.0, 7.0, 5.0, 5.0, 6.0, 0.0, 0.0, 2.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0004053623997606337, -0.000382576952688396, -0.00035979150561615825, -0.00033700602944009006, -0.00031422058236785233, -0.0002914351352956146, -0.0002686496591195464, -0.0002458642120473087, -0.00022307876497507095, -0.00020029331790283322, -0.00017750785627868026, -0.0001547223946545273, -0.00013193694758228958, -0.00010915149323409423, -8.636603888589889e-05, -6.358057726174593e-05, -4.07951301895082e-05, -1.8009675841312855e-05, 4.775778506882489e-06, 2.7561232855077833e-05, 5.034668720327318e-05, 7.313214155146852e-05, 9.591759589966387e-05, 0.00011870305752381682, 0.00014148850459605455, 0.00016427395166829228, 0.00018705941329244524, 0.0002098448749165982, 0.00023263032198883593, 0.00025541576906107366, 0.00027820124523714185, 0.0003009866923093796, 0.0003237721975892782, 0.00034655764466151595, 0.0003693430917337537, 0.00039212856790982187, 0.0004149140149820596, 0.00043769946205429733, 0.0004604849382303655, 0.00048327038530260324, 0.000506055832374841, 0.0005288412794470787, 0.0005516267265193164, 0.0005744121735915542, 0.0005971976788714528, 0.0006199831259436905, 0.0006427685730159283, 0.000665554020088166, 0.0006883394671604037, 0.0007111249142326415, 0.0007339103613048792, 0.0007566958083771169, 0.0007794812554493546, 0.0008022667607292533, 0.000825052207801491, 0.0008478376548737288, 0.0008706231019459665, 0.0008934085490182042, 0.0009161939960904419, 0.0009389794431626797, 0.0009617649484425783, 0.0009845503373071551, 0.0010073358425870538, 0.0010301212314516306, 0.0010529067367315292]}, "gradients/encoder.encoder.layers.17.final_layer_norm.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 5.0, 2.0, 1.0, 4.0, 7.0, 4.0, 8.0, 5.0, 10.0, 10.0, 8.0, 15.0, 17.0, 23.0, 32.0, 23.0, 20.0, 29.0, 33.0, 38.0, 34.0, 47.0, 46.0, 56.0, 56.0, 35.0, 35.0, 30.0, 37.0, 44.0, 25.0, 34.0, 26.0, 21.0, 24.0, 27.0, 21.0, 24.0, 12.0, 14.0, 10.0, 7.0, 13.0, 8.0, 7.0, 6.0, 5.0, 6.0, 6.0, 3.0, 3.0, 0.0, 2.0, 0.0, 1.0, 0.0, 1.0, 1.0], "bins": [-0.0001837015151977539, -0.00017777085304260254, -0.00017184019088745117, -0.0001659095287322998, -0.00015997886657714844, -0.00015404820442199707, -0.0001481175422668457, -0.00014218688011169434, -0.00013625621795654297, -0.0001303255558013916, -0.00012439489364624023, -0.00011846423149108887, -0.0001125335693359375, -0.00010660290718078613, -0.00010067224502563477, -9.47415828704834e-05, -8.881092071533203e-05, -8.288025856018066e-05, -7.69495964050293e-05, -7.101893424987793e-05, -6.508827209472656e-05, -5.9157609939575195e-05, -5.322694778442383e-05, -4.729628562927246e-05, -4.1365623474121094e-05, -3.5434961318969727e-05, -2.950429916381836e-05, -2.3573637008666992e-05, -1.7642974853515625e-05, -1.1712312698364258e-05, -5.781650543212891e-06, 1.4901161193847656e-07, 6.079673767089844e-06, 1.2010335922241211e-05, 1.7940998077392578e-05, 2.3871660232543945e-05, 2.9802322387695312e-05, 3.573298454284668e-05, 4.166364669799805e-05, 4.7594308853149414e-05, 5.352497100830078e-05, 5.945563316345215e-05, 6.538629531860352e-05, 7.131695747375488e-05, 7.724761962890625e-05, 8.317828178405762e-05, 8.910894393920898e-05, 9.503960609436035e-05, 0.00010097026824951172, 0.00010690093040466309, 0.00011283159255981445, 0.00011876225471496582, 0.0001246929168701172, 0.00013062357902526855, 0.00013655424118041992, 0.0001424849033355713, 0.00014841556549072266, 0.00015434622764587402, 0.0001602768898010254, 0.00016620755195617676, 0.00017213821411132812, 0.0001780688762664795, 0.00018399953842163086, 0.00018993020057678223, 0.0001958608627319336]}, "gradients/encoder.encoder.layers.17.attention.out_proj.weight": {"_type": "histogram", "values": [3.0, 1.0, 1.0, 1.0, 0.0, 1.0, 2.0, 4.0, 0.0, 6.0, 5.0, 4.0, 7.0, 9.0, 10.0, 6.0, 8.0, 7.0, 12.0, 24.0, 34.0, 39.0, 57.0, 72.0, 135.0, 268.0, 582.0, 1291.0, 3520.0, 10742.0, 43946.0, 403892.0, 518531.0, 47578.0, 11386.0, 3747.0, 1390.0, 559.0, 265.0, 137.0, 65.0, 47.0, 33.0, 35.0, 22.0, 14.0, 11.0, 10.0, 7.0, 10.0, 5.0, 5.0, 9.0, 5.0, 2.0, 3.0, 1.0, 1.0, 1.0, 1.0, 4.0, 0.0, 2.0, 1.0], "bins": [-0.0003056526184082031, -0.0002961084246635437, -0.0002865642309188843, -0.00027702003717422485, -0.00026747584342956543, -0.000257931649684906, -0.0002483874559402466, -0.00023884326219558716, -0.00022929906845092773, -0.0002197548747062683, -0.0002102106809616089, -0.00020066648721694946, -0.00019112229347229004, -0.00018157809972763062, -0.0001720339059829712, -0.00016248971223831177, -0.00015294551849365234, -0.00014340132474899292, -0.0001338571310043335, -0.00012431293725967407, -0.00011476874351501465, -0.00010522454977035522, -9.56803560256958e-05, -8.613616228103638e-05, -7.659196853637695e-05, -6.704777479171753e-05, -5.7503581047058105e-05, -4.795938730239868e-05, -3.841519355773926e-05, -2.8870999813079834e-05, -1.932680606842041e-05, -9.782612323760986e-06, -2.384185791015625e-07, 9.305775165557861e-06, 1.8849968910217285e-05, 2.839416265487671e-05, 3.793835639953613e-05, 4.748255014419556e-05, 5.702674388885498e-05, 6.65709376335144e-05, 7.611513137817383e-05, 8.565932512283325e-05, 9.520351886749268e-05, 0.0001047477126121521, 0.00011429190635681152, 0.00012383610010147095, 0.00013338029384613037, 0.0001429244875907898, 0.00015246868133544922, 0.00016201287508010864, 0.00017155706882476807, 0.0001811012625694275, 0.00019064545631408691, 0.00020018965005874634, 0.00020973384380340576, 0.00021927803754806519, 0.0002288222312927246, 0.00023836642503738403, 0.00024791061878204346, 0.0002574548125267029, 0.0002669990062713623, 0.00027654320001602173, 0.00028608739376068115, 0.0002956315875053406, 0.00030517578125]}, "gradients/encoder.encoder.layers.17.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 2.0, 1.0, 2.0, 1.0, 6.0, 4.0, 10.0, 8.0, 15.0, 27.0, 37.0, 50.0, 93.0, 114.0, 142.0, 138.0, 127.0, 96.0, 38.0, 41.0, 16.0, 21.0, 10.0, 6.0, 3.0, 5.0, 5.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.002716064453125e-05, -1.876894384622574e-05, -1.7510727047920227e-05, -1.6252510249614716e-05, -1.4994293451309204e-05, -1.3736076653003693e-05, -1.2477859854698181e-05, -1.121964305639267e-05, -9.961426258087158e-06, -8.703209459781647e-06, -7.444992661476135e-06, -6.186775863170624e-06, -4.928559064865112e-06, -3.670342266559601e-06, -2.4121254682540894e-06, -1.1539086699485779e-06, 1.043081283569336e-07, 1.362524926662445e-06, 2.6207417249679565e-06, 3.878958523273468e-06, 5.1371753215789795e-06, 6.395392119884491e-06, 7.653608918190002e-06, 8.911825716495514e-06, 1.0170042514801025e-05, 1.1428259313106537e-05, 1.2686476111412048e-05, 1.394469290971756e-05, 1.5202909708023071e-05, 1.6461126506328583e-05, 1.7719343304634094e-05, 1.8977560102939606e-05, 2.0235776901245117e-05, 2.149399369955063e-05, 2.275221049785614e-05, 2.401042729616165e-05, 2.5268644094467163e-05, 2.6526860892772675e-05, 2.7785077691078186e-05, 2.9043294489383698e-05, 3.030151128768921e-05, 3.155972808599472e-05, 3.281794488430023e-05, 3.4076161682605743e-05, 3.5334378480911255e-05, 3.6592595279216766e-05, 3.785081207752228e-05, 3.910902887582779e-05, 4.03672456741333e-05, 4.162546247243881e-05, 4.2883679270744324e-05, 4.4141896069049835e-05, 4.540011286735535e-05, 4.665832966566086e-05, 4.791654646396637e-05, 4.917476326227188e-05, 5.043298006057739e-05, 5.1691196858882904e-05, 5.2949413657188416e-05, 5.420763045549393e-05, 5.546584725379944e-05, 5.672406405210495e-05, 5.798228085041046e-05, 5.924049764871597e-05, 6.0498714447021484e-05]}, "gradients/encoder.encoder.layers.17.attention.v_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 3.0, 3.0, 2.0, 5.0, 9.0, 21.0, 21.0, 24.0, 46.0, 60.0, 69.0, 93.0, 146.0, 200.0, 300.0, 422.0, 537.0, 810.0, 1092.0, 1611.0, 2203.0, 3344.0, 4684.0, 6780.0, 10159.0, 14550.0, 22461.0, 32947.0, 53380.0, 87301.0, 167884.0, 301303.0, 129177.0, 73590.0, 43928.0, 29305.0, 18802.0, 12765.0, 8815.0, 5902.0, 4156.0, 2845.0, 2001.0, 1385.0, 950.0, 753.0, 467.0, 367.0, 253.0, 183.0, 133.0, 91.0, 68.0, 45.0, 33.0, 30.0, 23.0, 15.0, 7.0, 6.0, 2.0, 7.0], "bins": [-6.836652755737305e-05, -6.631482392549515e-05, -6.426312029361725e-05, -6.221141666173935e-05, -6.015971302986145e-05, -5.810800939798355e-05, -5.605630576610565e-05, -5.400460213422775e-05, -5.1952898502349854e-05, -4.9901194870471954e-05, -4.7849491238594055e-05, -4.5797787606716156e-05, -4.374608397483826e-05, -4.169438034296036e-05, -3.964267671108246e-05, -3.759097307920456e-05, -3.553926944732666e-05, -3.348756581544876e-05, -3.143586218357086e-05, -2.9384158551692963e-05, -2.7332454919815063e-05, -2.5280751287937164e-05, -2.3229047656059265e-05, -2.1177344024181366e-05, -1.9125640392303467e-05, -1.7073936760425568e-05, -1.5022233128547668e-05, -1.297052949666977e-05, -1.091882586479187e-05, -8.867122232913971e-06, -6.815418601036072e-06, -4.763714969158173e-06, -2.7120113372802734e-06, -6.603077054023743e-07, 1.391395926475525e-06, 3.443099558353424e-06, 5.494803190231323e-06, 7.546506822109222e-06, 9.598210453987122e-06, 1.164991408586502e-05, 1.370161771774292e-05, 1.575332134962082e-05, 1.7805024981498718e-05, 1.9856728613376617e-05, 2.1908432245254517e-05, 2.3960135877132416e-05, 2.6011839509010315e-05, 2.8063543140888214e-05, 3.0115246772766113e-05, 3.216695040464401e-05, 3.421865403652191e-05, 3.627035766839981e-05, 3.832206130027771e-05, 4.037376493215561e-05, 4.242546856403351e-05, 4.447717219591141e-05, 4.652887582778931e-05, 4.8580579459667206e-05, 5.0632283091545105e-05, 5.2683986723423004e-05, 5.47356903553009e-05, 5.67873939871788e-05, 5.88390976190567e-05, 6.08908012509346e-05, 6.29425048828125e-05]}, "gradients/encoder.encoder.layers.17.attention.v_proj.bias": {"_type": "histogram", "values": [3.0, 1.0, 3.0, 3.0, 3.0, 1.0, 4.0, 2.0, 7.0, 4.0, 5.0, 8.0, 8.0, 12.0, 21.0, 15.0, 17.0, 16.0, 33.0, 23.0, 26.0, 21.0, 32.0, 31.0, 32.0, 40.0, 28.0, 35.0, 38.0, 39.0, 34.0, 41.0, 35.0, 46.0, 40.0, 31.0, 34.0, 32.0, 22.0, 25.0, 23.0, 17.0, 15.0, 18.0, 14.0, 17.0, 8.0, 11.0, 9.0, 9.0, 5.0, 5.0, 3.0, 3.0, 3.0, 4.0, 2.0, 1.0, 3.0, 1.0, 0.0, 0.0, 0.0, 2.0], "bins": [-4.7266483306884766e-05, -4.570651799440384e-05, -4.414655268192291e-05, -4.2586587369441986e-05, -4.102662205696106e-05, -3.946665674448013e-05, -3.7906691431999207e-05, -3.634672611951828e-05, -3.4786760807037354e-05, -3.322679549455643e-05, -3.16668301820755e-05, -3.0106864869594574e-05, -2.8546899557113647e-05, -2.698693424463272e-05, -2.5426968932151794e-05, -2.3867003619670868e-05, -2.230703830718994e-05, -2.0747072994709015e-05, -1.918710768222809e-05, -1.7627142369747162e-05, -1.6067177057266235e-05, -1.4507211744785309e-05, -1.2947246432304382e-05, -1.1387281119823456e-05, -9.82731580734253e-06, -8.267350494861603e-06, -6.707385182380676e-06, -5.14741986989975e-06, -3.5874545574188232e-06, -2.0274892449378967e-06, -4.675239324569702e-07, 1.0924413800239563e-06, 2.652406692504883e-06, 4.212372004985809e-06, 5.772337317466736e-06, 7.332302629947662e-06, 8.892267942428589e-06, 1.0452233254909515e-05, 1.2012198567390442e-05, 1.3572163879871368e-05, 1.5132129192352295e-05, 1.669209450483322e-05, 1.8252059817314148e-05, 1.9812025129795074e-05, 2.1371990442276e-05, 2.2931955754756927e-05, 2.4491921067237854e-05, 2.605188637971878e-05, 2.7611851692199707e-05, 2.9171817004680634e-05, 3.073178231716156e-05, 3.2291747629642487e-05, 3.385171294212341e-05, 3.541167825460434e-05, 3.6971643567085266e-05, 3.853160887956619e-05, 4.009157419204712e-05, 4.1651539504528046e-05, 4.321150481700897e-05, 4.47714701294899e-05, 4.6331435441970825e-05, 4.789140075445175e-05, 4.945136606693268e-05, 5.1011331379413605e-05, 5.257129669189453e-05]}, "gradients/encoder.encoder.layers.17.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 7.0, 1.0, 2.0, 1.0, 3.0, 8.0, 5.0, 10.0, 22.0, 23.0, 39.0, 68.0, 84.0, 98.0, 156.0, 233.0, 470.0, 651.0, 919.0, 1471.0, 2598.0, 6060.0, 9674.0, 19254.0, 44407.0, 122367.0, 378876.0, 330375.0, 71846.0, 28416.0, 13396.0, 6997.0, 4588.0, 1992.0, 1214.0, 764.0, 454.0, 399.0, 210.0, 118.0, 89.0, 46.0, 44.0, 45.0, 14.0, 16.0, 13.0, 9.0, 7.0, 4.0, 2.0, 2.0, 1.0, 1.0, 2.0, 0.0, 0.0, 1.0, 2.0], "bins": [-7.927417755126953e-06, -7.677823305130005e-06, -7.428228855133057e-06, -7.178634405136108e-06, -6.92903995513916e-06, -6.679445505142212e-06, -6.429851055145264e-06, -6.1802566051483154e-06, -5.930662155151367e-06, -5.681067705154419e-06, -5.431473255157471e-06, -5.1818788051605225e-06, -4.932284355163574e-06, -4.682689905166626e-06, -4.433095455169678e-06, -4.1835010051727295e-06, -3.933906555175781e-06, -3.684312105178833e-06, -3.4347176551818848e-06, -3.1851232051849365e-06, -2.9355287551879883e-06, -2.68593430519104e-06, -2.436339855194092e-06, -2.1867454051971436e-06, -1.9371509552001953e-06, -1.687556505203247e-06, -1.4379620552062988e-06, -1.1883676052093506e-06, -9.387731552124023e-07, -6.891787052154541e-07, -4.3958425521850586e-07, -1.8998980522155762e-07, 5.960464477539063e-08, 3.0919909477233887e-07, 5.587935447692871e-07, 8.083879947662354e-07, 1.0579824447631836e-06, 1.3075768947601318e-06, 1.55717134475708e-06, 1.8067657947540283e-06, 2.0563602447509766e-06, 2.305954694747925e-06, 2.555549144744873e-06, 2.8051435947418213e-06, 3.0547380447387695e-06, 3.3043324947357178e-06, 3.553926944732666e-06, 3.8035213947296143e-06, 4.0531158447265625e-06, 4.302710294723511e-06, 4.552304744720459e-06, 4.801899194717407e-06, 5.0514936447143555e-06, 5.301088094711304e-06, 5.550682544708252e-06, 5.8002769947052e-06, 6.0498714447021484e-06, 6.299465894699097e-06, 6.549060344696045e-06, 6.798654794692993e-06, 7.048249244689941e-06, 7.29784369468689e-06, 7.547438144683838e-06, 7.797032594680786e-06, 8.046627044677734e-06]}, "gradients/encoder.encoder.layers.17.attention.k_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 3.0, 1.0, 6.0, 4.0, 6.0, 12.0, 15.0, 14.0, 15.0, 26.0, 30.0, 27.0, 32.0, 35.0, 46.0, 38.0, 69.0, 42.0, 53.0, 66.0, 58.0, 57.0, 91.0, 36.0, 37.0, 37.0, 32.0, 25.0, 19.0, 17.0, 11.0, 15.0, 12.0, 9.0, 6.0, 2.0, 3.0, 1.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-1.7881393432617188e-06, -1.7266720533370972e-06, -1.6652047634124756e-06, -1.603737473487854e-06, -1.5422701835632324e-06, -1.4808028936386108e-06, -1.4193356037139893e-06, -1.3578683137893677e-06, -1.296401023864746e-06, -1.2349337339401245e-06, -1.173466444015503e-06, -1.1119991540908813e-06, -1.0505318641662598e-06, -9.890645742416382e-07, -9.275972843170166e-07, -8.66129994392395e-07, -8.046627044677734e-07, -7.431954145431519e-07, -6.817281246185303e-07, -6.202608346939087e-07, -5.587935447692871e-07, -4.973262548446655e-07, -4.3585896492004395e-07, -3.7439167499542236e-07, -3.129243850708008e-07, -2.514570951461792e-07, -1.8998980522155762e-07, -1.2852251529693604e-07, -6.705522537231445e-08, -5.587935447692871e-09, 5.587935447692871e-08, 1.1734664440155029e-07, 1.7881393432617188e-07, 2.4028122425079346e-07, 3.0174851417541504e-07, 3.632158041000366e-07, 4.246830940246582e-07, 4.861503839492798e-07, 5.476176738739014e-07, 6.09084963798523e-07, 6.705522537231445e-07, 7.320195436477661e-07, 7.934868335723877e-07, 8.549541234970093e-07, 9.164214134216309e-07, 9.778887033462524e-07, 1.039355993270874e-06, 1.1008232831954956e-06, 1.1622905731201172e-06, 1.2237578630447388e-06, 1.2852251529693604e-06, 1.346692442893982e-06, 1.4081597328186035e-06, 1.469627022743225e-06, 1.5310943126678467e-06, 1.5925616025924683e-06, 1.6540288925170898e-06, 1.7154961824417114e-06, 1.776963472366333e-06, 1.8384307622909546e-06, 1.8998980522155762e-06, 1.9613653421401978e-06, 2.0228326320648193e-06, 2.084299921989441e-06, 2.1457672119140625e-06]}, "gradients/encoder.encoder.layers.17.attention.q_proj.weight": {"_type": "histogram", "values": [2.0, 2.0, 4.0, 3.0, 3.0, 5.0, 8.0, 17.0, 24.0, 13.0, 43.0, 29.0, 61.0, 67.0, 143.0, 116.0, 241.0, 392.0, 360.0, 782.0, 720.0, 1606.0, 1617.0, 3503.0, 5842.0, 5873.0, 14182.0, 15547.0, 42105.0, 52619.0, 178473.0, 277262.0, 260661.0, 92866.0, 31093.0, 26531.0, 10441.0, 9825.0, 4170.0, 4092.0, 2561.0, 1125.0, 1195.0, 541.0, 607.0, 283.0, 301.0, 145.0, 159.0, 107.0, 45.0, 67.0, 16.0, 22.0, 12.0, 15.0, 12.0, 7.0, 4.0, 5.0, 3.0, 0.0, 0.0, 1.0], "bins": [-4.827976226806641e-06, -4.675239324569702e-06, -4.522502422332764e-06, -4.369765520095825e-06, -4.217028617858887e-06, -4.064291715621948e-06, -3.91155481338501e-06, -3.7588179111480713e-06, -3.606081008911133e-06, -3.4533441066741943e-06, -3.300607204437256e-06, -3.1478703022003174e-06, -2.995133399963379e-06, -2.8423964977264404e-06, -2.689659595489502e-06, -2.5369226932525635e-06, -2.384185791015625e-06, -2.2314488887786865e-06, -2.078711986541748e-06, -1.9259750843048096e-06, -1.773238182067871e-06, -1.6205012798309326e-06, -1.4677643775939941e-06, -1.3150274753570557e-06, -1.1622905731201172e-06, -1.0095536708831787e-06, -8.568167686462402e-07, -7.040798664093018e-07, -5.513429641723633e-07, -3.986060619354248e-07, -2.4586915969848633e-07, -9.313225746154785e-08, 5.960464477539063e-08, 2.123415470123291e-07, 3.650784492492676e-07, 5.178153514862061e-07, 6.705522537231445e-07, 8.23289155960083e-07, 9.760260581970215e-07, 1.12876296043396e-06, 1.2814998626708984e-06, 1.434236764907837e-06, 1.5869736671447754e-06, 1.7397105693817139e-06, 1.8924474716186523e-06, 2.045184373855591e-06, 2.1979212760925293e-06, 2.3506581783294678e-06, 2.5033950805664062e-06, 2.6561319828033447e-06, 2.808868885040283e-06, 2.9616057872772217e-06, 3.11434268951416e-06, 3.2670795917510986e-06, 3.419816493988037e-06, 3.5725533962249756e-06, 3.725290298461914e-06, 3.8780272006988525e-06, 4.030764102935791e-06, 4.1835010051727295e-06, 4.336237907409668e-06, 4.4889748096466064e-06, 4.641711711883545e-06, 4.794448614120483e-06, 4.947185516357422e-06]}, "gradients/encoder.encoder.layers.17.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 2.0, 1.0, 0.0, 0.0, 1.0, 2.0, 1.0, 1.0, 2.0, 3.0, 6.0, 6.0, 8.0, 12.0, 9.0, 14.0, 8.0, 7.0, 20.0, 17.0, 30.0, 30.0, 40.0, 46.0, 51.0, 60.0, 25.0, 73.0, 66.0, 59.0, 54.0, 57.0, 43.0, 45.0, 43.0, 22.0, 30.0, 24.0, 18.0, 16.0, 9.0, 6.0, 8.0, 9.0, 5.0, 7.0, 5.0, 1.0, 6.0, 2.0, 1.0, 2.0, 2.0, 0.0, 1.0, 1.0, 4.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.337860107421875e-06, -3.225170075893402e-06, -3.112480044364929e-06, -2.9997900128364563e-06, -2.8870999813079834e-06, -2.7744099497795105e-06, -2.6617199182510376e-06, -2.5490298867225647e-06, -2.436339855194092e-06, -2.323649823665619e-06, -2.210959792137146e-06, -2.098269760608673e-06, -1.9855797290802e-06, -1.8728896975517273e-06, -1.7601996660232544e-06, -1.6475096344947815e-06, -1.5348196029663086e-06, -1.4221295714378357e-06, -1.3094395399093628e-06, -1.1967495083808899e-06, -1.084059476852417e-06, -9.71369445323944e-07, -8.586794137954712e-07, -7.459893822669983e-07, -6.332993507385254e-07, -5.206093192100525e-07, -4.079192876815796e-07, -2.952292561531067e-07, -1.825392246246338e-07, -6.984919309616089e-08, 4.284083843231201e-08, 1.555308699607849e-07, 2.682209014892578e-07, 3.809109330177307e-07, 4.936009645462036e-07, 6.062909960746765e-07, 7.189810276031494e-07, 8.316710591316223e-07, 9.443610906600952e-07, 1.0570511221885681e-06, 1.169741153717041e-06, 1.282431185245514e-06, 1.3951212167739868e-06, 1.5078112483024597e-06, 1.6205012798309326e-06, 1.7331913113594055e-06, 1.8458813428878784e-06, 1.9585713744163513e-06, 2.0712614059448242e-06, 2.183951437473297e-06, 2.29664146900177e-06, 2.409331500530243e-06, 2.522021532058716e-06, 2.6347115635871887e-06, 2.7474015951156616e-06, 2.8600916266441345e-06, 2.9727816581726074e-06, 3.0854716897010803e-06, 3.1981617212295532e-06, 3.310851752758026e-06, 3.423541784286499e-06, 3.536231815814972e-06, 3.648921847343445e-06, 3.7616118788719177e-06, 3.874301910400391e-06]}, "gradients/encoder.encoder.layers.17.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 4.0, 7.0, 12.0, 18.0, 42.0, 100.0, 212.0, 367.0, 127.0, 58.0, 39.0, 13.0, 8.0, 6.0, 2.0, 1.0, 1.0, 2.0, 1.0], "bins": [-0.001676164218224585, -0.0016440360341221094, -0.0016119078500196338, -0.0015797795495018363, -0.0015476513653993607, -0.001515523181296885, -0.0014833949971944094, -0.001451266696676612, -0.0014191385125741363, -0.0013870103284716606, -0.001354882144369185, -0.0013227538438513875, -0.0012906256597489119, -0.0012584974756464362, -0.0012263692915439606, -0.001194240991026163, -0.0011621129233390093, -0.0011299847392365336, -0.001097856555134058, -0.0010657282546162605, -0.0010336000705137849, -0.0010014718864113092, -0.0009693437023088336, -0.000937215459998697, -0.0009050872176885605, -0.0008729590335860848, -0.0008408307912759483, -0.0008087026071734726, -0.0007765743648633361, -0.0007444461807608604, -0.0007123179966583848, -0.0006801897543482482, -0.0006480614538304508, -0.0006159332697279751, -0.0005838050274178386, -0.0005516768433153629, -0.0005195486010052264, -0.00048742041690275073, -0.00045529220369644463, -0.00042316399049013853, -0.00039103577728383243, -0.00035890756407752633, -0.00032677935087122023, -0.00029465113766491413, -0.0002625229535624385, -0.00023039472580421716, -0.0001982665271498263, -0.0001661383139435202, -0.0001340101007372141, -0.00010188188753090799, -6.97536816005595e-05, -3.762547567021102e-05, -5.497262463904917e-06, 2.6630950742401183e-05, 5.8759149396792054e-05, 9.088736260309815e-05, 0.00012301557580940425, 0.00015514378901571035, 0.00018727200222201645, 0.00021940020087640733, 0.0002515283995307982, 0.00028365664184093475, 0.0003157848259434104, 0.0003479130391497165, 0.0003800412523560226]}, "gradients/encoder.encoder.layers.17.layer_norm.bias": {"_type": "histogram", "values": [2.0, 4.0, 0.0, 4.0, 4.0, 10.0, 7.0, 8.0, 14.0, 11.0, 14.0, 26.0, 26.0, 27.0, 24.0, 38.0, 39.0, 36.0, 55.0, 58.0, 56.0, 47.0, 52.0, 63.0, 46.0, 45.0, 46.0, 42.0, 40.0, 45.0, 23.0, 17.0, 15.0, 14.0, 12.0, 14.0, 12.0, 4.0, 4.0, 8.0, 6.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00019353628158569336, -0.00018495041877031326, -0.00017636455595493317, -0.00016777869313955307, -0.00015919283032417297, -0.00015060696750879288, -0.00014202110469341278, -0.00013343524187803268, -0.0001248493790626526, -0.00011626351624727249, -0.0001076776534318924, -9.90917906165123e-05, -9.05059278011322e-05, -8.19200649857521e-05, -7.333420217037201e-05, -6.474833935499191e-05, -5.6162476539611816e-05, -4.757661372423172e-05, -3.8990750908851624e-05, -3.0404888093471527e-05, -2.181902527809143e-05, -1.3233162462711334e-05, -4.647299647331238e-06, 3.938563168048859e-06, 1.2524425983428955e-05, 2.111028879880905e-05, 2.9696151614189148e-05, 3.8282014429569244e-05, 4.686787724494934e-05, 5.545374006032944e-05, 6.403960287570953e-05, 7.262546569108963e-05, 8.121132850646973e-05, 8.979719132184982e-05, 9.838305413722992e-05, 0.00010696891695261002, 0.00011555477976799011, 0.0001241406425833702, 0.0001327265053987503, 0.0001413123682141304, 0.0001498982310295105, 0.0001584840938448906, 0.0001670699566602707, 0.0001756558194756508, 0.00018424168229103088, 0.00019282754510641098, 0.00020141340792179108, 0.00020999927073717117, 0.00021858513355255127, 0.00022717099636793137, 0.00023575685918331146, 0.00024434272199869156, 0.00025292858481407166, 0.00026151444762945175, 0.00027010031044483185, 0.00027868617326021194, 0.00028727203607559204, 0.00029585789889097214, 0.00030444376170635223, 0.00031302962452173233, 0.0003216154873371124, 0.0003302013501524925, 0.0003387872129678726, 0.0003473730757832527, 0.0003559589385986328]}, "gradients/encoder.encoder.layers.16.feed_forward.output_dense.weight": {"_type": "histogram", "values": [3.0, 8.0, 13.0, 32.0, 74.0, 145.0, 247.0, 337.0, 568.0, 864.0, 1641.0, 3027.0, 5366.0, 10023.0, 21602.0, 52539.0, 175219.0, 3467231.0, 326281.0, 69354.0, 27709.0, 12994.0, 6931.0, 4186.0, 2442.0, 1564.0, 1034.0, 680.0, 525.0, 366.0, 258.0, 199.0, 157.0, 103.0, 102.0, 86.0, 59.0, 43.0, 56.0, 39.0, 19.0, 21.0, 24.0, 18.0, 16.0, 15.0, 6.0, 14.0, 9.0, 6.0, 11.0, 3.0, 3.0, 2.0, 6.0, 7.0, 4.0, 2.0, 2.0, 3.0, 4.0, 0.0, 0.0, 2.0], "bins": [-6.884336471557617e-05, -6.49457797408104e-05, -6.104819476604462e-05, -5.715060979127884e-05, -5.325302481651306e-05, -4.9355439841747284e-05, -4.5457854866981506e-05, -4.156026989221573e-05, -3.766268491744995e-05, -3.3765099942684174e-05, -2.9867514967918396e-05, -2.596992999315262e-05, -2.207234501838684e-05, -1.8174760043621063e-05, -1.4277175068855286e-05, -1.0379590094089508e-05, -6.4820051193237305e-06, -2.584420144557953e-06, 1.3131648302078247e-06, 5.210749804973602e-06, 9.10833477973938e-06, 1.3005919754505157e-05, 1.6903504729270935e-05, 2.0801089704036713e-05, 2.469867467880249e-05, 2.8596259653568268e-05, 3.2493844628334045e-05, 3.639142960309982e-05, 4.02890145778656e-05, 4.418659955263138e-05, 4.8084184527397156e-05, 5.198176950216293e-05, 5.587935447692871e-05, 5.977693945169449e-05, 6.367452442646027e-05, 6.757210940122604e-05, 7.146969437599182e-05, 7.53672793507576e-05, 7.926486432552338e-05, 8.316244930028915e-05, 8.706003427505493e-05, 9.095761924982071e-05, 9.485520422458649e-05, 9.875278919935226e-05, 0.00010265037417411804, 0.00010654795914888382, 0.0001104455441236496, 0.00011434312909841537, 0.00011824071407318115, 0.00012213829904794693, 0.0001260358840227127, 0.00012993346899747849, 0.00013383105397224426, 0.00013772863894701004, 0.00014162622392177582, 0.0001455238088965416, 0.00014942139387130737, 0.00015331897884607315, 0.00015721656382083893, 0.0001611141487956047, 0.00016501173377037048, 0.00016890931874513626, 0.00017280690371990204, 0.00017670448869466782, 0.0001806020736694336]}, "gradients/encoder.encoder.layers.16.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 2.0, 2.0, 4.0, 5.0, 4.0, 3.0, 5.0, 14.0, 16.0, 23.0, 34.0, 38.0, 64.0, 68.0, 104.0, 129.0, 84.0, 108.0, 98.0, 59.0, 36.0, 30.0, 26.0, 17.0, 10.0, 9.0, 5.0, 7.0, 1.0, 4.0, 2.0, 2.0, 3.0, 1.0, 0.0, 0.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.710653305053711e-05, -1.6110017895698547e-05, -1.5113502740859985e-05, -1.4116987586021423e-05, -1.3120472431182861e-05, -1.21239572763443e-05, -1.1127442121505737e-05, -1.0130926966667175e-05, -9.134411811828613e-06, -8.137896656990051e-06, -7.141381502151489e-06, -6.144866347312927e-06, -5.148351192474365e-06, -4.151836037635803e-06, -3.155320882797241e-06, -2.158805727958679e-06, -1.1622905731201172e-06, -1.6577541828155518e-07, 8.307397365570068e-07, 1.8272548913955688e-06, 2.823770046234131e-06, 3.820285201072693e-06, 4.816800355911255e-06, 5.813315510749817e-06, 6.809830665588379e-06, 7.806345820426941e-06, 8.802860975265503e-06, 9.799376130104065e-06, 1.0795891284942627e-05, 1.1792406439781189e-05, 1.2788921594619751e-05, 1.3785436749458313e-05, 1.4781951904296875e-05, 1.5778467059135437e-05, 1.6774982213974e-05, 1.777149736881256e-05, 1.8768012523651123e-05, 1.9764527678489685e-05, 2.0761042833328247e-05, 2.175755798816681e-05, 2.275407314300537e-05, 2.3750588297843933e-05, 2.4747103452682495e-05, 2.5743618607521057e-05, 2.674013376235962e-05, 2.773664891719818e-05, 2.8733164072036743e-05, 2.9729679226875305e-05, 3.072619438171387e-05, 3.172270953655243e-05, 3.271922469139099e-05, 3.371573984622955e-05, 3.4712255001068115e-05, 3.570877015590668e-05, 3.670528531074524e-05, 3.77018004655838e-05, 3.869831562042236e-05, 3.9694830775260925e-05, 4.069134593009949e-05, 4.168786108493805e-05, 4.268437623977661e-05, 4.368089139461517e-05, 4.4677406549453735e-05, 4.56739217042923e-05, 4.667043685913086e-05]}, "gradients/encoder.encoder.layers.16.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 2.0, 2.0, 1.0, 1.0, 0.0, 1.0, 3.0, 7.0, 10.0, 10.0, 17.0, 24.0, 37.0, 43.0, 52.0, 109.0, 141.0, 249.0, 460.0, 819.0, 1413.0, 2760.0, 5468.0, 11067.0, 24183.0, 56847.0, 160434.0, 1122667.0, 2488353.0, 198827.0, 66663.0, 27663.0, 12810.0, 6220.0, 3114.0, 1552.0, 920.0, 500.0, 317.0, 168.0, 111.0, 79.0, 44.0, 37.0, 26.0, 22.0, 19.0, 9.0, 4.0, 5.0, 2.0, 0.0, 4.0, 0.0, 2.0, 2.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-9.196996688842773e-05, -8.880812674760818e-05, -8.564628660678864e-05, -8.248444646596909e-05, -7.932260632514954e-05, -7.616076618432999e-05, -7.299892604351044e-05, -6.983708590269089e-05, -6.667524576187134e-05, -6.351340562105179e-05, -6.035156548023224e-05, -5.718972533941269e-05, -5.402788519859314e-05, -5.086604505777359e-05, -4.770420491695404e-05, -4.454236477613449e-05, -4.138052463531494e-05, -3.821868449449539e-05, -3.505684435367584e-05, -3.189500421285629e-05, -2.8733164072036743e-05, -2.5571323931217194e-05, -2.2409483790397644e-05, -1.9247643649578094e-05, -1.6085803508758545e-05, -1.2923963367938995e-05, -9.762123227119446e-06, -6.600283086299896e-06, -3.4384429454803467e-06, -2.766028046607971e-07, 2.8852373361587524e-06, 6.047077476978302e-06, 9.208917617797852e-06, 1.2370757758617401e-05, 1.553259789943695e-05, 1.86944380402565e-05, 2.185627818107605e-05, 2.50181183218956e-05, 2.817995846271515e-05, 3.13417986035347e-05, 3.450363874435425e-05, 3.76654788851738e-05, 4.082731902599335e-05, 4.39891591668129e-05, 4.7150999307632446e-05, 5.0312839448451996e-05, 5.3474679589271545e-05, 5.6636519730091095e-05, 5.9798359870910645e-05, 6.29602000117302e-05, 6.612204015254974e-05, 6.928388029336929e-05, 7.244572043418884e-05, 7.560756057500839e-05, 7.876940071582794e-05, 8.193124085664749e-05, 8.509308099746704e-05, 8.825492113828659e-05, 9.141676127910614e-05, 9.457860141992569e-05, 9.774044156074524e-05, 0.00010090228170156479, 0.00010406412184238434, 0.00010722596198320389, 0.00011038780212402344]}, "gradients/encoder.encoder.layers.16.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 1.0, 5.0, 10.0, 4.0, 14.0, 11.0, 16.0, 24.0, 33.0, 50.0, 59.0, 87.0, 95.0, 156.0, 254.0, 750.0, 1242.0, 492.0, 201.0, 128.0, 93.0, 83.0, 63.0, 48.0, 33.0, 24.0, 26.0, 22.0, 23.0, 9.0, 15.0, 3.0, 3.0, 2.0, 2.0, 0.0, 1.0, 2.0, 1.0, 1.0, 0.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.644559860229492e-05, -5.4127536714076996e-05, -5.180947482585907e-05, -4.9491412937641144e-05, -4.717335104942322e-05, -4.485528916120529e-05, -4.2537227272987366e-05, -4.021916538476944e-05, -3.7901103496551514e-05, -3.558304160833359e-05, -3.326497972011566e-05, -3.0946917831897736e-05, -2.862885594367981e-05, -2.6310794055461884e-05, -2.3992732167243958e-05, -2.167467027902603e-05, -1.9356608390808105e-05, -1.703854650259018e-05, -1.4720484614372253e-05, -1.2402422726154327e-05, -1.0084360837936401e-05, -7.766298949718475e-06, -5.448237061500549e-06, -3.1301751732826233e-06, -8.121132850646973e-07, 1.5059486031532288e-06, 3.824010491371155e-06, 6.142072379589081e-06, 8.460134267807007e-06, 1.0778196156024933e-05, 1.3096258044242859e-05, 1.5414319932460785e-05, 1.773238182067871e-05, 2.0050443708896637e-05, 2.2368505597114563e-05, 2.468656748533249e-05, 2.7004629373550415e-05, 2.932269126176834e-05, 3.164075314998627e-05, 3.395881503820419e-05, 3.627687692642212e-05, 3.8594938814640045e-05, 4.091300070285797e-05, 4.32310625910759e-05, 4.554912447929382e-05, 4.786718636751175e-05, 5.0185248255729675e-05, 5.25033101439476e-05, 5.482137203216553e-05, 5.713943392038345e-05, 5.945749580860138e-05, 6.17755576968193e-05, 6.409361958503723e-05, 6.641168147325516e-05, 6.872974336147308e-05, 7.104780524969101e-05, 7.336586713790894e-05, 7.568392902612686e-05, 7.800199091434479e-05, 8.032005280256271e-05, 8.263811469078064e-05, 8.495617657899857e-05, 8.727423846721649e-05, 8.959230035543442e-05, 9.191036224365234e-05]}, "gradients/encoder.encoder.layers.16.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 2.0, 0.0, 7.0, 28.0, 90.0, 236.0, 357.0, 151.0, 91.0, 31.0, 15.0, 5.0, 5.0, 1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0003415206156205386, -0.00029373582219704986, -0.00024595099966973066, -0.00019816620624624193, -0.00015038139827083796, -0.000102596590295434, -5.481179687194526e-05, -7.026974344626069e-06, 4.075781907886267e-05, 8.854262705426663e-05, 0.0001363274350296706, 0.00018411222845315933, 0.0002318970364285633, 0.00027968184440396726, 0.000327466637827456, 0.0003752514603547752, 0.0004230362537782639, 0.00047082104720175266, 0.0005186058697290719, 0.000566390692256391, 0.0006141754565760493, 0.0006619602791033685, 0.0007097451016306877, 0.000757529865950346, 0.0008053146884776652, 0.0008530995110049844, 0.0009008842753246427, 0.0009486690978519619, 0.000996453920379281, 0.0010442386846989393, 0.0010920234490185976, 0.0011398083297535777, 0.001187593094073236, 0.0012353778583928943, 0.0012831627391278744, 0.0013309475034475327, 0.001378732267767191, 0.001426517148502171, 0.0014743019128218293, 0.0015220867935568094, 0.0015698715578764677, 0.001617656322196126, 0.001665441202931106, 0.0017132259672507644, 0.0017610107315704226, 0.0018087956123054028, 0.001856580376625061, 0.0019043651409447193, 0.0019521499052643776, 0.001999934669584036, 0.002047719433903694, 0.0020955041982233524, 0.0021432891953736544, 0.0021910739596933126, 0.002238858724012971, 0.002286643488332629, 0.002334428485482931, 0.0023822132498025894, 0.0024299980141222477, 0.002477782778441906, 0.002525567775592208, 0.002573352539911866, 0.0026211373042315245, 0.0026689220685511827, 0.002716706832870841]}, "gradients/encoder.encoder.layers.16.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 6.0, 10.0, 14.0, 18.0, 24.0, 34.0, 43.0, 49.0, 52.0, 59.0, 72.0, 55.0, 73.0, 68.0, 69.0, 55.0, 77.0, 52.0, 38.0, 41.0, 24.0, 24.0, 15.0, 14.0, 12.0, 5.0, 4.0, 6.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0005079507827758789, -0.0004951776936650276, -0.00048240460455417633, -0.00046963151544332504, -0.00045685842633247375, -0.00044408533722162247, -0.0004313122481107712, -0.0004185391589999199, -0.0004057660698890686, -0.0003929929807782173, -0.00038021989166736603, -0.00036744680255651474, -0.00035467371344566345, -0.00034190062433481216, -0.0003291275352239609, -0.0003163544461131096, -0.0003035813570022583, -0.000290808267891407, -0.0002780351787805557, -0.00026526208966970444, -0.00025248900055885315, -0.00023971591144800186, -0.00022694282233715057, -0.00021416973322629929, -0.000201396644115448, -0.0001886235550045967, -0.00017585046589374542, -0.00016307737678289413, -0.00015030428767204285, -0.00013753119856119156, -0.00012475810945034027, -0.00011198502033948898, -9.92119312286377e-05, -8.643884211778641e-05, -7.366575300693512e-05, -6.089266389608383e-05, -4.8119574785232544e-05, -3.5346485674381256e-05, -2.2573396563529968e-05, -9.80030745267868e-06, 2.9727816581726074e-06, 1.5745870769023895e-05, 2.8518959879875183e-05, 4.129204899072647e-05, 5.406513810157776e-05, 6.683822721242905e-05, 7.961131632328033e-05, 9.238440543413162e-05, 0.00010515749454498291, 0.0001179305836558342, 0.00013070367276668549, 0.00014347676187753677, 0.00015624985098838806, 0.00016902294009923935, 0.00018179602921009064, 0.00019456911832094193, 0.0002073422074317932, 0.0002201152965426445, 0.0002328883856534958, 0.0002456614747643471, 0.00025843456387519836, 0.00027120765298604965, 0.00028398074209690094, 0.00029675383120775223, 0.0003095269203186035]}, "gradients/encoder.encoder.layers.16.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 6.0, 3.0, 7.0, 7.0, 10.0, 13.0, 17.0, 35.0, 50.0, 69.0, 115.0, 159.0, 244.0, 351.0, 515.0, 756.0, 1278.0, 2123.0, 3647.0, 6618.0, 12319.0, 24941.0, 55996.0, 167401.0, 535682.0, 139131.0, 48885.0, 22312.0, 11171.0, 5992.0, 3416.0, 1993.0, 1124.0, 759.0, 435.0, 317.0, 221.0, 157.0, 106.0, 62.0, 36.0, 25.0, 16.0, 9.0, 16.0, 11.0, 5.0, 5.0, 2.0, 1.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00012934207916259766, -0.00012522563338279724, -0.00012110918760299683, -0.00011699274182319641, -0.000112876296043396, -0.00010875985026359558, -0.00010464340448379517, -0.00010052695870399475, -9.641051292419434e-05, -9.229406714439392e-05, -8.81776213645935e-05, -8.406117558479309e-05, -7.994472980499268e-05, -7.582828402519226e-05, -7.171183824539185e-05, -6.759539246559143e-05, -6.347894668579102e-05, -5.93625009059906e-05, -5.5246055126190186e-05, -5.112960934638977e-05, -4.7013163566589355e-05, -4.289671778678894e-05, -3.8780272006988525e-05, -3.466382622718811e-05, -3.0547380447387695e-05, -2.643093466758728e-05, -2.2314488887786865e-05, -1.819804310798645e-05, -1.4081597328186035e-05, -9.96515154838562e-06, -5.848705768585205e-06, -1.73225998878479e-06, 2.384185791015625e-06, 6.50063157081604e-06, 1.0617077350616455e-05, 1.473352313041687e-05, 1.8849968910217285e-05, 2.29664146900177e-05, 2.7082860469818115e-05, 3.119930624961853e-05, 3.5315752029418945e-05, 3.943219780921936e-05, 4.3548643589019775e-05, 4.766508936882019e-05, 5.1781535148620605e-05, 5.589798092842102e-05, 6.0014426708221436e-05, 6.413087248802185e-05, 6.824731826782227e-05, 7.236376404762268e-05, 7.64802098274231e-05, 8.059665560722351e-05, 8.471310138702393e-05, 8.882954716682434e-05, 9.294599294662476e-05, 9.706243872642517e-05, 0.00010117888450622559, 0.000105295330286026, 0.00010941177606582642, 0.00011352822184562683, 0.00011764466762542725, 0.00012176111340522766, 0.00012587755918502808, 0.0001299940049648285, 0.0001341104507446289]}, "gradients/encoder.encoder.layers.16.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 0.0, 0.0, 3.0, 2.0, 1.0, 1.0, 4.0, 6.0, 9.0, 8.0, 17.0, 21.0, 18.0, 27.0, 31.0, 38.0, 55.0, 75.0, 78.0, 88.0, 88.0, 78.0, 69.0, 60.0, 44.0, 31.0, 31.0, 23.0, 23.0, 19.0, 14.0, 14.0, 5.0, 10.0, 4.0, 2.0, 3.0, 3.0, 0.0, 4.0, 4.0, 1.0, 1.0, 0.0, 2.0, 1.0], "bins": [-2.855062484741211e-05, -2.7801841497421265e-05, -2.705305814743042e-05, -2.6304274797439575e-05, -2.555549144744873e-05, -2.4806708097457886e-05, -2.405792474746704e-05, -2.3309141397476196e-05, -2.256035804748535e-05, -2.1811574697494507e-05, -2.1062791347503662e-05, -2.0314007997512817e-05, -1.9565224647521973e-05, -1.8816441297531128e-05, -1.8067657947540283e-05, -1.731887459754944e-05, -1.6570091247558594e-05, -1.582130789756775e-05, -1.5072524547576904e-05, -1.432374119758606e-05, -1.3574957847595215e-05, -1.282617449760437e-05, -1.2077391147613525e-05, -1.132860779762268e-05, -1.0579824447631836e-05, -9.831041097640991e-06, -9.082257747650146e-06, -8.333474397659302e-06, -7.584691047668457e-06, -6.835907697677612e-06, -6.087124347686768e-06, -5.338340997695923e-06, -4.589557647705078e-06, -3.840774297714233e-06, -3.0919909477233887e-06, -2.343207597732544e-06, -1.5944242477416992e-06, -8.456408977508545e-07, -9.685754776000977e-08, 6.51925802230835e-07, 1.4007091522216797e-06, 2.1494925022125244e-06, 2.898275852203369e-06, 3.647059202194214e-06, 4.395842552185059e-06, 5.144625902175903e-06, 5.893409252166748e-06, 6.642192602157593e-06, 7.3909759521484375e-06, 8.139759302139282e-06, 8.888542652130127e-06, 9.637326002120972e-06, 1.0386109352111816e-05, 1.1134892702102661e-05, 1.1883676052093506e-05, 1.263245940208435e-05, 1.3381242752075195e-05, 1.413002610206604e-05, 1.4878809452056885e-05, 1.562759280204773e-05, 1.6376376152038574e-05, 1.712515950202942e-05, 1.7873942852020264e-05, 1.862272620201111e-05, 1.9371509552001953e-05]}, "gradients/encoder.encoder.layers.16.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 3.0, 0.0, 1.0, 3.0, 3.0, 1.0, 6.0, 11.0, 8.0, 7.0, 36.0, 48.0, 76.0, 88.0, 117.0, 169.0, 296.0, 357.0, 675.0, 906.0, 1297.0, 1958.0, 3105.0, 4506.0, 7190.0, 11166.0, 18292.0, 29068.0, 50882.0, 88402.0, 200578.0, 343183.0, 122469.0, 65227.0, 36663.0, 22790.0, 13480.0, 8788.0, 5629.0, 3734.0, 2292.0, 1722.0, 1043.0, 820.0, 480.0, 319.0, 215.0, 155.0, 85.0, 72.0, 55.0, 27.0, 23.0, 19.0, 5.0, 5.0, 6.0, 6.0, 2.0, 3.0, 0.0, 3.0], "bins": [-6.03795051574707e-05, -5.8562494814395905e-05, -5.6745484471321106e-05, -5.492847412824631e-05, -5.311146378517151e-05, -5.129445344209671e-05, -4.947744309902191e-05, -4.766043275594711e-05, -4.5843422412872314e-05, -4.4026412069797516e-05, -4.220940172672272e-05, -4.039239138364792e-05, -3.857538104057312e-05, -3.675837069749832e-05, -3.494136035442352e-05, -3.3124350011348724e-05, -3.1307339668273926e-05, -2.9490329325199127e-05, -2.767331898212433e-05, -2.585630863904953e-05, -2.403929829597473e-05, -2.2222287952899933e-05, -2.0405277609825134e-05, -1.8588267266750336e-05, -1.6771256923675537e-05, -1.4954246580600739e-05, -1.313723623752594e-05, -1.1320225894451141e-05, -9.503215551376343e-06, -7.686205208301544e-06, -5.869194865226746e-06, -4.052184522151947e-06, -2.2351741790771484e-06, -4.1816383600234985e-07, 1.3988465070724487e-06, 3.2158568501472473e-06, 5.032867193222046e-06, 6.8498775362968445e-06, 8.666887879371643e-06, 1.0483898222446442e-05, 1.230090856552124e-05, 1.4117918908596039e-05, 1.5934929251670837e-05, 1.7751939594745636e-05, 1.9568949937820435e-05, 2.1385960280895233e-05, 2.3202970623970032e-05, 2.501998096704483e-05, 2.683699131011963e-05, 2.8654001653194427e-05, 3.0471011996269226e-05, 3.2288022339344025e-05, 3.410503268241882e-05, 3.592204302549362e-05, 3.773905336856842e-05, 3.955606371164322e-05, 4.137307405471802e-05, 4.3190084397792816e-05, 4.5007094740867615e-05, 4.682410508394241e-05, 4.864111542701721e-05, 5.045812577009201e-05, 5.227513611316681e-05, 5.409214645624161e-05, 5.5909156799316406e-05]}, "gradients/encoder.encoder.layers.16.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 4.0, 2.0, 7.0, 3.0, 4.0, 2.0, 9.0, 4.0, 6.0, 8.0, 12.0, 17.0, 25.0, 23.0, 26.0, 34.0, 32.0, 31.0, 41.0, 43.0, 46.0, 41.0, 49.0, 52.0, 55.0, 55.0, 35.0, 44.0, 45.0, 35.0, 49.0, 29.0, 21.0, 26.0, 13.0, 20.0, 17.0, 6.0, 15.0, 7.0, 6.0, 4.0, 5.0, 8.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.0187110900878906e-05, -4.855450242757797e-05, -4.692189395427704e-05, -4.5289285480976105e-05, -4.365667700767517e-05, -4.202406853437424e-05, -4.03914600610733e-05, -3.875885158777237e-05, -3.7126243114471436e-05, -3.54936346411705e-05, -3.386102616786957e-05, -3.2228417694568634e-05, -3.05958092212677e-05, -2.8963200747966766e-05, -2.7330592274665833e-05, -2.56979838013649e-05, -2.4065375328063965e-05, -2.243276685476303e-05, -2.0800158381462097e-05, -1.9167549908161163e-05, -1.753494143486023e-05, -1.5902332961559296e-05, -1.4269724488258362e-05, -1.2637116014957428e-05, -1.1004507541656494e-05, -9.37189906835556e-06, -7.739290595054626e-06, -6.106682121753693e-06, -4.474073648452759e-06, -2.841465175151825e-06, -1.2088567018508911e-06, 4.237517714500427e-07, 2.0563602447509766e-06, 3.6889687180519104e-06, 5.321577191352844e-06, 6.954185664653778e-06, 8.586794137954712e-06, 1.0219402611255646e-05, 1.185201108455658e-05, 1.3484619557857513e-05, 1.5117228031158447e-05, 1.674983650445938e-05, 1.8382444977760315e-05, 2.001505345106125e-05, 2.1647661924362183e-05, 2.3280270397663116e-05, 2.491287887096405e-05, 2.6545487344264984e-05, 2.8178095817565918e-05, 2.9810704290866852e-05, 3.1443312764167786e-05, 3.307592123746872e-05, 3.470852971076965e-05, 3.634113818407059e-05, 3.797374665737152e-05, 3.9606355130672455e-05, 4.123896360397339e-05, 4.287157207727432e-05, 4.4504180550575256e-05, 4.613678902387619e-05, 4.7769397497177124e-05, 4.940200597047806e-05, 5.103461444377899e-05, 5.2667222917079926e-05, 5.429983139038086e-05]}, "gradients/encoder.encoder.layers.16.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 4.0, 4.0, 4.0, 2.0, 5.0, 10.0, 14.0, 19.0, 32.0, 47.0, 91.0, 99.0, 200.0, 325.0, 690.0, 1066.0, 2494.0, 4885.0, 14440.0, 44118.0, 386902.0, 492865.0, 68637.0, 19458.0, 6052.0, 3180.0, 1202.0, 775.0, 364.0, 201.0, 129.0, 89.0, 49.0, 38.0, 19.0, 18.0, 10.0, 10.0, 5.0, 7.0, 5.0, 2.0, 2.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.341104507446289e-05, -1.2965872883796692e-05, -1.2520700693130493e-05, -1.2075528502464294e-05, -1.1630356311798096e-05, -1.1185184121131897e-05, -1.0740011930465698e-05, -1.02948397397995e-05, -9.8496675491333e-06, -9.404495358467102e-06, -8.959323167800903e-06, -8.514150977134705e-06, -8.068978786468506e-06, -7.623806595802307e-06, -7.178634405136108e-06, -6.73346221446991e-06, -6.288290023803711e-06, -5.843117833137512e-06, -5.3979456424713135e-06, -4.952773451805115e-06, -4.507601261138916e-06, -4.062429070472717e-06, -3.6172568798065186e-06, -3.17208468914032e-06, -2.726912498474121e-06, -2.2817403078079224e-06, -1.8365681171417236e-06, -1.391395926475525e-06, -9.462237358093262e-07, -5.010515451431274e-07, -5.587935447692871e-08, 3.8929283618927e-07, 8.344650268554688e-07, 1.2796372175216675e-06, 1.7248094081878662e-06, 2.169981598854065e-06, 2.6151537895202637e-06, 3.0603259801864624e-06, 3.505498170852661e-06, 3.95067036151886e-06, 4.395842552185059e-06, 4.841014742851257e-06, 5.286186933517456e-06, 5.731359124183655e-06, 6.1765313148498535e-06, 6.621703505516052e-06, 7.066875696182251e-06, 7.51204788684845e-06, 7.957220077514648e-06, 8.402392268180847e-06, 8.847564458847046e-06, 9.292736649513245e-06, 9.737908840179443e-06, 1.0183081030845642e-05, 1.062825322151184e-05, 1.107342541217804e-05, 1.1518597602844238e-05, 1.1963769793510437e-05, 1.2408941984176636e-05, 1.2854114174842834e-05, 1.3299286365509033e-05, 1.3744458556175232e-05, 1.418963074684143e-05, 1.463480293750763e-05, 1.5079975128173828e-05]}, "gradients/encoder.encoder.layers.16.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 2.0, 1.0, 3.0, 2.0, 5.0, 5.0, 13.0, 13.0, 20.0, 11.0, 20.0, 19.0, 31.0, 40.0, 44.0, 54.0, 48.0, 63.0, 54.0, 56.0, 45.0, 91.0, 49.0, 50.0, 40.0, 43.0, 32.0, 30.0, 25.0, 16.0, 21.0, 18.0, 10.0, 9.0, 7.0, 5.0, 2.0, 3.0, 0.0, 2.0, 2.0, 4.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 3.0, 1.0, 0.0, 0.0, 1.0], "bins": [-1.9073486328125e-06, -1.8458813428878784e-06, -1.7844140529632568e-06, -1.7229467630386353e-06, -1.6614794731140137e-06, -1.600012183189392e-06, -1.5385448932647705e-06, -1.477077603340149e-06, -1.4156103134155273e-06, -1.3541430234909058e-06, -1.2926757335662842e-06, -1.2312084436416626e-06, -1.169741153717041e-06, -1.1082738637924194e-06, -1.0468065738677979e-06, -9.853392839431763e-07, -9.238719940185547e-07, -8.624047040939331e-07, -8.009374141693115e-07, -7.394701242446899e-07, -6.780028343200684e-07, -6.165355443954468e-07, -5.550682544708252e-07, -4.936009645462036e-07, -4.3213367462158203e-07, -3.7066638469696045e-07, -3.0919909477233887e-07, -2.477318048477173e-07, -1.862645149230957e-07, -1.2479722499847412e-07, -6.332993507385254e-08, -1.862645149230957e-09, 5.960464477539063e-08, 1.210719347000122e-07, 1.825392246246338e-07, 2.4400651454925537e-07, 3.0547380447387695e-07, 3.6694109439849854e-07, 4.284083843231201e-07, 4.898756742477417e-07, 5.513429641723633e-07, 6.128102540969849e-07, 6.742775440216064e-07, 7.35744833946228e-07, 7.972121238708496e-07, 8.586794137954712e-07, 9.201467037200928e-07, 9.816139936447144e-07, 1.043081283569336e-06, 1.1045485734939575e-06, 1.166015863418579e-06, 1.2274831533432007e-06, 1.2889504432678223e-06, 1.3504177331924438e-06, 1.4118850231170654e-06, 1.473352313041687e-06, 1.5348196029663086e-06, 1.5962868928909302e-06, 1.6577541828155518e-06, 1.7192214727401733e-06, 1.780688762664795e-06, 1.8421560525894165e-06, 1.903623342514038e-06, 1.9650906324386597e-06, 2.0265579223632812e-06]}, "gradients/encoder.encoder.layers.16.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 3.0, 4.0, 7.0, 8.0, 12.0, 13.0, 15.0, 22.0, 41.0, 66.0, 69.0, 81.0, 116.0, 187.0, 311.0, 516.0, 800.0, 993.0, 2194.0, 4211.0, 9359.0, 26364.0, 101728.0, 475863.0, 338516.0, 55702.0, 16874.0, 6833.0, 2701.0, 1916.0, 1135.0, 643.0, 394.0, 299.0, 145.0, 142.0, 92.0, 49.0, 41.0, 26.0, 14.0, 18.0, 11.0, 11.0, 8.0, 2.0, 2.0, 4.0, 2.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-8.58306884765625e-06, -8.295290172100067e-06, -8.007511496543884e-06, -7.719732820987701e-06, -7.4319541454315186e-06, -7.144175469875336e-06, -6.856396794319153e-06, -6.56861811876297e-06, -6.280839443206787e-06, -5.993060767650604e-06, -5.705282092094421e-06, -5.4175034165382385e-06, -5.129724740982056e-06, -4.841946065425873e-06, -4.55416738986969e-06, -4.266388714313507e-06, -3.978610038757324e-06, -3.6908313632011414e-06, -3.4030526876449585e-06, -3.1152740120887756e-06, -2.8274953365325928e-06, -2.53971666097641e-06, -2.251937985420227e-06, -1.964159309864044e-06, -1.6763806343078613e-06, -1.3886019587516785e-06, -1.1008232831954956e-06, -8.130446076393127e-07, -5.252659320831299e-07, -2.3748725652694702e-07, 5.029141902923584e-08, 3.380700945854187e-07, 6.258487701416016e-07, 9.136274456977844e-07, 1.2014061212539673e-06, 1.4891847968101501e-06, 1.776963472366333e-06, 2.064742147922516e-06, 2.3525208234786987e-06, 2.6402994990348816e-06, 2.9280781745910645e-06, 3.2158568501472473e-06, 3.50363552570343e-06, 3.791414201259613e-06, 4.079192876815796e-06, 4.366971552371979e-06, 4.654750227928162e-06, 4.9425289034843445e-06, 5.230307579040527e-06, 5.51808625459671e-06, 5.805864930152893e-06, 6.093643605709076e-06, 6.381422281265259e-06, 6.669200956821442e-06, 6.9569796323776245e-06, 7.244758307933807e-06, 7.53253698348999e-06, 7.820315659046173e-06, 8.108094334602356e-06, 8.395873010158539e-06, 8.683651685714722e-06, 8.971430361270905e-06, 9.259209036827087e-06, 9.54698771238327e-06, 9.834766387939453e-06]}, "gradients/encoder.encoder.layers.16.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 3.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 3.0, 5.0, 3.0, 4.0, 4.0, 7.0, 7.0, 11.0, 16.0, 19.0, 28.0, 60.0, 73.0, 121.0, 121.0, 114.0, 129.0, 73.0, 60.0, 51.0, 38.0, 12.0, 15.0, 8.0, 12.0, 5.0, 1.0, 0.0, 3.0, 2.0, 4.0, 0.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-8.225440979003906e-06, -8.020550012588501e-06, -7.815659046173096e-06, -7.6107680797576904e-06, -7.405877113342285e-06, -7.20098614692688e-06, -6.996095180511475e-06, -6.791204214096069e-06, -6.586313247680664e-06, -6.381422281265259e-06, -6.1765313148498535e-06, -5.971640348434448e-06, -5.766749382019043e-06, -5.561858415603638e-06, -5.356967449188232e-06, -5.152076482772827e-06, -4.947185516357422e-06, -4.742294549942017e-06, -4.537403583526611e-06, -4.332512617111206e-06, -4.127621650695801e-06, -3.9227306842803955e-06, -3.7178397178649902e-06, -3.512948751449585e-06, -3.3080577850341797e-06, -3.1031668186187744e-06, -2.898275852203369e-06, -2.693384885787964e-06, -2.4884939193725586e-06, -2.2836029529571533e-06, -2.078711986541748e-06, -1.8738210201263428e-06, -1.6689300537109375e-06, -1.4640390872955322e-06, -1.259148120880127e-06, -1.0542571544647217e-06, -8.493661880493164e-07, -6.444752216339111e-07, -4.3958425521850586e-07, -2.3469328880310059e-07, -2.9802322387695312e-08, 1.7508864402770996e-07, 3.7997961044311523e-07, 5.848705768585205e-07, 7.897615432739258e-07, 9.94652509689331e-07, 1.1995434761047363e-06, 1.4044344425201416e-06, 1.6093254089355469e-06, 1.8142163753509521e-06, 2.0191073417663574e-06, 2.2239983081817627e-06, 2.428889274597168e-06, 2.6337802410125732e-06, 2.8386712074279785e-06, 3.043562173843384e-06, 3.248453140258789e-06, 3.4533441066741943e-06, 3.6582350730895996e-06, 3.863126039505005e-06, 4.06801700592041e-06, 4.2729079723358154e-06, 4.477798938751221e-06, 4.682689905166626e-06, 4.887580871582031e-06]}, "gradients/encoder.encoder.layers.16.layer_norm.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 0.0, 2.0, 0.0, 2.0, 0.0, 3.0, 2.0, 0.0, 1.0, 3.0, 2.0, 4.0, 1.0, 5.0, 11.0, 7.0, 13.0, 16.0, 34.0, 41.0, 50.0, 75.0, 140.0, 184.0, 96.0, 66.0, 49.0, 50.0, 28.0, 30.0, 17.0, 13.0, 15.0, 7.0, 8.0, 4.0, 7.0, 8.0, 5.0, 6.0, 2.0, 3.0, 3.0, 2.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00021596763690467924, -0.00020804685482289642, -0.00020012605818919837, -0.00019220527610741556, -0.00018428449402563274, -0.0001763636973919347, -0.00016844291531015188, -0.00016052211867645383, -0.000152601336594671, -0.0001446805545128882, -0.00013675975787919015, -0.00012883897579740733, -0.0001209181864396669, -0.00011299739708192647, -0.00010507661500014365, -9.715582564240322e-05, -8.923503628466278e-05, -8.131424692692235e-05, -7.339345756918192e-05, -6.54726754873991e-05, -5.755188612965867e-05, -4.963109677191824e-05, -4.171031105215661e-05, -3.378952533239499e-05, -2.5868735974654555e-05, -1.7947948435903527e-05, -1.0027160897152498e-05, -2.10637335840147e-06, 5.814414180349559e-06, 1.373520353808999e-05, 2.1655989257851616e-05, 2.957677497761324e-05, 3.74975788872689e-05, 4.541836824500933e-05, 5.333915396477096e-05, 6.125993968453258e-05, 6.918072904227301e-05, 7.710151840001345e-05, 8.502230048179626e-05, 9.29430898395367e-05, 0.00010086387919727713, 0.00010878466855501756, 0.00011670545791275799, 0.0001246262399945408, 0.00013254702207632363, 0.00014046781871002167, 0.0001483886007918045, 0.00015630939742550254, 0.00016423017950728536, 0.00017215096158906817, 0.00018007175822276622, 0.00018799254030454904, 0.00019591333693824708, 0.0002038341190200299, 0.00021175490110181272, 0.00021967568318359554, 0.00022759647981729358, 0.0002355172618990764, 0.00024343805853277445, 0.00025135884061455727, 0.0002592796226963401, 0.0002672004047781229, 0.0002751212159637362, 0.000283041998045519, 0.0002909627801273018]}, "gradients/encoder.encoder.layers.16.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 2.0, 3.0, 0.0, 3.0, 9.0, 4.0, 6.0, 0.0, 15.0, 10.0, 7.0, 17.0, 13.0, 25.0, 27.0, 24.0, 26.0, 33.0, 32.0, 33.0, 48.0, 42.0, 39.0, 34.0, 44.0, 49.0, 47.0, 45.0, 38.0, 43.0, 41.0, 27.0, 34.0, 28.0, 30.0, 25.0, 21.0, 21.0, 15.0, 14.0, 10.0, 11.0, 9.0, 3.0, 2.0, 2.0, 2.0, 2.0, 0.0, 1.0, 0.0, 0.0, 4.0], "bins": [-0.00017625093460083008, -0.00017139501869678497, -0.00016653910279273987, -0.00016168318688869476, -0.00015682727098464966, -0.00015197135508060455, -0.00014711543917655945, -0.00014225952327251434, -0.00013740360736846924, -0.00013254769146442413, -0.00012769177556037903, -0.00012283585965633392, -0.00011797994375228882, -0.00011312402784824371, -0.00010826811194419861, -0.0001034121960401535, -9.85562801361084e-05, -9.37003642320633e-05, -8.884444832801819e-05, -8.398853242397308e-05, -7.913261651992798e-05, -7.427670061588287e-05, -6.942078471183777e-05, -6.456486880779266e-05, -5.970895290374756e-05, -5.4853036999702454e-05, -4.999712109565735e-05, -4.5141205191612244e-05, -4.028528928756714e-05, -3.5429373383522034e-05, -3.057345747947693e-05, -2.5717541575431824e-05, -2.086162567138672e-05, -1.6005709767341614e-05, -1.1149793863296509e-05, -6.293877959251404e-06, -1.4379620552062988e-06, 3.417953848838806e-06, 8.273869752883911e-06, 1.3129785656929016e-05, 1.798570156097412e-05, 2.2841617465019226e-05, 2.769753336906433e-05, 3.2553449273109436e-05, 3.740936517715454e-05, 4.2265281081199646e-05, 4.712119698524475e-05, 5.1977112889289856e-05, 5.683302879333496e-05, 6.168894469738007e-05, 6.654486060142517e-05, 7.140077650547028e-05, 7.625669240951538e-05, 8.111260831356049e-05, 8.596852421760559e-05, 9.08244401216507e-05, 9.56803560256958e-05, 0.0001005362719297409, 0.00010539218783378601, 0.00011024810373783112, 0.00011510401964187622, 0.00011995993554592133, 0.00012481585144996643, 0.00012967176735401154, 0.00013452768325805664]}, "gradients/encoder.encoder.layers.15.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 5.0, 3.0, 3.0, 1.0, 8.0, 2.0, 6.0, 10.0, 20.0, 15.0, 41.0, 35.0, 59.0, 77.0, 80.0, 133.0, 183.0, 305.0, 419.0, 581.0, 937.0, 1521.0, 2334.0, 4060.0, 6995.0, 12898.0, 25436.0, 52292.0, 136541.0, 2720204.0, 1018579.0, 111802.0, 45779.0, 22638.0, 11846.0, 6682.0, 4007.0, 2515.0, 1665.0, 1082.0, 754.0, 503.0, 355.0, 257.0, 179.0, 152.0, 84.0, 50.0, 45.0, 37.0, 25.0, 23.0, 16.0, 5.0, 4.0, 4.0, 4.0, 2.0, 2.0, 0.0, 1.0, 2.0], "bins": [-0.00013244152069091797, -0.0001282915472984314, -0.00012414157390594482, -0.00011999160051345825, -0.00011584162712097168, -0.00011169165372848511, -0.00010754168033599854, -0.00010339170694351196, -9.924173355102539e-05, -9.509176015853882e-05, -9.094178676605225e-05, -8.679181337356567e-05, -8.26418399810791e-05, -7.849186658859253e-05, -7.434189319610596e-05, -7.019191980361938e-05, -6.604194641113281e-05, -6.189197301864624e-05, -5.774199962615967e-05, -5.3592026233673096e-05, -4.9442052841186523e-05, -4.529207944869995e-05, -4.114210605621338e-05, -3.699213266372681e-05, -3.2842159271240234e-05, -2.8692185878753662e-05, -2.454221248626709e-05, -2.0392239093780518e-05, -1.6242265701293945e-05, -1.2092292308807373e-05, -7.9423189163208e-06, -3.7923455238342285e-06, 3.5762786865234375e-07, 4.507601261138916e-06, 8.657574653625488e-06, 1.280754804611206e-05, 1.6957521438598633e-05, 2.1107494831085205e-05, 2.5257468223571777e-05, 2.940744161605835e-05, 3.355741500854492e-05, 3.7707388401031494e-05, 4.1857361793518066e-05, 4.600733518600464e-05, 5.015730857849121e-05, 5.430728197097778e-05, 5.8457255363464355e-05, 6.260722875595093e-05, 6.67572021484375e-05, 7.090717554092407e-05, 7.505714893341064e-05, 7.920712232589722e-05, 8.335709571838379e-05, 8.750706911087036e-05, 9.165704250335693e-05, 9.58070158958435e-05, 9.995698928833008e-05, 0.00010410696268081665, 0.00010825693607330322, 0.0001124069094657898, 0.00011655688285827637, 0.00012070685625076294, 0.0001248568296432495, 0.00012900680303573608, 0.00013315677642822266]}, "gradients/encoder.encoder.layers.15.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 3.0, 3.0, 2.0, 4.0, 2.0, 6.0, 9.0, 6.0, 5.0, 18.0, 16.0, 30.0, 32.0, 35.0, 42.0, 50.0, 63.0, 75.0, 79.0, 86.0, 80.0, 66.0, 66.0, 45.0, 48.0, 31.0, 17.0, 23.0, 15.0, 14.0, 9.0, 7.0, 5.0, 4.0, 7.0, 2.0, 4.0, 0.0, 0.0, 1.0, 2.0, 4.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-2.3424625396728516e-05, -2.2676773369312286e-05, -2.1928921341896057e-05, -2.1181069314479828e-05, -2.04332172870636e-05, -1.968536525964737e-05, -1.893751323223114e-05, -1.818966120481491e-05, -1.744180917739868e-05, -1.6693957149982452e-05, -1.5946105122566223e-05, -1.5198253095149994e-05, -1.4450401067733765e-05, -1.3702549040317535e-05, -1.2954697012901306e-05, -1.2206844985485077e-05, -1.1458992958068848e-05, -1.0711140930652618e-05, -9.96328890323639e-06, -9.21543687582016e-06, -8.46758484840393e-06, -7.719732820987701e-06, -6.971880793571472e-06, -6.224028766155243e-06, -5.476176738739014e-06, -4.7283247113227844e-06, -3.980472683906555e-06, -3.232620656490326e-06, -2.4847686290740967e-06, -1.7369166016578674e-06, -9.890645742416382e-07, -2.4121254682540894e-07, 5.066394805908203e-07, 1.2544915080070496e-06, 2.002343535423279e-06, 2.750195562839508e-06, 3.4980475902557373e-06, 4.2458996176719666e-06, 4.993751645088196e-06, 5.741603672504425e-06, 6.489455699920654e-06, 7.2373077273368835e-06, 7.985159754753113e-06, 8.733011782169342e-06, 9.480863809585571e-06, 1.02287158370018e-05, 1.097656786441803e-05, 1.1724419891834259e-05, 1.2472271919250488e-05, 1.3220123946666718e-05, 1.3967975974082947e-05, 1.4715828001499176e-05, 1.5463680028915405e-05, 1.6211532056331635e-05, 1.6959384083747864e-05, 1.7707236111164093e-05, 1.8455088138580322e-05, 1.920294016599655e-05, 1.995079219341278e-05, 2.069864422082901e-05, 2.144649624824524e-05, 2.219434827566147e-05, 2.2942200303077698e-05, 2.3690052330493927e-05, 2.4437904357910156e-05]}, "gradients/encoder.encoder.layers.15.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 2.0, 1.0, 3.0, 3.0, 7.0, 13.0, 17.0, 25.0, 35.0, 56.0, 72.0, 88.0, 125.0, 211.0, 308.0, 442.0, 662.0, 1050.0, 1483.0, 2309.0, 3415.0, 5515.0, 8856.0, 14660.0, 23941.0, 43045.0, 81233.0, 169926.0, 497917.0, 2615505.0, 404472.0, 148470.0, 71164.0, 40350.0, 22846.0, 13465.0, 7919.0, 5250.0, 3294.0, 2088.0, 1320.0, 886.0, 624.0, 416.0, 272.0, 173.0, 140.0, 66.0, 55.0, 34.0, 22.0, 24.0, 7.0, 10.0, 3.0, 0.0, 3.0, 2.0, 1.0, 0.0, 1.0], "bins": [-6.520748138427734e-05, -6.319582462310791e-05, -6.118416786193848e-05, -5.917251110076904e-05, -5.716085433959961e-05, -5.5149197578430176e-05, -5.313754081726074e-05, -5.112588405609131e-05, -4.9114227294921875e-05, -4.710257053375244e-05, -4.509091377258301e-05, -4.3079257011413574e-05, -4.106760025024414e-05, -3.905594348907471e-05, -3.7044286727905273e-05, -3.503262996673584e-05, -3.3020973205566406e-05, -3.100931644439697e-05, -2.899765968322754e-05, -2.6986002922058105e-05, -2.4974346160888672e-05, -2.2962689399719238e-05, -2.0951032638549805e-05, -1.893937587738037e-05, -1.6927719116210938e-05, -1.4916062355041504e-05, -1.290440559387207e-05, -1.0892748832702637e-05, -8.881092071533203e-06, -6.8694353103637695e-06, -4.857778549194336e-06, -2.8461217880249023e-06, -8.344650268554688e-07, 1.1771917343139648e-06, 3.1888484954833984e-06, 5.200505256652832e-06, 7.212162017822266e-06, 9.2238187789917e-06, 1.1235475540161133e-05, 1.3247132301330566e-05, 1.52587890625e-05, 1.7270445823669434e-05, 1.9282102584838867e-05, 2.12937593460083e-05, 2.3305416107177734e-05, 2.5317072868347168e-05, 2.73287296295166e-05, 2.9340386390686035e-05, 3.135204315185547e-05, 3.33636999130249e-05, 3.5375356674194336e-05, 3.738701343536377e-05, 3.93986701965332e-05, 4.141032695770264e-05, 4.342198371887207e-05, 4.5433640480041504e-05, 4.744529724121094e-05, 4.945695400238037e-05, 5.1468610763549805e-05, 5.348026752471924e-05, 5.549192428588867e-05, 5.7503581047058105e-05, 5.951523780822754e-05, 6.152689456939697e-05, 6.35385513305664e-05]}, "gradients/encoder.encoder.layers.15.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 3.0, 0.0, 2.0, 2.0, 1.0, 2.0, 5.0, 8.0, 4.0, 14.0, 8.0, 11.0, 17.0, 22.0, 17.0, 34.0, 49.0, 55.0, 65.0, 75.0, 109.0, 146.0, 248.0, 502.0, 1050.0, 553.0, 261.0, 152.0, 89.0, 93.0, 77.0, 78.0, 63.0, 46.0, 51.0, 37.0, 21.0, 24.0, 26.0, 13.0, 15.0, 9.0, 10.0, 6.0, 5.0, 3.0, 1.0, 3.0, 1.0, 2.0, 0.0, 2.0, 4.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.792213439941406e-05, -4.623830318450928e-05, -4.455447196960449e-05, -4.287064075469971e-05, -4.118680953979492e-05, -3.950297832489014e-05, -3.781914710998535e-05, -3.6135315895080566e-05, -3.445148468017578e-05, -3.2767653465270996e-05, -3.108382225036621e-05, -2.9399991035461426e-05, -2.771615982055664e-05, -2.6032328605651855e-05, -2.434849739074707e-05, -2.2664666175842285e-05, -2.09808349609375e-05, -1.9297003746032715e-05, -1.761317253112793e-05, -1.5929341316223145e-05, -1.424551010131836e-05, -1.2561678886413574e-05, -1.0877847671508789e-05, -9.194016456604004e-06, -7.510185241699219e-06, -5.826354026794434e-06, -4.1425228118896484e-06, -2.4586915969848633e-06, -7.748603820800781e-07, 9.08970832824707e-07, 2.592802047729492e-06, 4.276633262634277e-06, 5.9604644775390625e-06, 7.644295692443848e-06, 9.328126907348633e-06, 1.1011958122253418e-05, 1.2695789337158203e-05, 1.4379620552062988e-05, 1.6063451766967773e-05, 1.774728298187256e-05, 1.9431114196777344e-05, 2.111494541168213e-05, 2.2798776626586914e-05, 2.44826078414917e-05, 2.6166439056396484e-05, 2.785027027130127e-05, 2.9534101486206055e-05, 3.121793270111084e-05, 3.2901763916015625e-05, 3.458559513092041e-05, 3.6269426345825195e-05, 3.795325756072998e-05, 3.9637088775634766e-05, 4.132091999053955e-05, 4.3004751205444336e-05, 4.468858242034912e-05, 4.6372413635253906e-05, 4.805624485015869e-05, 4.9740076065063477e-05, 5.142390727996826e-05, 5.310773849487305e-05, 5.479156970977783e-05, 5.647540092468262e-05, 5.81592321395874e-05, 5.984306335449219e-05]}, "gradients/encoder.encoder.layers.15.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 2.0, 0.0, 4.0, 5.0, 5.0, 11.0, 18.0, 31.0, 46.0, 110.0, 164.0, 175.0, 131.0, 91.0, 62.0, 44.0, 25.0, 28.0, 19.0, 11.0, 4.0, 8.0, 5.0, 6.0, 3.0, 3.0, 1.0, 0.0, 1.0, 0.0, 2.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0004972552414983511, -0.00047652030480094254, -0.000455785368103534, -0.00043505040230229497, -0.00041431549470871687, -0.00039358052890747786, -0.0003728455922100693, -0.00035211065551266074, -0.0003313757188152522, -0.00031064078211784363, -0.00028990584542043507, -0.0002691709087230265, -0.0002484359429217875, -0.00022770102077629417, -0.0002069660695269704, -0.00018623113282956183, -0.00016549619613215327, -0.00014476125943474472, -0.00012402632273733616, -0.00010329137148801237, -8.255643479060382e-05, -6.182149809319526e-05, -4.1086546843871474e-05, -2.0351610146462917e-05, 3.833265509456396e-07, 2.1118266886333004e-05, 4.185320722172037e-05, 6.258815119508654e-05, 8.33230878924951e-05, 0.00010405802458990365, 0.00012479297583922744, 0.000145527912536636, 0.00016626279102638364, 0.0001869977277237922, 0.00020773266442120075, 0.00022846761567052454, 0.00024920253781601787, 0.0002699375036172569, 0.00029067244031466544, 0.000311407377012074, 0.00033214231370948255, 0.0003528772504068911, 0.00037361218710429966, 0.0003943471238017082, 0.00041508208960294724, 0.00043581699719652534, 0.00045655196299776435, 0.0004772868996951729, 0.0004980218363925815, 0.0005187568021938205, 0.0005394917097873986, 0.0005602266755886376, 0.0005809615831822157, 0.0006016965489834547, 0.0006224315147846937, 0.0006431664223782718, 0.0006639013299718499, 0.0006846362957730889, 0.000705371203366667, 0.000726106169167906, 0.0007468410767614841, 0.0007675760425627232, 0.0007883110083639622, 0.0008090459159575403, 0.0008297808817587793]}, "gradients/encoder.encoder.layers.15.final_layer_norm.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 3.0, 3.0, 4.0, 3.0, 1.0, 4.0, 6.0, 6.0, 6.0, 13.0, 13.0, 13.0, 21.0, 27.0, 21.0, 24.0, 23.0, 18.0, 23.0, 36.0, 50.0, 40.0, 33.0, 43.0, 43.0, 41.0, 28.0, 43.0, 39.0, 41.0, 42.0, 39.0, 26.0, 25.0, 32.0, 20.0, 27.0, 26.0, 12.0, 21.0, 11.0, 4.0, 11.0, 7.0, 8.0, 10.0, 3.0, 4.0, 5.0, 5.0, 4.0, 1.0, 2.0, 1.0, 5.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.00019913911819458008, -0.00019257422536611557, -0.00018600933253765106, -0.00017944443970918655, -0.00017287954688072205, -0.00016631465405225754, -0.00015974976122379303, -0.00015318486839532852, -0.00014661997556686401, -0.0001400550827383995, -0.000133490189909935, -0.0001269252970814705, -0.00012036040425300598, -0.00011379551142454147, -0.00010723061859607697, -0.00010066572576761246, -9.410083293914795e-05, -8.753594011068344e-05, -8.097104728221893e-05, -7.440615445375443e-05, -6.784126162528992e-05, -6.127636879682541e-05, -5.47114759683609e-05, -4.814658313989639e-05, -4.1581690311431885e-05, -3.501679748296738e-05, -2.845190465450287e-05, -2.188701182603836e-05, -1.5322118997573853e-05, -8.757226169109344e-06, -2.1923333406448364e-06, 4.372559487819672e-06, 1.093745231628418e-05, 1.7502345144748688e-05, 2.4067237973213196e-05, 3.0632130801677704e-05, 3.719702363014221e-05, 4.376191645860672e-05, 5.032680928707123e-05, 5.6891702115535736e-05, 6.345659494400024e-05, 7.002148777246475e-05, 7.658638060092926e-05, 8.315127342939377e-05, 8.971616625785828e-05, 9.628105908632278e-05, 0.00010284595191478729, 0.0001094108447432518, 0.00011597573757171631, 0.00012254063040018082, 0.00012910552322864532, 0.00013567041605710983, 0.00014223530888557434, 0.00014880020171403885, 0.00015536509454250336, 0.00016192998737096786, 0.00016849488019943237, 0.00017505977302789688, 0.0001816246658563614, 0.0001881895586848259, 0.0001947544515132904, 0.0002013193443417549, 0.00020788423717021942, 0.00021444912999868393, 0.00022101402282714844]}, "gradients/encoder.encoder.layers.15.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 4.0, 2.0, 6.0, 4.0, 11.0, 11.0, 21.0, 21.0, 37.0, 44.0, 64.0, 108.0, 153.0, 236.0, 308.0, 467.0, 787.0, 1210.0, 1950.0, 3130.0, 5331.0, 9211.0, 16666.0, 32796.0, 73474.0, 255342.0, 460203.0, 98697.0, 40949.0, 20218.0, 10937.0, 6200.0, 3671.0, 2244.0, 1393.0, 865.0, 586.0, 397.0, 266.0, 163.0, 119.0, 89.0, 52.0, 39.0, 28.0, 17.0, 15.0, 8.0, 11.0, 4.0, 1.0, 1.0, 1.0, 2.0, 0.0, 0.0, 2.0], "bins": [-0.00013053417205810547, -0.00012659654021263123, -0.00012265890836715698, -0.00011872127652168274, -0.0001147836446762085, -0.00011084601283073425, -0.00010690838098526001, -0.00010297074913978577, -9.903311729431152e-05, -9.509548544883728e-05, -9.115785360336304e-05, -8.72202217578888e-05, -8.328258991241455e-05, -7.934495806694031e-05, -7.540732622146606e-05, -7.146969437599182e-05, -6.753206253051758e-05, -6.359443068504333e-05, -5.965679883956909e-05, -5.571916699409485e-05, -5.1781535148620605e-05, -4.784390330314636e-05, -4.390627145767212e-05, -3.9968639612197876e-05, -3.603100776672363e-05, -3.209337592124939e-05, -2.8155744075775146e-05, -2.4218112230300903e-05, -2.028048038482666e-05, -1.6342848539352417e-05, -1.2405216693878174e-05, -8.46758484840393e-06, -4.5299530029296875e-06, -5.923211574554443e-07, 3.345310688018799e-06, 7.282942533493042e-06, 1.1220574378967285e-05, 1.5158206224441528e-05, 1.909583806991577e-05, 2.3033469915390015e-05, 2.6971101760864258e-05, 3.09087336063385e-05, 3.4846365451812744e-05, 3.878399729728699e-05, 4.272162914276123e-05, 4.6659260988235474e-05, 5.059689283370972e-05, 5.453452467918396e-05, 5.84721565246582e-05, 6.240978837013245e-05, 6.634742021560669e-05, 7.028505206108093e-05, 7.422268390655518e-05, 7.816031575202942e-05, 8.209794759750366e-05, 8.60355794429779e-05, 8.997321128845215e-05, 9.391084313392639e-05, 9.784847497940063e-05, 0.00010178610682487488, 0.00010572373867034912, 0.00010966137051582336, 0.00011359900236129761, 0.00011753663420677185, 0.0001214742660522461]}, "gradients/encoder.encoder.layers.15.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 2.0, 1.0, 0.0, 1.0, 6.0, 2.0, 3.0, 8.0, 3.0, 11.0, 12.0, 11.0, 14.0, 23.0, 22.0, 34.0, 45.0, 49.0, 62.0, 71.0, 88.0, 101.0, 87.0, 66.0, 51.0, 45.0, 40.0, 21.0, 29.0, 19.0, 13.0, 18.0, 10.0, 11.0, 8.0, 6.0, 10.0, 2.0, 1.0, 2.0, 4.0, 0.0, 2.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-2.6047229766845703e-05, -2.53310427069664e-05, -2.4614855647087097e-05, -2.3898668587207794e-05, -2.318248152732849e-05, -2.2466294467449188e-05, -2.1750107407569885e-05, -2.1033920347690582e-05, -2.031773328781128e-05, -1.9601546227931976e-05, -1.8885359168052673e-05, -1.816917210817337e-05, -1.7452985048294067e-05, -1.6736797988414764e-05, -1.602061092853546e-05, -1.530442386865616e-05, -1.4588236808776855e-05, -1.3872049748897552e-05, -1.315586268901825e-05, -1.2439675629138947e-05, -1.1723488569259644e-05, -1.100730150938034e-05, -1.0291114449501038e-05, -9.574927389621735e-06, -8.858740329742432e-06, -8.142553269863129e-06, -7.426366209983826e-06, -6.710179150104523e-06, -5.99399209022522e-06, -5.277805030345917e-06, -4.561617970466614e-06, -3.845430910587311e-06, -3.129243850708008e-06, -2.413056790828705e-06, -1.6968697309494019e-06, -9.806826710700989e-07, -2.644956111907959e-07, 4.516914486885071e-07, 1.16787850856781e-06, 1.884065568447113e-06, 2.600252628326416e-06, 3.316439688205719e-06, 4.032626748085022e-06, 4.748813807964325e-06, 5.465000867843628e-06, 6.181187927722931e-06, 6.897374987602234e-06, 7.613562047481537e-06, 8.32974910736084e-06, 9.045936167240143e-06, 9.762123227119446e-06, 1.0478310286998749e-05, 1.1194497346878052e-05, 1.1910684406757355e-05, 1.2626871466636658e-05, 1.334305852651596e-05, 1.4059245586395264e-05, 1.4775432646274567e-05, 1.549161970615387e-05, 1.6207806766033173e-05, 1.6923993825912476e-05, 1.764018088579178e-05, 1.835636794567108e-05, 1.9072555005550385e-05, 1.9788742065429688e-05]}, "gradients/encoder.encoder.layers.15.attention.v_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 1.0, 3.0, 1.0, 11.0, 12.0, 16.0, 15.0, 35.0, 41.0, 63.0, 80.0, 112.0, 169.0, 261.0, 359.0, 514.0, 708.0, 1034.0, 1519.0, 2102.0, 3034.0, 4528.0, 6402.0, 9682.0, 14181.0, 21997.0, 34522.0, 53560.0, 94732.0, 203604.0, 291775.0, 120323.0, 66240.0, 38842.0, 25991.0, 16425.0, 11291.0, 7717.0, 5101.0, 3591.0, 2389.0, 1713.0, 1167.0, 838.0, 544.0, 435.0, 269.0, 177.0, 147.0, 77.0, 69.0, 48.0, 30.0, 30.0, 17.0, 10.0, 5.0, 5.0, 6.0, 1.0, 2.0], "bins": [-5.269050598144531e-05, -5.110260099172592e-05, -4.951469600200653e-05, -4.792679101228714e-05, -4.633888602256775e-05, -4.475098103284836e-05, -4.316307604312897e-05, -4.1575171053409576e-05, -3.9987266063690186e-05, -3.8399361073970795e-05, -3.6811456084251404e-05, -3.522355109453201e-05, -3.363564610481262e-05, -3.204774111509323e-05, -3.045983612537384e-05, -2.887193113565445e-05, -2.728402614593506e-05, -2.5696121156215668e-05, -2.4108216166496277e-05, -2.2520311176776886e-05, -2.0932406187057495e-05, -1.9344501197338104e-05, -1.7756596207618713e-05, -1.6168691217899323e-05, -1.4580786228179932e-05, -1.299288123846054e-05, -1.140497624874115e-05, -9.817071259021759e-06, -8.229166269302368e-06, -6.641261279582977e-06, -5.0533562898635864e-06, -3.4654513001441956e-06, -1.8775463104248047e-06, -2.896413207054138e-07, 1.298263669013977e-06, 2.886168658733368e-06, 4.474073648452759e-06, 6.06197863817215e-06, 7.64988362789154e-06, 9.237788617610931e-06, 1.0825693607330322e-05, 1.2413598597049713e-05, 1.4001503586769104e-05, 1.5589408576488495e-05, 1.7177313566207886e-05, 1.8765218555927277e-05, 2.0353123545646667e-05, 2.194102853536606e-05, 2.352893352508545e-05, 2.511683851480484e-05, 2.670474350452423e-05, 2.8292648494243622e-05, 2.9880553483963013e-05, 3.1468458473682404e-05, 3.3056363463401794e-05, 3.4644268453121185e-05, 3.6232173442840576e-05, 3.782007843255997e-05, 3.940798342227936e-05, 4.099588841199875e-05, 4.258379340171814e-05, 4.417169839143753e-05, 4.575960338115692e-05, 4.734750837087631e-05, 4.89354133605957e-05]}, "gradients/encoder.encoder.layers.15.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 4.0, 1.0, 2.0, 2.0, 7.0, 0.0, 10.0, 7.0, 10.0, 8.0, 9.0, 17.0, 15.0, 18.0, 13.0, 18.0, 22.0, 29.0, 20.0, 29.0, 51.0, 36.0, 39.0, 39.0, 32.0, 52.0, 41.0, 32.0, 61.0, 30.0, 29.0, 34.0, 37.0, 40.0, 20.0, 28.0, 22.0, 25.0, 18.0, 13.0, 21.0, 21.0, 11.0, 6.0, 13.0, 6.0, 5.0, 5.0, 1.0, 1.0, 2.0, 2.0, 2.0, 0.0, 1.0, 2.0], "bins": [-4.7206878662109375e-05, -4.5833177864551544e-05, -4.445947706699371e-05, -4.308577626943588e-05, -4.171207547187805e-05, -4.033837467432022e-05, -3.896467387676239e-05, -3.759097307920456e-05, -3.621727228164673e-05, -3.48435714840889e-05, -3.346987068653107e-05, -3.2096169888973236e-05, -3.0722469091415405e-05, -2.9348768293857574e-05, -2.7975067496299744e-05, -2.6601366698741913e-05, -2.5227665901184082e-05, -2.385396510362625e-05, -2.248026430606842e-05, -2.110656350851059e-05, -1.973286271095276e-05, -1.8359161913394928e-05, -1.6985461115837097e-05, -1.5611760318279266e-05, -1.4238059520721436e-05, -1.2864358723163605e-05, -1.1490657925605774e-05, -1.0116957128047943e-05, -8.743256330490112e-06, -7.3695555329322815e-06, -5.995854735374451e-06, -4.62215393781662e-06, -3.248453140258789e-06, -1.8747523427009583e-06, -5.010515451431274e-07, 8.726492524147034e-07, 2.246350049972534e-06, 3.620050847530365e-06, 4.993751645088196e-06, 6.367452442646027e-06, 7.741153240203857e-06, 9.114854037761688e-06, 1.0488554835319519e-05, 1.186225563287735e-05, 1.323595643043518e-05, 1.4609657227993011e-05, 1.5983358025550842e-05, 1.7357058823108673e-05, 1.8730759620666504e-05, 2.0104460418224335e-05, 2.1478161215782166e-05, 2.2851862013339996e-05, 2.4225562810897827e-05, 2.5599263608455658e-05, 2.697296440601349e-05, 2.834666520357132e-05, 2.972036600112915e-05, 3.109406679868698e-05, 3.246776759624481e-05, 3.384146839380264e-05, 3.5215169191360474e-05, 3.6588869988918304e-05, 3.7962570786476135e-05, 3.9336271584033966e-05, 4.07099723815918e-05]}, "gradients/encoder.encoder.layers.15.attention.k_proj.weight": {"_type": "histogram", "values": [2.0, 4.0, 3.0, 9.0, 3.0, 9.0, 17.0, 21.0, 29.0, 31.0, 52.0, 77.0, 110.0, 115.0, 207.0, 226.0, 377.0, 397.0, 743.0, 779.0, 1581.0, 1815.0, 3702.0, 4454.0, 10363.0, 13917.0, 36406.0, 58805.0, 210885.0, 358550.0, 210575.0, 58509.0, 30137.0, 20469.0, 8426.0, 6450.0, 2939.0, 2503.0, 1301.0, 1199.0, 608.0, 541.0, 298.0, 283.0, 159.0, 155.0, 77.0, 94.0, 44.0, 30.0, 18.0, 24.0, 8.0, 13.0, 7.0, 5.0, 1.0, 3.0, 4.0, 1.0, 1.0, 3.0, 0.0, 2.0], "bins": [-6.139278411865234e-06, -5.931593477725983e-06, -5.723908543586731e-06, -5.516223609447479e-06, -5.3085386753082275e-06, -5.100853741168976e-06, -4.893168807029724e-06, -4.685483872890472e-06, -4.477798938751221e-06, -4.270114004611969e-06, -4.062429070472717e-06, -3.8547441363334656e-06, -3.647059202194214e-06, -3.439374268054962e-06, -3.2316893339157104e-06, -3.0240043997764587e-06, -2.816319465637207e-06, -2.6086345314979553e-06, -2.4009495973587036e-06, -2.193264663219452e-06, -1.9855797290802e-06, -1.7778947949409485e-06, -1.5702098608016968e-06, -1.362524926662445e-06, -1.1548399925231934e-06, -9.471550583839417e-07, -7.394701242446899e-07, -5.317851901054382e-07, -3.241002559661865e-07, -1.1641532182693481e-07, 9.12696123123169e-08, 2.989545464515686e-07, 5.066394805908203e-07, 7.14324414730072e-07, 9.220093488693237e-07, 1.1296942830085754e-06, 1.3373792171478271e-06, 1.5450641512870789e-06, 1.7527490854263306e-06, 1.9604340195655823e-06, 2.168118953704834e-06, 2.3758038878440857e-06, 2.5834888219833374e-06, 2.791173756122589e-06, 2.998858690261841e-06, 3.2065436244010925e-06, 3.4142285585403442e-06, 3.621913492679596e-06, 3.829598426818848e-06, 4.037283360958099e-06, 4.244968295097351e-06, 4.452653229236603e-06, 4.6603381633758545e-06, 4.868023097515106e-06, 5.075708031654358e-06, 5.28339296579361e-06, 5.491077899932861e-06, 5.698762834072113e-06, 5.906447768211365e-06, 6.1141327023506165e-06, 6.321817636489868e-06, 6.52950257062912e-06, 6.737187504768372e-06, 6.944872438907623e-06, 7.152557373046875e-06]}, "gradients/encoder.encoder.layers.15.attention.k_proj.bias": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 3.0, 3.0, 0.0, 4.0, 3.0, 7.0, 2.0, 4.0, 3.0, 5.0, 13.0, 0.0, 19.0, 8.0, 21.0, 26.0, 19.0, 31.0, 25.0, 39.0, 0.0, 40.0, 56.0, 43.0, 55.0, 42.0, 61.0, 47.0, 58.0, 0.0, 49.0, 49.0, 51.0, 45.0, 22.0, 28.0, 26.0, 20.0, 0.0, 22.0, 12.0, 13.0, 4.0, 9.0, 5.0, 12.0, 2.0, 0.0, 2.0, 3.0, 1.0, 3.0, 0.0, 2.0, 1.0, 0.0, 1.0], "bins": [-1.7881393432617188e-06, -1.7350539565086365e-06, -1.6819685697555542e-06, -1.628883183002472e-06, -1.5757977962493896e-06, -1.5227124094963074e-06, -1.469627022743225e-06, -1.4165416359901428e-06, -1.3634562492370605e-06, -1.3103708624839783e-06, -1.257285475730896e-06, -1.2042000889778137e-06, -1.1511147022247314e-06, -1.0980293154716492e-06, -1.044943928718567e-06, -9.918585419654846e-07, -9.387731552124023e-07, -8.856877684593201e-07, -8.326023817062378e-07, -7.795169949531555e-07, -7.264316082000732e-07, -6.73346221446991e-07, -6.202608346939087e-07, -5.671754479408264e-07, -5.140900611877441e-07, -4.6100467443466187e-07, -4.079192876815796e-07, -3.548339009284973e-07, -3.0174851417541504e-07, -2.4866312742233276e-07, -1.955777406692505e-07, -1.424923539161682e-07, -8.940696716308594e-08, -3.632158041000366e-08, 1.6763806343078613e-08, 6.984919309616089e-08, 1.2293457984924316e-07, 1.7601996660232544e-07, 2.2910535335540771e-07, 2.8219074010849e-07, 3.3527612686157227e-07, 3.8836151361465454e-07, 4.414469003677368e-07, 4.945322871208191e-07, 5.476176738739014e-07, 6.007030606269836e-07, 6.537884473800659e-07, 7.068738341331482e-07, 7.599592208862305e-07, 8.130446076393127e-07, 8.66129994392395e-07, 9.192153811454773e-07, 9.723007678985596e-07, 1.0253861546516418e-06, 1.0784715414047241e-06, 1.1315569281578064e-06, 1.1846423149108887e-06, 1.237727701663971e-06, 1.2908130884170532e-06, 1.3438984751701355e-06, 1.3969838619232178e-06, 1.4500692486763e-06, 1.5031546354293823e-06, 1.5562400221824646e-06, 1.6093254089355469e-06]}, "gradients/encoder.encoder.layers.15.attention.q_proj.weight": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 1.0, 4.0, 2.0, 0.0, 1.0, 2.0, 1.0, 8.0, 5.0, 4.0, 11.0, 19.0, 16.0, 28.0, 43.0, 93.0, 93.0, 169.0, 349.0, 371.0, 788.0, 1502.0, 1902.0, 4404.0, 9096.0, 12913.0, 36618.0, 102071.0, 266391.0, 441152.0, 102206.0, 36514.0, 12954.0, 8983.0, 4481.0, 1841.0, 1501.0, 828.0, 386.0, 318.0, 180.0, 85.0, 74.0, 48.0, 21.0, 28.0, 17.0, 16.0, 8.0, 4.0, 4.0, 3.0, 3.0, 2.0, 4.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0], "bins": [-7.033348083496094e-06, -6.813555955886841e-06, -6.593763828277588e-06, -6.373971700668335e-06, -6.154179573059082e-06, -5.934387445449829e-06, -5.714595317840576e-06, -5.494803190231323e-06, -5.27501106262207e-06, -5.055218935012817e-06, -4.8354268074035645e-06, -4.6156346797943115e-06, -4.395842552185059e-06, -4.176050424575806e-06, -3.956258296966553e-06, -3.7364661693573e-06, -3.516674041748047e-06, -3.296881914138794e-06, -3.077089786529541e-06, -2.857297658920288e-06, -2.637505531311035e-06, -2.4177134037017822e-06, -2.1979212760925293e-06, -1.9781291484832764e-06, -1.7583370208740234e-06, -1.5385448932647705e-06, -1.3187527656555176e-06, -1.0989606380462646e-06, -8.791685104370117e-07, -6.593763828277588e-07, -4.3958425521850586e-07, -2.1979212760925293e-07, 0.0, 2.1979212760925293e-07, 4.3958425521850586e-07, 6.593763828277588e-07, 8.791685104370117e-07, 1.0989606380462646e-06, 1.3187527656555176e-06, 1.5385448932647705e-06, 1.7583370208740234e-06, 1.9781291484832764e-06, 2.1979212760925293e-06, 2.4177134037017822e-06, 2.637505531311035e-06, 2.857297658920288e-06, 3.077089786529541e-06, 3.296881914138794e-06, 3.516674041748047e-06, 3.7364661693573e-06, 3.956258296966553e-06, 4.176050424575806e-06, 4.395842552185059e-06, 4.6156346797943115e-06, 4.8354268074035645e-06, 5.055218935012817e-06, 5.27501106262207e-06, 5.494803190231323e-06, 5.714595317840576e-06, 5.934387445449829e-06, 6.154179573059082e-06, 6.373971700668335e-06, 6.593763828277588e-06, 6.813555955886841e-06, 7.033348083496094e-06]}, "gradients/encoder.encoder.layers.15.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 1.0, 2.0, 1.0, 1.0, 3.0, 7.0, 3.0, 6.0, 9.0, 15.0, 9.0, 14.0, 20.0, 20.0, 39.0, 53.0, 53.0, 51.0, 81.0, 99.0, 62.0, 93.0, 93.0, 61.0, 34.0, 43.0, 32.0, 22.0, 21.0, 19.0, 11.0, 4.0, 12.0, 6.0, 6.0, 4.0, 1.0, 4.0, 4.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.933906555175781e-06, -3.771856427192688e-06, -3.6098062992095947e-06, -3.4477561712265015e-06, -3.285706043243408e-06, -3.123655915260315e-06, -2.9616057872772217e-06, -2.7995556592941284e-06, -2.637505531311035e-06, -2.475455403327942e-06, -2.3134052753448486e-06, -2.1513551473617554e-06, -1.989305019378662e-06, -1.8272548913955688e-06, -1.6652047634124756e-06, -1.5031546354293823e-06, -1.341104507446289e-06, -1.1790543794631958e-06, -1.0170042514801025e-06, -8.549541234970093e-07, -6.92903995513916e-07, -5.308538675308228e-07, -3.688037395477295e-07, -2.0675361156463623e-07, -4.470348358154297e-08, 1.1734664440155029e-07, 2.7939677238464355e-07, 4.414469003677368e-07, 6.034970283508301e-07, 7.655471563339233e-07, 9.275972843170166e-07, 1.0896474123001099e-06, 1.2516975402832031e-06, 1.4137476682662964e-06, 1.5757977962493896e-06, 1.737847924232483e-06, 1.8998980522155762e-06, 2.0619481801986694e-06, 2.2239983081817627e-06, 2.386048436164856e-06, 2.5480985641479492e-06, 2.7101486921310425e-06, 2.8721988201141357e-06, 3.034248948097229e-06, 3.1962990760803223e-06, 3.3583492040634155e-06, 3.520399332046509e-06, 3.682449460029602e-06, 3.844499588012695e-06, 4.0065497159957886e-06, 4.168599843978882e-06, 4.330649971961975e-06, 4.492700099945068e-06, 4.654750227928162e-06, 4.816800355911255e-06, 4.978850483894348e-06, 5.140900611877441e-06, 5.302950739860535e-06, 5.465000867843628e-06, 5.627050995826721e-06, 5.7891011238098145e-06, 5.951151251792908e-06, 6.113201379776001e-06, 6.275251507759094e-06, 6.4373016357421875e-06]}, "gradients/encoder.encoder.layers.15.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 4.0, 3.0, 4.0, 4.0, 6.0, 2.0, 11.0, 7.0, 14.0, 22.0, 36.0, 33.0, 60.0, 84.0, 167.0, 144.0, 101.0, 54.0, 40.0, 45.0, 25.0, 28.0, 21.0, 14.0, 8.0, 12.0, 14.0, 7.0, 5.0, 9.0, 6.0, 3.0, 3.0, 2.0, 3.0, 1.0, 4.0, 3.0, 0.0, 2.0, 3.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00017123515135608613, -0.0001643389550736174, -0.00015744274423923343, -0.0001505465479567647, -0.00014365035167429596, -0.00013675415539182723, -0.00012985794455744326, -0.00012296174827497452, -0.00011606555199250579, -0.00010916934843407944, -0.0001022731521516107, -9.537694859318435e-05, -8.848075231071562e-05, -8.158454875228927e-05, -7.468834519386292e-05, -6.779214891139418e-05, -6.089594535296783e-05, -5.3999745432520285e-05, -4.710354551207274e-05, -4.020734195364639e-05, -3.3311145671177655e-05, -2.6414942112751305e-05, -1.9518742192303762e-05, -1.2622542271856219e-05, -5.726342351408675e-06, 1.1698580237862188e-06, 8.066058398981113e-06, 1.4962259228923358e-05, 2.18584591493709e-05, 2.8754660888807848e-05, 3.565086080925539e-05, 4.2547060729702935e-05, 4.944326065015048e-05, 5.633946057059802e-05, 6.323566049104556e-05, 7.013186404947191e-05, 7.702806033194065e-05, 8.3924263890367e-05, 9.082046744879335e-05, 9.771666373126209e-05, 0.00010461286001373082, 0.00011150906357215717, 0.00011840525985462591, 0.00012530146341305226, 0.000132197659695521, 0.00013909387052990496, 0.0001459900668123737, 0.00015288626309484243, 0.0001597824739292264, 0.00016667867021169513, 0.0001735748810460791, 0.00018047107732854784, 0.00018736727361101657, 0.0001942634698934853, 0.00020115968072786927, 0.000208055877010338, 0.00021495207329280674, 0.00022184826957527548, 0.00022874448040965945, 0.00023564067669212818, 0.00024253687297459692, 0.00024943306925706565, 0.0002563292800914496, 0.0002632254618220031, 0.0002701216726563871]}, "gradients/encoder.encoder.layers.15.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 1.0, 3.0, 5.0, 9.0, 4.0, 7.0, 8.0, 6.0, 7.0, 18.0, 17.0, 14.0, 21.0, 20.0, 33.0, 31.0, 40.0, 49.0, 32.0, 43.0, 45.0, 44.0, 48.0, 55.0, 47.0, 44.0, 48.0, 35.0, 34.0, 42.0, 31.0, 24.0, 26.0, 25.0, 20.0, 14.0, 12.0, 9.0, 8.0, 6.0, 5.0, 9.0, 4.0, 3.0, 4.0, 3.0, 2.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00016242265701293945, -0.0001575099304318428, -0.00015259720385074615, -0.0001476844772696495, -0.00014277175068855286, -0.0001378590241074562, -0.00013294629752635956, -0.0001280335709452629, -0.00012312084436416626, -0.00011820811778306961, -0.00011329539120197296, -0.00010838266462087631, -0.00010346993803977966, -9.855721145868301e-05, -9.364448487758636e-05, -8.873175829648972e-05, -8.381903171539307e-05, -7.890630513429642e-05, -7.399357855319977e-05, -6.908085197210312e-05, -6.416812539100647e-05, -5.925539880990982e-05, -5.434267222881317e-05, -4.942994564771652e-05, -4.451721906661987e-05, -3.9604492485523224e-05, -3.4691765904426575e-05, -2.9779039323329926e-05, -2.4866312742233276e-05, -1.9953586161136627e-05, -1.5040859580039978e-05, -1.0128132998943329e-05, -5.21540641784668e-06, -3.026798367500305e-07, 4.610046744346619e-06, 9.522773325443268e-06, 1.4435499906539917e-05, 1.9348226487636566e-05, 2.4260953068733215e-05, 2.9173679649829865e-05, 3.4086406230926514e-05, 3.899913281202316e-05, 4.391185939311981e-05, 4.882458597421646e-05, 5.373731255531311e-05, 5.865003913640976e-05, 6.356276571750641e-05, 6.847549229860306e-05, 7.338821887969971e-05, 7.830094546079636e-05, 8.3213672041893e-05, 8.812639862298965e-05, 9.30391252040863e-05, 9.795185178518295e-05, 0.0001028645783662796, 0.00010777730494737625, 0.0001126900315284729, 0.00011760275810956955, 0.0001225154846906662, 0.00012742821127176285, 0.0001323409378528595, 0.00013725366443395615, 0.0001421663910150528, 0.00014707911759614944, 0.0001519918441772461]}, "gradients/encoder.encoder.layers.14.feed_forward.output_dense.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 4.0, 1.0, 5.0, 5.0, 10.0, 12.0, 26.0, 35.0, 43.0, 71.0, 147.0, 200.0, 281.0, 393.0, 652.0, 941.0, 1379.0, 2138.0, 3354.0, 5158.0, 8799.0, 15806.0, 30733.0, 67402.0, 275508.0, 3508148.0, 161205.0, 53486.0, 25160.0, 12973.0, 7119.0, 4205.0, 2670.0, 1878.0, 1198.0, 807.0, 646.0, 423.0, 326.0, 221.0, 195.0, 137.0, 98.0, 70.0, 60.0, 39.0, 33.0, 25.0, 20.0, 15.0, 9.0, 5.0, 4.0, 9.0, 7.0, 0.0, 3.0, 1.0, 1.0, 2.0], "bins": [-0.00011336803436279297, -0.00010950863361358643, -0.00010564923286437988, -0.00010178983211517334, -9.79304313659668e-05, -9.407103061676025e-05, -9.021162986755371e-05, -8.635222911834717e-05, -8.249282836914062e-05, -7.863342761993408e-05, -7.477402687072754e-05, -7.0914626121521e-05, -6.705522537231445e-05, -6.319582462310791e-05, -5.933642387390137e-05, -5.5477023124694824e-05, -5.161762237548828e-05, -4.775822162628174e-05, -4.3898820877075195e-05, -4.003942012786865e-05, -3.618001937866211e-05, -3.2320618629455566e-05, -2.8461217880249023e-05, -2.460181713104248e-05, -2.0742416381835938e-05, -1.6883015632629395e-05, -1.3023614883422852e-05, -9.164214134216309e-06, -5.304813385009766e-06, -1.4454126358032227e-06, 2.4139881134033203e-06, 6.273388862609863e-06, 1.0132789611816406e-05, 1.399219036102295e-05, 1.7851591110229492e-05, 2.1710991859436035e-05, 2.5570392608642578e-05, 2.942979335784912e-05, 3.3289194107055664e-05, 3.714859485626221e-05, 4.100799560546875e-05, 4.486739635467529e-05, 4.8726797103881836e-05, 5.258619785308838e-05, 5.644559860229492e-05, 6.0304999351501465e-05, 6.416440010070801e-05, 6.802380084991455e-05, 7.18832015991211e-05, 7.574260234832764e-05, 7.960200309753418e-05, 8.346140384674072e-05, 8.732080459594727e-05, 9.118020534515381e-05, 9.503960609436035e-05, 9.88990068435669e-05, 0.00010275840759277344, 0.00010661780834197998, 0.00011047720909118652, 0.00011433660984039307, 0.00011819601058959961, 0.00012205541133880615, 0.0001259148120880127, 0.00012977421283721924, 0.00013363361358642578]}, "gradients/encoder.encoder.layers.14.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 3.0, 2.0, 1.0, 4.0, 2.0, 8.0, 8.0, 8.0, 3.0, 4.0, 14.0, 18.0, 26.0, 18.0, 34.0, 40.0, 55.0, 64.0, 67.0, 98.0, 73.0, 98.0, 64.0, 64.0, 59.0, 42.0, 32.0, 27.0, 21.0, 7.0, 14.0, 6.0, 6.0, 4.0, 4.0, 4.0, 4.0, 1.0, 2.0, 3.0, 1.0, 0.0, 1.0, 3.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.0682811737060547e-05, -1.9929371774196625e-05, -1.9175931811332703e-05, -1.842249184846878e-05, -1.766905188560486e-05, -1.6915611922740936e-05, -1.6162171959877014e-05, -1.5408731997013092e-05, -1.465529203414917e-05, -1.3901852071285248e-05, -1.3148412108421326e-05, -1.2394972145557404e-05, -1.1641532182693481e-05, -1.088809221982956e-05, -1.0134652256965637e-05, -9.381212294101715e-06, -8.627772331237793e-06, -7.874332368373871e-06, -7.120892405509949e-06, -6.367452442646027e-06, -5.6140124797821045e-06, -4.860572516918182e-06, -4.10713255405426e-06, -3.353692591190338e-06, -2.600252628326416e-06, -1.846812665462494e-06, -1.0933727025985718e-06, -3.3993273973464966e-07, 4.1350722312927246e-07, 1.1669471859931946e-06, 1.9203871488571167e-06, 2.673827111721039e-06, 3.427267074584961e-06, 4.180707037448883e-06, 4.934147000312805e-06, 5.687586963176727e-06, 6.441026926040649e-06, 7.1944668889045715e-06, 7.947906851768494e-06, 8.701346814632416e-06, 9.454786777496338e-06, 1.020822674036026e-05, 1.0961666703224182e-05, 1.1715106666088104e-05, 1.2468546628952026e-05, 1.3221986591815948e-05, 1.397542655467987e-05, 1.4728866517543793e-05, 1.5482306480407715e-05, 1.6235746443271637e-05, 1.698918640613556e-05, 1.774262636899948e-05, 1.8496066331863403e-05, 1.9249506294727325e-05, 2.0002946257591248e-05, 2.075638622045517e-05, 2.1509826183319092e-05, 2.2263266146183014e-05, 2.3016706109046936e-05, 2.3770146071910858e-05, 2.452358603477478e-05, 2.5277025997638702e-05, 2.6030465960502625e-05, 2.6783905923366547e-05, 2.753734588623047e-05]}, "gradients/encoder.encoder.layers.14.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 1.0, 2.0, 2.0, 2.0, 4.0, 7.0, 10.0, 10.0, 25.0, 29.0, 35.0, 58.0, 78.0, 115.0, 172.0, 274.0, 376.0, 586.0, 793.0, 1291.0, 2141.0, 3394.0, 5395.0, 8925.0, 15583.0, 27921.0, 53238.0, 110626.0, 296371.0, 2909094.0, 479086.0, 136829.0, 63600.0, 32987.0, 17796.0, 10630.0, 6272.0, 3879.0, 2319.0, 1422.0, 999.0, 625.0, 402.0, 290.0, 182.0, 125.0, 74.0, 66.0, 62.0, 35.0, 15.0, 17.0, 7.0, 6.0, 4.0, 6.0, 2.0, 0.0, 0.0, 4.0, 2.0], "bins": [-7.742643356323242e-05, -7.505342364311218e-05, -7.268041372299194e-05, -7.03074038028717e-05, -6.793439388275146e-05, -6.556138396263123e-05, -6.318837404251099e-05, -6.081536412239075e-05, -5.844235420227051e-05, -5.606934428215027e-05, -5.369633436203003e-05, -5.132332444190979e-05, -4.895031452178955e-05, -4.657730460166931e-05, -4.420429468154907e-05, -4.183128476142883e-05, -3.9458274841308594e-05, -3.7085264921188354e-05, -3.4712255001068115e-05, -3.2339245080947876e-05, -2.9966235160827637e-05, -2.7593225240707397e-05, -2.5220215320587158e-05, -2.284720540046692e-05, -2.047419548034668e-05, -1.810118556022644e-05, -1.57281756401062e-05, -1.3355165719985962e-05, -1.0982155799865723e-05, -8.609145879745483e-06, -6.236135959625244e-06, -3.863126039505005e-06, -1.4901161193847656e-06, 8.828938007354736e-07, 3.255903720855713e-06, 5.628913640975952e-06, 8.001923561096191e-06, 1.037493348121643e-05, 1.274794340133667e-05, 1.512095332145691e-05, 1.749396324157715e-05, 1.9866973161697388e-05, 2.2239983081817627e-05, 2.4612993001937866e-05, 2.6986002922058105e-05, 2.9359012842178345e-05, 3.1732022762298584e-05, 3.410503268241882e-05, 3.647804260253906e-05, 3.88510525226593e-05, 4.122406244277954e-05, 4.359707236289978e-05, 4.597008228302002e-05, 4.834309220314026e-05, 5.07161021232605e-05, 5.308911204338074e-05, 5.5462121963500977e-05, 5.7835131883621216e-05, 6.0208141803741455e-05, 6.25811517238617e-05, 6.495416164398193e-05, 6.732717156410217e-05, 6.970018148422241e-05, 7.207319140434265e-05, 7.444620132446289e-05]}, "gradients/encoder.encoder.layers.14.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 3.0, 2.0, 0.0, 2.0, 3.0, 3.0, 4.0, 7.0, 14.0, 13.0, 17.0, 25.0, 31.0, 23.0, 49.0, 49.0, 68.0, 69.0, 87.0, 162.0, 395.0, 1120.0, 964.0, 299.0, 153.0, 100.0, 66.0, 71.0, 55.0, 43.0, 34.0, 33.0, 23.0, 19.0, 17.0, 16.0, 10.0, 13.0, 2.0, 8.0, 5.0, 3.0, 6.0, 2.0, 0.0, 1.0, 2.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.875659942626953e-05, -4.694890230894089e-05, -4.5141205191612244e-05, -4.33335080742836e-05, -4.1525810956954956e-05, -3.971811383962631e-05, -3.791041672229767e-05, -3.6102719604969025e-05, -3.429502248764038e-05, -3.248732537031174e-05, -3.067962825298309e-05, -2.887193113565445e-05, -2.7064234018325806e-05, -2.5256536900997162e-05, -2.3448839783668518e-05, -2.1641142666339874e-05, -1.983344554901123e-05, -1.8025748431682587e-05, -1.6218051314353943e-05, -1.4410354197025299e-05, -1.2602657079696655e-05, -1.0794959962368011e-05, -8.987262845039368e-06, -7.179565727710724e-06, -5.37186861038208e-06, -3.5641714930534363e-06, -1.7564743757247925e-06, 5.122274160385132e-08, 1.8589198589324951e-06, 3.666616976261139e-06, 5.474314093589783e-06, 7.2820112109184265e-06, 9.08970832824707e-06, 1.0897405445575714e-05, 1.2705102562904358e-05, 1.4512799680233002e-05, 1.6320496797561646e-05, 1.812819391489029e-05, 1.9935891032218933e-05, 2.1743588149547577e-05, 2.355128526687622e-05, 2.5358982384204865e-05, 2.7166679501533508e-05, 2.8974376618862152e-05, 3.0782073736190796e-05, 3.258977085351944e-05, 3.4397467970848083e-05, 3.620516508817673e-05, 3.801286220550537e-05, 3.9820559322834015e-05, 4.162825644016266e-05, 4.34359535574913e-05, 4.5243650674819946e-05, 4.705134779214859e-05, 4.8859044909477234e-05, 5.066674202680588e-05, 5.247443914413452e-05, 5.4282136261463165e-05, 5.608983337879181e-05, 5.789753049612045e-05, 5.97052276134491e-05, 6.151292473077774e-05, 6.332062184810638e-05, 6.512831896543503e-05, 6.693601608276367e-05]}, "gradients/encoder.encoder.layers.14.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 3.0, 2.0, 2.0, 5.0, 6.0, 10.0, 12.0, 23.0, 26.0, 40.0, 56.0, 75.0, 130.0, 120.0, 114.0, 81.0, 76.0, 57.0, 28.0, 32.0, 27.0, 25.0, 15.0, 6.0, 12.0, 8.0, 2.0, 3.0, 7.0, 3.0, 2.0, 2.0, 2.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.0004095589101780206, -0.00039564017788507044, -0.0003817214455921203, -0.00036780271329917014, -0.00035388398100622, -0.00033996524871326983, -0.00032604654552415013, -0.0003121278132312, -0.0002982090809382498, -0.0002842903486452997, -0.0002703716163523495, -0.00025645288405939937, -0.0002425341517664492, -0.00022861541947349906, -0.00021469670173246413, -0.00020077796943951398, -0.0001868592225946486, -0.00017294049030169845, -0.0001590217580087483, -0.00014510302571579814, -0.00013118429342284799, -0.00011726556840585545, -0.00010334684338886291, -8.942811109591275e-05, -7.55093788029626e-05, -6.159064651001245e-05, -4.76719178550411e-05, -3.3753189200069755e-05, -1.9834456907119602e-05, -5.915724614169449e-06, 8.00300040282309e-06, 2.1921732695773244e-05, 3.5840494092553854e-05, 4.975922638550401e-05, 6.367795867845416e-05, 7.75966836954467e-05, 9.151541598839685e-05, 0.000105434148281347, 0.00011935287329833955, 0.0001332716055912897, 0.00014719033788423985, 0.00016110907017719, 0.00017502780247014016, 0.00018894652021117508, 0.00020286525250412524, 0.0002167839847970754, 0.00023070271709002554, 0.0002446214493829757, 0.00025854018167592585, 0.000272458913968876, 0.00028637764626182616, 0.0003002963785547763, 0.00031421511084772646, 0.0003281338431406766, 0.0003420525463297963, 0.00035597127862274647, 0.0003698900109156966, 0.0003838087432086468, 0.00039772747550159693, 0.0004116462077945471, 0.00042556494008749723, 0.0004394836723804474, 0.00045340240467339754, 0.0004673211369663477, 0.00048123986925929785]}, "gradients/encoder.encoder.layers.14.final_layer_norm.bias": {"_type": "histogram", "values": [2.0, 3.0, 1.0, 1.0, 0.0, 5.0, 1.0, 8.0, 4.0, 3.0, 6.0, 9.0, 8.0, 10.0, 11.0, 19.0, 9.0, 12.0, 31.0, 20.0, 19.0, 18.0, 18.0, 25.0, 36.0, 25.0, 27.0, 27.0, 30.0, 42.0, 37.0, 31.0, 44.0, 15.0, 44.0, 26.0, 34.0, 34.0, 27.0, 38.0, 36.0, 30.0, 26.0, 24.0, 18.0, 25.0, 13.0, 20.0, 12.0, 9.0, 13.0, 10.0, 7.0, 4.0, 1.0, 4.0, 2.0, 3.0, 2.0, 1.0, 1.0, 1.0, 0.0, 2.0], "bins": [-0.00017547607421875, -0.00017000921070575714, -0.00016454234719276428, -0.00015907548367977142, -0.00015360862016677856, -0.0001481417566537857, -0.00014267489314079285, -0.0001372080296278, -0.00013174116611480713, -0.00012627430260181427, -0.00012080743908882141, -0.00011534057557582855, -0.0001098737120628357, -0.00010440684854984283, -9.893998503684998e-05, -9.347312152385712e-05, -8.800625801086426e-05, -8.25393944978714e-05, -7.707253098487854e-05, -7.160566747188568e-05, -6.613880395889282e-05, -6.067194044589996e-05, -5.5205076932907104e-05, -4.9738213419914246e-05, -4.427134990692139e-05, -3.880448639392853e-05, -3.333762288093567e-05, -2.787075936794281e-05, -2.240389585494995e-05, -1.6937032341957092e-05, -1.1470168828964233e-05, -6.0033053159713745e-06, -5.364418029785156e-07, 4.930421710014343e-06, 1.0397285223007202e-05, 1.586414873600006e-05, 2.133101224899292e-05, 2.679787576198578e-05, 3.226473927497864e-05, 3.7731602787971497e-05, 4.3198466300964355e-05, 4.8665329813957214e-05, 5.413219332695007e-05, 5.959905683994293e-05, 6.506592035293579e-05, 7.053278386592865e-05, 7.599964737892151e-05, 8.146651089191437e-05, 8.693337440490723e-05, 9.240023791790009e-05, 9.786710143089294e-05, 0.0001033339649438858, 0.00010880082845687866, 0.00011426769196987152, 0.00011973455548286438, 0.00012520141899585724, 0.0001306682825088501, 0.00013613514602184296, 0.00014160200953483582, 0.00014706887304782867, 0.00015253573656082153, 0.0001580026000738144, 0.00016346946358680725, 0.0001689363270998001, 0.00017440319061279297]}, "gradients/encoder.encoder.layers.14.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 3.0, 5.0, 8.0, 12.0, 14.0, 12.0, 30.0, 39.0, 69.0, 114.0, 179.0, 288.0, 410.0, 713.0, 1219.0, 2177.0, 4137.0, 8386.0, 18664.0, 49425.0, 180938.0, 586997.0, 125883.0, 38368.0, 15110.0, 6856.0, 3475.0, 1945.0, 1108.0, 695.0, 455.0, 288.0, 187.0, 122.0, 81.0, 65.0, 26.0, 22.0, 14.0, 8.0, 3.0, 7.0, 3.0, 4.0, 3.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00018322467803955078, -0.00017773546278476715, -0.00017224624752998352, -0.0001667570322751999, -0.00016126781702041626, -0.00015577860176563263, -0.000150289386510849, -0.00014480017125606537, -0.00013931095600128174, -0.0001338217407464981, -0.00012833252549171448, -0.00012284331023693085, -0.00011735409498214722, -0.00011186487972736359, -0.00010637566447257996, -0.00010088644921779633, -9.53972339630127e-05, -8.990801870822906e-05, -8.441880345344543e-05, -7.89295881986618e-05, -7.344037294387817e-05, -6.795115768909454e-05, -6.246194243431091e-05, -5.697272717952728e-05, -5.148351192474365e-05, -4.599429666996002e-05, -4.050508141517639e-05, -3.501586616039276e-05, -2.952665090560913e-05, -2.40374356508255e-05, -1.854822039604187e-05, -1.305900514125824e-05, -7.569789886474609e-06, -2.080574631690979e-06, 3.4086406230926514e-06, 8.897855877876282e-06, 1.4387071132659912e-05, 1.9876286387443542e-05, 2.5365501642227173e-05, 3.08547168970108e-05, 3.6343932151794434e-05, 4.1833147406578064e-05, 4.7322362661361694e-05, 5.2811577916145325e-05, 5.8300793170928955e-05, 6.379000842571259e-05, 6.927922368049622e-05, 7.476843893527985e-05, 8.025765419006348e-05, 8.574686944484711e-05, 9.123608469963074e-05, 9.672529995441437e-05, 0.000102214515209198, 0.00010770373046398163, 0.00011319294571876526, 0.00011868216097354889, 0.00012417137622833252, 0.00012966059148311615, 0.00013514980673789978, 0.0001406390219926834, 0.00014612823724746704, 0.00015161745250225067, 0.0001571066677570343, 0.00016259588301181793, 0.00016808509826660156]}, "gradients/encoder.encoder.layers.14.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 4.0, 1.0, 3.0, 0.0, 2.0, 0.0, 3.0, 6.0, 2.0, 5.0, 4.0, 2.0, 9.0, 13.0, 22.0, 13.0, 29.0, 28.0, 51.0, 67.0, 81.0, 82.0, 93.0, 104.0, 74.0, 69.0, 62.0, 47.0, 33.0, 24.0, 22.0, 15.0, 10.0, 10.0, 7.0, 6.0, 5.0, 2.0, 3.0, 0.0, 3.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-3.039836883544922e-05, -2.9552727937698364e-05, -2.870708703994751e-05, -2.7861446142196655e-05, -2.70158052444458e-05, -2.6170164346694946e-05, -2.5324523448944092e-05, -2.4478882551193237e-05, -2.3633241653442383e-05, -2.278760075569153e-05, -2.1941959857940674e-05, -2.109631896018982e-05, -2.0250678062438965e-05, -1.940503716468811e-05, -1.8559396266937256e-05, -1.77137553691864e-05, -1.6868114471435547e-05, -1.6022473573684692e-05, -1.5176832675933838e-05, -1.4331191778182983e-05, -1.3485550880432129e-05, -1.2639909982681274e-05, -1.179426908493042e-05, -1.0948628187179565e-05, -1.0102987289428711e-05, -9.257346391677856e-06, -8.411705493927002e-06, -7.5660645961761475e-06, -6.720423698425293e-06, -5.8747828006744385e-06, -5.029141902923584e-06, -4.1835010051727295e-06, -3.337860107421875e-06, -2.4922192096710205e-06, -1.646578311920166e-06, -8.009374141693115e-07, 4.470348358154297e-08, 8.903443813323975e-07, 1.735985279083252e-06, 2.5816261768341064e-06, 3.427267074584961e-06, 4.2729079723358154e-06, 5.11854887008667e-06, 5.964189767837524e-06, 6.809830665588379e-06, 7.655471563339233e-06, 8.501112461090088e-06, 9.346753358840942e-06, 1.0192394256591797e-05, 1.1038035154342651e-05, 1.1883676052093506e-05, 1.272931694984436e-05, 1.3574957847595215e-05, 1.442059874534607e-05, 1.5266239643096924e-05, 1.611188054084778e-05, 1.6957521438598633e-05, 1.7803162336349487e-05, 1.8648803234100342e-05, 1.9494444131851196e-05, 2.034008502960205e-05, 2.1185725927352905e-05, 2.203136682510376e-05, 2.2877007722854614e-05, 2.372264862060547e-05]}, "gradients/encoder.encoder.layers.14.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 3.0, 0.0, 1.0, 3.0, 6.0, 5.0, 8.0, 14.0, 18.0, 25.0, 36.0, 52.0, 85.0, 128.0, 206.0, 286.0, 427.0, 619.0, 901.0, 1327.0, 1939.0, 3082.0, 4881.0, 7376.0, 11622.0, 18726.0, 30807.0, 50437.0, 93620.0, 203919.0, 322037.0, 126794.0, 67819.0, 38492.0, 22238.0, 14472.0, 9246.0, 5701.0, 3923.0, 2497.0, 1570.0, 1016.0, 772.0, 446.0, 311.0, 205.0, 136.0, 102.0, 68.0, 47.0, 44.0, 24.0, 16.0, 16.0, 8.0, 4.0, 4.0, 4.0, 2.0, 1.0, 1.0], "bins": [-6.079673767089844e-05, -5.896668881177902e-05, -5.713663995265961e-05, -5.530659109354019e-05, -5.3476542234420776e-05, -5.164649337530136e-05, -4.9816444516181946e-05, -4.798639565706253e-05, -4.6156346797943115e-05, -4.43262979388237e-05, -4.2496249079704285e-05, -4.066620022058487e-05, -3.8836151361465454e-05, -3.700610250234604e-05, -3.5176053643226624e-05, -3.334600478410721e-05, -3.151595592498779e-05, -2.9685907065868378e-05, -2.7855858206748962e-05, -2.6025809347629547e-05, -2.4195760488510132e-05, -2.2365711629390717e-05, -2.05356627702713e-05, -1.8705613911151886e-05, -1.687556505203247e-05, -1.5045516192913055e-05, -1.321546733379364e-05, -1.1385418474674225e-05, -9.55536961555481e-06, -7.725320756435394e-06, -5.895271897315979e-06, -4.065223038196564e-06, -2.2351741790771484e-06, -4.0512531995773315e-07, 1.4249235391616821e-06, 3.2549723982810974e-06, 5.085021257400513e-06, 6.915070116519928e-06, 8.745118975639343e-06, 1.0575167834758759e-05, 1.2405216693878174e-05, 1.4235265552997589e-05, 1.6065314412117004e-05, 1.789536327123642e-05, 1.9725412130355835e-05, 2.155546098947525e-05, 2.3385509848594666e-05, 2.521555870771408e-05, 2.7045607566833496e-05, 2.887565642595291e-05, 3.070570528507233e-05, 3.253575414419174e-05, 3.436580300331116e-05, 3.619585186243057e-05, 3.802590072154999e-05, 3.98559495806694e-05, 4.168599843978882e-05, 4.3516047298908234e-05, 4.534609615802765e-05, 4.7176145017147064e-05, 4.900619387626648e-05, 5.0836242735385895e-05, 5.266629159450531e-05, 5.4496340453624725e-05, 5.632638931274414e-05]}, "gradients/encoder.encoder.layers.14.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 3.0, 1.0, 2.0, 0.0, 2.0, 4.0, 3.0, 7.0, 12.0, 8.0, 10.0, 12.0, 13.0, 14.0, 16.0, 18.0, 14.0, 29.0, 25.0, 27.0, 35.0, 40.0, 44.0, 41.0, 36.0, 31.0, 53.0, 44.0, 41.0, 39.0, 38.0, 38.0, 40.0, 43.0, 35.0, 30.0, 26.0, 22.0, 17.0, 20.0, 10.0, 15.0, 18.0, 16.0, 4.0, 10.0, 4.0, 4.0, 0.0, 1.0, 1.0, 0.0, 2.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-4.57763671875e-05, -4.4289976358413696e-05, -4.280358552932739e-05, -4.131719470024109e-05, -3.9830803871154785e-05, -3.834441304206848e-05, -3.685802221298218e-05, -3.5371631383895874e-05, -3.388524055480957e-05, -3.2398849725723267e-05, -3.091245889663696e-05, -2.942606806755066e-05, -2.7939677238464355e-05, -2.6453286409378052e-05, -2.4966895580291748e-05, -2.3480504751205444e-05, -2.199411392211914e-05, -2.0507723093032837e-05, -1.9021332263946533e-05, -1.753494143486023e-05, -1.6048550605773926e-05, -1.4562159776687622e-05, -1.3075768947601318e-05, -1.1589378118515015e-05, -1.0102987289428711e-05, -8.616596460342407e-06, -7.1302056312561035e-06, -5.6438148021698e-06, -4.157423973083496e-06, -2.6710331439971924e-06, -1.1846423149108887e-06, 3.0174851417541504e-07, 1.7881393432617188e-06, 3.2745301723480225e-06, 4.760921001434326e-06, 6.24731183052063e-06, 7.733702659606934e-06, 9.220093488693237e-06, 1.0706484317779541e-05, 1.2192875146865845e-05, 1.3679265975952148e-05, 1.5165656805038452e-05, 1.6652047634124756e-05, 1.813843846321106e-05, 1.9624829292297363e-05, 2.1111220121383667e-05, 2.259761095046997e-05, 2.4084001779556274e-05, 2.5570392608642578e-05, 2.7056783437728882e-05, 2.8543174266815186e-05, 3.002956509590149e-05, 3.151595592498779e-05, 3.30023467540741e-05, 3.44887375831604e-05, 3.5975128412246704e-05, 3.746151924133301e-05, 3.894791007041931e-05, 4.0434300899505615e-05, 4.192069172859192e-05, 4.340708255767822e-05, 4.4893473386764526e-05, 4.637986421585083e-05, 4.7866255044937134e-05, 4.935264587402344e-05]}, "gradients/encoder.encoder.layers.14.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0, 11.0, 4.0, 5.0, 11.0, 23.0, 22.0, 28.0, 37.0, 66.0, 106.0, 200.0, 235.0, 457.0, 541.0, 1149.0, 1627.0, 3340.0, 5164.0, 12609.0, 23466.0, 79783.0, 237232.0, 493273.0, 112240.0, 44298.0, 14608.0, 8378.0, 3717.0, 2438.0, 1230.0, 827.0, 455.0, 384.0, 195.0, 150.0, 74.0, 70.0, 32.0, 26.0, 17.0, 16.0, 6.0, 7.0, 1.0, 4.0, 2.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-1.1205673217773438e-05, -1.0877847671508789e-05, -1.055002212524414e-05, -1.0222196578979492e-05, -9.894371032714844e-06, -9.566545486450195e-06, -9.238719940185547e-06, -8.910894393920898e-06, -8.58306884765625e-06, -8.255243301391602e-06, -7.927417755126953e-06, -7.599592208862305e-06, -7.271766662597656e-06, -6.943941116333008e-06, -6.616115570068359e-06, -6.288290023803711e-06, -5.9604644775390625e-06, -5.632638931274414e-06, -5.304813385009766e-06, -4.976987838745117e-06, -4.649162292480469e-06, -4.32133674621582e-06, -3.993511199951172e-06, -3.6656856536865234e-06, -3.337860107421875e-06, -3.0100345611572266e-06, -2.682209014892578e-06, -2.3543834686279297e-06, -2.0265579223632812e-06, -1.6987323760986328e-06, -1.3709068298339844e-06, -1.043081283569336e-06, -7.152557373046875e-07, -3.8743019104003906e-07, -5.960464477539063e-08, 2.682209014892578e-07, 5.960464477539062e-07, 9.238719940185547e-07, 1.2516975402832031e-06, 1.5795230865478516e-06, 1.9073486328125e-06, 2.2351741790771484e-06, 2.562999725341797e-06, 2.8908252716064453e-06, 3.2186508178710938e-06, 3.546476364135742e-06, 3.874301910400391e-06, 4.202127456665039e-06, 4.5299530029296875e-06, 4.857778549194336e-06, 5.185604095458984e-06, 5.513429641723633e-06, 5.841255187988281e-06, 6.16908073425293e-06, 6.496906280517578e-06, 6.8247318267822266e-06, 7.152557373046875e-06, 7.4803829193115234e-06, 7.808208465576172e-06, 8.13603401184082e-06, 8.463859558105469e-06, 8.791685104370117e-06, 9.119510650634766e-06, 9.447336196899414e-06, 9.775161743164062e-06]}, "gradients/encoder.encoder.layers.14.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 8.0, 10.0, 5.0, 6.0, 9.0, 10.0, 34.0, 16.0, 26.0, 28.0, 48.0, 30.0, 39.0, 35.0, 82.0, 41.0, 43.0, 41.0, 78.0, 42.0, 44.0, 46.0, 41.0, 64.0, 30.0, 25.0, 21.0, 26.0, 14.0, 13.0, 8.0, 12.0, 6.0, 4.0, 6.0, 4.0, 4.0, 2.0, 1.0, 1.0, 4.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 1.0], "bins": [-2.086162567138672e-06, -2.012588083744049e-06, -1.9390136003494263e-06, -1.8654391169548035e-06, -1.7918646335601807e-06, -1.7182901501655579e-06, -1.644715666770935e-06, -1.5711411833763123e-06, -1.4975666999816895e-06, -1.4239922165870667e-06, -1.3504177331924438e-06, -1.276843249797821e-06, -1.2032687664031982e-06, -1.1296942830085754e-06, -1.0561197996139526e-06, -9.825453162193298e-07, -9.08970832824707e-07, -8.353963494300842e-07, -7.618218660354614e-07, -6.882473826408386e-07, -6.146728992462158e-07, -5.41098415851593e-07, -4.675239324569702e-07, -3.939494490623474e-07, -3.203749656677246e-07, -2.468004822731018e-07, -1.73225998878479e-07, -9.96515154838562e-08, -2.60770320892334e-08, 4.7497451305389404e-08, 1.210719347000122e-07, 1.94646418094635e-07, 2.682209014892578e-07, 3.417953848838806e-07, 4.153698682785034e-07, 4.889443516731262e-07, 5.62518835067749e-07, 6.360933184623718e-07, 7.096678018569946e-07, 7.832422852516174e-07, 8.568167686462402e-07, 9.30391252040863e-07, 1.0039657354354858e-06, 1.0775402188301086e-06, 1.1511147022247314e-06, 1.2246891856193542e-06, 1.298263669013977e-06, 1.3718381524085999e-06, 1.4454126358032227e-06, 1.5189871191978455e-06, 1.5925616025924683e-06, 1.666136085987091e-06, 1.7397105693817139e-06, 1.8132850527763367e-06, 1.8868595361709595e-06, 1.9604340195655823e-06, 2.034008502960205e-06, 2.107582986354828e-06, 2.1811574697494507e-06, 2.2547319531440735e-06, 2.3283064365386963e-06, 2.401880919933319e-06, 2.475455403327942e-06, 2.5490298867225647e-06, 2.6226043701171875e-06]}, "gradients/encoder.encoder.layers.14.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 3.0, 0.0, 1.0, 3.0, 3.0, 7.0, 3.0, 4.0, 10.0, 18.0, 15.0, 10.0, 39.0, 41.0, 59.0, 83.0, 109.0, 147.0, 233.0, 423.0, 494.0, 661.0, 1365.0, 1710.0, 2645.0, 4132.0, 10118.0, 14292.0, 26308.0, 52966.0, 207966.0, 401267.0, 177987.0, 82737.0, 26179.0, 14133.0, 8156.0, 6163.0, 2693.0, 1718.0, 1324.0, 633.0, 484.0, 347.0, 291.0, 144.0, 113.0, 121.0, 51.0, 43.0, 24.0, 30.0, 17.0, 13.0, 13.0, 7.0, 7.0, 5.0, 3.0, 1.0, 1.0, 2.0], "bins": [-6.556510925292969e-06, -6.360933184623718e-06, -6.165355443954468e-06, -5.969777703285217e-06, -5.774199962615967e-06, -5.578622221946716e-06, -5.383044481277466e-06, -5.187466740608215e-06, -4.991888999938965e-06, -4.796311259269714e-06, -4.600733518600464e-06, -4.405155777931213e-06, -4.209578037261963e-06, -4.014000296592712e-06, -3.818422555923462e-06, -3.6228448152542114e-06, -3.427267074584961e-06, -3.2316893339157104e-06, -3.03611159324646e-06, -2.8405338525772095e-06, -2.644956111907959e-06, -2.4493783712387085e-06, -2.253800630569458e-06, -2.0582228899002075e-06, -1.862645149230957e-06, -1.6670674085617065e-06, -1.471489667892456e-06, -1.2759119272232056e-06, -1.080334186553955e-06, -8.847564458847046e-07, -6.891787052154541e-07, -4.936009645462036e-07, -2.980232238769531e-07, -1.0244548320770264e-07, 9.313225746154785e-08, 2.8870999813079834e-07, 4.842877388000488e-07, 6.798654794692993e-07, 8.754432201385498e-07, 1.0710209608078003e-06, 1.2665987014770508e-06, 1.4621764421463013e-06, 1.6577541828155518e-06, 1.8533319234848022e-06, 2.0489096641540527e-06, 2.2444874048233032e-06, 2.4400651454925537e-06, 2.635642886161804e-06, 2.8312206268310547e-06, 3.026798367500305e-06, 3.2223761081695557e-06, 3.417953848838806e-06, 3.6135315895080566e-06, 3.809109330177307e-06, 4.004687070846558e-06, 4.200264811515808e-06, 4.395842552185059e-06, 4.591420292854309e-06, 4.7869980335235596e-06, 4.98257577419281e-06, 5.1781535148620605e-06, 5.373731255531311e-06, 5.5693089962005615e-06, 5.764886736869812e-06, 5.9604644775390625e-06]}, "gradients/encoder.encoder.layers.14.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 4.0, 7.0, 6.0, 7.0, 5.0, 8.0, 8.0, 16.0, 27.0, 22.0, 25.0, 25.0, 55.0, 64.0, 41.0, 85.0, 52.0, 90.0, 81.0, 44.0, 69.0, 41.0, 57.0, 40.0, 10.0, 31.0, 14.0, 18.0, 17.0, 7.0, 10.0, 5.0, 1.0, 8.0, 0.0, 5.0, 0.0, 2.0, 1.0, 2.0, 0.0, 0.0, 0.0, 3.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0], "bins": [-4.887580871582031e-06, -4.732050001621246e-06, -4.5765191316604614e-06, -4.4209882616996765e-06, -4.265457391738892e-06, -4.109926521778107e-06, -3.954395651817322e-06, -3.798864781856537e-06, -3.643333911895752e-06, -3.487803041934967e-06, -3.332272171974182e-06, -3.1767413020133972e-06, -3.0212104320526123e-06, -2.8656795620918274e-06, -2.7101486921310425e-06, -2.5546178221702576e-06, -2.3990869522094727e-06, -2.2435560822486877e-06, -2.088025212287903e-06, -1.932494342327118e-06, -1.776963472366333e-06, -1.621432602405548e-06, -1.4659017324447632e-06, -1.3103708624839783e-06, -1.1548399925231934e-06, -9.993091225624084e-07, -8.437782526016235e-07, -6.882473826408386e-07, -5.327165126800537e-07, -3.771856427192688e-07, -2.2165477275848389e-07, -6.612390279769897e-08, 8.940696716308594e-08, 2.4493783712387085e-07, 4.0046870708465576e-07, 5.559995770454407e-07, 7.115304470062256e-07, 8.670613169670105e-07, 1.0225921869277954e-06, 1.1781230568885803e-06, 1.3336539268493652e-06, 1.4891847968101501e-06, 1.644715666770935e-06, 1.80024653673172e-06, 1.955777406692505e-06, 2.11130827665329e-06, 2.2668391466140747e-06, 2.4223700165748596e-06, 2.5779008865356445e-06, 2.7334317564964294e-06, 2.8889626264572144e-06, 3.0444934964179993e-06, 3.200024366378784e-06, 3.355555236339569e-06, 3.511086106300354e-06, 3.666616976261139e-06, 3.822147846221924e-06, 3.977678716182709e-06, 4.133209586143494e-06, 4.2887404561042786e-06, 4.4442713260650635e-06, 4.599802196025848e-06, 4.755333065986633e-06, 4.910863935947418e-06, 5.066394805908203e-06]}, "gradients/encoder.encoder.layers.14.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 3.0, 1.0, 4.0, 1.0, 5.0, 3.0, 6.0, 2.0, 5.0, 11.0, 11.0, 11.0, 18.0, 28.0, 27.0, 50.0, 55.0, 117.0, 166.0, 119.0, 66.0, 57.0, 47.0, 33.0, 25.0, 27.0, 21.0, 13.0, 16.0, 12.0, 9.0, 9.0, 6.0, 5.0, 5.0, 3.0, 4.0, 2.0, 4.0, 2.0, 1.0, 0.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.0002200883609475568, -0.00021268957061693072, -0.00020529079483821988, -0.0001978920045075938, -0.00019049322872888297, -0.0001830944383982569, -0.00017569566261954606, -0.00016829687228891999, -0.00016089808195829391, -0.00015349929162766784, -0.000146100515848957, -0.00013870172551833093, -0.0001313029497396201, -0.00012390415940899402, -0.00011650537635432556, -0.0001091065932996571, -0.00010170781752094626, -9.430903446627781e-05, -8.691025141160935e-05, -7.951146108098328e-05, -7.211268530227244e-05, -6.471389497164637e-05, -5.731511191697791e-05, -4.9916328862309456e-05, -4.2517545807641e-05, -3.511876275297254e-05, -2.7719977879314683e-05, -2.0321193005656824e-05, -1.2922409950988367e-05, -5.523626896319911e-06, 1.8751597963273525e-06, 9.273942850995809e-06, 1.6672740457579494e-05, 2.407152351224795e-05, 3.1470306566916406e-05, 3.886909325956367e-05, 4.6267876314232126e-05, 5.366665936890058e-05, 6.106544606154785e-05, 6.84642291162163e-05, 7.586301217088476e-05, 8.326179522555321e-05, 9.066057828022167e-05, 9.805936133489013e-05, 0.0001054581516655162, 0.00011285692744422704, 0.00012025571777485311, 0.00012765449355356395, 0.00013505328388419002, 0.0001424520742148161, 0.00014985084999352694, 0.000157249640324153, 0.00016464841610286385, 0.00017204720643348992, 0.00017944598221220076, 0.00018684477254282683, 0.0001942435628734529, 0.00020164235320407897, 0.00020904112898278981, 0.00021643991931341588, 0.00022383869509212673, 0.0002312374854227528, 0.00023863627575337887, 0.0002460350515320897, 0.00025343382731080055]}, "gradients/encoder.encoder.layers.14.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 6.0, 2.0, 6.0, 4.0, 5.0, 4.0, 2.0, 9.0, 6.0, 13.0, 10.0, 22.0, 10.0, 19.0, 30.0, 16.0, 18.0, 26.0, 19.0, 19.0, 34.0, 32.0, 33.0, 42.0, 34.0, 37.0, 32.0, 38.0, 42.0, 38.0, 49.0, 27.0, 38.0, 45.0, 34.0, 32.0, 18.0, 25.0, 25.0, 21.0, 16.0, 16.0, 10.0, 3.0, 8.0, 6.0, 12.0, 7.0, 7.0, 6.0, 2.0, 2.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.00016266107559204102, -0.00015805289149284363, -0.00015344470739364624, -0.00014883652329444885, -0.00014422833919525146, -0.00013962015509605408, -0.0001350119709968567, -0.0001304037868976593, -0.00012579560279846191, -0.00012118741869926453, -0.00011657923460006714, -0.00011197105050086975, -0.00010736286640167236, -0.00010275468230247498, -9.814649820327759e-05, -9.35383141040802e-05, -8.893013000488281e-05, -8.432194590568542e-05, -7.971376180648804e-05, -7.510557770729065e-05, -7.049739360809326e-05, -6.588920950889587e-05, -6.128102540969849e-05, -5.66728413105011e-05, -5.206465721130371e-05, -4.745647311210632e-05, -4.2848289012908936e-05, -3.824010491371155e-05, -3.363192081451416e-05, -2.9023736715316772e-05, -2.4415552616119385e-05, -1.9807368516921997e-05, -1.519918441772461e-05, -1.0591000318527222e-05, -5.982816219329834e-06, -1.3746321201324463e-06, 3.2335519790649414e-06, 7.841736078262329e-06, 1.2449920177459717e-05, 1.7058104276657104e-05, 2.1666288375854492e-05, 2.627447247505188e-05, 3.088265657424927e-05, 3.5490840673446655e-05, 4.009902477264404e-05, 4.470720887184143e-05, 4.931539297103882e-05, 5.3923577070236206e-05, 5.8531761169433594e-05, 6.313994526863098e-05, 6.774812936782837e-05, 7.235631346702576e-05, 7.696449756622314e-05, 8.157268166542053e-05, 8.618086576461792e-05, 9.078904986381531e-05, 9.53972339630127e-05, 0.00010000541806221008, 0.00010461360216140747, 0.00010922178626060486, 0.00011382997035980225, 0.00011843815445899963, 0.00012304633855819702, 0.0001276545226573944, 0.0001322627067565918]}, "gradients/encoder.encoder.layers.13.feed_forward.output_dense.weight": {"_type": "histogram", "values": [2.0, 4.0, 2.0, 7.0, 7.0, 5.0, 19.0, 17.0, 28.0, 38.0, 71.0, 107.0, 168.0, 309.0, 427.0, 730.0, 1225.0, 2180.0, 4159.0, 8562.0, 18831.0, 49252.0, 239463.0, 3726790.0, 82496.0, 29491.0, 13318.0, 6677.0, 3688.0, 2220.0, 1296.0, 814.0, 575.0, 339.0, 257.0, 190.0, 135.0, 92.0, 76.0, 43.0, 39.0, 32.0, 18.0, 11.0, 17.0, 13.0, 12.0, 9.0, 6.0, 7.0, 5.0, 3.0, 5.0, 4.0, 4.0, 1.0, 2.0, 0.0, 1.0, 3.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.00013911724090576172, -0.0001331288367509842, -0.00012714043259620667, -0.00012115202844142914, -0.00011516362428665161, -0.00010917522013187408, -0.00010318681597709656, -9.719841182231903e-05, -9.12100076675415e-05, -8.522160351276398e-05, -7.923319935798645e-05, -7.324479520320892e-05, -6.72563910484314e-05, -6.126798689365387e-05, -5.527958273887634e-05, -4.9291178584098816e-05, -4.330277442932129e-05, -3.731437027454376e-05, -3.1325966119766235e-05, -2.533756196498871e-05, -1.934915781021118e-05, -1.3360753655433655e-05, -7.372349500656128e-06, -1.383945345878601e-06, 4.604458808898926e-06, 1.0592862963676453e-05, 1.658126711845398e-05, 2.2569671273231506e-05, 2.8558075428009033e-05, 3.454647958278656e-05, 4.053488373756409e-05, 4.6523287892341614e-05, 5.251169204711914e-05, 5.850009620189667e-05, 6.44885003566742e-05, 7.047690451145172e-05, 7.646530866622925e-05, 8.245371282100677e-05, 8.84421169757843e-05, 9.443052113056183e-05, 0.00010041892528533936, 0.00010640732944011688, 0.00011239573359489441, 0.00011838413774967194, 0.00012437254190444946, 0.000130360946059227, 0.00013634935021400452, 0.00014233775436878204, 0.00014832615852355957, 0.0001543145626783371, 0.00016030296683311462, 0.00016629137098789215, 0.00017227977514266968, 0.0001782681792974472, 0.00018425658345222473, 0.00019024498760700226, 0.00019623339176177979, 0.0002022217959165573, 0.00020821020007133484, 0.00021419860422611237, 0.0002201870083808899, 0.00022617541253566742, 0.00023216381669044495, 0.00023815222084522247, 0.000244140625]}, "gradients/encoder.encoder.layers.13.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 1.0, 3.0, 4.0, 4.0, 3.0, 7.0, 10.0, 17.0, 22.0, 25.0, 46.0, 64.0, 78.0, 85.0, 109.0, 124.0, 94.0, 74.0, 66.0, 48.0, 33.0, 23.0, 19.0, 16.0, 9.0, 8.0, 6.0, 4.0, 3.0, 2.0, 1.0, 3.0, 1.0, 2.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.4199485778808594e-05, -2.319924533367157e-05, -2.2199004888534546e-05, -2.1198764443397522e-05, -2.0198523998260498e-05, -1.9198283553123474e-05, -1.819804310798645e-05, -1.7197802662849426e-05, -1.6197562217712402e-05, -1.5197321772575378e-05, -1.4197081327438354e-05, -1.319684088230133e-05, -1.2196600437164307e-05, -1.1196359992027283e-05, -1.0196119546890259e-05, -9.195879101753235e-06, -8.195638656616211e-06, -7.195398211479187e-06, -6.195157766342163e-06, -5.194917321205139e-06, -4.194676876068115e-06, -3.1944364309310913e-06, -2.1941959857940674e-06, -1.1939555406570435e-06, -1.9371509552001953e-07, 8.065253496170044e-07, 1.8067657947540283e-06, 2.8070062398910522e-06, 3.807246685028076e-06, 4.8074871301651e-06, 5.807727575302124e-06, 6.807968020439148e-06, 7.808208465576172e-06, 8.808448910713196e-06, 9.80868935585022e-06, 1.0808929800987244e-05, 1.1809170246124268e-05, 1.2809410691261292e-05, 1.3809651136398315e-05, 1.480989158153534e-05, 1.5810132026672363e-05, 1.6810372471809387e-05, 1.781061291694641e-05, 1.8810853362083435e-05, 1.981109380722046e-05, 2.0811334252357483e-05, 2.1811574697494507e-05, 2.281181514263153e-05, 2.3812055587768555e-05, 2.481229603290558e-05, 2.5812536478042603e-05, 2.6812776923179626e-05, 2.781301736831665e-05, 2.8813257813453674e-05, 2.9813498258590698e-05, 3.081373870372772e-05, 3.1813979148864746e-05, 3.281421959400177e-05, 3.3814460039138794e-05, 3.481470048427582e-05, 3.581494092941284e-05, 3.6815181374549866e-05, 3.781542181968689e-05, 3.8815662264823914e-05, 3.981590270996094e-05]}, "gradients/encoder.encoder.layers.13.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 5.0, 2.0, 4.0, 2.0, 2.0, 7.0, 13.0, 7.0, 12.0, 18.0, 26.0, 32.0, 44.0, 52.0, 81.0, 106.0, 175.0, 245.0, 375.0, 651.0, 1020.0, 1534.0, 2699.0, 4221.0, 7010.0, 12997.0, 23622.0, 48086.0, 116366.0, 764551.0, 2932613.0, 154417.0, 60191.0, 27688.0, 14647.0, 8288.0, 4727.0, 2826.0, 1732.0, 1077.0, 720.0, 448.0, 301.0, 203.0, 128.0, 85.0, 72.0, 44.0, 27.0, 29.0, 14.0, 18.0, 6.0, 10.0, 7.0, 8.0, 0.0, 4.0, 1.0, 1.0, 2.0, 1.0, 3.0], "bins": [-9.143352508544922e-05, -8.849520236253738e-05, -8.555687963962555e-05, -8.261855691671371e-05, -7.968023419380188e-05, -7.674191147089005e-05, -7.380358874797821e-05, -7.086526602506638e-05, -6.792694330215454e-05, -6.49886205792427e-05, -6.205029785633087e-05, -5.911197513341904e-05, -5.61736524105072e-05, -5.323532968759537e-05, -5.029700696468353e-05, -4.73586842417717e-05, -4.442036151885986e-05, -4.148203879594803e-05, -3.8543716073036194e-05, -3.560539335012436e-05, -3.2667070627212524e-05, -2.972874790430069e-05, -2.6790425181388855e-05, -2.385210245847702e-05, -2.0913779735565186e-05, -1.797545701265335e-05, -1.5037134289741516e-05, -1.2098811566829681e-05, -9.160488843917847e-06, -6.222166121006012e-06, -3.2838433980941772e-06, -3.4552067518234253e-07, 2.592802047729492e-06, 5.531124770641327e-06, 8.469447493553162e-06, 1.1407770216464996e-05, 1.4346092939376831e-05, 1.7284415662288666e-05, 2.02227383852005e-05, 2.3161061108112335e-05, 2.609938383102417e-05, 2.9037706553936005e-05, 3.197602927684784e-05, 3.4914351999759674e-05, 3.785267472267151e-05, 4.0790997445583344e-05, 4.372932016849518e-05, 4.666764289140701e-05, 4.960596561431885e-05, 5.254428833723068e-05, 5.548261106014252e-05, 5.842093378305435e-05, 6.135925650596619e-05, 6.429757922887802e-05, 6.723590195178986e-05, 7.017422467470169e-05, 7.311254739761353e-05, 7.605087012052536e-05, 7.89891928434372e-05, 8.192751556634903e-05, 8.486583828926086e-05, 8.78041610121727e-05, 9.074248373508453e-05, 9.368080645799637e-05, 9.66191291809082e-05]}, "gradients/encoder.encoder.layers.13.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 1.0, 3.0, 6.0, 7.0, 11.0, 8.0, 2.0, 10.0, 14.0, 18.0, 24.0, 19.0, 44.0, 56.0, 81.0, 122.0, 333.0, 1854.0, 755.0, 223.0, 105.0, 74.0, 68.0, 47.0, 37.0, 29.0, 28.0, 19.0, 21.0, 14.0, 11.0, 14.0, 5.0, 4.0, 6.0, 6.0, 3.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-6.777048110961914e-05, -6.588082760572433e-05, -6.399117410182953e-05, -6.210152059793472e-05, -6.021186709403992e-05, -5.832221359014511e-05, -5.6432560086250305e-05, -5.45429065823555e-05, -5.265325307846069e-05, -5.076359957456589e-05, -4.887394607067108e-05, -4.6984292566776276e-05, -4.509463906288147e-05, -4.3204985558986664e-05, -4.131533205509186e-05, -3.942567855119705e-05, -3.7536025047302246e-05, -3.564637154340744e-05, -3.3756718039512634e-05, -3.186706453561783e-05, -2.9977411031723022e-05, -2.8087757527828217e-05, -2.619810402393341e-05, -2.4308450520038605e-05, -2.24187970161438e-05, -2.0529143512248993e-05, -1.8639490008354187e-05, -1.674983650445938e-05, -1.4860183000564575e-05, -1.297052949666977e-05, -1.1080875992774963e-05, -9.191222488880157e-06, -7.3015689849853516e-06, -5.411915481090546e-06, -3.5222619771957397e-06, -1.6326084733009338e-06, 2.5704503059387207e-07, 2.146698534488678e-06, 4.036352038383484e-06, 5.92600554227829e-06, 7.815659046173096e-06, 9.705312550067902e-06, 1.1594966053962708e-05, 1.3484619557857513e-05, 1.537427306175232e-05, 1.7263926565647125e-05, 1.915358006954193e-05, 2.1043233573436737e-05, 2.2932887077331543e-05, 2.482254058122635e-05, 2.6712194085121155e-05, 2.860184758901596e-05, 3.0491501092910767e-05, 3.238115459680557e-05, 3.427080810070038e-05, 3.6160461604595184e-05, 3.805011510848999e-05, 3.9939768612384796e-05, 4.18294221162796e-05, 4.371907562017441e-05, 4.5608729124069214e-05, 4.749838262796402e-05, 4.9388036131858826e-05, 5.127768963575363e-05, 5.316734313964844e-05]}, "gradients/encoder.encoder.layers.13.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 0.0, 1.0, 2.0, 0.0, 1.0, 3.0, 4.0, 2.0, 10.0, 5.0, 7.0, 13.0, 10.0, 24.0, 32.0, 37.0, 48.0, 87.0, 85.0, 76.0, 88.0, 88.0, 79.0, 59.0, 47.0, 43.0, 33.0, 16.0, 22.0, 11.0, 9.0, 8.0, 13.0, 9.0, 5.0, 7.0, 9.0, 1.0, 3.0, 1.0, 2.0, 2.0, 3.0, 1.0, 2.0, 1.0, 2.0, 0.0, 0.0, 1.0, 1.0, 2.0, 1.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0001974922197405249, -0.00018885031749960035, -0.00018020841525867581, -0.00017156651301775128, -0.00016292461077682674, -0.0001542827085359022, -0.00014564080629497766, -0.0001369988895021379, -0.0001283570018131286, -0.00011971509957220405, -0.00011107319733127952, -0.00010243129509035498, -9.378939284943044e-05, -8.51474906085059e-05, -7.650558109162375e-05, -6.786367885069922e-05, -5.9221769333817065e-05, -5.057986709289253e-05, -4.193796485196799e-05, -3.3296058973064646e-05, -2.465415673214011e-05, -1.6012254491215572e-05, -7.3703486123122275e-06, 1.2715536286123097e-06, 9.913455869536847e-06, 1.8555358110461384e-05, 2.7197262170375325e-05, 3.5839166230289266e-05, 4.44810684712138e-05, 5.312297071213834e-05, 6.176487659104168e-05, 7.040677883196622e-05, 7.904868107289076e-05, 8.76905833138153e-05, 9.633248555473983e-05, 0.00010497438779566437, 0.00011361629003658891, 0.00012225819227751344, 0.00013090009451843798, 0.00013954201131127775, 0.00014818389900028706, 0.0001568258012412116, 0.00016546770348213613, 0.00017410960572306067, 0.0001827515079639852, 0.00019139341020490974, 0.00020003531244583428, 0.00020867722923867404, 0.00021731913147959858, 0.00022596103372052312, 0.00023460293596144766, 0.0002432448382023722, 0.00025188675499521196, 0.00026052864268422127, 0.00026917055947706103, 0.00027781244716607034, 0.0002864543639589101, 0.0002950962807517499, 0.0003037381684407592, 0.00031238008523359895, 0.00032102197292260826, 0.000329663889715448, 0.00033830577740445733, 0.0003469476941972971, 0.0003555895818863064]}, "gradients/encoder.encoder.layers.13.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 1.0, 6.0, 1.0, 6.0, 6.0, 4.0, 6.0, 9.0, 2.0, 10.0, 6.0, 12.0, 14.0, 17.0, 20.0, 15.0, 26.0, 20.0, 33.0, 26.0, 42.0, 31.0, 35.0, 32.0, 35.0, 34.0, 41.0, 39.0, 38.0, 34.0, 36.0, 41.0, 34.0, 26.0, 29.0, 29.0, 27.0, 21.0, 28.0, 18.0, 23.0, 15.0, 15.0, 11.0, 13.0, 8.0, 10.0, 7.0, 7.0, 4.0, 1.0, 2.0, 4.0, 2.0, 3.0, 4.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.0001367330551147461, -0.0001319926232099533, -0.00012725219130516052, -0.00012251175940036774, -0.00011777132749557495, -0.00011303089559078217, -0.00010829046368598938, -0.0001035500317811966, -9.880959987640381e-05, -9.406916797161102e-05, -8.932873606681824e-05, -8.458830416202545e-05, -7.984787225723267e-05, -7.510744035243988e-05, -7.03670084476471e-05, -6.562657654285431e-05, -6.0886144638061523e-05, -5.614571273326874e-05, -5.140528082847595e-05, -4.6664848923683167e-05, -4.192441701889038e-05, -3.7183985114097595e-05, -3.244355320930481e-05, -2.7703121304512024e-05, -2.2962689399719238e-05, -1.8222257494926453e-05, -1.3481825590133667e-05, -8.741393685340881e-06, -4.000961780548096e-06, 7.394701242446899e-07, 5.479902029037476e-06, 1.0220333933830261e-05, 1.4960765838623047e-05, 1.9701197743415833e-05, 2.4441629648208618e-05, 2.9182061553001404e-05, 3.392249345779419e-05, 3.8662925362586975e-05, 4.340335726737976e-05, 4.8143789172172546e-05, 5.288422107696533e-05, 5.762465298175812e-05, 6.23650848865509e-05, 6.710551679134369e-05, 7.184594869613647e-05, 7.658638060092926e-05, 8.132681250572205e-05, 8.606724441051483e-05, 9.080767631530762e-05, 9.55481082201004e-05, 0.00010028854012489319, 0.00010502897202968597, 0.00010976940393447876, 0.00011450983583927155, 0.00011925026774406433, 0.00012399069964885712, 0.0001287311315536499, 0.0001334715634584427, 0.00013821199536323547, 0.00014295242726802826, 0.00014769285917282104, 0.00015243329107761383, 0.00015717372298240662, 0.0001619141548871994, 0.0001666545867919922]}, "gradients/encoder.encoder.layers.13.attention.out_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 3.0, 6.0, 11.0, 10.0, 8.0, 16.0, 20.0, 41.0, 46.0, 81.0, 137.0, 223.0, 327.0, 521.0, 994.0, 1701.0, 3089.0, 5981.0, 12363.0, 29575.0, 84191.0, 430490.0, 351403.0, 75858.0, 27151.0, 11868.0, 5511.0, 2985.0, 1619.0, 899.0, 516.0, 356.0, 199.0, 142.0, 72.0, 51.0, 32.0, 14.0, 16.0, 13.0, 7.0, 4.0, 8.0, 4.0, 0.0, 4.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00015997886657714844, -0.00015445426106452942, -0.0001489296555519104, -0.00014340505003929138, -0.00013788044452667236, -0.00013235583901405334, -0.00012683123350143433, -0.00012130662798881531, -0.00011578202247619629, -0.00011025741696357727, -0.00010473281145095825, -9.920820593833923e-05, -9.368360042572021e-05, -8.81589949131012e-05, -8.263438940048218e-05, -7.710978388786316e-05, -7.158517837524414e-05, -6.606057286262512e-05, -6.0535967350006104e-05, -5.5011361837387085e-05, -4.9486756324768066e-05, -4.396215081214905e-05, -3.843754529953003e-05, -3.291293978691101e-05, -2.7388334274291992e-05, -2.1863728761672974e-05, -1.6339123249053955e-05, -1.0814517736434937e-05, -5.289912223815918e-06, 2.3469328880310059e-07, 5.759298801422119e-06, 1.1283904314041138e-05, 1.6808509826660156e-05, 2.2333115339279175e-05, 2.7857720851898193e-05, 3.338232636451721e-05, 3.890693187713623e-05, 4.443153738975525e-05, 4.995614290237427e-05, 5.5480748414993286e-05, 6.1005353927612305e-05, 6.652995944023132e-05, 7.205456495285034e-05, 7.757917046546936e-05, 8.310377597808838e-05, 8.86283814907074e-05, 9.415298700332642e-05, 9.967759251594543e-05, 0.00010520219802856445, 0.00011072680354118347, 0.00011625140905380249, 0.00012177601456642151, 0.00012730062007904053, 0.00013282522559165955, 0.00013834983110427856, 0.00014387443661689758, 0.0001493990421295166, 0.00015492364764213562, 0.00016044825315475464, 0.00016597285866737366, 0.00017149746417999268, 0.0001770220696926117, 0.0001825466752052307, 0.00018807128071784973, 0.00019359588623046875]}, "gradients/encoder.encoder.layers.13.attention.out_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 0.0, 0.0, 3.0, 2.0, 4.0, 6.0, 7.0, 8.0, 7.0, 16.0, 13.0, 30.0, 35.0, 44.0, 53.0, 66.0, 91.0, 77.0, 83.0, 95.0, 77.0, 79.0, 45.0, 35.0, 31.0, 24.0, 23.0, 12.0, 6.0, 9.0, 5.0, 7.0, 7.0, 6.0, 2.0, 2.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.199411392211914e-05, -2.1134503185749054e-05, -2.0274892449378967e-05, -1.941528171300888e-05, -1.8555670976638794e-05, -1.7696060240268707e-05, -1.683644950389862e-05, -1.5976838767528534e-05, -1.5117228031158447e-05, -1.425761729478836e-05, -1.3398006558418274e-05, -1.2538395822048187e-05, -1.16787850856781e-05, -1.0819174349308014e-05, -9.959563612937927e-06, -9.09995287656784e-06, -8.240342140197754e-06, -7.380731403827667e-06, -6.5211206674575806e-06, -5.661509931087494e-06, -4.801899194717407e-06, -3.9422884583473206e-06, -3.082677721977234e-06, -2.2230669856071472e-06, -1.3634562492370605e-06, -5.038455128669739e-07, 3.557652235031128e-07, 1.2153759598731995e-06, 2.074986696243286e-06, 2.934597432613373e-06, 3.7942081689834595e-06, 4.653818905353546e-06, 5.513429641723633e-06, 6.3730403780937195e-06, 7.232651114463806e-06, 8.092261850833893e-06, 8.95187258720398e-06, 9.811483323574066e-06, 1.0671094059944153e-05, 1.153070479631424e-05, 1.2390315532684326e-05, 1.3249926269054413e-05, 1.41095370054245e-05, 1.4969147741794586e-05, 1.5828758478164673e-05, 1.668836921453476e-05, 1.7547979950904846e-05, 1.8407590687274933e-05, 1.926720142364502e-05, 2.0126812160015106e-05, 2.0986422896385193e-05, 2.184603363275528e-05, 2.2705644369125366e-05, 2.3565255105495453e-05, 2.442486584186554e-05, 2.5284476578235626e-05, 2.6144087314605713e-05, 2.70036980509758e-05, 2.7863308787345886e-05, 2.8722919523715973e-05, 2.958253026008606e-05, 3.0442140996456146e-05, 3.130175173282623e-05, 3.216136246919632e-05, 3.3020973205566406e-05]}, "gradients/encoder.encoder.layers.13.attention.v_proj.weight": {"_type": "histogram", "values": [2.0, 2.0, 0.0, 3.0, 1.0, 5.0, 4.0, 5.0, 6.0, 22.0, 30.0, 28.0, 55.0, 64.0, 105.0, 174.0, 265.0, 424.0, 623.0, 1039.0, 1532.0, 2608.0, 4209.0, 6990.0, 11531.0, 20267.0, 35973.0, 66498.0, 137978.0, 367953.0, 197541.0, 85976.0, 45481.0, 25039.0, 14263.0, 8449.0, 5053.0, 3155.0, 1891.0, 1175.0, 770.0, 454.0, 316.0, 194.0, 159.0, 84.0, 56.0, 41.0, 25.0, 18.0, 8.0, 9.0, 8.0, 6.0, 3.0, 3.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6.014108657836914e-05, -5.8115459978580475e-05, -5.608983337879181e-05, -5.406420677900314e-05, -5.203858017921448e-05, -5.001295357942581e-05, -4.7987326979637146e-05, -4.596170037984848e-05, -4.3936073780059814e-05, -4.191044718027115e-05, -3.988482058048248e-05, -3.785919398069382e-05, -3.583356738090515e-05, -3.3807940781116486e-05, -3.178231418132782e-05, -2.9756687581539154e-05, -2.7731060981750488e-05, -2.5705434381961823e-05, -2.3679807782173157e-05, -2.165418118238449e-05, -1.9628554582595825e-05, -1.760292798280716e-05, -1.5577301383018494e-05, -1.3551674783229828e-05, -1.1526048183441162e-05, -9.500421583652496e-06, -7.4747949838638306e-06, -5.449168384075165e-06, -3.423541784286499e-06, -1.3979151844978333e-06, 6.277114152908325e-07, 2.6533380150794983e-06, 4.678964614868164e-06, 6.70459121465683e-06, 8.730217814445496e-06, 1.0755844414234161e-05, 1.2781471014022827e-05, 1.4807097613811493e-05, 1.683272421360016e-05, 1.8858350813388824e-05, 2.088397741317749e-05, 2.2909604012966156e-05, 2.4935230612754822e-05, 2.6960857212543488e-05, 2.8986483812332153e-05, 3.101211041212082e-05, 3.3037737011909485e-05, 3.506336361169815e-05, 3.7088990211486816e-05, 3.911461681127548e-05, 4.114024341106415e-05, 4.3165870010852814e-05, 4.519149661064148e-05, 4.7217123210430145e-05, 4.924274981021881e-05, 5.126837641000748e-05, 5.329400300979614e-05, 5.531962960958481e-05, 5.7345256209373474e-05, 5.937088280916214e-05, 6.13965094089508e-05, 6.342213600873947e-05, 6.544776260852814e-05, 6.74733892083168e-05, 6.949901580810547e-05]}, "gradients/encoder.encoder.layers.13.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 4.0, 4.0, 6.0, 5.0, 7.0, 14.0, 10.0, 9.0, 17.0, 15.0, 25.0, 26.0, 32.0, 39.0, 51.0, 51.0, 32.0, 41.0, 47.0, 39.0, 49.0, 48.0, 64.0, 50.0, 46.0, 36.0, 41.0, 34.0, 34.0, 29.0, 18.0, 17.0, 18.0, 10.0, 12.0, 9.0, 7.0, 4.0, 5.0, 4.0, 2.0, 1.0, 0.0, 3.0, 1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.935264587402344e-05, -4.767347127199173e-05, -4.599429666996002e-05, -4.4315122067928314e-05, -4.2635947465896606e-05, -4.09567728638649e-05, -3.927759826183319e-05, -3.759842365980148e-05, -3.5919249057769775e-05, -3.424007445573807e-05, -3.256089985370636e-05, -3.088172525167465e-05, -2.9202550649642944e-05, -2.7523376047611237e-05, -2.584420144557953e-05, -2.416502684354782e-05, -2.2485852241516113e-05, -2.0806677639484406e-05, -1.9127503037452698e-05, -1.744832843542099e-05, -1.5769153833389282e-05, -1.4089979231357574e-05, -1.2410804629325867e-05, -1.0731630027294159e-05, -9.052455425262451e-06, -7.373280823230743e-06, -5.694106221199036e-06, -4.014931619167328e-06, -2.33575701713562e-06, -6.565824151039124e-07, 1.0225921869277954e-06, 2.701766788959503e-06, 4.380941390991211e-06, 6.060115993022919e-06, 7.739290595054626e-06, 9.418465197086334e-06, 1.1097639799118042e-05, 1.277681440114975e-05, 1.4455989003181458e-05, 1.6135163605213165e-05, 1.7814338207244873e-05, 1.949351280927658e-05, 2.117268741130829e-05, 2.2851862013339996e-05, 2.4531036615371704e-05, 2.6210211217403412e-05, 2.788938581943512e-05, 2.9568560421466827e-05, 3.1247735023498535e-05, 3.292690962553024e-05, 3.460608422756195e-05, 3.628525882959366e-05, 3.7964433431625366e-05, 3.9643608033657074e-05, 4.132278263568878e-05, 4.300195723772049e-05, 4.46811318397522e-05, 4.6360306441783905e-05, 4.803948104381561e-05, 4.971865564584732e-05, 5.139783024787903e-05, 5.3077004849910736e-05, 5.4756179451942444e-05, 5.643535405397415e-05, 5.811452865600586e-05]}, "gradients/encoder.encoder.layers.13.attention.k_proj.weight": {"_type": "histogram", "values": [3.0, 1.0, 0.0, 5.0, 6.0, 8.0, 3.0, 24.0, 19.0, 19.0, 29.0, 42.0, 53.0, 72.0, 103.0, 128.0, 208.0, 326.0, 435.0, 657.0, 1110.0, 1813.0, 3276.0, 6190.0, 13152.0, 33599.0, 115006.0, 517143.0, 255914.0, 58849.0, 20536.0, 7811.0, 5077.0, 2718.0, 1453.0, 930.0, 586.0, 374.0, 251.0, 188.0, 120.0, 94.0, 61.0, 49.0, 25.0, 28.0, 21.0, 18.0, 12.0, 13.0, 6.0, 5.0, 2.0, 0.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-9.834766387939453e-06, -9.480863809585571e-06, -9.12696123123169e-06, -8.773058652877808e-06, -8.419156074523926e-06, -8.065253496170044e-06, -7.711350917816162e-06, -7.35744833946228e-06, -7.0035457611083984e-06, -6.649643182754517e-06, -6.295740604400635e-06, -5.941838026046753e-06, -5.587935447692871e-06, -5.234032869338989e-06, -4.880130290985107e-06, -4.526227712631226e-06, -4.172325134277344e-06, -3.818422555923462e-06, -3.46451997756958e-06, -3.1106173992156982e-06, -2.7567148208618164e-06, -2.4028122425079346e-06, -2.0489096641540527e-06, -1.695007085800171e-06, -1.341104507446289e-06, -9.872019290924072e-07, -6.332993507385254e-07, -2.7939677238464355e-07, 7.450580596923828e-08, 4.284083843231201e-07, 7.82310962677002e-07, 1.1362135410308838e-06, 1.4901161193847656e-06, 1.8440186977386475e-06, 2.1979212760925293e-06, 2.551823854446411e-06, 2.905726432800293e-06, 3.259629011154175e-06, 3.6135315895080566e-06, 3.9674341678619385e-06, 4.32133674621582e-06, 4.675239324569702e-06, 5.029141902923584e-06, 5.383044481277466e-06, 5.736947059631348e-06, 6.0908496379852295e-06, 6.444752216339111e-06, 6.798654794692993e-06, 7.152557373046875e-06, 7.506459951400757e-06, 7.860362529754639e-06, 8.21426510810852e-06, 8.568167686462402e-06, 8.922070264816284e-06, 9.275972843170166e-06, 9.629875421524048e-06, 9.98377799987793e-06, 1.0337680578231812e-05, 1.0691583156585693e-05, 1.1045485734939575e-05, 1.1399388313293457e-05, 1.1753290891647339e-05, 1.210719347000122e-05, 1.2461096048355103e-05, 1.2814998626708984e-05]}, "gradients/encoder.encoder.layers.13.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 1.0, 2.0, 1.0, 1.0, 1.0, 0.0, 4.0, 5.0, 7.0, 10.0, 8.0, 19.0, 48.0, 58.0, 97.0, 53.0, 138.0, 141.0, 128.0, 94.0, 70.0, 30.0, 37.0, 23.0, 17.0, 5.0, 2.0, 2.0, 4.0, 3.0, 0.0, 1.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-3.5762786865234375e-06, -3.468245267868042e-06, -3.3602118492126465e-06, -3.252178430557251e-06, -3.1441450119018555e-06, -3.03611159324646e-06, -2.9280781745910645e-06, -2.820044755935669e-06, -2.7120113372802734e-06, -2.603977918624878e-06, -2.4959444999694824e-06, -2.387911081314087e-06, -2.2798776626586914e-06, -2.171844244003296e-06, -2.0638108253479004e-06, -1.955777406692505e-06, -1.8477439880371094e-06, -1.7397105693817139e-06, -1.6316771507263184e-06, -1.5236437320709229e-06, -1.4156103134155273e-06, -1.3075768947601318e-06, -1.1995434761047363e-06, -1.0915100574493408e-06, -9.834766387939453e-07, -8.754432201385498e-07, -7.674098014831543e-07, -6.593763828277588e-07, -5.513429641723633e-07, -4.4330954551696777e-07, -3.3527612686157227e-07, -2.2724270820617676e-07, -1.1920928955078125e-07, -1.1175870895385742e-08, 9.685754776000977e-08, 2.0489096641540527e-07, 3.129243850708008e-07, 4.209578037261963e-07, 5.289912223815918e-07, 6.370246410369873e-07, 7.450580596923828e-07, 8.530914783477783e-07, 9.611248970031738e-07, 1.0691583156585693e-06, 1.1771917343139648e-06, 1.2852251529693604e-06, 1.3932585716247559e-06, 1.5012919902801514e-06, 1.6093254089355469e-06, 1.7173588275909424e-06, 1.8253922462463379e-06, 1.9334256649017334e-06, 2.041459083557129e-06, 2.1494925022125244e-06, 2.25752592086792e-06, 2.3655593395233154e-06, 2.473592758178711e-06, 2.5816261768341064e-06, 2.689659595489502e-06, 2.7976930141448975e-06, 2.905726432800293e-06, 3.0137598514556885e-06, 3.121793270111084e-06, 3.2298266887664795e-06, 3.337860107421875e-06]}, "gradients/encoder.encoder.layers.13.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 2.0, 3.0, 2.0, 4.0, 3.0, 9.0, 7.0, 16.0, 15.0, 32.0, 41.0, 43.0, 100.0, 97.0, 210.0, 209.0, 424.0, 470.0, 997.0, 1156.0, 1780.0, 3977.0, 5291.0, 13248.0, 20096.0, 62326.0, 125693.0, 366601.0, 305796.0, 67959.0, 38388.0, 13091.0, 9389.0, 3707.0, 2964.0, 1408.0, 946.0, 755.0, 349.0, 331.0, 184.0, 161.0, 82.0, 42.0, 64.0, 27.0, 18.0, 15.0, 16.0, 9.0, 6.0, 5.0, 2.0, 1.0, 2.0, 3.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6.496906280517578e-06, -6.292015314102173e-06, -6.087124347686768e-06, -5.882233381271362e-06, -5.677342414855957e-06, -5.472451448440552e-06, -5.2675604820251465e-06, -5.062669515609741e-06, -4.857778549194336e-06, -4.652887582778931e-06, -4.447996616363525e-06, -4.24310564994812e-06, -4.038214683532715e-06, -3.8333237171173096e-06, -3.6284327507019043e-06, -3.423541784286499e-06, -3.2186508178710938e-06, -3.0137598514556885e-06, -2.808868885040283e-06, -2.603977918624878e-06, -2.3990869522094727e-06, -2.1941959857940674e-06, -1.989305019378662e-06, -1.7844140529632568e-06, -1.5795230865478516e-06, -1.3746321201324463e-06, -1.169741153717041e-06, -9.648501873016357e-07, -7.599592208862305e-07, -5.550682544708252e-07, -3.501772880554199e-07, -1.4528632164001465e-07, 5.960464477539063e-08, 2.644956111907959e-07, 4.6938657760620117e-07, 6.742775440216064e-07, 8.791685104370117e-07, 1.084059476852417e-06, 1.2889504432678223e-06, 1.4938414096832275e-06, 1.6987323760986328e-06, 1.903623342514038e-06, 2.1085143089294434e-06, 2.3134052753448486e-06, 2.518296241760254e-06, 2.723187208175659e-06, 2.9280781745910645e-06, 3.1329691410064697e-06, 3.337860107421875e-06, 3.5427510738372803e-06, 3.7476420402526855e-06, 3.952533006668091e-06, 4.157423973083496e-06, 4.362314939498901e-06, 4.567205905914307e-06, 4.772096872329712e-06, 4.976987838745117e-06, 5.1818788051605225e-06, 5.386769771575928e-06, 5.591660737991333e-06, 5.796551704406738e-06, 6.0014426708221436e-06, 6.206333637237549e-06, 6.411224603652954e-06, 6.616115570068359e-06]}, "gradients/encoder.encoder.layers.13.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 3.0, 1.0, 1.0, 1.0, 3.0, 3.0, 5.0, 4.0, 7.0, 6.0, 6.0, 13.0, 17.0, 18.0, 24.0, 24.0, 29.0, 37.0, 43.0, 90.0, 91.0, 88.0, 47.0, 85.0, 65.0, 73.0, 57.0, 44.0, 47.0, 19.0, 15.0, 12.0, 10.0, 8.0, 4.0, 4.0, 1.0, 1.0, 4.0, 3.0, 1.0, 1.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.304813385009766e-06, -5.1353126764297485e-06, -4.9658119678497314e-06, -4.796311259269714e-06, -4.626810550689697e-06, -4.45730984210968e-06, -4.287809133529663e-06, -4.118308424949646e-06, -3.948807716369629e-06, -3.779307007789612e-06, -3.6098062992095947e-06, -3.4403055906295776e-06, -3.2708048820495605e-06, -3.1013041734695435e-06, -2.9318034648895264e-06, -2.7623027563095093e-06, -2.592802047729492e-06, -2.423301339149475e-06, -2.253800630569458e-06, -2.084299921989441e-06, -1.914799213409424e-06, -1.7452985048294067e-06, -1.5757977962493896e-06, -1.4062970876693726e-06, -1.2367963790893555e-06, -1.0672956705093384e-06, -8.977949619293213e-07, -7.282942533493042e-07, -5.587935447692871e-07, -3.8929283618927e-07, -2.1979212760925293e-07, -5.029141902923584e-08, 1.1920928955078125e-07, 2.8870999813079834e-07, 4.5821070671081543e-07, 6.277114152908325e-07, 7.972121238708496e-07, 9.667128324508667e-07, 1.1362135410308838e-06, 1.3057142496109009e-06, 1.475214958190918e-06, 1.644715666770935e-06, 1.8142163753509521e-06, 1.9837170839309692e-06, 2.1532177925109863e-06, 2.3227185010910034e-06, 2.4922192096710205e-06, 2.6617199182510376e-06, 2.8312206268310547e-06, 3.0007213354110718e-06, 3.170222043991089e-06, 3.339722752571106e-06, 3.509223461151123e-06, 3.67872416973114e-06, 3.848224878311157e-06, 4.017725586891174e-06, 4.187226295471191e-06, 4.3567270040512085e-06, 4.526227712631226e-06, 4.695728421211243e-06, 4.86522912979126e-06, 5.034729838371277e-06, 5.204230546951294e-06, 5.373731255531311e-06, 5.543231964111328e-06]}, "gradients/encoder.encoder.layers.13.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 3.0, 4.0, 5.0, 7.0, 5.0, 10.0, 5.0, 16.0, 32.0, 27.0, 63.0, 86.0, 143.0, 169.0, 114.0, 68.0, 57.0, 43.0, 32.0, 20.0, 18.0, 20.0, 14.0, 7.0, 16.0, 4.0, 5.0, 5.0, 2.0, 6.0, 4.0, 3.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00018661469221115112, -0.00017955238581635058, -0.00017249007942155004, -0.0001654277730267495, -0.00015836546663194895, -0.0001513031602371484, -0.00014424085384234786, -0.0001371785328956321, -0.00013011622650083154, -0.000123053920106031, -0.00011599161371123046, -0.00010892930731642991, -0.00010186699364567176, -9.480468725087121e-05, -8.774238085607067e-05, -8.068006718531251e-05, -7.361776806646958e-05, -6.655546167166904e-05, -5.9493151638889685e-05, -5.243084524408914e-05, -4.536853521130979e-05, -3.830622881650925e-05, -3.12439224217087e-05, -2.4181612388929352e-05, -1.711930599412881e-05, -1.0056997780338861e-05, -2.9946904760436155e-06, 4.06761682825163e-06, 1.1129925042041577e-05, 1.8192233255831525e-05, 2.525453965063207e-05, 3.231684968341142e-05, 3.937915607821196e-05, 4.644146247301251e-05, 5.350377250579186e-05, 6.05660789005924e-05, 6.762838893337175e-05, 7.46906953281723e-05, 8.175300172297284e-05, 8.8815315393731e-05, 9.587762178853154e-05, 0.00010293992818333209, 0.00011000223457813263, 0.00011706454097293317, 0.00012412684736773372, 0.00013118915376253426, 0.0001382514601573348, 0.00014531378110405058, 0.0001523760729469359, 0.00015943837934173644, 0.00016650068573653698, 0.00017356299213133752, 0.00018062529852613807, 0.0001876876049209386, 0.00019474991131573915, 0.00020181223226245493, 0.00020887453865725547, 0.00021593684505205601, 0.00022299915144685656, 0.0002300614578416571, 0.00023712376423645765, 0.0002441860851831734, 0.00025124839157797396, 0.0002583106979727745, 0.00026537300436757505]}, "gradients/encoder.encoder.layers.13.layer_norm.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 2.0, 5.0, 4.0, 4.0, 3.0, 8.0, 7.0, 7.0, 8.0, 8.0, 10.0, 14.0, 17.0, 18.0, 15.0, 21.0, 27.0, 28.0, 24.0, 36.0, 31.0, 45.0, 33.0, 45.0, 48.0, 33.0, 41.0, 45.0, 32.0, 33.0, 33.0, 27.0, 28.0, 34.0, 20.0, 18.0, 14.0, 20.0, 26.0, 19.0, 21.0, 20.0, 20.0, 5.0, 12.0, 10.0, 7.0, 6.0, 4.0, 3.0, 3.0, 3.0, 5.0, 3.0, 3.0, 0.0, 2.0], "bins": [-0.00011587142944335938, -0.00011242832988500595, -0.00010898523032665253, -0.0001055421307682991, -0.00010209903120994568, -9.865593165159225e-05, -9.521283209323883e-05, -9.17697325348854e-05, -8.832663297653198e-05, -8.488353341817856e-05, -8.144043385982513e-05, -7.799733430147171e-05, -7.455423474311829e-05, -7.111113518476486e-05, -6.766803562641144e-05, -6.422493606805801e-05, -6.078183650970459e-05, -5.7338736951351166e-05, -5.389563739299774e-05, -5.045253783464432e-05, -4.7009438276290894e-05, -4.356633871793747e-05, -4.0123239159584045e-05, -3.668013960123062e-05, -3.32370400428772e-05, -2.9793940484523773e-05, -2.635084092617035e-05, -2.2907741367816925e-05, -1.94646418094635e-05, -1.6021542251110077e-05, -1.2578442692756653e-05, -9.135343134403229e-06, -5.692243576049805e-06, -2.2491440176963806e-06, 1.1939555406570435e-06, 4.6370550990104675e-06, 8.080154657363892e-06, 1.1523254215717316e-05, 1.496635377407074e-05, 1.8409453332424164e-05, 2.1852552890777588e-05, 2.5295652449131012e-05, 2.8738752007484436e-05, 3.218185156583786e-05, 3.5624951124191284e-05, 3.906805068254471e-05, 4.251115024089813e-05, 4.5954249799251556e-05, 4.939734935760498e-05, 5.2840448915958405e-05, 5.628354847431183e-05, 5.972664803266525e-05, 6.316974759101868e-05, 6.66128471493721e-05, 7.005594670772552e-05, 7.349904626607895e-05, 7.694214582443237e-05, 8.03852453827858e-05, 8.382834494113922e-05, 8.727144449949265e-05, 9.071454405784607e-05, 9.41576436161995e-05, 9.760074317455292e-05, 0.00010104384273290634, 0.00010448694229125977]}, "gradients/encoder.encoder.layers.12.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 2.0, 0.0, 8.0, 18.0, 21.0, 30.0, 52.0, 74.0, 108.0, 200.0, 264.0, 363.0, 559.0, 883.0, 1303.0, 2041.0, 3378.0, 6071.0, 11066.0, 22119.0, 52630.0, 417414.0, 3541066.0, 70608.0, 29766.0, 14231.0, 7619.0, 4303.0, 2561.0, 1650.0, 1095.0, 756.0, 492.0, 378.0, 276.0, 213.0, 165.0, 119.0, 85.0, 57.0, 61.0, 34.0, 32.0, 27.0, 25.0, 17.0, 10.0, 10.0, 7.0, 9.0, 9.0, 3.0, 3.0, 1.0, 4.0, 0.0, 0.0, 2.0, 0.0, 1.0, 1.0], "bins": [-0.0001252889633178711, -0.00012030079960823059, -0.00011531263589859009, -0.00011032447218894958, -0.00010533630847930908, -0.00010034814476966858, -9.535998106002808e-05, -9.037181735038757e-05, -8.538365364074707e-05, -8.039548993110657e-05, -7.540732622146606e-05, -7.041916251182556e-05, -6.543099880218506e-05, -6.0442835092544556e-05, -5.545467138290405e-05, -5.046650767326355e-05, -4.547834396362305e-05, -4.0490180253982544e-05, -3.550201654434204e-05, -3.0513852834701538e-05, -2.5525689125061035e-05, -2.0537525415420532e-05, -1.554936170578003e-05, -1.0561197996139526e-05, -5.5730342864990234e-06, -5.848705768585205e-07, 4.403293132781982e-06, 9.391456842422485e-06, 1.4379620552062988e-05, 1.936778426170349e-05, 2.4355947971343994e-05, 2.9344111680984497e-05, 3.4332275390625e-05, 3.93204391002655e-05, 4.4308602809906006e-05, 4.929676651954651e-05, 5.428493022918701e-05, 5.9273093938827515e-05, 6.426125764846802e-05, 6.924942135810852e-05, 7.423758506774902e-05, 7.922574877738953e-05, 8.421391248703003e-05, 8.920207619667053e-05, 9.419023990631104e-05, 9.917840361595154e-05, 0.00010416656732559204, 0.00010915473103523254, 0.00011414289474487305, 0.00011913105845451355, 0.00012411922216415405, 0.00012910738587379456, 0.00013409554958343506, 0.00013908371329307556, 0.00014407187700271606, 0.00014906004071235657, 0.00015404820442199707, 0.00015903636813163757, 0.00016402453184127808, 0.00016901269555091858, 0.00017400085926055908, 0.00017898902297019958, 0.0001839771866798401, 0.0001889653503894806, 0.0001939535140991211]}, "gradients/encoder.encoder.layers.12.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 3.0, 3.0, 3.0, 2.0, 1.0, 1.0, 5.0, 5.0, 6.0, 8.0, 14.0, 15.0, 15.0, 22.0, 36.0, 51.0, 54.0, 72.0, 84.0, 83.0, 92.0, 83.0, 74.0, 62.0, 55.0, 46.0, 28.0, 20.0, 15.0, 14.0, 7.0, 10.0, 9.0, 5.0, 0.0, 3.0, 2.0, 0.0, 2.0, 6.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.0205974578857422e-05, -1.9368715584278107e-05, -1.853145658969879e-05, -1.7694197595119476e-05, -1.685693860054016e-05, -1.6019679605960846e-05, -1.518242061138153e-05, -1.4345161616802216e-05, -1.35079026222229e-05, -1.2670643627643585e-05, -1.183338463306427e-05, -1.0996125638484955e-05, -1.015886664390564e-05, -9.321607649326324e-06, -8.48434865474701e-06, -7.647089660167694e-06, -6.809830665588379e-06, -5.972571671009064e-06, -5.1353126764297485e-06, -4.298053681850433e-06, -3.460794687271118e-06, -2.623535692691803e-06, -1.7862766981124878e-06, -9.490177035331726e-07, -1.1175870895385742e-07, 7.255002856254578e-07, 1.562759280204773e-06, 2.400018274784088e-06, 3.2372772693634033e-06, 4.0745362639427185e-06, 4.911795258522034e-06, 5.749054253101349e-06, 6.586313247680664e-06, 7.423572242259979e-06, 8.260831236839294e-06, 9.09809023141861e-06, 9.935349225997925e-06, 1.077260822057724e-05, 1.1609867215156555e-05, 1.244712620973587e-05, 1.3284385204315186e-05, 1.41216441988945e-05, 1.4958903193473816e-05, 1.579616218805313e-05, 1.6633421182632446e-05, 1.747068017721176e-05, 1.8307939171791077e-05, 1.9145198166370392e-05, 1.9982457160949707e-05, 2.0819716155529022e-05, 2.1656975150108337e-05, 2.2494234144687653e-05, 2.3331493139266968e-05, 2.4168752133846283e-05, 2.5006011128425598e-05, 2.5843270123004913e-05, 2.668052911758423e-05, 2.7517788112163544e-05, 2.835504710674286e-05, 2.9192306101322174e-05, 3.002956509590149e-05, 3.0866824090480804e-05, 3.170408308506012e-05, 3.2541342079639435e-05, 3.337860107421875e-05]}, "gradients/encoder.encoder.layers.12.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 3.0, 0.0, 3.0, 4.0, 2.0, 3.0, 4.0, 3.0, 6.0, 12.0, 18.0, 20.0, 38.0, 47.0, 69.0, 84.0, 124.0, 196.0, 301.0, 450.0, 651.0, 1009.0, 1687.0, 2663.0, 4403.0, 7408.0, 13302.0, 25156.0, 50160.0, 131792.0, 3248600.0, 527286.0, 91355.0, 39465.0, 20236.0, 10948.0, 6397.0, 3789.0, 2338.0, 1448.0, 941.0, 619.0, 434.0, 277.0, 175.0, 108.0, 93.0, 57.0, 29.0, 26.0, 22.0, 11.0, 7.0, 4.0, 6.0, 1.0, 5.0, 1.0, 3.0, 2.0, 1.0], "bins": [-0.00010269880294799805, -9.966269135475159e-05, -9.662657976150513e-05, -9.359046816825867e-05, -9.055435657501221e-05, -8.751824498176575e-05, -8.448213338851929e-05, -8.144602179527283e-05, -7.840991020202637e-05, -7.537379860877991e-05, -7.233768701553345e-05, -6.930157542228699e-05, -6.626546382904053e-05, -6.322935223579407e-05, -6.019324064254761e-05, -5.715712904930115e-05, -5.412101745605469e-05, -5.108490586280823e-05, -4.804879426956177e-05, -4.501268267631531e-05, -4.197657108306885e-05, -3.894045948982239e-05, -3.590434789657593e-05, -3.286823630332947e-05, -2.9832124710083008e-05, -2.6796013116836548e-05, -2.3759901523590088e-05, -2.0723789930343628e-05, -1.7687678337097168e-05, -1.4651566743850708e-05, -1.1615455150604248e-05, -8.579343557357788e-06, -5.543231964111328e-06, -2.507120370864868e-06, 5.289912223815918e-07, 3.5651028156280518e-06, 6.601214408874512e-06, 9.637326002120972e-06, 1.2673437595367432e-05, 1.570954918861389e-05, 1.874566078186035e-05, 2.178177237510681e-05, 2.481788396835327e-05, 2.785399556159973e-05, 3.089010715484619e-05, 3.392621874809265e-05, 3.696233034133911e-05, 3.999844193458557e-05, 4.303455352783203e-05, 4.607066512107849e-05, 4.910677671432495e-05, 5.214288830757141e-05, 5.517899990081787e-05, 5.821511149406433e-05, 6.125122308731079e-05, 6.428733468055725e-05, 6.732344627380371e-05, 7.035955786705017e-05, 7.339566946029663e-05, 7.643178105354309e-05, 7.946789264678955e-05, 8.250400424003601e-05, 8.554011583328247e-05, 8.857622742652893e-05, 9.161233901977539e-05]}, "gradients/encoder.encoder.layers.12.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [2.0, 0.0, 2.0, 0.0, 0.0, 6.0, 2.0, 4.0, 5.0, 6.0, 11.0, 10.0, 9.0, 20.0, 26.0, 27.0, 31.0, 45.0, 49.0, 79.0, 173.0, 467.0, 2136.0, 420.0, 165.0, 71.0, 46.0, 40.0, 28.0, 29.0, 26.0, 21.0, 20.0, 18.0, 9.0, 12.0, 8.0, 10.0, 11.0, 10.0, 5.0, 4.0, 6.0, 8.0, 3.0, 2.0, 5.0, 3.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-3.552436828613281e-05, -3.394670784473419e-05, -3.236904740333557e-05, -3.079138696193695e-05, -2.921372652053833e-05, -2.763606607913971e-05, -2.605840563774109e-05, -2.4480745196342468e-05, -2.2903084754943848e-05, -2.1325424313545227e-05, -1.9747763872146606e-05, -1.8170103430747986e-05, -1.6592442989349365e-05, -1.5014782547950745e-05, -1.3437122106552124e-05, -1.1859461665153503e-05, -1.0281801223754883e-05, -8.704140782356262e-06, -7.126480340957642e-06, -5.548819899559021e-06, -3.9711594581604e-06, -2.3934990167617798e-06, -8.158385753631592e-07, 7.618218660354614e-07, 2.339482307434082e-06, 3.917142748832703e-06, 5.494803190231323e-06, 7.072463631629944e-06, 8.650124073028564e-06, 1.0227784514427185e-05, 1.1805444955825806e-05, 1.3383105397224426e-05, 1.4960765838623047e-05, 1.6538426280021667e-05, 1.8116086721420288e-05, 1.969374716281891e-05, 2.127140760421753e-05, 2.284906804561615e-05, 2.442672848701477e-05, 2.600438892841339e-05, 2.7582049369812012e-05, 2.9159709811210632e-05, 3.073737025260925e-05, 3.2315030694007874e-05, 3.3892691135406494e-05, 3.5470351576805115e-05, 3.7048012018203735e-05, 3.8625672459602356e-05, 4.0203332901000977e-05, 4.17809933423996e-05, 4.335865378379822e-05, 4.493631422519684e-05, 4.651397466659546e-05, 4.809163510799408e-05, 4.96692955493927e-05, 5.124695599079132e-05, 5.282461643218994e-05, 5.440227687358856e-05, 5.597993731498718e-05, 5.75575977563858e-05, 5.9135258197784424e-05, 6.0712918639183044e-05, 6.229057908058167e-05, 6.386823952198029e-05, 6.54458999633789e-05]}, "gradients/encoder.encoder.layers.12.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 0.0, 2.0, 2.0, 3.0, 8.0, 13.0, 17.0, 20.0, 33.0, 36.0, 90.0, 111.0, 124.0, 117.0, 116.0, 65.0, 58.0, 44.0, 38.0, 16.0, 24.0, 13.0, 12.0, 8.0, 7.0, 3.0, 10.0, 6.0, 3.0, 4.0, 2.0, 1.0, 2.0, 2.0, 1.0, 1.0, 2.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00029002869268879294, -0.00027820051764138043, -0.0002663723425939679, -0.0002545441675465554, -0.00024271597794722766, -0.00023088780289981514, -0.00021905962785240263, -0.00020723143825307488, -0.00019540326320566237, -0.00018357508815824986, -0.00017174691311083734, -0.00015991873806342483, -0.00014809054846409708, -0.00013626237341668457, -0.00012443419836927205, -0.00011260601604590192, -0.00010077784827444702, -8.894967322703451e-05, -7.712149090366438e-05, -6.529331585625187e-05, -5.3465137170860544e-05, -4.163695848546922e-05, -2.9808783438056707e-05, -1.798060111468658e-05, -6.152426067274064e-06, 5.675751708622556e-06, 1.7503929484519176e-05, 2.9332106350921094e-05, 4.1160285036312416e-05, 5.298846372170374e-05, 6.481663876911625e-05, 7.664482109248638e-05, 8.84729961398989e-05, 0.00010030117118731141, 0.00011212935351068154, 0.00012395752128213644, 0.00013578571088146418, 0.0001476138859288767, 0.0001594420609762892, 0.00017127025057561696, 0.00018309842562302947, 0.00019492660067044199, 0.0002067547757178545, 0.00021858295076526701, 0.00023041114036459476, 0.00024223931541200727, 0.00025406747590750456, 0.00026589568005874753, 0.0002777238260023296, 0.0002895520010497421, 0.0003013801760971546, 0.00031320835114456713, 0.00032503652619197965, 0.0003368647303432226, 0.0003486928762868047, 0.00036052108043804765, 0.00037234925548546016, 0.0003841774305328727, 0.0003960056055802852, 0.0004078337806276977, 0.0004196619556751102, 0.00043149013072252274, 0.0004433183348737657, 0.0004551465099211782, 0.00046697468496859074]}, "gradients/encoder.encoder.layers.12.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 2.0, 3.0, 0.0, 1.0, 1.0, 2.0, 1.0, 8.0, 6.0, 8.0, 5.0, 17.0, 16.0, 11.0, 17.0, 16.0, 16.0, 23.0, 23.0, 23.0, 16.0, 25.0, 29.0, 36.0, 28.0, 37.0, 47.0, 28.0, 49.0, 41.0, 34.0, 32.0, 37.0, 38.0, 36.0, 38.0, 39.0, 30.0, 17.0, 22.0, 17.0, 24.0, 18.0, 21.0, 11.0, 8.0, 14.0, 12.0, 9.0, 4.0, 5.0, 4.0, 3.0, 2.0, 4.0, 4.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0], "bins": [-0.00015664100646972656, -0.00015152152627706528, -0.000146402046084404, -0.0001412825658917427, -0.00013616308569908142, -0.00013104360550642014, -0.00012592412531375885, -0.00012080464512109756, -0.00011568516492843628, -0.000110565684735775, -0.00010544620454311371, -0.00010032672435045242, -9.520724415779114e-05, -9.008776396512985e-05, -8.496828377246857e-05, -7.984880357980728e-05, -7.4729323387146e-05, -6.960984319448471e-05, -6.449036300182343e-05, -5.937088280916214e-05, -5.4251402616500854e-05, -4.913192242383957e-05, -4.4012442231178284e-05, -3.8892962038517e-05, -3.377348184585571e-05, -2.8654001653194427e-05, -2.3534521460533142e-05, -1.8415041267871857e-05, -1.3295561075210571e-05, -8.176080882549286e-06, -3.0566006898880005e-06, 2.062879502773285e-06, 7.18235969543457e-06, 1.2301839888095856e-05, 1.742132008075714e-05, 2.2540800273418427e-05, 2.7660280466079712e-05, 3.2779760658741e-05, 3.789924085140228e-05, 4.301872104406357e-05, 4.8138201236724854e-05, 5.325768142938614e-05, 5.8377161622047424e-05, 6.349664181470871e-05, 6.861612200737e-05, 7.373560220003128e-05, 7.885508239269257e-05, 8.397456258535385e-05, 8.909404277801514e-05, 9.421352297067642e-05, 9.933300316333771e-05, 0.00010445248335599899, 0.00010957196354866028, 0.00011469144374132156, 0.00011981092393398285, 0.00012493040412664413, 0.00013004988431930542, 0.0001351693645119667, 0.000140288844704628, 0.00014540832489728928, 0.00015052780508995056, 0.00015564728528261185, 0.00016076676547527313, 0.00016588624566793442, 0.0001710057258605957]}, "gradients/encoder.encoder.layers.12.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 2.0, 2.0, 2.0, 2.0, 5.0, 2.0, 8.0, 2.0, 9.0, 17.0, 11.0, 27.0, 24.0, 50.0, 56.0, 75.0, 124.0, 172.0, 271.0, 372.0, 571.0, 911.0, 1347.0, 2218.0, 3578.0, 6388.0, 11323.0, 23074.0, 50987.0, 135414.0, 493261.0, 190064.0, 66283.0, 29414.0, 13781.0, 7389.0, 4265.0, 2546.0, 1480.0, 994.0, 649.0, 426.0, 313.0, 199.0, 135.0, 104.0, 57.0, 36.0, 36.0, 23.0, 21.0, 14.0, 7.0, 11.0, 8.0, 3.0, 3.0, 2.0, 4.0, 1.0, 1.0], "bins": [-0.00012683868408203125, -0.0001230686902999878, -0.00011929869651794434, -0.00011552870273590088, -0.00011175870895385742, -0.00010798871517181396, -0.00010421872138977051, -0.00010044872760772705, -9.66787338256836e-05, -9.290874004364014e-05, -8.913874626159668e-05, -8.536875247955322e-05, -8.159875869750977e-05, -7.782876491546631e-05, -7.405877113342285e-05, -7.02887773513794e-05, -6.651878356933594e-05, -6.274878978729248e-05, -5.8978796005249023e-05, -5.5208802223205566e-05, -5.143880844116211e-05, -4.766881465911865e-05, -4.3898820877075195e-05, -4.012882709503174e-05, -3.635883331298828e-05, -3.2588839530944824e-05, -2.8818845748901367e-05, -2.504885196685791e-05, -2.1278858184814453e-05, -1.7508864402770996e-05, -1.3738870620727539e-05, -9.968876838684082e-06, -6.198883056640625e-06, -2.428889274597168e-06, 1.341104507446289e-06, 5.111098289489746e-06, 8.881092071533203e-06, 1.265108585357666e-05, 1.6421079635620117e-05, 2.0191073417663574e-05, 2.396106719970703e-05, 2.7731060981750488e-05, 3.1501054763793945e-05, 3.52710485458374e-05, 3.904104232788086e-05, 4.2811036109924316e-05, 4.6581029891967773e-05, 5.035102367401123e-05, 5.412101745605469e-05, 5.7891011238098145e-05, 6.16610050201416e-05, 6.543099880218506e-05, 6.920099258422852e-05, 7.297098636627197e-05, 7.674098014831543e-05, 8.051097393035889e-05, 8.428096771240234e-05, 8.80509614944458e-05, 9.182095527648926e-05, 9.559094905853271e-05, 9.936094284057617e-05, 0.00010313093662261963, 0.00010690093040466309, 0.00011067092418670654, 0.00011444091796875]}, "gradients/encoder.encoder.layers.12.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 3.0, 2.0, 4.0, 4.0, 3.0, 3.0, 6.0, 9.0, 6.0, 13.0, 14.0, 19.0, 43.0, 37.0, 47.0, 61.0, 54.0, 87.0, 72.0, 79.0, 66.0, 72.0, 77.0, 46.0, 45.0, 36.0, 26.0, 22.0, 11.0, 9.0, 4.0, 6.0, 6.0, 7.0, 6.0, 2.0, 3.0, 3.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-2.9861927032470703e-05, -2.9061920940876007e-05, -2.826191484928131e-05, -2.7461908757686615e-05, -2.666190266609192e-05, -2.5861896574497223e-05, -2.5061890482902527e-05, -2.426188439130783e-05, -2.3461878299713135e-05, -2.266187220811844e-05, -2.1861866116523743e-05, -2.1061860024929047e-05, -2.026185393333435e-05, -1.9461847841739655e-05, -1.866184175014496e-05, -1.7861835658550262e-05, -1.7061829566955566e-05, -1.626182347536087e-05, -1.5461817383766174e-05, -1.4661811292171478e-05, -1.3861805200576782e-05, -1.3061799108982086e-05, -1.226179301738739e-05, -1.1461786925792694e-05, -1.0661780834197998e-05, -9.861774742603302e-06, -9.061768651008606e-06, -8.26176255941391e-06, -7.461756467819214e-06, -6.661750376224518e-06, -5.861744284629822e-06, -5.061738193035126e-06, -4.26173210144043e-06, -3.4617260098457336e-06, -2.6617199182510376e-06, -1.8617138266563416e-06, -1.0617077350616455e-06, -2.6170164346694946e-07, 5.383044481277466e-07, 1.3383105397224426e-06, 2.1383166313171387e-06, 2.9383227229118347e-06, 3.7383288145065308e-06, 4.538334906101227e-06, 5.338340997695923e-06, 6.138347089290619e-06, 6.938353180885315e-06, 7.738359272480011e-06, 8.538365364074707e-06, 9.338371455669403e-06, 1.0138377547264099e-05, 1.0938383638858795e-05, 1.1738389730453491e-05, 1.2538395822048187e-05, 1.3338401913642883e-05, 1.413840800523758e-05, 1.4938414096832275e-05, 1.573842018842697e-05, 1.6538426280021667e-05, 1.7338432371616364e-05, 1.813843846321106e-05, 1.8938444554805756e-05, 1.973845064640045e-05, 2.0538456737995148e-05, 2.1338462829589844e-05]}, "gradients/encoder.encoder.layers.12.attention.v_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 4.0, 6.0, 8.0, 11.0, 9.0, 11.0, 21.0, 30.0, 28.0, 57.0, 79.0, 108.0, 133.0, 217.0, 332.0, 384.0, 592.0, 858.0, 1230.0, 1866.0, 2645.0, 4023.0, 6101.0, 9371.0, 14582.0, 23568.0, 38917.0, 68023.0, 134005.0, 331854.0, 193162.0, 87508.0, 48397.0, 28946.0, 17897.0, 11297.0, 7346.0, 4728.0, 3173.0, 2177.0, 1484.0, 991.0, 691.0, 509.0, 339.0, 232.0, 187.0, 128.0, 85.0, 61.0, 52.0, 33.0, 16.0, 26.0, 14.0, 9.0, 3.0, 1.0, 5.0, 1.0, 2.0, 1.0], "bins": [-4.9114227294921875e-05, -4.7565437853336334e-05, -4.6016648411750793e-05, -4.446785897016525e-05, -4.291906952857971e-05, -4.137028008699417e-05, -3.982149064540863e-05, -3.827270120382309e-05, -3.672391176223755e-05, -3.517512232065201e-05, -3.362633287906647e-05, -3.2077543437480927e-05, -3.0528753995895386e-05, -2.8979964554309845e-05, -2.7431175112724304e-05, -2.5882385671138763e-05, -2.4333596229553223e-05, -2.2784806787967682e-05, -2.123601734638214e-05, -1.96872279047966e-05, -1.813843846321106e-05, -1.658964902162552e-05, -1.5040859580039978e-05, -1.3492070138454437e-05, -1.1943280696868896e-05, -1.0394491255283356e-05, -8.845701813697815e-06, -7.296912372112274e-06, -5.748122930526733e-06, -4.199333488941193e-06, -2.650544047355652e-06, -1.101754605770111e-06, 4.470348358154297e-07, 1.9958242774009705e-06, 3.5446137189865112e-06, 5.093403160572052e-06, 6.642192602157593e-06, 8.190982043743134e-06, 9.739771485328674e-06, 1.1288560926914215e-05, 1.2837350368499756e-05, 1.4386139810085297e-05, 1.5934929251670837e-05, 1.7483718693256378e-05, 1.903250813484192e-05, 2.058129757642746e-05, 2.2130087018013e-05, 2.367887645959854e-05, 2.5227665901184082e-05, 2.6776455342769623e-05, 2.8325244784355164e-05, 2.9874034225940704e-05, 3.1422823667526245e-05, 3.2971613109111786e-05, 3.452040255069733e-05, 3.606919199228287e-05, 3.761798143386841e-05, 3.916677087545395e-05, 4.071556031703949e-05, 4.226434975862503e-05, 4.381313920021057e-05, 4.536192864179611e-05, 4.691071808338165e-05, 4.8459507524967194e-05, 5.0008296966552734e-05]}, "gradients/encoder.encoder.layers.12.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 3.0, 0.0, 0.0, 0.0, 1.0, 4.0, 1.0, 0.0, 4.0, 5.0, 4.0, 5.0, 7.0, 7.0, 8.0, 12.0, 8.0, 17.0, 16.0, 21.0, 23.0, 27.0, 35.0, 36.0, 40.0, 40.0, 49.0, 45.0, 39.0, 44.0, 45.0, 48.0, 57.0, 45.0, 39.0, 38.0, 30.0, 42.0, 28.0, 21.0, 23.0, 19.0, 17.0, 13.0, 11.0, 7.0, 8.0, 7.0, 3.0, 7.0, 2.0, 2.0, 1.0, 0.0, 1.0, 2.0, 3.0, 1.0, 0.0, 0.0, 1.0], "bins": [-4.738569259643555e-05, -4.5921653509140015e-05, -4.445761442184448e-05, -4.299357533454895e-05, -4.152953624725342e-05, -4.0065497159957886e-05, -3.8601458072662354e-05, -3.713741898536682e-05, -3.567337989807129e-05, -3.420934081077576e-05, -3.2745301723480225e-05, -3.128126263618469e-05, -2.981722354888916e-05, -2.8353184461593628e-05, -2.6889145374298096e-05, -2.5425106287002563e-05, -2.396106719970703e-05, -2.24970281124115e-05, -2.1032989025115967e-05, -1.9568949937820435e-05, -1.8104910850524902e-05, -1.664087176322937e-05, -1.5176832675933838e-05, -1.3712793588638306e-05, -1.2248754501342773e-05, -1.0784715414047241e-05, -9.320676326751709e-06, -7.856637239456177e-06, -6.3925981521606445e-06, -4.928559064865112e-06, -3.46451997756958e-06, -2.000480890274048e-06, -5.364418029785156e-07, 9.275972843170166e-07, 2.391636371612549e-06, 3.855675458908081e-06, 5.319714546203613e-06, 6.7837536334991455e-06, 8.247792720794678e-06, 9.71183180809021e-06, 1.1175870895385742e-05, 1.2639909982681274e-05, 1.4103949069976807e-05, 1.556798815727234e-05, 1.703202724456787e-05, 1.8496066331863403e-05, 1.9960105419158936e-05, 2.1424144506454468e-05, 2.288818359375e-05, 2.4352222681045532e-05, 2.5816261768341064e-05, 2.7280300855636597e-05, 2.874433994293213e-05, 3.020837903022766e-05, 3.167241811752319e-05, 3.3136457204818726e-05, 3.460049629211426e-05, 3.606453537940979e-05, 3.752857446670532e-05, 3.8992613554000854e-05, 4.045665264129639e-05, 4.192069172859192e-05, 4.338473081588745e-05, 4.4848769903182983e-05, 4.6312808990478516e-05]}, "gradients/encoder.encoder.layers.12.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 2.0, 0.0, 2.0, 1.0, 2.0, 2.0, 1.0, 2.0, 3.0, 1.0, 3.0, 6.0, 11.0, 20.0, 16.0, 19.0, 35.0, 39.0, 60.0, 91.0, 157.0, 233.0, 376.0, 610.0, 1049.0, 2177.0, 4750.0, 15876.0, 53094.0, 300496.0, 535367.0, 96707.0, 22937.0, 7756.0, 3297.0, 1374.0, 714.0, 440.0, 259.0, 191.0, 119.0, 81.0, 54.0, 37.0, 26.0, 21.0, 20.0, 8.0, 15.0, 9.0, 1.0, 0.0, 1.0, 1.0, 2.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.5139579772949219e-05, -1.4654360711574554e-05, -1.416914165019989e-05, -1.3683922588825226e-05, -1.3198703527450562e-05, -1.2713484466075897e-05, -1.2228265404701233e-05, -1.1743046343326569e-05, -1.1257827281951904e-05, -1.077260822057724e-05, -1.0287389159202576e-05, -9.802170097827911e-06, -9.316951036453247e-06, -8.831731975078583e-06, -8.346512913703918e-06, -7.861293852329254e-06, -7.37607479095459e-06, -6.8908557295799255e-06, -6.405636668205261e-06, -5.920417606830597e-06, -5.435198545455933e-06, -4.949979484081268e-06, -4.464760422706604e-06, -3.97954136133194e-06, -3.4943222999572754e-06, -3.009103238582611e-06, -2.5238841772079468e-06, -2.0386651158332825e-06, -1.5534460544586182e-06, -1.0682269930839539e-06, -5.830079317092896e-07, -9.778887033462524e-08, 3.8743019104003906e-07, 8.726492524147034e-07, 1.3578683137893677e-06, 1.843087375164032e-06, 2.3283064365386963e-06, 2.8135254979133606e-06, 3.298744559288025e-06, 3.783963620662689e-06, 4.2691826820373535e-06, 4.754401743412018e-06, 5.239620804786682e-06, 5.7248398661613464e-06, 6.210058927536011e-06, 6.695277988910675e-06, 7.180497050285339e-06, 7.665716111660004e-06, 8.150935173034668e-06, 8.636154234409332e-06, 9.121373295783997e-06, 9.606592357158661e-06, 1.0091811418533325e-05, 1.057703047990799e-05, 1.1062249541282654e-05, 1.1547468602657318e-05, 1.2032687664031982e-05, 1.2517906725406647e-05, 1.3003125786781311e-05, 1.3488344848155975e-05, 1.397356390953064e-05, 1.4458782970905304e-05, 1.4944002032279968e-05, 1.5429221093654633e-05, 1.5914440155029297e-05]}, "gradients/encoder.encoder.layers.12.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 7.0, 0.0, 2.0, 5.0, 7.0, 10.0, 0.0, 12.0, 11.0, 21.0, 0.0, 21.0, 27.0, 20.0, 24.0, 0.0, 24.0, 32.0, 47.0, 0.0, 48.0, 51.0, 46.0, 41.0, 0.0, 59.0, 58.0, 57.0, 0.0, 46.0, 41.0, 32.0, 47.0, 0.0, 37.0, 32.0, 32.0, 26.0, 0.0, 23.0, 17.0, 16.0, 0.0, 10.0, 13.0, 8.0, 4.0, 0.0, 1.0, 1.0, 3.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-1.3113021850585938e-06, -1.2647360563278198e-06, -1.218169927597046e-06, -1.171603798866272e-06, -1.125037670135498e-06, -1.0784715414047241e-06, -1.0319054126739502e-06, -9.853392839431763e-07, -9.387731552124023e-07, -8.922070264816284e-07, -8.456408977508545e-07, -7.990747690200806e-07, -7.525086402893066e-07, -7.059425115585327e-07, -6.593763828277588e-07, -6.128102540969849e-07, -5.662441253662109e-07, -5.19677996635437e-07, -4.731118679046631e-07, -4.2654573917388916e-07, -3.7997961044311523e-07, -3.334134817123413e-07, -2.868473529815674e-07, -2.4028122425079346e-07, -1.9371509552001953e-07, -1.471489667892456e-07, -1.0058283805847168e-07, -5.4016709327697754e-08, -7.450580596923828e-09, 3.91155481338501e-08, 8.568167686462402e-08, 1.3224780559539795e-07, 1.7881393432617188e-07, 2.253800630569458e-07, 2.7194619178771973e-07, 3.1851232051849365e-07, 3.650784492492676e-07, 4.116445779800415e-07, 4.5821070671081543e-07, 5.047768354415894e-07, 5.513429641723633e-07, 5.979090929031372e-07, 6.444752216339111e-07, 6.910413503646851e-07, 7.37607479095459e-07, 7.841736078262329e-07, 8.307397365570068e-07, 8.773058652877808e-07, 9.238719940185547e-07, 9.704381227493286e-07, 1.0170042514801025e-06, 1.0635703802108765e-06, 1.1101365089416504e-06, 1.1567026376724243e-06, 1.2032687664031982e-06, 1.2498348951339722e-06, 1.296401023864746e-06, 1.34296715259552e-06, 1.389533281326294e-06, 1.4360994100570679e-06, 1.4826655387878418e-06, 1.5292316675186157e-06, 1.5757977962493896e-06, 1.6223639249801636e-06, 1.6689300537109375e-06]}, "gradients/encoder.encoder.layers.12.attention.q_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 2.0, 4.0, 3.0, 8.0, 8.0, 5.0, 12.0, 18.0, 24.0, 32.0, 50.0, 65.0, 86.0, 96.0, 160.0, 270.0, 334.0, 477.0, 688.0, 1009.0, 1505.0, 1391.0, 3079.0, 4818.0, 7796.0, 12955.0, 22309.0, 39369.0, 75308.0, 96254.0, 323483.0, 243946.0, 96779.0, 48615.0, 26587.0, 15419.0, 9337.0, 4030.0, 4058.0, 2628.0, 1774.0, 1164.0, 783.0, 540.0, 382.0, 184.0, 187.0, 162.0, 99.0, 88.0, 38.0, 49.0, 39.0, 14.0, 15.0, 10.0, 9.0, 7.0, 4.0, 2.0, 3.0, 4.0], "bins": [-5.602836608886719e-06, -5.431473255157471e-06, -5.260109901428223e-06, -5.088746547698975e-06, -4.9173831939697266e-06, -4.7460198402404785e-06, -4.5746564865112305e-06, -4.403293132781982e-06, -4.231929779052734e-06, -4.060566425323486e-06, -3.889203071594238e-06, -3.7178397178649902e-06, -3.546476364135742e-06, -3.375113010406494e-06, -3.203749656677246e-06, -3.032386302947998e-06, -2.86102294921875e-06, -2.689659595489502e-06, -2.518296241760254e-06, -2.346932888031006e-06, -2.175569534301758e-06, -2.0042061805725098e-06, -1.8328428268432617e-06, -1.6614794731140137e-06, -1.4901161193847656e-06, -1.3187527656555176e-06, -1.1473894119262695e-06, -9.760260581970215e-07, -8.046627044677734e-07, -6.332993507385254e-07, -4.6193599700927734e-07, -2.905726432800293e-07, -1.1920928955078125e-07, 5.21540641784668e-08, 2.2351741790771484e-07, 3.948807716369629e-07, 5.662441253662109e-07, 7.37607479095459e-07, 9.08970832824707e-07, 1.080334186553955e-06, 1.2516975402832031e-06, 1.4230608940124512e-06, 1.5944242477416992e-06, 1.7657876014709473e-06, 1.9371509552001953e-06, 2.1085143089294434e-06, 2.2798776626586914e-06, 2.4512410163879395e-06, 2.6226043701171875e-06, 2.7939677238464355e-06, 2.9653310775756836e-06, 3.1366944313049316e-06, 3.3080577850341797e-06, 3.4794211387634277e-06, 3.6507844924926758e-06, 3.822147846221924e-06, 3.993511199951172e-06, 4.16487455368042e-06, 4.336237907409668e-06, 4.507601261138916e-06, 4.678964614868164e-06, 4.850327968597412e-06, 5.02169132232666e-06, 5.193054676055908e-06, 5.364418029785156e-06]}, "gradients/encoder.encoder.layers.12.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 1.0, 3.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0, 2.0, 4.0, 6.0, 7.0, 6.0, 12.0, 6.0, 14.0, 23.0, 16.0, 32.0, 18.0, 46.0, 37.0, 22.0, 53.0, 52.0, 65.0, 49.0, 65.0, 71.0, 39.0, 51.0, 30.0, 49.0, 23.0, 38.0, 27.0, 21.0, 25.0, 14.0, 19.0, 11.0, 10.0, 11.0, 6.0, 6.0, 4.0, 4.0, 2.0, 2.0, 2.0, 2.0, 1.0, 1.0, 3.0, 1.0, 0.0, 2.0, 1.0, 0.0, 1.0], "bins": [-4.887580871582031e-06, -4.733912646770477e-06, -4.580244421958923e-06, -4.426576197147369e-06, -4.2729079723358154e-06, -4.1192397475242615e-06, -3.9655715227127075e-06, -3.8119032979011536e-06, -3.6582350730895996e-06, -3.5045668482780457e-06, -3.3508986234664917e-06, -3.1972303986549377e-06, -3.043562173843384e-06, -2.88989394903183e-06, -2.736225724220276e-06, -2.582557499408722e-06, -2.428889274597168e-06, -2.275221049785614e-06, -2.12155282497406e-06, -1.967884600162506e-06, -1.8142163753509521e-06, -1.6605481505393982e-06, -1.5068799257278442e-06, -1.3532117009162903e-06, -1.1995434761047363e-06, -1.0458752512931824e-06, -8.922070264816284e-07, -7.385388016700745e-07, -5.848705768585205e-07, -4.3120235204696655e-07, -2.775341272354126e-07, -1.2386590242385864e-07, 2.9802322387695312e-08, 1.8347054719924927e-07, 3.371387720108032e-07, 4.908069968223572e-07, 6.444752216339111e-07, 7.981434464454651e-07, 9.51811671257019e-07, 1.105479896068573e-06, 1.259148120880127e-06, 1.412816345691681e-06, 1.5664845705032349e-06, 1.7201527953147888e-06, 1.8738210201263428e-06, 2.0274892449378967e-06, 2.1811574697494507e-06, 2.3348256945610046e-06, 2.4884939193725586e-06, 2.6421621441841125e-06, 2.7958303689956665e-06, 2.9494985938072205e-06, 3.1031668186187744e-06, 3.2568350434303284e-06, 3.4105032682418823e-06, 3.5641714930534363e-06, 3.7178397178649902e-06, 3.871507942676544e-06, 4.025176167488098e-06, 4.178844392299652e-06, 4.332512617111206e-06, 4.48618084192276e-06, 4.639849066734314e-06, 4.793517291545868e-06, 4.947185516357422e-06]}, "gradients/encoder.encoder.layers.12.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 1.0, 2.0, 5.0, 1.0, 3.0, 7.0, 9.0, 6.0, 12.0, 17.0, 26.0, 25.0, 41.0, 44.0, 64.0, 138.0, 140.0, 107.0, 76.0, 48.0, 46.0, 31.0, 29.0, 23.0, 26.0, 12.0, 14.0, 19.0, 4.0, 5.0, 6.0, 4.0, 4.0, 6.0, 3.0, 4.0, 2.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.00021041017316747457, -0.0002039047540165484, -0.000197399320313707, -0.00019089390116278082, -0.00018438848201185465, -0.00017788304830901325, -0.00017137762915808707, -0.0001648722100071609, -0.0001583667763043195, -0.00015186135715339333, -0.00014535592345055193, -0.00013885050429962575, -0.00013234508514869958, -0.00012583965144585818, -0.00011933423229493201, -0.00011282880586804822, -0.00010632338671712205, -9.981796029023826e-05, -9.331254113931209e-05, -8.68071147124283e-05, -8.030168828554451e-05, -7.379626913461834e-05, -6.729084270773456e-05, -6.078541628085077e-05, -5.427999349194579e-05, -4.777457070304081e-05, -4.126914427615702e-05, -3.476372148725204e-05, -2.825829687935766e-05, -2.1752872271463275e-05, -1.5247449482558295e-05, -8.742023055674508e-06, -2.2366002667695284e-06, 4.268823886377504e-06, 1.0774248039524537e-05, 1.7279671737924218e-05, 2.37850963458186e-05, 3.0290520953712985e-05, 3.6795943742617965e-05, 4.330137016950175e-05, 4.980679295840673e-05, 5.631221574731171e-05, 6.28176421741955e-05, 6.932306860107929e-05, 7.582848775200546e-05, 8.233391417888924e-05, 8.883934060577303e-05, 9.53447597566992e-05, 0.00010185018618358299, 0.00010835561261046678, 0.00011486103176139295, 0.00012136645818827674, 0.00012787188461516052, 0.0001343773037660867, 0.0001408827374689281, 0.00014738815661985427, 0.00015389357577078044, 0.00016039899492170662, 0.00016690442862454802, 0.0001734098477754742, 0.00017991526692640036, 0.00018642070062924176, 0.00019292611978016794, 0.00019943155348300934, 0.0002059369726339355]}, "gradients/encoder.encoder.layers.12.layer_norm.bias": {"_type": "histogram", "values": [2.0, 0.0, 2.0, 1.0, 1.0, 3.0, 2.0, 3.0, 3.0, 6.0, 8.0, 6.0, 6.0, 10.0, 12.0, 9.0, 13.0, 17.0, 11.0, 26.0, 27.0, 23.0, 23.0, 36.0, 24.0, 38.0, 32.0, 39.0, 40.0, 49.0, 47.0, 34.0, 36.0, 30.0, 40.0, 39.0, 32.0, 33.0, 23.0, 29.0, 22.0, 25.0, 32.0, 13.0, 15.0, 16.0, 13.0, 11.0, 8.0, 13.0, 7.0, 6.0, 8.0, 1.0, 6.0, 2.0, 2.0, 2.0, 2.0, 2.0, 1.0, 0.0, 1.0, 1.0], "bins": [-0.0001278519630432129, -0.0001238696277141571, -0.00011988729238510132, -0.00011590495705604553, -0.00011192262172698975, -0.00010794028639793396, -0.00010395795106887817, -9.997561573982239e-05, -9.59932804107666e-05, -9.201094508171082e-05, -8.802860975265503e-05, -8.404627442359924e-05, -8.006393909454346e-05, -7.608160376548767e-05, -7.209926843643188e-05, -6.81169331073761e-05, -6.413459777832031e-05, -6.0152262449264526e-05, -5.616992712020874e-05, -5.2187591791152954e-05, -4.820525646209717e-05, -4.422292113304138e-05, -4.0240585803985596e-05, -3.625825047492981e-05, -3.2275915145874023e-05, -2.8293579816818237e-05, -2.431124448776245e-05, -2.0328909158706665e-05, -1.634657382965088e-05, -1.2364238500595093e-05, -8.381903171539307e-06, -4.3995678424835205e-06, -4.172325134277344e-07, 3.5651028156280518e-06, 7.547438144683838e-06, 1.1529773473739624e-05, 1.551210880279541e-05, 1.9494444131851196e-05, 2.3476779460906982e-05, 2.745911478996277e-05, 3.1441450119018555e-05, 3.542378544807434e-05, 3.940612077713013e-05, 4.338845610618591e-05, 4.73707914352417e-05, 5.1353126764297485e-05, 5.533546209335327e-05, 5.931779742240906e-05, 6.330013275146484e-05, 6.728246808052063e-05, 7.126480340957642e-05, 7.52471387386322e-05, 7.922947406768799e-05, 8.321180939674377e-05, 8.719414472579956e-05, 9.117648005485535e-05, 9.515881538391113e-05, 9.914115071296692e-05, 0.0001031234860420227, 0.00010710582137107849, 0.00011108815670013428, 0.00011507049202919006, 0.00011905282735824585, 0.00012303516268730164, 0.00012701749801635742]}, "gradients/encoder.encoder.layers.11.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 2.0, 2.0, 7.0, 7.0, 6.0, 18.0, 19.0, 29.0, 49.0, 71.0, 112.0, 152.0, 208.0, 251.0, 284.0, 510.0, 671.0, 1018.0, 1696.0, 2776.0, 5004.0, 9226.0, 19292.0, 49280.0, 3139088.0, 879050.0, 43021.0, 18277.0, 9593.0, 5369.0, 3239.0, 2129.0, 1314.0, 883.0, 556.0, 356.0, 227.0, 162.0, 99.0, 75.0, 49.0, 29.0, 18.0, 22.0, 9.0, 5.0, 14.0, 9.0, 3.0, 3.0, 2.0, 2.0, 1.0, 2.0, 2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-0.00011330842971801758, -0.00010925624519586563, -0.00010520406067371368, -0.00010115187615156174, -9.709969162940979e-05, -9.304750710725784e-05, -8.89953225851059e-05, -8.494313806295395e-05, -8.0890953540802e-05, -7.683876901865005e-05, -7.278658449649811e-05, -6.873439997434616e-05, -6.468221545219421e-05, -6.063003093004227e-05, -5.657784640789032e-05, -5.252566188573837e-05, -4.8473477363586426e-05, -4.442129284143448e-05, -4.036910831928253e-05, -3.6316923797130585e-05, -3.226473927497864e-05, -2.821255475282669e-05, -2.4160370230674744e-05, -2.0108185708522797e-05, -1.605600118637085e-05, -1.2003816664218903e-05, -7.951632142066956e-06, -3.8994476199150085e-06, 1.5273690223693848e-07, 4.2049214243888855e-06, 8.257105946540833e-06, 1.230929046869278e-05, 1.6361474990844727e-05, 2.0413659512996674e-05, 2.446584403514862e-05, 2.8518028557300568e-05, 3.2570213079452515e-05, 3.662239760160446e-05, 4.067458212375641e-05, 4.4726766645908356e-05, 4.87789511680603e-05, 5.283113569021225e-05, 5.68833202123642e-05, 6.0935504734516144e-05, 6.498768925666809e-05, 6.903987377882004e-05, 7.309205830097198e-05, 7.714424282312393e-05, 8.119642734527588e-05, 8.524861186742783e-05, 8.930079638957977e-05, 9.335298091173172e-05, 9.740516543388367e-05, 0.00010145734995603561, 0.00010550953447818756, 0.00010956171900033951, 0.00011361390352249146, 0.0001176660880446434, 0.00012171827256679535, 0.0001257704570889473, 0.00012982264161109924, 0.0001338748261332512, 0.00013792701065540314, 0.00014197919517755508, 0.00014603137969970703]}, "gradients/encoder.encoder.layers.11.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 3.0, 1.0, 1.0, 2.0, 3.0, 2.0, 1.0, 0.0, 1.0, 5.0, 2.0, 5.0, 7.0, 7.0, 15.0, 11.0, 21.0, 35.0, 28.0, 38.0, 62.0, 63.0, 78.0, 81.0, 81.0, 91.0, 68.0, 52.0, 52.0, 53.0, 21.0, 20.0, 15.0, 17.0, 17.0, 19.0, 2.0, 13.0, 6.0, 6.0, 4.0, 4.0, 0.0, 3.0, 0.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.2411346435546875e-05, -2.162531018257141e-05, -2.0839273929595947e-05, -2.0053237676620483e-05, -1.926720142364502e-05, -1.8481165170669556e-05, -1.7695128917694092e-05, -1.6909092664718628e-05, -1.6123056411743164e-05, -1.53370201587677e-05, -1.4550983905792236e-05, -1.3764947652816772e-05, -1.2978911399841309e-05, -1.2192875146865845e-05, -1.1406838893890381e-05, -1.0620802640914917e-05, -9.834766387939453e-06, -9.04873013496399e-06, -8.262693881988525e-06, -7.4766576290130615e-06, -6.690621376037598e-06, -5.904585123062134e-06, -5.11854887008667e-06, -4.332512617111206e-06, -3.546476364135742e-06, -2.7604401111602783e-06, -1.9744038581848145e-06, -1.1883676052093506e-06, -4.023313522338867e-07, 3.8370490074157715e-07, 1.169741153717041e-06, 1.955777406692505e-06, 2.7418136596679688e-06, 3.5278499126434326e-06, 4.3138861656188965e-06, 5.09992241859436e-06, 5.885958671569824e-06, 6.671994924545288e-06, 7.458031177520752e-06, 8.244067430496216e-06, 9.03010368347168e-06, 9.816139936447144e-06, 1.0602176189422607e-05, 1.1388212442398071e-05, 1.2174248695373535e-05, 1.2960284948348999e-05, 1.3746321201324463e-05, 1.4532357454299927e-05, 1.531839370727539e-05, 1.6104429960250854e-05, 1.689046621322632e-05, 1.7676502466201782e-05, 1.8462538719177246e-05, 1.924857497215271e-05, 2.0034611225128174e-05, 2.0820647478103638e-05, 2.16066837310791e-05, 2.2392719984054565e-05, 2.317875623703003e-05, 2.3964792490005493e-05, 2.4750828742980957e-05, 2.553686499595642e-05, 2.6322901248931885e-05, 2.710893750190735e-05, 2.7894973754882812e-05]}, "gradients/encoder.encoder.layers.11.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 1.0, 2.0, 4.0, 7.0, 11.0, 12.0, 14.0, 23.0, 35.0, 42.0, 61.0, 94.0, 120.0, 193.0, 240.0, 318.0, 508.0, 767.0, 1024.0, 1545.0, 2395.0, 3500.0, 5518.0, 8513.0, 14506.0, 24569.0, 49798.0, 129617.0, 3197485.0, 575735.0, 85015.0, 38647.0, 20270.0, 12160.0, 7132.0, 4748.0, 2985.0, 2106.0, 1366.0, 1024.0, 661.0, 471.0, 306.0, 232.0, 141.0, 118.0, 75.0, 63.0, 38.0, 25.0, 17.0, 13.0, 11.0, 8.0, 4.0, 0.0, 2.0, 2.0, 2.0, 0.0, 1.0], "bins": [-5.984306335449219e-05, -5.7962723076343536e-05, -5.6082382798194885e-05, -5.4202042520046234e-05, -5.232170224189758e-05, -5.044136196374893e-05, -4.856102168560028e-05, -4.668068140745163e-05, -4.480034112930298e-05, -4.292000085115433e-05, -4.1039660573005676e-05, -3.9159320294857025e-05, -3.7278980016708374e-05, -3.539863973855972e-05, -3.351829946041107e-05, -3.163795918226242e-05, -2.975761890411377e-05, -2.787727862596512e-05, -2.5996938347816467e-05, -2.4116598069667816e-05, -2.2236257791519165e-05, -2.0355917513370514e-05, -1.8475577235221863e-05, -1.659523695707321e-05, -1.471489667892456e-05, -1.283455640077591e-05, -1.0954216122627258e-05, -9.073875844478607e-06, -7.193535566329956e-06, -5.313195288181305e-06, -3.432855010032654e-06, -1.5525147318840027e-06, 3.2782554626464844e-07, 2.2081658244132996e-06, 4.088506102561951e-06, 5.968846380710602e-06, 7.849186658859253e-06, 9.729526937007904e-06, 1.1609867215156555e-05, 1.3490207493305206e-05, 1.5370547771453857e-05, 1.725088804960251e-05, 1.913122832775116e-05, 2.101156860589981e-05, 2.2891908884048462e-05, 2.4772249162197113e-05, 2.6652589440345764e-05, 2.8532929718494415e-05, 3.0413269996643066e-05, 3.229361027479172e-05, 3.417395055294037e-05, 3.605429083108902e-05, 3.793463110923767e-05, 3.981497138738632e-05, 4.169531166553497e-05, 4.3575651943683624e-05, 4.5455992221832275e-05, 4.7336332499980927e-05, 4.921667277812958e-05, 5.109701305627823e-05, 5.297735333442688e-05, 5.485769361257553e-05, 5.673803389072418e-05, 5.861837416887283e-05, 6.0498714447021484e-05]}, "gradients/encoder.encoder.layers.11.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 1.0, 2.0, 0.0, 6.0, 1.0, 6.0, 2.0, 1.0, 7.0, 7.0, 10.0, 11.0, 11.0, 12.0, 18.0, 19.0, 16.0, 21.0, 28.0, 51.0, 49.0, 121.0, 272.0, 1736.0, 967.0, 284.0, 123.0, 71.0, 41.0, 30.0, 30.0, 12.0, 19.0, 14.0, 16.0, 10.0, 6.0, 7.0, 4.0, 7.0, 5.0, 7.0, 4.0, 7.0, 3.0, 2.0, 0.0, 3.0, 1.0, 4.0, 4.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-3.618001937866211e-05, -3.483891487121582e-05, -3.349781036376953e-05, -3.215670585632324e-05, -3.081560134887695e-05, -2.9474496841430664e-05, -2.8133392333984375e-05, -2.6792287826538086e-05, -2.5451183319091797e-05, -2.4110078811645508e-05, -2.276897430419922e-05, -2.142786979675293e-05, -2.008676528930664e-05, -1.874566078186035e-05, -1.7404556274414062e-05, -1.6063451766967773e-05, -1.4722347259521484e-05, -1.3381242752075195e-05, -1.2040138244628906e-05, -1.0699033737182617e-05, -9.357929229736328e-06, -8.016824722290039e-06, -6.67572021484375e-06, -5.334615707397461e-06, -3.993511199951172e-06, -2.652406692504883e-06, -1.3113021850585938e-06, 2.9802322387695312e-08, 1.3709068298339844e-06, 2.7120113372802734e-06, 4.0531158447265625e-06, 5.3942203521728516e-06, 6.735324859619141e-06, 8.07642936706543e-06, 9.417533874511719e-06, 1.0758638381958008e-05, 1.2099742889404297e-05, 1.3440847396850586e-05, 1.4781951904296875e-05, 1.6123056411743164e-05, 1.7464160919189453e-05, 1.8805265426635742e-05, 2.014636993408203e-05, 2.148747444152832e-05, 2.282857894897461e-05, 2.41696834564209e-05, 2.5510787963867188e-05, 2.6851892471313477e-05, 2.8192996978759766e-05, 2.9534101486206055e-05, 3.0875205993652344e-05, 3.221631050109863e-05, 3.355741500854492e-05, 3.489851951599121e-05, 3.62396240234375e-05, 3.758072853088379e-05, 3.892183303833008e-05, 4.026293754577637e-05, 4.1604042053222656e-05, 4.2945146560668945e-05, 4.4286251068115234e-05, 4.5627355575561523e-05, 4.696846008300781e-05, 4.83095645904541e-05, 4.965066909790039e-05]}, "gradients/encoder.encoder.layers.11.final_layer_norm.weight": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 4.0, 6.0, 2.0, 1.0, 4.0, 6.0, 9.0, 28.0, 30.0, 43.0, 49.0, 50.0, 87.0, 119.0, 130.0, 127.0, 65.0, 61.0, 36.0, 32.0, 31.0, 18.0, 19.0, 9.0, 12.0, 10.0, 7.0, 4.0, 3.0, 2.0, 1.0, 1.0, 3.0, 2.0, 3.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00016668399621266872, -0.0001588018931215629, -0.00015091979003045708, -0.00014303768693935126, -0.00013515558384824544, -0.00012727348075713962, -0.0001193913776660338, -0.00011150927457492799, -0.00010362717148382217, -9.574506839271635e-05, -8.786296530161053e-05, -7.998086221050471e-05, -7.209875911939889e-05, -6.421665602829307e-05, -5.6334552937187254e-05, -4.8452449846081436e-05, -4.057034675497562e-05, -3.26882436638698e-05, -2.480614057276398e-05, -1.692403748165816e-05, -9.041934390552342e-06, -1.1598312994465232e-06, 6.7222717916592956e-06, 1.4604374882765114e-05, 2.2486477973870933e-05, 3.0368581064976752e-05, 3.825068415608257e-05, 4.613278724718839e-05, 5.401489033829421e-05, 6.189699342940003e-05, 6.977909652050585e-05, 7.766119961161166e-05, 8.554331725463271e-05, 9.342542034573853e-05, 0.00010130752343684435, 0.00010918962652795017, 0.00011707172961905599, 0.0001249538327101618, 0.00013283593580126762, 0.00014071803889237344, 0.00014860014198347926, 0.00015648224507458508, 0.0001643643481656909, 0.00017224645125679672, 0.00018012855434790254, 0.00018801065743900836, 0.00019589276053011417, 0.00020377486362122, 0.0002116569667123258, 0.00021953906980343163, 0.00022742117289453745, 0.00023530327598564327, 0.00024318537907674909, 0.0002510674821678549, 0.0002589495852589607, 0.00026683168835006654, 0.00027471379144117236, 0.0002825958945322782, 0.000290477997623384, 0.0002983601007144898, 0.00030624220380559564, 0.00031412430689670146, 0.0003220064099878073, 0.0003298885130789131, 0.0003377706161700189]}, "gradients/encoder.encoder.layers.11.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 4.0, 3.0, 2.0, 5.0, 4.0, 6.0, 8.0, 16.0, 18.0, 17.0, 12.0, 15.0, 14.0, 28.0, 25.0, 22.0, 21.0, 28.0, 27.0, 40.0, 48.0, 36.0, 37.0, 42.0, 52.0, 57.0, 32.0, 46.0, 37.0, 44.0, 29.0, 32.0, 35.0, 32.0, 20.0, 18.0, 17.0, 14.0, 18.0, 10.0, 9.0, 4.0, 7.0, 4.0, 4.0, 4.0, 2.0, 6.0, 0.0, 1.0, 1.0, 4.0, 1.0, 0.0, 1.0], "bins": [-0.00011593103408813477, -0.00011245999485254288, -0.00010898895561695099, -0.0001055179163813591, -0.00010204687714576721, -9.857583791017532e-05, -9.510479867458344e-05, -9.163375943899155e-05, -8.816272020339966e-05, -8.469168096780777e-05, -8.122064173221588e-05, -7.774960249662399e-05, -7.42785632610321e-05, -7.080752402544022e-05, -6.733648478984833e-05, -6.386544555425644e-05, -6.039440631866455e-05, -5.692336708307266e-05, -5.3452327847480774e-05, -4.9981288611888885e-05, -4.6510249376297e-05, -4.303921014070511e-05, -3.956817090511322e-05, -3.609713166952133e-05, -3.262609243392944e-05, -2.9155053198337555e-05, -2.5684013962745667e-05, -2.2212974727153778e-05, -1.874193549156189e-05, -1.527089625597e-05, -1.1799857020378113e-05, -8.328817784786224e-06, -4.857778549194336e-06, -1.3867393136024475e-06, 2.084299921989441e-06, 5.555339157581329e-06, 9.026378393173218e-06, 1.2497417628765106e-05, 1.5968456864356995e-05, 1.9439496099948883e-05, 2.291053533554077e-05, 2.638157457113266e-05, 2.985261380672455e-05, 3.332365304231644e-05, 3.6794692277908325e-05, 4.0265731513500214e-05, 4.37367707490921e-05, 4.720780998468399e-05, 5.067884922027588e-05, 5.414988845586777e-05, 5.7620927691459656e-05, 6.109196692705154e-05, 6.456300616264343e-05, 6.803404539823532e-05, 7.150508463382721e-05, 7.49761238694191e-05, 7.844716310501099e-05, 8.191820234060287e-05, 8.538924157619476e-05, 8.886028081178665e-05, 9.233132004737854e-05, 9.580235928297043e-05, 9.927339851856232e-05, 0.0001027444377541542, 0.0001062154769897461]}, "gradients/encoder.encoder.layers.11.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 4.0, 4.0, 6.0, 10.0, 5.0, 10.0, 18.0, 18.0, 51.0, 91.0, 92.0, 146.0, 215.0, 329.0, 501.0, 815.0, 1254.0, 2125.0, 3888.0, 7252.0, 15185.0, 34352.0, 94297.0, 478869.0, 281691.0, 72008.0, 27481.0, 12966.0, 6503.0, 3333.0, 1847.0, 1083.0, 746.0, 456.0, 280.0, 216.0, 138.0, 97.0, 64.0, 54.0, 23.0, 11.0, 3.0, 6.0, 9.0, 9.0, 5.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.00015974044799804688, -0.00015502609312534332, -0.00015031173825263977, -0.00014559738337993622, -0.00014088302850723267, -0.00013616867363452911, -0.00013145431876182556, -0.000126739963889122, -0.00012202560901641846, -0.0001173112541437149, -0.00011259689927101135, -0.0001078825443983078, -0.00010316818952560425, -9.84538346529007e-05, -9.373947978019714e-05, -8.902512490749359e-05, -8.431077003479004e-05, -7.959641516208649e-05, -7.488206028938293e-05, -7.016770541667938e-05, -6.545335054397583e-05, -6.073899567127228e-05, -5.6024640798568726e-05, -5.131028592586517e-05, -4.659593105316162e-05, -4.188157618045807e-05, -3.7167221307754517e-05, -3.2452866435050964e-05, -2.7738511562347412e-05, -2.302415668964386e-05, -1.8309801816940308e-05, -1.3595446944236755e-05, -8.881092071533203e-06, -4.166737198829651e-06, 5.476176738739014e-07, 5.261972546577454e-06, 9.976327419281006e-06, 1.4690682291984558e-05, 1.940503716468811e-05, 2.4119392037391663e-05, 2.8833746910095215e-05, 3.354810178279877e-05, 3.826245665550232e-05, 4.297681152820587e-05, 4.7691166400909424e-05, 5.2405521273612976e-05, 5.711987614631653e-05, 6.183423101902008e-05, 6.654858589172363e-05, 7.126294076442719e-05, 7.597729563713074e-05, 8.069165050983429e-05, 8.540600538253784e-05, 9.01203602552414e-05, 9.483471512794495e-05, 9.95490700006485e-05, 0.00010426342487335205, 0.0001089777797460556, 0.00011369213461875916, 0.00011840648949146271, 0.00012312084436416626, 0.0001278351992368698, 0.00013254955410957336, 0.00013726390898227692, 0.00014197826385498047]}, "gradients/encoder.encoder.layers.11.attention.out_proj.bias": {"_type": "histogram", "values": [3.0, 2.0, 0.0, 1.0, 0.0, 1.0, 1.0, 3.0, 0.0, 2.0, 4.0, 2.0, 3.0, 3.0, 6.0, 11.0, 11.0, 24.0, 28.0, 36.0, 26.0, 34.0, 67.0, 69.0, 80.0, 84.0, 82.0, 80.0, 83.0, 60.0, 48.0, 31.0, 35.0, 20.0, 13.0, 13.0, 12.0, 6.0, 11.0, 10.0, 3.0, 5.0, 0.0, 0.0, 4.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.187490463256836e-05, -2.1039508283138275e-05, -2.020411193370819e-05, -1.9368715584278107e-05, -1.8533319234848022e-05, -1.7697922885417938e-05, -1.6862526535987854e-05, -1.602713018655777e-05, -1.5191733837127686e-05, -1.4356337487697601e-05, -1.3520941138267517e-05, -1.2685544788837433e-05, -1.1850148439407349e-05, -1.1014752089977264e-05, -1.017935574054718e-05, -9.343959391117096e-06, -8.508563041687012e-06, -7.673166692256927e-06, -6.837770342826843e-06, -6.002373993396759e-06, -5.166977643966675e-06, -4.3315812945365906e-06, -3.4961849451065063e-06, -2.660788595676422e-06, -1.8253922462463379e-06, -9.899958968162537e-07, -1.5459954738616943e-07, 6.807968020439148e-07, 1.516193151473999e-06, 2.3515895009040833e-06, 3.1869858503341675e-06, 4.022382199764252e-06, 4.857778549194336e-06, 5.69317489862442e-06, 6.528571248054504e-06, 7.363967597484589e-06, 8.199363946914673e-06, 9.034760296344757e-06, 9.870156645774841e-06, 1.0705552995204926e-05, 1.154094934463501e-05, 1.2376345694065094e-05, 1.3211742043495178e-05, 1.4047138392925262e-05, 1.4882534742355347e-05, 1.571793109178543e-05, 1.6553327441215515e-05, 1.73887237906456e-05, 1.8224120140075684e-05, 1.9059516489505768e-05, 1.9894912838935852e-05, 2.0730309188365936e-05, 2.156570553779602e-05, 2.2401101887226105e-05, 2.323649823665619e-05, 2.4071894586086273e-05, 2.4907290935516357e-05, 2.574268728494644e-05, 2.6578083634376526e-05, 2.741347998380661e-05, 2.8248876333236694e-05, 2.908427268266678e-05, 2.9919669032096863e-05, 3.075506538152695e-05, 3.159046173095703e-05]}, "gradients/encoder.encoder.layers.11.attention.v_proj.weight": {"_type": "histogram", "values": [2.0, 1.0, 2.0, 2.0, 1.0, 3.0, 6.0, 12.0, 13.0, 14.0, 26.0, 22.0, 54.0, 63.0, 90.0, 158.0, 230.0, 336.0, 510.0, 786.0, 1186.0, 1924.0, 2916.0, 4571.0, 7094.0, 11536.0, 18744.0, 31398.0, 55253.0, 101124.0, 277250.0, 290094.0, 106304.0, 55255.0, 30550.0, 19070.0, 11630.0, 7301.0, 4682.0, 2907.0, 1862.0, 1237.0, 787.0, 522.0, 344.0, 215.0, 147.0, 118.0, 64.0, 43.0, 40.0, 29.0, 17.0, 8.0, 9.0, 5.0, 3.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0], "bins": [-5.507469177246094e-05, -5.329865962266922e-05, -5.15226274728775e-05, -4.9746595323085785e-05, -4.797056317329407e-05, -4.619453102350235e-05, -4.441849887371063e-05, -4.2642466723918915e-05, -4.08664345741272e-05, -3.909040242433548e-05, -3.731437027454376e-05, -3.5538338124752045e-05, -3.376230597496033e-05, -3.198627382516861e-05, -3.0210241675376892e-05, -2.8434209525585175e-05, -2.6658177375793457e-05, -2.488214522600174e-05, -2.3106113076210022e-05, -2.1330080926418304e-05, -1.9554048776626587e-05, -1.777801662683487e-05, -1.6001984477043152e-05, -1.4225952327251434e-05, -1.2449920177459717e-05, -1.0673888027668e-05, -8.897855877876282e-06, -7.121823728084564e-06, -5.345791578292847e-06, -3.569759428501129e-06, -1.7937272787094116e-06, -1.7695128917694092e-08, 1.7583370208740234e-06, 3.534369170665741e-06, 5.3104013204574585e-06, 7.086433470249176e-06, 8.862465620040894e-06, 1.0638497769832611e-05, 1.2414529919624329e-05, 1.4190562069416046e-05, 1.5966594219207764e-05, 1.774262636899948e-05, 1.95186585187912e-05, 2.1294690668582916e-05, 2.3070722818374634e-05, 2.484675496816635e-05, 2.662278711795807e-05, 2.8398819267749786e-05, 3.0174851417541504e-05, 3.195088356733322e-05, 3.372691571712494e-05, 3.5502947866916656e-05, 3.7278980016708374e-05, 3.905501216650009e-05, 4.083104431629181e-05, 4.2607076466083527e-05, 4.4383108615875244e-05, 4.615914076566696e-05, 4.793517291545868e-05, 4.97112050652504e-05, 5.1487237215042114e-05, 5.326326936483383e-05, 5.503930151462555e-05, 5.681533366441727e-05, 5.8591365814208984e-05]}, "gradients/encoder.encoder.layers.11.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 4.0, 5.0, 5.0, 4.0, 8.0, 6.0, 7.0, 7.0, 14.0, 16.0, 16.0, 19.0, 19.0, 23.0, 20.0, 31.0, 20.0, 42.0, 42.0, 36.0, 33.0, 45.0, 37.0, 45.0, 48.0, 43.0, 35.0, 39.0, 35.0, 38.0, 39.0, 29.0, 28.0, 20.0, 30.0, 20.0, 21.0, 8.0, 11.0, 16.0, 8.0, 7.0, 6.0, 4.0, 10.0, 6.0, 0.0, 3.0, 5.0, 3.0, 0.0, 0.0, 1.0, 1.0, 1.0], "bins": [-5.0008296966552734e-05, -4.8488378524780273e-05, -4.696846008300781e-05, -4.544854164123535e-05, -4.392862319946289e-05, -4.240870475769043e-05, -4.088878631591797e-05, -3.936886787414551e-05, -3.784894943237305e-05, -3.6329030990600586e-05, -3.4809112548828125e-05, -3.3289194107055664e-05, -3.17692756652832e-05, -3.0249357223510742e-05, -2.872943878173828e-05, -2.720952033996582e-05, -2.568960189819336e-05, -2.41696834564209e-05, -2.2649765014648438e-05, -2.1129846572875977e-05, -1.9609928131103516e-05, -1.8090009689331055e-05, -1.6570091247558594e-05, -1.5050172805786133e-05, -1.3530254364013672e-05, -1.2010335922241211e-05, -1.049041748046875e-05, -8.970499038696289e-06, -7.450580596923828e-06, -5.930662155151367e-06, -4.410743713378906e-06, -2.8908252716064453e-06, -1.3709068298339844e-06, 1.4901161193847656e-07, 1.6689300537109375e-06, 3.1888484954833984e-06, 4.708766937255859e-06, 6.22868537902832e-06, 7.748603820800781e-06, 9.268522262573242e-06, 1.0788440704345703e-05, 1.2308359146118164e-05, 1.3828277587890625e-05, 1.5348196029663086e-05, 1.6868114471435547e-05, 1.8388032913208008e-05, 1.990795135498047e-05, 2.142786979675293e-05, 2.294778823852539e-05, 2.446770668029785e-05, 2.5987625122070312e-05, 2.7507543563842773e-05, 2.9027462005615234e-05, 3.0547380447387695e-05, 3.2067298889160156e-05, 3.358721733093262e-05, 3.510713577270508e-05, 3.662705421447754e-05, 3.814697265625e-05, 3.966689109802246e-05, 4.118680953979492e-05, 4.270672798156738e-05, 4.4226646423339844e-05, 4.5746564865112305e-05, 4.7266483306884766e-05]}, "gradients/encoder.encoder.layers.11.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 1.0, 2.0, 4.0, 2.0, 3.0, 3.0, 0.0, 6.0, 13.0, 10.0, 19.0, 33.0, 43.0, 57.0, 81.0, 158.0, 209.0, 273.0, 459.0, 709.0, 1114.0, 2744.0, 3824.0, 6883.0, 13196.0, 27251.0, 60538.0, 144295.0, 439291.0, 195139.0, 80091.0, 35384.0, 16711.0, 8607.0, 5627.0, 2168.0, 1285.0, 782.0, 513.0, 312.0, 291.0, 152.0, 79.0, 54.0, 45.0, 28.0, 24.0, 22.0, 12.0, 6.0, 3.0, 5.0, 6.0, 0.0, 1.0, 1.0, 0.0, 2.0, 0.0, 2.0], "bins": [-6.139278411865234e-06, -5.951151251792908e-06, -5.763024091720581e-06, -5.574896931648254e-06, -5.386769771575928e-06, -5.198642611503601e-06, -5.010515451431274e-06, -4.822388291358948e-06, -4.634261131286621e-06, -4.4461339712142944e-06, -4.258006811141968e-06, -4.069879651069641e-06, -3.8817524909973145e-06, -3.693625330924988e-06, -3.505498170852661e-06, -3.3173710107803345e-06, -3.129243850708008e-06, -2.941116690635681e-06, -2.7529895305633545e-06, -2.564862370491028e-06, -2.376735210418701e-06, -2.1886080503463745e-06, -2.000480890274048e-06, -1.8123537302017212e-06, -1.6242265701293945e-06, -1.4360994100570679e-06, -1.2479722499847412e-06, -1.0598450899124146e-06, -8.717179298400879e-07, -6.835907697677612e-07, -4.954636096954346e-07, -3.073364496231079e-07, -1.1920928955078125e-07, 6.891787052154541e-08, 2.5704503059387207e-07, 4.4517219066619873e-07, 6.332993507385254e-07, 8.21426510810852e-07, 1.0095536708831787e-06, 1.1976808309555054e-06, 1.385807991027832e-06, 1.5739351511001587e-06, 1.7620623111724854e-06, 1.950189471244812e-06, 2.1383166313171387e-06, 2.3264437913894653e-06, 2.514570951461792e-06, 2.7026981115341187e-06, 2.8908252716064453e-06, 3.078952431678772e-06, 3.2670795917510986e-06, 3.4552067518234253e-06, 3.643333911895752e-06, 3.831461071968079e-06, 4.019588232040405e-06, 4.207715392112732e-06, 4.395842552185059e-06, 4.583969712257385e-06, 4.772096872329712e-06, 4.9602240324020386e-06, 5.148351192474365e-06, 5.336478352546692e-06, 5.5246055126190186e-06, 5.712732672691345e-06, 5.900859832763672e-06]}, "gradients/encoder.encoder.layers.11.attention.k_proj.bias": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 7.0, 4.0, 6.0, 10.0, 8.0, 12.0, 15.0, 15.0, 20.0, 30.0, 30.0, 35.0, 38.0, 50.0, 46.0, 29.0, 43.0, 50.0, 44.0, 0.0, 44.0, 39.0, 55.0, 47.0, 33.0, 26.0, 37.0, 35.0, 33.0, 29.0, 26.0, 27.0, 22.0, 14.0, 11.0, 8.0, 11.0, 6.0, 1.0, 3.0, 3.0, 1.0, 4.0, 1.0, 4.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.8477439880371094e-06, -1.7900019884109497e-06, -1.73225998878479e-06, -1.6745179891586304e-06, -1.6167759895324707e-06, -1.559033989906311e-06, -1.5012919902801514e-06, -1.4435499906539917e-06, -1.385807991027832e-06, -1.3280659914016724e-06, -1.2703239917755127e-06, -1.212581992149353e-06, -1.1548399925231934e-06, -1.0970979928970337e-06, -1.039355993270874e-06, -9.816139936447144e-07, -9.238719940185547e-07, -8.66129994392395e-07, -8.083879947662354e-07, -7.506459951400757e-07, -6.92903995513916e-07, -6.351619958877563e-07, -5.774199962615967e-07, -5.19677996635437e-07, -4.6193599700927734e-07, -4.041939973831177e-07, -3.46451997756958e-07, -2.8870999813079834e-07, -2.3096799850463867e-07, -1.73225998878479e-07, -1.1548399925231934e-07, -5.774199962615967e-08, 0.0, 5.774199962615967e-08, 1.1548399925231934e-07, 1.73225998878479e-07, 2.3096799850463867e-07, 2.8870999813079834e-07, 3.46451997756958e-07, 4.041939973831177e-07, 4.6193599700927734e-07, 5.19677996635437e-07, 5.774199962615967e-07, 6.351619958877563e-07, 6.92903995513916e-07, 7.506459951400757e-07, 8.083879947662354e-07, 8.66129994392395e-07, 9.238719940185547e-07, 9.816139936447144e-07, 1.039355993270874e-06, 1.0970979928970337e-06, 1.1548399925231934e-06, 1.212581992149353e-06, 1.2703239917755127e-06, 1.3280659914016724e-06, 1.385807991027832e-06, 1.4435499906539917e-06, 1.5012919902801514e-06, 1.559033989906311e-06, 1.6167759895324707e-06, 1.6745179891586304e-06, 1.73225998878479e-06, 1.7900019884109497e-06, 1.8477439880371094e-06]}, "gradients/encoder.encoder.layers.11.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 1.0, 4.0, 5.0, 12.0, 4.0, 11.0, 19.0, 28.0, 31.0, 56.0, 60.0, 105.0, 118.0, 172.0, 374.0, 425.0, 874.0, 916.0, 2221.0, 2397.0, 5996.0, 6733.0, 17897.0, 21471.0, 64689.0, 96627.0, 387657.0, 218872.0, 96068.0, 64650.0, 21526.0, 17850.0, 6757.0, 6043.0, 2418.0, 2246.0, 946.0, 927.0, 368.0, 354.0, 173.0, 176.0, 82.0, 74.0, 34.0, 35.0, 23.0, 11.0, 14.0, 4.0, 5.0, 2.0, 6.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0], "bins": [-4.410743713378906e-06, -4.263594746589661e-06, -4.116445779800415e-06, -3.9692968130111694e-06, -3.822147846221924e-06, -3.6749988794326782e-06, -3.5278499126434326e-06, -3.380700945854187e-06, -3.2335519790649414e-06, -3.086403012275696e-06, -2.93925404548645e-06, -2.7921050786972046e-06, -2.644956111907959e-06, -2.4978071451187134e-06, -2.3506581783294678e-06, -2.203509211540222e-06, -2.0563602447509766e-06, -1.909211277961731e-06, -1.7620623111724854e-06, -1.6149133443832397e-06, -1.4677643775939941e-06, -1.3206154108047485e-06, -1.173466444015503e-06, -1.0263174772262573e-06, -8.791685104370117e-07, -7.320195436477661e-07, -5.848705768585205e-07, -4.377216100692749e-07, -2.905726432800293e-07, -1.434236764907837e-07, 3.725290298461914e-09, 1.5087425708770752e-07, 2.980232238769531e-07, 4.4517219066619873e-07, 5.923211574554443e-07, 7.394701242446899e-07, 8.866190910339355e-07, 1.0337680578231812e-06, 1.1809170246124268e-06, 1.3280659914016724e-06, 1.475214958190918e-06, 1.6223639249801636e-06, 1.7695128917694092e-06, 1.9166618585586548e-06, 2.0638108253479004e-06, 2.210959792137146e-06, 2.3581087589263916e-06, 2.505257725715637e-06, 2.652406692504883e-06, 2.7995556592941284e-06, 2.946704626083374e-06, 3.0938535928726196e-06, 3.2410025596618652e-06, 3.388151526451111e-06, 3.5353004932403564e-06, 3.682449460029602e-06, 3.829598426818848e-06, 3.976747393608093e-06, 4.123896360397339e-06, 4.2710453271865845e-06, 4.41819429397583e-06, 4.565343260765076e-06, 4.712492227554321e-06, 4.859641194343567e-06, 5.0067901611328125e-06]}, "gradients/encoder.encoder.layers.11.attention.q_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 1.0, 1.0, 4.0, 2.0, 3.0, 2.0, 0.0, 4.0, 8.0, 3.0, 8.0, 7.0, 12.0, 11.0, 19.0, 14.0, 22.0, 24.0, 23.0, 28.0, 34.0, 29.0, 17.0, 48.0, 40.0, 54.0, 55.0, 62.0, 50.0, 58.0, 51.0, 46.0, 43.0, 44.0, 34.0, 25.0, 18.0, 23.0, 5.0, 15.0, 13.0, 12.0, 8.0, 8.0, 7.0, 5.0, 5.0, 0.0, 2.0, 2.0, 4.0, 2.0, 1.0, 1.0, 2.0], "bins": [-4.172325134277344e-06, -4.056841135025024e-06, -3.941357135772705e-06, -3.825873136520386e-06, -3.7103891372680664e-06, -3.594905138015747e-06, -3.4794211387634277e-06, -3.3639371395111084e-06, -3.248453140258789e-06, -3.1329691410064697e-06, -3.0174851417541504e-06, -2.902001142501831e-06, -2.7865171432495117e-06, -2.6710331439971924e-06, -2.555549144744873e-06, -2.4400651454925537e-06, -2.3245811462402344e-06, -2.209097146987915e-06, -2.0936131477355957e-06, -1.9781291484832764e-06, -1.862645149230957e-06, -1.7471611499786377e-06, -1.6316771507263184e-06, -1.516193151473999e-06, -1.4007091522216797e-06, -1.2852251529693604e-06, -1.169741153717041e-06, -1.0542571544647217e-06, -9.387731552124023e-07, -8.23289155960083e-07, -7.078051567077637e-07, -5.923211574554443e-07, -4.76837158203125e-07, -3.6135315895080566e-07, -2.4586915969848633e-07, -1.30385160446167e-07, -1.4901161193847656e-08, 1.0058283805847168e-07, 2.1606683731079102e-07, 3.3155083656311035e-07, 4.470348358154297e-07, 5.62518835067749e-07, 6.780028343200684e-07, 7.934868335723877e-07, 9.08970832824707e-07, 1.0244548320770264e-06, 1.1399388313293457e-06, 1.255422830581665e-06, 1.3709068298339844e-06, 1.4863908290863037e-06, 1.601874828338623e-06, 1.7173588275909424e-06, 1.8328428268432617e-06, 1.948326826095581e-06, 2.0638108253479004e-06, 2.1792948246002197e-06, 2.294778823852539e-06, 2.4102628231048584e-06, 2.5257468223571777e-06, 2.641230821609497e-06, 2.7567148208618164e-06, 2.8721988201141357e-06, 2.987682819366455e-06, 3.1031668186187744e-06, 3.2186508178710938e-06]}, "gradients/encoder.encoder.layers.11.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 3.0, 2.0, 2.0, 3.0, 5.0, 4.0, 9.0, 10.0, 20.0, 22.0, 40.0, 37.0, 73.0, 103.0, 185.0, 133.0, 85.0, 53.0, 44.0, 26.0, 33.0, 18.0, 21.0, 19.0, 16.0, 9.0, 8.0, 7.0, 9.0, 1.0, 5.0, 2.0, 1.0, 3.0, 1.0, 2.0, 1.0, 3.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00025282386923208833, -0.00024402370036114007, -0.00023522354604210705, -0.0002264233771711588, -0.00021762322285212576, -0.0002088230539811775, -0.00020002288511022925, -0.00019122273079119623, -0.00018242256192024797, -0.00017362239304929972, -0.0001648222387302667, -0.00015602206985931844, -0.00014722190098837018, -0.00013842174666933715, -0.0001296215777983889, -0.00012082141620339826, -0.00011202125460840762, -0.00010322109301341698, -9.442093141842633e-05, -8.562076254747808e-05, -7.682060095248744e-05, -6.80204393574968e-05, -5.922027412452735e-05, -5.04201088915579e-05, -4.161994729656726e-05, -3.281978570157662e-05, -2.4019620468607172e-05, -1.5219457054627128e-05, -6.4192936406470835e-06, 2.3808679543435574e-06, 1.1181033187313005e-05, 1.9981198420282453e-05, 2.8781330911442637e-05, 3.758149250643328e-05, 4.6381657739402726e-05, 5.5181822972372174e-05, 6.398198456736282e-05, 7.278214616235346e-05, 8.158231503330171e-05, 9.038247662829235e-05, 9.918263822328299e-05, 0.00010798279981827363, 0.00011678296141326427, 0.00012558313028421253, 0.00013438329915516078, 0.0001431834534741938, 0.00015198362234514207, 0.00016078379121609032, 0.00016958394553512335, 0.0001783841144060716, 0.00018718426872510463, 0.00019598443759605289, 0.0002047845919150859, 0.00021358476078603417, 0.00022238492965698242, 0.00023118508397601545, 0.0002399852528469637, 0.00024878542171791196, 0.000257585576036945, 0.000266385730355978, 0.0002751859137788415, 0.0002839860680978745, 0.00029278622241690755, 0.00030158640583977103, 0.00031038656015880406]}, "gradients/encoder.encoder.layers.11.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 2.0, 4.0, 2.0, 5.0, 3.0, 6.0, 7.0, 11.0, 10.0, 14.0, 9.0, 24.0, 18.0, 14.0, 27.0, 42.0, 36.0, 44.0, 50.0, 44.0, 48.0, 42.0, 45.0, 56.0, 54.0, 39.0, 45.0, 42.0, 41.0, 35.0, 29.0, 23.0, 20.0, 16.0, 21.0, 20.0, 20.0, 17.0, 8.0, 5.0, 1.0, 3.0, 4.0, 4.0, 4.0, 3.0, 0.0, 2.0, 1.0, 0.0, 1.0], "bins": [-0.00020319223403930664, -0.00019762292504310608, -0.00019205361604690552, -0.00018648430705070496, -0.0001809149980545044, -0.00017534568905830383, -0.00016977638006210327, -0.0001642070710659027, -0.00015863776206970215, -0.0001530684530735016, -0.00014749914407730103, -0.00014192983508110046, -0.0001363605260848999, -0.00013079121708869934, -0.00012522190809249878, -0.00011965259909629822, -0.00011408329010009766, -0.0001085139811038971, -0.00010294467210769653, -9.737536311149597e-05, -9.180605411529541e-05, -8.623674511909485e-05, -8.066743612289429e-05, -7.509812712669373e-05, -6.952881813049316e-05, -6.39595091342926e-05, -5.839020013809204e-05, -5.282089114189148e-05, -4.725158214569092e-05, -4.1682273149490356e-05, -3.6112964153289795e-05, -3.0543655157089233e-05, -2.4974346160888672e-05, -1.940503716468811e-05, -1.3835728168487549e-05, -8.266419172286987e-06, -2.6971101760864258e-06, 2.8721988201141357e-06, 8.441507816314697e-06, 1.4010816812515259e-05, 1.958012580871582e-05, 2.5149434804916382e-05, 3.071874380111694e-05, 3.6288052797317505e-05, 4.1857361793518066e-05, 4.742667078971863e-05, 5.299597978591919e-05, 5.856528878211975e-05, 6.413459777832031e-05, 6.970390677452087e-05, 7.527321577072144e-05, 8.0842524766922e-05, 8.641183376312256e-05, 9.198114275932312e-05, 9.755045175552368e-05, 0.00010311976075172424, 0.0001086890697479248, 0.00011425837874412537, 0.00011982768774032593, 0.0001253969967365265, 0.00013096630573272705, 0.0001365356147289276, 0.00014210492372512817, 0.00014767423272132874, 0.0001532435417175293]}, "gradients/encoder.encoder.layers.10.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 2.0, 2.0, 3.0, 12.0, 4.0, 22.0, 24.0, 32.0, 59.0, 90.0, 110.0, 179.0, 283.0, 454.0, 714.0, 1092.0, 1844.0, 3062.0, 4791.0, 8429.0, 14989.0, 30244.0, 80648.0, 3582988.0, 365214.0, 47358.0, 21377.0, 11752.0, 6692.0, 4049.0, 2609.0, 1614.0, 1040.0, 723.0, 489.0, 336.0, 259.0, 150.0, 121.0, 104.0, 89.0, 47.0, 51.0, 28.0, 25.0, 18.0, 22.0, 14.0, 12.0, 14.0, 6.0, 3.0, 1.0, 4.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0], "bins": [-8.89897346496582e-05, -8.578691631555557e-05, -8.258409798145294e-05, -7.938127964735031e-05, -7.617846131324768e-05, -7.297564297914505e-05, -6.977282464504242e-05, -6.657000631093979e-05, -6.336718797683716e-05, -6.016436964273453e-05, -5.69615513086319e-05, -5.3758732974529266e-05, -5.0555914640426636e-05, -4.7353096306324005e-05, -4.4150277972221375e-05, -4.0947459638118744e-05, -3.774464130401611e-05, -3.454182296991348e-05, -3.133900463581085e-05, -2.813618630170822e-05, -2.493336796760559e-05, -2.173054963350296e-05, -1.852773129940033e-05, -1.53249129652977e-05, -1.2122094631195068e-05, -8.919276297092438e-06, -5.716457962989807e-06, -2.5136396288871765e-06, 6.891787052154541e-07, 3.891997039318085e-06, 7.094815373420715e-06, 1.0297633707523346e-05, 1.3500452041625977e-05, 1.6703270375728607e-05, 1.9906088709831238e-05, 2.310890704393387e-05, 2.63117253780365e-05, 2.951454371213913e-05, 3.271736204624176e-05, 3.592018038034439e-05, 3.912299871444702e-05, 4.232581704854965e-05, 4.552863538265228e-05, 4.873145371675491e-05, 5.1934272050857544e-05, 5.5137090384960175e-05, 5.8339908719062805e-05, 6.154272705316544e-05, 6.474554538726807e-05, 6.79483637213707e-05, 7.115118205547333e-05, 7.435400038957596e-05, 7.755681872367859e-05, 8.075963705778122e-05, 8.396245539188385e-05, 8.716527372598648e-05, 9.036809206008911e-05, 9.357091039419174e-05, 9.677372872829437e-05, 9.9976547062397e-05, 0.00010317936539649963, 0.00010638218373060226, 0.0001095850020647049, 0.00011278782039880753, 0.00011599063873291016]}, "gradients/encoder.encoder.layers.10.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 3.0, 2.0, 1.0, 3.0, 0.0, 4.0, 3.0, 3.0, 14.0, 10.0, 16.0, 20.0, 15.0, 23.0, 21.0, 40.0, 38.0, 45.0, 61.0, 72.0, 76.0, 79.0, 81.0, 59.0, 71.0, 37.0, 34.0, 37.0, 26.0, 29.0, 18.0, 10.0, 16.0, 9.0, 7.0, 12.0, 2.0, 7.0, 3.0, 2.0, 1.0, 1.0, 1.0, 1.0, 4.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0], "bins": [-2.0682811737060547e-05, -1.995917409658432e-05, -1.9235536456108093e-05, -1.8511898815631866e-05, -1.778826117515564e-05, -1.7064623534679413e-05, -1.6340985894203186e-05, -1.561734825372696e-05, -1.4893710613250732e-05, -1.4170072972774506e-05, -1.3446435332298279e-05, -1.2722797691822052e-05, -1.1999160051345825e-05, -1.1275522410869598e-05, -1.0551884770393372e-05, -9.828247129917145e-06, -9.104609489440918e-06, -8.380971848964691e-06, -7.657334208488464e-06, -6.9336965680122375e-06, -6.210058927536011e-06, -5.486421287059784e-06, -4.762783646583557e-06, -4.03914600610733e-06, -3.3155083656311035e-06, -2.5918707251548767e-06, -1.86823308467865e-06, -1.144595444202423e-06, -4.209578037261963e-07, 3.026798367500305e-07, 1.0263174772262573e-06, 1.7499551177024841e-06, 2.473592758178711e-06, 3.1972303986549377e-06, 3.9208680391311646e-06, 4.644505679607391e-06, 5.368143320083618e-06, 6.091780960559845e-06, 6.815418601036072e-06, 7.539056241512299e-06, 8.262693881988525e-06, 8.986331522464752e-06, 9.709969162940979e-06, 1.0433606803417206e-05, 1.1157244443893433e-05, 1.188088208436966e-05, 1.2604519724845886e-05, 1.3328157365322113e-05, 1.405179500579834e-05, 1.4775432646274567e-05, 1.5499070286750793e-05, 1.622270792722702e-05, 1.6946345567703247e-05, 1.7669983208179474e-05, 1.83936208486557e-05, 1.9117258489131927e-05, 1.9840896129608154e-05, 2.056453377008438e-05, 2.1288171410560608e-05, 2.2011809051036835e-05, 2.273544669151306e-05, 2.345908433198929e-05, 2.4182721972465515e-05, 2.4906359612941742e-05, 2.562999725341797e-05]}, "gradients/encoder.encoder.layers.10.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 1.0, 4.0, 1.0, 3.0, 3.0, 1.0, 6.0, 3.0, 16.0, 13.0, 8.0, 18.0, 32.0, 49.0, 69.0, 108.0, 119.0, 236.0, 324.0, 553.0, 816.0, 1352.0, 2296.0, 3837.0, 6635.0, 12751.0, 25713.0, 56527.0, 179946.0, 3479947.0, 289696.0, 67960.0, 31085.0, 15011.0, 7998.0, 4402.0, 2556.0, 1477.0, 941.0, 588.0, 404.0, 252.0, 179.0, 87.0, 91.0, 54.0, 38.0, 25.0, 25.0, 13.0, 9.0, 8.0, 2.0, 6.0, 2.0, 1.0, 1.0, 1.0, 1.0, 2.0], "bins": [-7.987022399902344e-05, -7.749442011117935e-05, -7.511861622333527e-05, -7.274281233549118e-05, -7.03670084476471e-05, -6.799120455980301e-05, -6.561540067195892e-05, -6.323959678411484e-05, -6.086379289627075e-05, -5.8487989008426666e-05, -5.611218512058258e-05, -5.3736381232738495e-05, -5.136057734489441e-05, -4.8984773457050323e-05, -4.660896956920624e-05, -4.423316568136215e-05, -4.1857361793518066e-05, -3.948155790567398e-05, -3.7105754017829895e-05, -3.472995012998581e-05, -3.2354146242141724e-05, -2.9978342354297638e-05, -2.7602538466453552e-05, -2.5226734578609467e-05, -2.285093069076538e-05, -2.0475126802921295e-05, -1.809932291507721e-05, -1.5723519027233124e-05, -1.3347715139389038e-05, -1.0971911251544952e-05, -8.596107363700867e-06, -6.220303475856781e-06, -3.844499588012695e-06, -1.4686957001686096e-06, 9.071081876754761e-07, 3.2829120755195618e-06, 5.6587159633636475e-06, 8.034519851207733e-06, 1.0410323739051819e-05, 1.2786127626895905e-05, 1.516193151473999e-05, 1.7537735402584076e-05, 1.991353929042816e-05, 2.2289343178272247e-05, 2.4665147066116333e-05, 2.704095095396042e-05, 2.9416754841804504e-05, 3.179255872964859e-05, 3.4168362617492676e-05, 3.654416650533676e-05, 3.891997039318085e-05, 4.129577428102493e-05, 4.367157816886902e-05, 4.6047382056713104e-05, 4.842318594455719e-05, 5.0798989832401276e-05, 5.317479372024536e-05, 5.555059760808945e-05, 5.792640149593353e-05, 6.030220538377762e-05, 6.26780092716217e-05, 6.505381315946579e-05, 6.742961704730988e-05, 6.980542093515396e-05, 7.218122482299805e-05]}, "gradients/encoder.encoder.layers.10.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 2.0, 1.0, 5.0, 2.0, 0.0, 1.0, 5.0, 6.0, 5.0, 11.0, 17.0, 14.0, 11.0, 26.0, 18.0, 33.0, 42.0, 45.0, 79.0, 181.0, 523.0, 1995.0, 567.0, 163.0, 68.0, 49.0, 44.0, 36.0, 24.0, 20.0, 19.0, 12.0, 14.0, 7.0, 7.0, 8.0, 11.0, 3.0, 2.0, 2.0, 1.0, 2.0, 0.0, 1.0, 2.0, 3.0, 1.0, 1.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.814697265625e-05, -3.6592595279216766e-05, -3.503821790218353e-05, -3.34838405251503e-05, -3.1929463148117065e-05, -3.0375085771083832e-05, -2.8820708394050598e-05, -2.7266331017017365e-05, -2.571195363998413e-05, -2.4157576262950897e-05, -2.2603198885917664e-05, -2.104882150888443e-05, -1.9494444131851196e-05, -1.7940066754817963e-05, -1.638568937778473e-05, -1.4831312000751495e-05, -1.3276934623718262e-05, -1.1722557246685028e-05, -1.0168179869651794e-05, -8.61380249261856e-06, -7.059425115585327e-06, -5.5050477385520935e-06, -3.95067036151886e-06, -2.3962929844856262e-06, -8.419156074523926e-07, 7.124617695808411e-07, 2.2668391466140747e-06, 3.821216523647308e-06, 5.375593900680542e-06, 6.929971277713776e-06, 8.48434865474701e-06, 1.0038726031780243e-05, 1.1593103408813477e-05, 1.314748078584671e-05, 1.4701858162879944e-05, 1.6256235539913177e-05, 1.781061291694641e-05, 1.9364990293979645e-05, 2.091936767101288e-05, 2.2473745048046112e-05, 2.4028122425079346e-05, 2.558249980211258e-05, 2.7136877179145813e-05, 2.8691254556179047e-05, 3.024563193321228e-05, 3.1800009310245514e-05, 3.335438668727875e-05, 3.490876406431198e-05, 3.6463141441345215e-05, 3.801751881837845e-05, 3.957189619541168e-05, 4.1126273572444916e-05, 4.268065094947815e-05, 4.423502832651138e-05, 4.578940570354462e-05, 4.734378308057785e-05, 4.8898160457611084e-05, 5.045253783464432e-05, 5.200691521167755e-05, 5.3561292588710785e-05, 5.511566996574402e-05, 5.667004734277725e-05, 5.8224424719810486e-05, 5.977880209684372e-05, 6.133317947387695e-05]}, "gradients/encoder.encoder.layers.10.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 2.0, 2.0, 2.0, 0.0, 4.0, 5.0, 4.0, 6.0, 5.0, 18.0, 20.0, 35.0, 28.0, 43.0, 65.0, 95.0, 114.0, 106.0, 100.0, 96.0, 57.0, 41.0, 26.0, 38.0, 26.0, 24.0, 8.0, 6.0, 8.0, 9.0, 5.0, 5.0, 4.0, 0.0, 3.0, 2.0, 1.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.0002095225645462051, -0.00020213652169331908, -0.0001947504933923483, -0.00018736445053946227, -0.00017997840768657625, -0.00017259237938560545, -0.00016520633653271943, -0.0001578202936798334, -0.00015043426537886262, -0.0001430482225259766, -0.0001356621942250058, -0.00012827615137211978, -0.00012089011579519138, -0.00011350408021826297, -0.00010611803736537695, -9.873200178844854e-05, -9.134596621152014e-05, -8.395993063459173e-05, -7.657389505766332e-05, -6.91878522047773e-05, -6.18018166278489e-05, -5.4415781050920486e-05, -4.702974183601327e-05, -3.964370262110606e-05, -3.225766704417765e-05, -2.487162964825984e-05, -1.748559225234203e-05, -1.009955485642422e-05, -2.7135174605064094e-06, 4.6725181164219975e-06, 1.2058557331329212e-05, 1.9444596546236426e-05, 2.6830617571249604e-05, 3.421665314817801e-05, 4.1602692363085225e-05, 4.898873157799244e-05, 5.6374767154920846e-05, 6.376080273184925e-05, 7.114684558473527e-05, 7.853288116166368e-05, 8.591891673859209e-05, 9.33049523155205e-05, 0.0001006909878924489, 0.00010807703074533492, 0.00011546306632226333, 0.00012284910189919174, 0.00013023514475207776, 0.00013762118760496378, 0.00014500721590593457, 0.0001523932587588206, 0.00015977928705979139, 0.0001671653299126774, 0.0001745513582136482, 0.00018193740106653422, 0.00018932344391942024, 0.00019670947222039104, 0.00020409551507327706, 0.00021148155792616308, 0.00021886758622713387, 0.0002262536290800199, 0.0002336396719329059, 0.0002410257002338767, 0.0002484117285348475, 0.00025579778593964875, 0.00026318381424061954]}, "gradients/encoder.encoder.layers.10.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 4.0, 0.0, 0.0, 1.0, 7.0, 8.0, 6.0, 6.0, 8.0, 7.0, 10.0, 21.0, 17.0, 20.0, 24.0, 25.0, 24.0, 26.0, 28.0, 27.0, 50.0, 37.0, 38.0, 42.0, 30.0, 38.0, 39.0, 29.0, 46.0, 42.0, 26.0, 33.0, 34.0, 29.0, 31.0, 30.0, 24.0, 17.0, 21.0, 16.0, 20.0, 9.0, 14.0, 12.0, 10.0, 2.0, 4.0, 5.0, 5.0, 2.0, 3.0, 3.0, 2.0, 3.0, 1.0, 4.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-9.369850158691406e-05, -9.047240018844604e-05, -8.724629878997803e-05, -8.402019739151001e-05, -8.079409599304199e-05, -7.756799459457397e-05, -7.434189319610596e-05, -7.111579179763794e-05, -6.788969039916992e-05, -6.46635890007019e-05, -6.143748760223389e-05, -5.821138620376587e-05, -5.498528480529785e-05, -5.1759183406829834e-05, -4.8533082008361816e-05, -4.53069806098938e-05, -4.208087921142578e-05, -3.8854777812957764e-05, -3.5628676414489746e-05, -3.240257501602173e-05, -2.917647361755371e-05, -2.5950372219085693e-05, -2.2724270820617676e-05, -1.9498169422149658e-05, -1.627206802368164e-05, -1.3045966625213623e-05, -9.819865226745605e-06, -6.593763828277588e-06, -3.3676624298095703e-06, -1.4156103134155273e-07, 3.084540367126465e-06, 6.310641765594482e-06, 9.5367431640625e-06, 1.2762844562530518e-05, 1.5988945960998535e-05, 1.9215047359466553e-05, 2.244114875793457e-05, 2.5667250156402588e-05, 2.8893351554870605e-05, 3.211945295333862e-05, 3.534555435180664e-05, 3.857165575027466e-05, 4.1797757148742676e-05, 4.502385854721069e-05, 4.824995994567871e-05, 5.147606134414673e-05, 5.4702162742614746e-05, 5.7928264141082764e-05, 6.115436553955078e-05, 6.43804669380188e-05, 6.760656833648682e-05, 7.083266973495483e-05, 7.405877113342285e-05, 7.728487253189087e-05, 8.051097393035889e-05, 8.37370753288269e-05, 8.696317672729492e-05, 9.018927812576294e-05, 9.341537952423096e-05, 9.664148092269897e-05, 9.986758232116699e-05, 0.00010309368371963501, 0.00010631978511810303, 0.00010954588651657104, 0.00011277198791503906]}, "gradients/encoder.encoder.layers.10.attention.out_proj.weight": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 2.0, 1.0, 7.0, 0.0, 4.0, 14.0, 13.0, 19.0, 33.0, 34.0, 45.0, 57.0, 88.0, 138.0, 216.0, 274.0, 428.0, 585.0, 897.0, 1524.0, 2542.0, 4344.0, 8367.0, 17010.0, 39289.0, 109995.0, 470350.0, 260603.0, 73677.0, 28726.0, 13098.0, 6432.0, 3695.0, 2148.0, 1293.0, 839.0, 552.0, 353.0, 247.0, 165.0, 127.0, 75.0, 68.0, 37.0, 43.0, 30.0, 23.0, 14.0, 15.0, 8.0, 8.0, 5.0, 1.0, 5.0, 4.0, 1.0], "bins": [-0.00012826919555664062, -0.0001245858147740364, -0.00012090243399143219, -0.00011721905320882797, -0.00011353567242622375, -0.00010985229164361954, -0.00010616891086101532, -0.0001024855300784111, -9.880214929580688e-05, -9.511876851320267e-05, -9.143538773059845e-05, -8.775200694799423e-05, -8.406862616539001e-05, -8.03852453827858e-05, -7.670186460018158e-05, -7.301848381757736e-05, -6.933510303497314e-05, -6.565172225236893e-05, -6.196834146976471e-05, -5.828496068716049e-05, -5.4601579904556274e-05, -5.091819912195206e-05, -4.723481833934784e-05, -4.355143755674362e-05, -3.9868056774139404e-05, -3.618467599153519e-05, -3.250129520893097e-05, -2.8817914426326752e-05, -2.5134533643722534e-05, -2.1451152861118317e-05, -1.77677720785141e-05, -1.4084391295909882e-05, -1.0401010513305664e-05, -6.7176297307014465e-06, -3.034248948097229e-06, 6.491318345069885e-07, 4.332512617111206e-06, 8.015893399715424e-06, 1.1699274182319641e-05, 1.538265496492386e-05, 1.9066035747528076e-05, 2.2749416530132294e-05, 2.643279731273651e-05, 3.011617809534073e-05, 3.3799558877944946e-05, 3.7482939660549164e-05, 4.116632044315338e-05, 4.48497012257576e-05, 4.8533082008361816e-05, 5.2216462790966034e-05, 5.589984357357025e-05, 5.958322435617447e-05, 6.326660513877869e-05, 6.69499859213829e-05, 7.063336670398712e-05, 7.431674748659134e-05, 7.800012826919556e-05, 8.168350905179977e-05, 8.536688983440399e-05, 8.905027061700821e-05, 9.273365139961243e-05, 9.641703218221664e-05, 0.00010010041296482086, 0.00010378379374742508, 0.0001074671745300293]}, "gradients/encoder.encoder.layers.10.attention.out_proj.bias": {"_type": "histogram", "values": [3.0, 0.0, 1.0, 0.0, 1.0, 3.0, 3.0, 2.0, 0.0, 2.0, 3.0, 3.0, 4.0, 7.0, 7.0, 10.0, 19.0, 17.0, 24.0, 39.0, 43.0, 53.0, 66.0, 69.0, 75.0, 74.0, 79.0, 74.0, 76.0, 68.0, 45.0, 26.0, 29.0, 16.0, 14.0, 14.0, 12.0, 10.0, 7.0, 5.0, 3.0, 2.0, 2.0, 1.0, 2.0, 3.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0], "bins": [-2.104043960571289e-05, -2.0223669707775116e-05, -1.940689980983734e-05, -1.8590129911899567e-05, -1.7773360013961792e-05, -1.6956590116024017e-05, -1.6139820218086243e-05, -1.5323050320148468e-05, -1.4506280422210693e-05, -1.3689510524272919e-05, -1.2872740626335144e-05, -1.205597072839737e-05, -1.1239200830459595e-05, -1.042243093252182e-05, -9.605661034584045e-06, -8.78889113664627e-06, -7.972121238708496e-06, -7.1553513407707214e-06, -6.338581442832947e-06, -5.521811544895172e-06, -4.7050416469573975e-06, -3.888271749019623e-06, -3.071501851081848e-06, -2.2547319531440735e-06, -1.4379620552062988e-06, -6.211921572685242e-07, 1.955777406692505e-07, 1.0123476386070251e-06, 1.8291175365447998e-06, 2.6458874344825745e-06, 3.462657332420349e-06, 4.279427230358124e-06, 5.0961971282958984e-06, 5.912967026233673e-06, 6.729736924171448e-06, 7.546506822109222e-06, 8.363276720046997e-06, 9.180046617984772e-06, 9.996816515922546e-06, 1.0813586413860321e-05, 1.1630356311798096e-05, 1.244712620973587e-05, 1.3263896107673645e-05, 1.408066600561142e-05, 1.4897435903549194e-05, 1.571420580148697e-05, 1.6530975699424744e-05, 1.734774559736252e-05, 1.8164515495300293e-05, 1.8981285393238068e-05, 1.9798055291175842e-05, 2.0614825189113617e-05, 2.143159508705139e-05, 2.2248364984989166e-05, 2.306513488292694e-05, 2.3881904780864716e-05, 2.469867467880249e-05, 2.5515444576740265e-05, 2.633221447467804e-05, 2.7148984372615814e-05, 2.796575427055359e-05, 2.8782524168491364e-05, 2.9599294066429138e-05, 3.0416063964366913e-05, 3.123283386230469e-05]}, "gradients/encoder.encoder.layers.10.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 0.0, 5.0, 4.0, 5.0, 8.0, 13.0, 17.0, 32.0, 37.0, 61.0, 84.0, 110.0, 155.0, 258.0, 355.0, 522.0, 771.0, 1194.0, 1634.0, 2657.0, 3908.0, 5942.0, 9001.0, 14067.0, 23217.0, 36005.0, 60090.0, 111208.0, 261188.0, 256769.0, 105426.0, 57364.0, 34521.0, 22369.0, 13570.0, 8812.0, 5760.0, 3762.0, 2540.0, 1626.0, 1134.0, 740.0, 525.0, 330.0, 248.0, 152.0, 121.0, 81.0, 57.0, 35.0, 25.0, 17.0, 13.0, 9.0, 6.0, 4.0, 3.0, 1.0, 2.0, 0.0, 2.0], "bins": [-4.8041343688964844e-05, -4.653818905353546e-05, -4.503503441810608e-05, -4.35318797826767e-05, -4.2028725147247314e-05, -4.052557051181793e-05, -3.902241587638855e-05, -3.751926124095917e-05, -3.6016106605529785e-05, -3.45129519701004e-05, -3.300979733467102e-05, -3.150664269924164e-05, -3.0003488063812256e-05, -2.8500333428382874e-05, -2.699717879295349e-05, -2.549402415752411e-05, -2.3990869522094727e-05, -2.2487714886665344e-05, -2.0984560251235962e-05, -1.948140561580658e-05, -1.7978250980377197e-05, -1.6475096344947815e-05, -1.4971941709518433e-05, -1.346878707408905e-05, -1.1965632438659668e-05, -1.0462477803230286e-05, -8.959323167800903e-06, -7.456168532371521e-06, -5.953013896942139e-06, -4.449859261512756e-06, -2.946704626083374e-06, -1.4435499906539917e-06, 5.960464477539063e-08, 1.562759280204773e-06, 3.0659139156341553e-06, 4.569068551063538e-06, 6.07222318649292e-06, 7.575377821922302e-06, 9.078532457351685e-06, 1.0581687092781067e-05, 1.208484172821045e-05, 1.3587996363639832e-05, 1.5091150999069214e-05, 1.6594305634498596e-05, 1.809746026992798e-05, 1.960061490535736e-05, 2.1103769540786743e-05, 2.2606924176216125e-05, 2.4110078811645508e-05, 2.561323344707489e-05, 2.7116388082504272e-05, 2.8619542717933655e-05, 3.0122697353363037e-05, 3.162585198879242e-05, 3.31290066242218e-05, 3.4632161259651184e-05, 3.6135315895080566e-05, 3.763847053050995e-05, 3.914162516593933e-05, 4.064477980136871e-05, 4.2147934436798096e-05, 4.365108907222748e-05, 4.515424370765686e-05, 4.665739834308624e-05, 4.8160552978515625e-05]}, "gradients/encoder.encoder.layers.10.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 3.0, 5.0, 1.0, 3.0, 7.0, 4.0, 9.0, 11.0, 11.0, 12.0, 19.0, 18.0, 21.0, 21.0, 28.0, 25.0, 43.0, 38.0, 33.0, 42.0, 42.0, 46.0, 38.0, 42.0, 45.0, 37.0, 47.0, 43.0, 48.0, 28.0, 21.0, 31.0, 31.0, 31.0, 19.0, 23.0, 17.0, 16.0, 16.0, 4.0, 9.0, 8.0, 6.0, 2.0, 3.0, 2.0, 2.0, 3.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.875659942626953e-05, -4.721153527498245e-05, -4.5666471123695374e-05, -4.4121406972408295e-05, -4.2576342821121216e-05, -4.103127866983414e-05, -3.948621451854706e-05, -3.794115036725998e-05, -3.63960862159729e-05, -3.485102206468582e-05, -3.330595791339874e-05, -3.1760893762111664e-05, -3.0215829610824585e-05, -2.8670765459537506e-05, -2.7125701308250427e-05, -2.558063715696335e-05, -2.403557300567627e-05, -2.249050885438919e-05, -2.0945444703102112e-05, -1.9400380551815033e-05, -1.7855316400527954e-05, -1.6310252249240875e-05, -1.4765188097953796e-05, -1.3220123946666718e-05, -1.1675059795379639e-05, -1.012999564409256e-05, -8.584931492805481e-06, -7.039867341518402e-06, -5.494803190231323e-06, -3.949739038944244e-06, -2.4046748876571655e-06, -8.596107363700867e-07, 6.854534149169922e-07, 2.230517566204071e-06, 3.77558171749115e-06, 5.320645868778229e-06, 6.865710020065308e-06, 8.410774171352386e-06, 9.955838322639465e-06, 1.1500902473926544e-05, 1.3045966625213623e-05, 1.4591030776500702e-05, 1.613609492778778e-05, 1.768115907907486e-05, 1.922622323036194e-05, 2.0771287381649017e-05, 2.2316351532936096e-05, 2.3861415684223175e-05, 2.5406479835510254e-05, 2.6951543986797333e-05, 2.849660813808441e-05, 3.004167228937149e-05, 3.158673644065857e-05, 3.313180059194565e-05, 3.467686474323273e-05, 3.6221928894519806e-05, 3.7766993045806885e-05, 3.9312057197093964e-05, 4.085712134838104e-05, 4.240218549966812e-05, 4.39472496509552e-05, 4.549231380224228e-05, 4.703737795352936e-05, 4.858244210481644e-05, 5.0127506256103516e-05]}, "gradients/encoder.encoder.layers.10.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 0.0, 1.0, 4.0, 3.0, 4.0, 5.0, 12.0, 17.0, 29.0, 41.0, 72.0, 108.0, 179.0, 306.0, 534.0, 1465.0, 2355.0, 4758.0, 10344.0, 24094.0, 64277.0, 196499.0, 438038.0, 197000.0, 74751.0, 17842.0, 7896.0, 3732.0, 1893.0, 1034.0, 513.0, 309.0, 174.0, 119.0, 55.0, 39.0, 19.0, 9.0, 11.0, 12.0, 6.0, 1.0, 7.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.708766937255859e-06, -4.523433744907379e-06, -4.338100552558899e-06, -4.152767360210419e-06, -3.9674341678619385e-06, -3.7821009755134583e-06, -3.596767783164978e-06, -3.411434590816498e-06, -3.2261013984680176e-06, -3.0407682061195374e-06, -2.855435013771057e-06, -2.670101821422577e-06, -2.4847686290740967e-06, -2.2994354367256165e-06, -2.1141022443771362e-06, -1.928769052028656e-06, -1.7434358596801758e-06, -1.5581026673316956e-06, -1.3727694749832153e-06, -1.1874362826347351e-06, -1.0021030902862549e-06, -8.167698979377747e-07, -6.314367055892944e-07, -4.461035132408142e-07, -2.60770320892334e-07, -7.543712854385376e-08, 1.0989606380462646e-07, 2.952292561531067e-07, 4.805624485015869e-07, 6.658956408500671e-07, 8.512288331985474e-07, 1.0365620255470276e-06, 1.2218952178955078e-06, 1.407228410243988e-06, 1.5925616025924683e-06, 1.7778947949409485e-06, 1.9632279872894287e-06, 2.148561179637909e-06, 2.333894371986389e-06, 2.5192275643348694e-06, 2.7045607566833496e-06, 2.88989394903183e-06, 3.07522714138031e-06, 3.2605603337287903e-06, 3.4458935260772705e-06, 3.6312267184257507e-06, 3.816559910774231e-06, 4.001893103122711e-06, 4.187226295471191e-06, 4.372559487819672e-06, 4.557892680168152e-06, 4.743225872516632e-06, 4.928559064865112e-06, 5.1138922572135925e-06, 5.299225449562073e-06, 5.484558641910553e-06, 5.669891834259033e-06, 5.8552250266075134e-06, 6.040558218955994e-06, 6.225891411304474e-06, 6.411224603652954e-06, 6.596557796001434e-06, 6.7818909883499146e-06, 6.967224180698395e-06, 7.152557373046875e-06]}, "gradients/encoder.encoder.layers.10.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 2.0, 0.0, 1.0, 4.0, 0.0, 5.0, 6.0, 0.0, 8.0, 0.0, 6.0, 12.0, 0.0, 15.0, 22.0, 0.0, 19.0, 29.0, 0.0, 43.0, 44.0, 0.0, 38.0, 0.0, 49.0, 57.0, 0.0, 60.0, 70.0, 0.0, 52.0, 52.0, 0.0, 51.0, 0.0, 58.0, 44.0, 0.0, 49.0, 41.0, 0.0, 37.0, 30.0, 0.0, 37.0, 29.0, 0.0, 11.0, 0.0, 8.0, 8.0, 0.0, 10.0, 3.0, 0.0, 4.0, 5.0, 0.0, 2.0, 1.0], "bins": [-1.3113021850585938e-06, -1.2731179594993591e-06, -1.2349337339401245e-06, -1.1967495083808899e-06, -1.1585652828216553e-06, -1.1203810572624207e-06, -1.082196831703186e-06, -1.0440126061439514e-06, -1.0058283805847168e-06, -9.676441550254822e-07, -9.294599294662476e-07, -8.912757039070129e-07, -8.530914783477783e-07, -8.149072527885437e-07, -7.767230272293091e-07, -7.385388016700745e-07, -7.003545761108398e-07, -6.621703505516052e-07, -6.239861249923706e-07, -5.85801899433136e-07, -5.476176738739014e-07, -5.094334483146667e-07, -4.7124922275543213e-07, -4.330649971961975e-07, -3.948807716369629e-07, -3.5669654607772827e-07, -3.1851232051849365e-07, -2.8032809495925903e-07, -2.421438694000244e-07, -2.039596438407898e-07, -1.6577541828155518e-07, -1.2759119272232056e-07, -8.940696716308594e-08, -5.122274160385132e-08, -1.30385160446167e-08, 2.514570951461792e-08, 6.332993507385254e-08, 1.0151416063308716e-07, 1.3969838619232178e-07, 1.778826117515564e-07, 2.1606683731079102e-07, 2.5425106287002563e-07, 2.9243528842926025e-07, 3.3061951398849487e-07, 3.688037395477295e-07, 4.069879651069641e-07, 4.4517219066619873e-07, 4.833564162254333e-07, 5.21540641784668e-07, 5.597248673439026e-07, 5.979090929031372e-07, 6.360933184623718e-07, 6.742775440216064e-07, 7.124617695808411e-07, 7.506459951400757e-07, 7.888302206993103e-07, 8.270144462585449e-07, 8.651986718177795e-07, 9.033828973770142e-07, 9.415671229362488e-07, 9.797513484954834e-07, 1.017935574054718e-06, 1.0561197996139526e-06, 1.0943040251731873e-06, 1.1324882507324219e-06]}, "gradients/encoder.encoder.layers.10.attention.q_proj.weight": {"_type": "histogram", "values": [3.0, 0.0, 0.0, 2.0, 1.0, 1.0, 6.0, 4.0, 4.0, 7.0, 11.0, 9.0, 27.0, 18.0, 18.0, 25.0, 47.0, 64.0, 88.0, 199.0, 222.0, 282.0, 499.0, 803.0, 1239.0, 3743.0, 5073.0, 9328.0, 18415.0, 38697.0, 89778.0, 247540.0, 464018.0, 89602.0, 38444.0, 18620.0, 9426.0, 4956.0, 3817.0, 1244.0, 762.0, 460.0, 337.0, 209.0, 198.0, 80.0, 57.0, 41.0, 38.0, 31.0, 16.0, 16.0, 15.0, 7.0, 7.0, 5.0, 4.0, 8.0, 1.0, 1.0, 0.0, 0.0, 2.0, 1.0], "bins": [-4.112720489501953e-06, -3.984197974205017e-06, -3.855675458908081e-06, -3.727152943611145e-06, -3.598630428314209e-06, -3.470107913017273e-06, -3.341585397720337e-06, -3.213062882423401e-06, -3.084540367126465e-06, -2.956017851829529e-06, -2.8274953365325928e-06, -2.6989728212356567e-06, -2.5704503059387207e-06, -2.4419277906417847e-06, -2.3134052753448486e-06, -2.1848827600479126e-06, -2.0563602447509766e-06, -1.9278377294540405e-06, -1.7993152141571045e-06, -1.6707926988601685e-06, -1.5422701835632324e-06, -1.4137476682662964e-06, -1.2852251529693604e-06, -1.1567026376724243e-06, -1.0281801223754883e-06, -8.996576070785522e-07, -7.711350917816162e-07, -6.426125764846802e-07, -5.140900611877441e-07, -3.855675458908081e-07, -2.5704503059387207e-07, -1.2852251529693604e-07, 0.0, 1.2852251529693604e-07, 2.5704503059387207e-07, 3.855675458908081e-07, 5.140900611877441e-07, 6.426125764846802e-07, 7.711350917816162e-07, 8.996576070785522e-07, 1.0281801223754883e-06, 1.1567026376724243e-06, 1.2852251529693604e-06, 1.4137476682662964e-06, 1.5422701835632324e-06, 1.6707926988601685e-06, 1.7993152141571045e-06, 1.9278377294540405e-06, 2.0563602447509766e-06, 2.1848827600479126e-06, 2.3134052753448486e-06, 2.4419277906417847e-06, 2.5704503059387207e-06, 2.6989728212356567e-06, 2.8274953365325928e-06, 2.956017851829529e-06, 3.084540367126465e-06, 3.213062882423401e-06, 3.341585397720337e-06, 3.470107913017273e-06, 3.598630428314209e-06, 3.727152943611145e-06, 3.855675458908081e-06, 3.984197974205017e-06, 4.112720489501953e-06]}, "gradients/encoder.encoder.layers.10.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 2.0, 3.0, 4.0, 4.0, 4.0, 7.0, 12.0, 12.0, 9.0, 21.0, 29.0, 41.0, 40.0, 84.0, 72.0, 86.0, 86.0, 67.0, 72.0, 61.0, 50.0, 48.0, 48.0, 29.0, 32.0, 26.0, 17.0, 16.0, 8.0, 4.0, 6.0, 7.0, 3.0, 2.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-4.410743713378906e-06, -4.289671778678894e-06, -4.168599843978882e-06, -4.04752790927887e-06, -3.926455974578857e-06, -3.8053840398788452e-06, -3.684312105178833e-06, -3.563240170478821e-06, -3.4421682357788086e-06, -3.3210963010787964e-06, -3.200024366378784e-06, -3.078952431678772e-06, -2.9578804969787598e-06, -2.8368085622787476e-06, -2.7157366275787354e-06, -2.594664692878723e-06, -2.473592758178711e-06, -2.3525208234786987e-06, -2.2314488887786865e-06, -2.1103769540786743e-06, -1.989305019378662e-06, -1.86823308467865e-06, -1.7471611499786377e-06, -1.6260892152786255e-06, -1.5050172805786133e-06, -1.383945345878601e-06, -1.2628734111785889e-06, -1.1418014764785767e-06, -1.0207295417785645e-06, -8.996576070785522e-07, -7.7858567237854e-07, -6.575137376785278e-07, -5.364418029785156e-07, -4.153698682785034e-07, -2.942979335784912e-07, -1.73225998878479e-07, -5.21540641784668e-08, 6.891787052154541e-08, 1.8998980522155762e-07, 3.110617399215698e-07, 4.3213367462158203e-07, 5.532056093215942e-07, 6.742775440216064e-07, 7.953494787216187e-07, 9.164214134216309e-07, 1.037493348121643e-06, 1.1585652828216553e-06, 1.2796372175216675e-06, 1.4007091522216797e-06, 1.521781086921692e-06, 1.642853021621704e-06, 1.7639249563217163e-06, 1.8849968910217285e-06, 2.0060688257217407e-06, 2.127140760421753e-06, 2.248212695121765e-06, 2.3692846298217773e-06, 2.4903565645217896e-06, 2.6114284992218018e-06, 2.732500433921814e-06, 2.853572368621826e-06, 2.9746443033218384e-06, 3.0957162380218506e-06, 3.216788172721863e-06, 3.337860107421875e-06]}, "gradients/encoder.encoder.layers.10.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 4.0, 5.0, 2.0, 7.0, 9.0, 7.0, 16.0, 25.0, 28.0, 38.0, 55.0, 107.0, 194.0, 168.0, 98.0, 63.0, 47.0, 37.0, 21.0, 14.0, 10.0, 19.0, 13.0, 4.0, 4.0, 4.0, 5.0, 1.0, 2.0, 1.0, 4.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00024095481785479933, -0.0002323074295418337, -0.00022366002667695284, -0.0002150126383639872, -0.00020636523549910635, -0.00019771784718614072, -0.00018907044432125986, -0.00018042305600829422, -0.0001717756676953286, -0.00016312827938236296, -0.0001544808765174821, -0.00014583348820451647, -0.0001371860853396356, -0.00012853869702666998, -0.00011989130143774673, -0.00011124390584882349, -0.00010259650298394263, -9.394910739501938e-05, -8.530171180609614e-05, -7.66543234931305e-05, -6.800692062824965e-05, -5.935952867730521e-05, -5.071213672636077e-05, -4.206474113743752e-05, -3.341734554851428e-05, -2.476994995959103e-05, -1.612255618965719e-05, -7.475162419723347e-06, 1.1722331691998988e-06, 9.819628758123145e-06, 1.8467020709067583e-05, 2.711441629799083e-05, 3.5761797334998846e-05, 4.440919292392209e-05, 5.305658851284534e-05, 6.170397682581097e-05, 7.035137969069183e-05, 7.899876800365746e-05, 8.76461635925807e-05, 9.629355918150395e-05, 0.0001049409547704272, 0.00011358835035935044, 0.00012223573867231607, 0.00013088314153719693, 0.00013953052985016257, 0.00014817793271504343, 0.00015682532102800906, 0.0001654727093409747, 0.00017412011220585555, 0.00018276750051882118, 0.00019141490338370204, 0.00020006229169666767, 0.00020870969456154853, 0.00021735708287451416, 0.00022600448573939502, 0.00023465187405236065, 0.00024329926236532629, 0.0002519466506782919, 0.00026059403899125755, 0.00026924145640805364, 0.00027788884472101927, 0.0002865362330339849, 0.00029518362134695053, 0.00030383100965991616, 0.00031247842707671225]}, "gradients/encoder.encoder.layers.10.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 3.0, 1.0, 6.0, 5.0, 2.0, 4.0, 3.0, 5.0, 12.0, 14.0, 13.0, 23.0, 22.0, 22.0, 28.0, 24.0, 11.0, 23.0, 32.0, 35.0, 34.0, 38.0, 33.0, 40.0, 48.0, 58.0, 36.0, 27.0, 28.0, 43.0, 42.0, 30.0, 30.0, 29.0, 21.0, 32.0, 28.0, 16.0, 25.0, 12.0, 11.0, 14.0, 7.0, 14.0, 7.0, 7.0, 2.0, 3.0, 4.0, 3.0, 3.0, 0.0, 3.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0], "bins": [-0.00011807680130004883, -0.00011412706226110458, -0.00011017732322216034, -0.0001062275841832161, -0.00010227784514427185, -9.83281061053276e-05, -9.437836706638336e-05, -9.042862802743912e-05, -8.647888898849487e-05, -8.252914994955063e-05, -7.857941091060638e-05, -7.462967187166214e-05, -7.06799328327179e-05, -6.673019379377365e-05, -6.27804547548294e-05, -5.883071571588516e-05, -5.488097667694092e-05, -5.0931237637996674e-05, -4.698149859905243e-05, -4.3031759560108185e-05, -3.908202052116394e-05, -3.5132281482219696e-05, -3.118254244327545e-05, -2.7232803404331207e-05, -2.3283064365386963e-05, -1.933332532644272e-05, -1.5383586287498474e-05, -1.143384724855423e-05, -7.484108209609985e-06, -3.534369170665741e-06, 4.153698682785034e-07, 4.365108907222748e-06, 8.314847946166992e-06, 1.2264586985111237e-05, 1.621432602405548e-05, 2.0164065062999725e-05, 2.411380410194397e-05, 2.8063543140888214e-05, 3.201328217983246e-05, 3.59630212187767e-05, 3.991276025772095e-05, 4.386249929666519e-05, 4.7812238335609436e-05, 5.176197737455368e-05, 5.5711716413497925e-05, 5.966145545244217e-05, 6.361119449138641e-05, 6.756093353033066e-05, 7.15106725692749e-05, 7.546041160821915e-05, 7.941015064716339e-05, 8.335988968610764e-05, 8.730962872505188e-05, 9.125936776399612e-05, 9.520910680294037e-05, 9.915884584188461e-05, 0.00010310858488082886, 0.0001070583239197731, 0.00011100806295871735, 0.00011495780199766159, 0.00011890754103660583, 0.00012285728007555008, 0.00012680701911449432, 0.00013075675815343857, 0.0001347064971923828]}, "gradients/encoder.encoder.layers.9.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 2.0, 2.0, 3.0, 4.0, 6.0, 6.0, 6.0, 10.0, 15.0, 23.0, 23.0, 40.0, 56.0, 48.0, 115.0, 155.0, 213.0, 307.0, 424.0, 641.0, 1040.0, 1554.0, 2535.0, 4185.0, 7172.0, 14576.0, 33713.0, 203001.0, 3784902.0, 84808.0, 25658.0, 12282.0, 6460.0, 3744.0, 2213.0, 1528.0, 944.0, 558.0, 404.0, 271.0, 193.0, 137.0, 89.0, 63.0, 43.0, 32.0, 31.0, 17.0, 13.0, 10.0, 2.0, 8.0, 3.0, 3.0, 4.0, 3.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.00011372566223144531, -0.00011009629815816879, -0.00010646693408489227, -0.00010283757001161575, -9.920820593833923e-05, -9.557884186506271e-05, -9.19494777917862e-05, -8.832011371850967e-05, -8.469074964523315e-05, -8.106138557195663e-05, -7.743202149868011e-05, -7.38026574254036e-05, -7.017329335212708e-05, -6.654392927885056e-05, -6.291456520557404e-05, -5.9285201132297516e-05, -5.5655837059020996e-05, -5.2026472985744476e-05, -4.8397108912467957e-05, -4.476774483919144e-05, -4.113838076591492e-05, -3.75090166926384e-05, -3.387965261936188e-05, -3.0250288546085358e-05, -2.6620924472808838e-05, -2.2991560399532318e-05, -1.93621963262558e-05, -1.573283225297928e-05, -1.2103468179702759e-05, -8.474104106426239e-06, -4.844740033149719e-06, -1.2153759598731995e-06, 2.4139881134033203e-06, 6.04335218667984e-06, 9.67271625995636e-06, 1.330208033323288e-05, 1.69314444065094e-05, 2.056080847978592e-05, 2.419017255306244e-05, 2.781953662633896e-05, 3.144890069961548e-05, 3.5078264772892e-05, 3.870762884616852e-05, 4.233699291944504e-05, 4.596635699272156e-05, 4.959572106599808e-05, 5.32250851392746e-05, 5.685444921255112e-05, 6.048381328582764e-05, 6.411317735910416e-05, 6.774254143238068e-05, 7.13719055056572e-05, 7.500126957893372e-05, 7.863063365221024e-05, 8.225999772548676e-05, 8.588936179876328e-05, 8.95187258720398e-05, 9.314808994531631e-05, 9.677745401859283e-05, 0.00010040681809186935, 0.00010403618216514587, 0.0001076655462384224, 0.00011129491031169891, 0.00011492427438497543, 0.00011855363845825195]}, "gradients/encoder.encoder.layers.9.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 3.0, 0.0, 1.0, 0.0, 1.0, 4.0, 2.0, 3.0, 1.0, 8.0, 8.0, 8.0, 10.0, 13.0, 9.0, 12.0, 9.0, 25.0, 25.0, 34.0, 48.0, 56.0, 59.0, 56.0, 55.0, 69.0, 69.0, 85.0, 63.0, 40.0, 37.0, 40.0, 32.0, 25.0, 18.0, 13.0, 16.0, 13.0, 9.0, 8.0, 10.0, 5.0, 0.0, 3.0, 6.0, 1.0, 4.0, 0.0, 0.0, 0.0, 3.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.205371856689453e-05, -2.1355226635932922e-05, -2.0656734704971313e-05, -1.9958242774009705e-05, -1.9259750843048096e-05, -1.8561258912086487e-05, -1.7862766981124878e-05, -1.716427505016327e-05, -1.646578311920166e-05, -1.576729118824005e-05, -1.5068799257278442e-05, -1.4370307326316833e-05, -1.3671815395355225e-05, -1.2973323464393616e-05, -1.2274831533432007e-05, -1.1576339602470398e-05, -1.0877847671508789e-05, -1.017935574054718e-05, -9.480863809585571e-06, -8.782371878623962e-06, -8.083879947662354e-06, -7.385388016700745e-06, -6.686896085739136e-06, -5.988404154777527e-06, -5.289912223815918e-06, -4.591420292854309e-06, -3.8929283618927e-06, -3.1944364309310913e-06, -2.4959444999694824e-06, -1.7974525690078735e-06, -1.0989606380462646e-06, -4.0046870708465576e-07, 2.980232238769531e-07, 9.96515154838562e-07, 1.695007085800171e-06, 2.3934990167617798e-06, 3.0919909477233887e-06, 3.7904828786849976e-06, 4.4889748096466064e-06, 5.187466740608215e-06, 5.885958671569824e-06, 6.584450602531433e-06, 7.282942533493042e-06, 7.981434464454651e-06, 8.67992639541626e-06, 9.378418326377869e-06, 1.0076910257339478e-05, 1.0775402188301086e-05, 1.1473894119262695e-05, 1.2172386050224304e-05, 1.2870877981185913e-05, 1.3569369912147522e-05, 1.4267861843109131e-05, 1.496635377407074e-05, 1.566484570503235e-05, 1.6363337635993958e-05, 1.7061829566955566e-05, 1.7760321497917175e-05, 1.8458813428878784e-05, 1.9157305359840393e-05, 1.9855797290802002e-05, 2.055428922176361e-05, 2.125278115272522e-05, 2.195127308368683e-05, 2.2649765014648438e-05]}, "gradients/encoder.encoder.layers.9.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [5.0, 2.0, 0.0, 1.0, 4.0, 3.0, 6.0, 7.0, 9.0, 10.0, 21.0, 25.0, 27.0, 47.0, 62.0, 113.0, 128.0, 194.0, 295.0, 379.0, 644.0, 1081.0, 1631.0, 2781.0, 4638.0, 8608.0, 15890.0, 31260.0, 71030.0, 281056.0, 3421311.0, 225027.0, 64300.0, 29257.0, 14703.0, 8009.0, 4527.0, 2828.0, 1560.0, 961.0, 583.0, 433.0, 290.0, 154.0, 112.0, 85.0, 50.0, 50.0, 33.0, 13.0, 17.0, 17.0, 7.0, 5.0, 5.0, 2.0, 2.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0], "bins": [-6.723403930664062e-05, -6.502587348222733e-05, -6.281770765781403e-05, -6.0609541833400726e-05, -5.840137600898743e-05, -5.619321018457413e-05, -5.398504436016083e-05, -5.177687853574753e-05, -4.956871271133423e-05, -4.736054688692093e-05, -4.515238106250763e-05, -4.294421523809433e-05, -4.073604941368103e-05, -3.852788358926773e-05, -3.631971776485443e-05, -3.411155194044113e-05, -3.190338611602783e-05, -2.9695220291614532e-05, -2.7487054467201233e-05, -2.5278888642787933e-05, -2.3070722818374634e-05, -2.0862556993961334e-05, -1.8654391169548035e-05, -1.6446225345134735e-05, -1.4238059520721436e-05, -1.2029893696308136e-05, -9.821727871894836e-06, -7.613562047481537e-06, -5.405396223068237e-06, -3.1972303986549377e-06, -9.890645742416382e-07, 1.2191012501716614e-06, 3.427267074584961e-06, 5.6354328989982605e-06, 7.84359872341156e-06, 1.005176454782486e-05, 1.225993037223816e-05, 1.4468096196651459e-05, 1.6676262021064758e-05, 1.8884427845478058e-05, 2.1092593669891357e-05, 2.3300759494304657e-05, 2.5508925318717957e-05, 2.7717091143131256e-05, 2.9925256967544556e-05, 3.2133422791957855e-05, 3.4341588616371155e-05, 3.6549754440784454e-05, 3.8757920265197754e-05, 4.0966086089611053e-05, 4.317425191402435e-05, 4.538241773843765e-05, 4.759058356285095e-05, 4.979874938726425e-05, 5.200691521167755e-05, 5.421508103609085e-05, 5.642324686050415e-05, 5.863141268491745e-05, 6.083957850933075e-05, 6.304774433374405e-05, 6.525591015815735e-05, 6.746407598257065e-05, 6.967224180698395e-05, 7.188040763139725e-05, 7.408857345581055e-05]}, "gradients/encoder.encoder.layers.9.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 5.0, 2.0, 1.0, 9.0, 5.0, 6.0, 12.0, 14.0, 16.0, 16.0, 23.0, 29.0, 41.0, 53.0, 93.0, 142.0, 431.0, 1902.0, 708.0, 196.0, 116.0, 50.0, 47.0, 33.0, 26.0, 13.0, 19.0, 17.0, 21.0, 11.0, 8.0, 5.0, 6.0, 3.0, 6.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0], "bins": [-6.186962127685547e-05, -6.0181133449077606e-05, -5.8492645621299744e-05, -5.680415779352188e-05, -5.511566996574402e-05, -5.3427182137966156e-05, -5.1738694310188293e-05, -5.005020648241043e-05, -4.836171865463257e-05, -4.6673230826854706e-05, -4.498474299907684e-05, -4.329625517129898e-05, -4.160776734352112e-05, -3.9919279515743256e-05, -3.823079168796539e-05, -3.654230386018753e-05, -3.485381603240967e-05, -3.3165328204631805e-05, -3.147684037685394e-05, -2.978835254907608e-05, -2.8099864721298218e-05, -2.6411376893520355e-05, -2.4722889065742493e-05, -2.303440123796463e-05, -2.1345913410186768e-05, -1.9657425582408905e-05, -1.7968937754631042e-05, -1.628044992685318e-05, -1.4591962099075317e-05, -1.2903474271297455e-05, -1.1214986443519592e-05, -9.52649861574173e-06, -7.838010787963867e-06, -6.149522960186005e-06, -4.461035132408142e-06, -2.7725473046302795e-06, -1.084059476852417e-06, 6.044283509254456e-07, 2.292916178703308e-06, 3.981404006481171e-06, 5.669891834259033e-06, 7.358379662036896e-06, 9.046867489814758e-06, 1.0735355317592621e-05, 1.2423843145370483e-05, 1.4112330973148346e-05, 1.580081880092621e-05, 1.748930662870407e-05, 1.9177794456481934e-05, 2.0866282284259796e-05, 2.255477011203766e-05, 2.424325793981552e-05, 2.5931745767593384e-05, 2.7620233595371246e-05, 2.930872142314911e-05, 3.099720925092697e-05, 3.2685697078704834e-05, 3.4374184906482697e-05, 3.606267273426056e-05, 3.775116056203842e-05, 3.9439648389816284e-05, 4.112813621759415e-05, 4.281662404537201e-05, 4.450511187314987e-05, 4.6193599700927734e-05]}, "gradients/encoder.encoder.layers.9.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 3.0, 3.0, 2.0, 3.0, 3.0, 6.0, 12.0, 4.0, 13.0, 14.0, 31.0, 33.0, 44.0, 60.0, 88.0, 92.0, 112.0, 99.0, 79.0, 71.0, 61.0, 49.0, 38.0, 26.0, 23.0, 10.0, 7.0, 6.0, 6.0, 3.0, 4.0, 1.0, 1.0, 2.0, 1.0, 2.0, 0.0, 0.0, 2.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0002900969993788749, -0.0002817021741066128, -0.0002733073488343507, -0.0002649125235620886, -0.00025651772739365697, -0.00024812290212139487, -0.00023972807684913278, -0.00023133325157687068, -0.00022293842630460858, -0.0002145436010323465, -0.0002061487757600844, -0.00019775396503973752, -0.00018935913976747543, -0.00018096431449521333, -0.00017256950377486646, -0.00016417467850260437, -0.00015577985323034227, -0.00014738502795808017, -0.00013899020268581808, -0.0001305953919654712, -0.0001222005666932091, -0.00011380574142094702, -0.00010541092342464253, -9.701610542833805e-05, -8.862128015607595e-05, -8.022645488381386e-05, -7.183163688750938e-05, -6.34368188912049e-05, -5.50419936189428e-05, -4.664717198465951e-05, -3.825235035037622e-05, -2.985752871609293e-05, -2.146270708180964e-05, -1.306788544752635e-05, -4.673063813243061e-06, 3.721757821040228e-06, 1.2116579455323517e-05, 2.0511401089606807e-05, 2.8906222723890096e-05, 3.7301044358173385e-05, 4.5695865992456675e-05, 5.4090687626739964e-05, 6.248550926102325e-05, 7.088032725732774e-05, 7.927515252958983e-05, 8.766997780185193e-05, 9.606479579815641e-05, 0.00010445961379446089, 0.00011285443906672299, 0.00012124926433898509, 0.00012964408961124718, 0.00013803890033159405, 0.00014643372560385615, 0.00015482855087611824, 0.0001632233615964651, 0.0001716181868687272, 0.0001800130121409893, 0.0001884078374132514, 0.0001968026626855135, 0.00020519747340586036, 0.00021359229867812246, 0.00022198712395038456, 0.00023038193467073143, 0.00023877675994299352, 0.0002471715852152556]}, "gradients/encoder.encoder.layers.9.final_layer_norm.bias": {"_type": "histogram", "values": [2.0, 3.0, 1.0, 2.0, 2.0, 1.0, 3.0, 5.0, 8.0, 10.0, 5.0, 7.0, 9.0, 10.0, 12.0, 17.0, 16.0, 16.0, 22.0, 19.0, 29.0, 25.0, 29.0, 26.0, 32.0, 24.0, 38.0, 33.0, 38.0, 27.0, 28.0, 31.0, 34.0, 37.0, 44.0, 41.0, 34.0, 37.0, 35.0, 29.0, 27.0, 24.0, 18.0, 12.0, 22.0, 15.0, 13.0, 12.0, 6.0, 7.0, 7.0, 5.0, 2.0, 8.0, 5.0, 3.0, 5.0, 2.0, 5.0, 2.0, 0.0, 0.0, 2.0, 1.0], "bins": [-0.00010275840759277344, -9.945593774318695e-05, -9.615346789360046e-05, -9.285099804401398e-05, -8.954852819442749e-05, -8.6246058344841e-05, -8.294358849525452e-05, -7.964111864566803e-05, -7.633864879608154e-05, -7.303617894649506e-05, -6.973370909690857e-05, -6.643123924732208e-05, -6.31287693977356e-05, -5.982629954814911e-05, -5.652382969856262e-05, -5.3221359848976135e-05, -4.991888999938965e-05, -4.661642014980316e-05, -4.3313950300216675e-05, -4.001148045063019e-05, -3.67090106010437e-05, -3.3406540751457214e-05, -3.0104070901870728e-05, -2.680160105228424e-05, -2.3499131202697754e-05, -2.0196661353111267e-05, -1.689419150352478e-05, -1.3591721653938293e-05, -1.0289251804351807e-05, -6.98678195476532e-06, -3.684312105178833e-06, -3.818422555923462e-07, 2.9206275939941406e-06, 6.2230974435806274e-06, 9.525567293167114e-06, 1.2828037142753601e-05, 1.6130506992340088e-05, 1.9432976841926575e-05, 2.273544669151306e-05, 2.603791654109955e-05, 2.9340386390686035e-05, 3.264285624027252e-05, 3.594532608985901e-05, 3.9247795939445496e-05, 4.255026578903198e-05, 4.585273563861847e-05, 4.9155205488204956e-05, 5.245767533779144e-05, 5.576014518737793e-05, 5.9062615036964417e-05, 6.23650848865509e-05, 6.566755473613739e-05, 6.897002458572388e-05, 7.227249443531036e-05, 7.557496428489685e-05, 7.887743413448334e-05, 8.217990398406982e-05, 8.548237383365631e-05, 8.87848436832428e-05, 9.208731353282928e-05, 9.538978338241577e-05, 9.869225323200226e-05, 0.00010199472308158875, 0.00010529719293117523, 0.00010859966278076172]}, "gradients/encoder.encoder.layers.9.attention.out_proj.weight": {"_type": "histogram", "values": [3.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 5.0, 8.0, 3.0, 11.0, 15.0, 24.0, 30.0, 35.0, 54.0, 89.0, 120.0, 211.0, 239.0, 413.0, 591.0, 946.0, 1463.0, 2494.0, 4243.0, 7869.0, 15764.0, 35407.0, 93554.0, 397897.0, 332005.0, 88243.0, 33617.0, 14973.0, 7540.0, 4132.0, 2425.0, 1469.0, 907.0, 572.0, 380.0, 260.0, 172.0, 103.0, 80.0, 56.0, 44.0, 34.0, 21.0, 14.0, 10.0, 10.0, 4.0, 6.0, 4.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0001270771026611328, -0.00012297369539737701, -0.00011887028813362122, -0.00011476688086986542, -0.00011066347360610962, -0.00010656006634235382, -0.00010245665907859802, -9.835325181484222e-05, -9.424984455108643e-05, -9.014643728733063e-05, -8.604303002357483e-05, -8.193962275981903e-05, -7.783621549606323e-05, -7.373280823230743e-05, -6.962940096855164e-05, -6.552599370479584e-05, -6.142258644104004e-05, -5.731917917728424e-05, -5.321577191352844e-05, -4.9112364649772644e-05, -4.5008957386016846e-05, -4.090555012226105e-05, -3.680214285850525e-05, -3.269873559474945e-05, -2.8595328330993652e-05, -2.4491921067237854e-05, -2.0388513803482056e-05, -1.6285106539726257e-05, -1.2181699275970459e-05, -8.07829201221466e-06, -3.974884748458862e-06, 1.2852251529693604e-07, 4.231929779052734e-06, 8.335337042808533e-06, 1.2438744306564331e-05, 1.654215157032013e-05, 2.0645558834075928e-05, 2.4748966097831726e-05, 2.8852373361587524e-05, 3.295578062534332e-05, 3.705918788909912e-05, 4.116259515285492e-05, 4.526600241661072e-05, 4.9369409680366516e-05, 5.3472816944122314e-05, 5.757622420787811e-05, 6.167963147163391e-05, 6.578303873538971e-05, 6.988644599914551e-05, 7.39898532629013e-05, 7.80932605266571e-05, 8.21966677904129e-05, 8.63000750541687e-05, 9.04034823179245e-05, 9.45068895816803e-05, 9.86102968454361e-05, 0.0001027137041091919, 0.00010681711137294769, 0.00011092051863670349, 0.00011502392590045929, 0.00011912733316421509, 0.00012323074042797089, 0.00012733414769172668, 0.00013143755495548248, 0.00013554096221923828]}, "gradients/encoder.encoder.layers.9.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 2.0, 3.0, 4.0, 3.0, 7.0, 10.0, 3.0, 8.0, 9.0, 10.0, 27.0, 23.0, 35.0, 42.0, 66.0, 82.0, 73.0, 82.0, 89.0, 87.0, 77.0, 58.0, 46.0, 42.0, 35.0, 20.0, 23.0, 11.0, 3.0, 7.0, 8.0, 5.0, 2.0, 1.0, 1.0, 5.0, 2.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-2.9325485229492188e-05, -2.8444454073905945e-05, -2.7563422918319702e-05, -2.668239176273346e-05, -2.5801360607147217e-05, -2.4920329451560974e-05, -2.403929829597473e-05, -2.315826714038849e-05, -2.2277235984802246e-05, -2.1396204829216003e-05, -2.051517367362976e-05, -1.9634142518043518e-05, -1.8753111362457275e-05, -1.7872080206871033e-05, -1.699104905128479e-05, -1.6110017895698547e-05, -1.5228986740112305e-05, -1.4347955584526062e-05, -1.346692442893982e-05, -1.2585893273353577e-05, -1.1704862117767334e-05, -1.0823830962181091e-05, -9.942799806594849e-06, -9.061768651008606e-06, -8.180737495422363e-06, -7.299706339836121e-06, -6.418675184249878e-06, -5.537644028663635e-06, -4.656612873077393e-06, -3.77558171749115e-06, -2.8945505619049072e-06, -2.0135194063186646e-06, -1.1324882507324219e-06, -2.514570951461792e-07, 6.295740604400635e-07, 1.5106052160263062e-06, 2.391636371612549e-06, 3.2726675271987915e-06, 4.153698682785034e-06, 5.034729838371277e-06, 5.9157609939575195e-06, 6.796792149543762e-06, 7.677823305130005e-06, 8.558854460716248e-06, 9.43988561630249e-06, 1.0320916771888733e-05, 1.1201947927474976e-05, 1.2082979083061218e-05, 1.2964010238647461e-05, 1.3845041394233704e-05, 1.4726072549819946e-05, 1.560710370540619e-05, 1.648813486099243e-05, 1.7369166016578674e-05, 1.8250197172164917e-05, 1.913122832775116e-05, 2.0012259483337402e-05, 2.0893290638923645e-05, 2.1774321794509888e-05, 2.265535295009613e-05, 2.3536384105682373e-05, 2.4417415261268616e-05, 2.529844641685486e-05, 2.61794775724411e-05, 2.7060508728027344e-05]}, "gradients/encoder.encoder.layers.9.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 3.0, 2.0, 6.0, 8.0, 13.0, 20.0, 25.0, 28.0, 55.0, 71.0, 115.0, 166.0, 273.0, 369.0, 601.0, 829.0, 1214.0, 1963.0, 3020.0, 4570.0, 7315.0, 11273.0, 18815.0, 30463.0, 53480.0, 101671.0, 232089.0, 309594.0, 119058.0, 60739.0, 34303.0, 20158.0, 13190.0, 8214.0, 5152.0, 3405.0, 2117.0, 1374.0, 920.0, 601.0, 398.0, 289.0, 206.0, 131.0, 91.0, 46.0, 47.0, 34.0, 16.0, 8.0, 9.0, 4.0, 4.0, 3.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.8100948333740234e-05, -4.650373011827469e-05, -4.490651190280914e-05, -4.33092936873436e-05, -4.171207547187805e-05, -4.0114857256412506e-05, -3.851763904094696e-05, -3.6920420825481415e-05, -3.532320261001587e-05, -3.3725984394550323e-05, -3.212876617908478e-05, -3.053154796361923e-05, -2.8934329748153687e-05, -2.733711153268814e-05, -2.5739893317222595e-05, -2.414267510175705e-05, -2.2545456886291504e-05, -2.0948238670825958e-05, -1.9351020455360413e-05, -1.7753802239894867e-05, -1.615658402442932e-05, -1.4559365808963776e-05, -1.296214759349823e-05, -1.1364929378032684e-05, -9.767711162567139e-06, -8.170492947101593e-06, -6.573274731636047e-06, -4.976056516170502e-06, -3.378838300704956e-06, -1.7816200852394104e-06, -1.8440186977386475e-07, 1.412816345691681e-06, 3.0100345611572266e-06, 4.607252776622772e-06, 6.204470992088318e-06, 7.801689207553864e-06, 9.39890742301941e-06, 1.0996125638484955e-05, 1.25933438539505e-05, 1.4190562069416046e-05, 1.5787780284881592e-05, 1.7384998500347137e-05, 1.8982216715812683e-05, 2.057943493127823e-05, 2.2176653146743774e-05, 2.377387136220932e-05, 2.5371089577674866e-05, 2.696830779314041e-05, 2.8565526008605957e-05, 3.0162744224071503e-05, 3.175996243953705e-05, 3.3357180655002594e-05, 3.495439887046814e-05, 3.6551617085933685e-05, 3.814883530139923e-05, 3.9746053516864777e-05, 4.134327173233032e-05, 4.294048994779587e-05, 4.4537708163261414e-05, 4.613492637872696e-05, 4.7732144594192505e-05, 4.932936280965805e-05, 5.0926581025123596e-05, 5.252379924058914e-05, 5.412101745605469e-05]}, "gradients/encoder.encoder.layers.9.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 3.0, 1.0, 2.0, 4.0, 2.0, 4.0, 7.0, 2.0, 6.0, 3.0, 9.0, 7.0, 20.0, 13.0, 16.0, 9.0, 18.0, 15.0, 28.0, 22.0, 31.0, 27.0, 38.0, 44.0, 44.0, 43.0, 35.0, 51.0, 24.0, 45.0, 51.0, 34.0, 40.0, 27.0, 30.0, 41.0, 28.0, 29.0, 21.0, 20.0, 15.0, 15.0, 22.0, 18.0, 12.0, 6.0, 9.0, 6.0, 8.0, 3.0, 4.0, 0.0, 3.0, 4.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.2378902435302734e-05, -4.0971674025058746e-05, -3.956444561481476e-05, -3.815721720457077e-05, -3.674998879432678e-05, -3.5342760384082794e-05, -3.3935531973838806e-05, -3.252830356359482e-05, -3.112107515335083e-05, -2.9713846743106842e-05, -2.8306618332862854e-05, -2.6899389922618866e-05, -2.5492161512374878e-05, -2.408493310213089e-05, -2.2677704691886902e-05, -2.1270476281642914e-05, -1.9863247871398926e-05, -1.8456019461154938e-05, -1.704879105091095e-05, -1.564156264066696e-05, -1.4234334230422974e-05, -1.2827105820178986e-05, -1.1419877409934998e-05, -1.001264899969101e-05, -8.605420589447021e-06, -7.1981921792030334e-06, -5.790963768959045e-06, -4.383735358715057e-06, -2.9765069484710693e-06, -1.5692785382270813e-06, -1.6205012798309326e-07, 1.2451782822608948e-06, 2.652406692504883e-06, 4.059635102748871e-06, 5.466863512992859e-06, 6.874091923236847e-06, 8.281320333480835e-06, 9.688548743724823e-06, 1.1095777153968811e-05, 1.2503005564212799e-05, 1.3910233974456787e-05, 1.5317462384700775e-05, 1.6724690794944763e-05, 1.813191920518875e-05, 1.953914761543274e-05, 2.0946376025676727e-05, 2.2353604435920715e-05, 2.3760832846164703e-05, 2.516806125640869e-05, 2.657528966665268e-05, 2.7982518076896667e-05, 2.9389746487140656e-05, 3.0796974897384644e-05, 3.220420330762863e-05, 3.361143171787262e-05, 3.501866012811661e-05, 3.6425888538360596e-05, 3.7833116948604584e-05, 3.924034535884857e-05, 4.064757376909256e-05, 4.205480217933655e-05, 4.3462030589580536e-05, 4.4869258999824524e-05, 4.627648741006851e-05, 4.76837158203125e-05]}, "gradients/encoder.encoder.layers.9.attention.k_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 0.0, 1.0, 2.0, 5.0, 4.0, 16.0, 8.0, 29.0, 24.0, 61.0, 66.0, 126.0, 163.0, 219.0, 534.0, 621.0, 1582.0, 2220.0, 5938.0, 9499.0, 31778.0, 67035.0, 327387.0, 376502.0, 135110.0, 56717.0, 15548.0, 9217.0, 3246.0, 2294.0, 977.0, 736.0, 297.0, 250.0, 108.0, 73.0, 53.0, 38.0, 29.0, 13.0, 18.0, 9.0, 6.0, 4.0, 3.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0], "bins": [-6.4373016357421875e-06, -6.231479346752167e-06, -6.025657057762146e-06, -5.819834768772125e-06, -5.6140124797821045e-06, -5.408190190792084e-06, -5.202367901802063e-06, -4.996545612812042e-06, -4.7907233238220215e-06, -4.584901034832001e-06, -4.37907874584198e-06, -4.173256456851959e-06, -3.9674341678619385e-06, -3.7616118788719177e-06, -3.555789589881897e-06, -3.3499673008918762e-06, -3.1441450119018555e-06, -2.9383227229118347e-06, -2.732500433921814e-06, -2.5266781449317932e-06, -2.3208558559417725e-06, -2.1150335669517517e-06, -1.909211277961731e-06, -1.7033889889717102e-06, -1.4975666999816895e-06, -1.2917444109916687e-06, -1.085922122001648e-06, -8.800998330116272e-07, -6.742775440216064e-07, -4.684552550315857e-07, -2.6263296604156494e-07, -5.681067705154419e-08, 1.4901161193847656e-07, 3.548339009284973e-07, 5.606561899185181e-07, 7.664784789085388e-07, 9.723007678985596e-07, 1.1781230568885803e-06, 1.383945345878601e-06, 1.5897676348686218e-06, 1.7955899238586426e-06, 2.0014122128486633e-06, 2.207234501838684e-06, 2.413056790828705e-06, 2.6188790798187256e-06, 2.8247013688087463e-06, 3.030523657798767e-06, 3.236345946788788e-06, 3.4421682357788086e-06, 3.6479905247688293e-06, 3.85381281375885e-06, 4.059635102748871e-06, 4.265457391738892e-06, 4.471279680728912e-06, 4.677101969718933e-06, 4.882924258708954e-06, 5.088746547698975e-06, 5.294568836688995e-06, 5.500391125679016e-06, 5.706213414669037e-06, 5.912035703659058e-06, 6.117857992649078e-06, 6.323680281639099e-06, 6.52950257062912e-06, 6.735324859619141e-06]}, "gradients/encoder.encoder.layers.9.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 3.0, 4.0, 0.0, 7.0, 9.0, 9.0, 12.0, 0.0, 11.0, 21.0, 21.0, 23.0, 0.0, 38.0, 41.0, 48.0, 51.0, 0.0, 58.0, 57.0, 64.0, 60.0, 0.0, 55.0, 54.0, 51.0, 67.0, 0.0, 52.0, 46.0, 23.0, 33.0, 0.0, 27.0, 15.0, 11.0, 14.0, 0.0, 7.0, 10.0, 1.0, 4.0, 0.0, 1.0, 2.0, 0.0, 3.0, 0.0, 1.0, 0.0, 1.0, 1.0], "bins": [-1.6093254089355469e-06, -1.5618279576301575e-06, -1.514330506324768e-06, -1.4668330550193787e-06, -1.4193356037139893e-06, -1.3718381524085999e-06, -1.3243407011032104e-06, -1.276843249797821e-06, -1.2293457984924316e-06, -1.1818483471870422e-06, -1.1343508958816528e-06, -1.0868534445762634e-06, -1.039355993270874e-06, -9.918585419654846e-07, -9.443610906600952e-07, -8.968636393547058e-07, -8.493661880493164e-07, -8.01868736743927e-07, -7.543712854385376e-07, -7.068738341331482e-07, -6.593763828277588e-07, -6.118789315223694e-07, -5.6438148021698e-07, -5.168840289115906e-07, -4.6938657760620117e-07, -4.2188912630081177e-07, -3.7439167499542236e-07, -3.2689422369003296e-07, -2.7939677238464355e-07, -2.3189932107925415e-07, -1.8440186977386475e-07, -1.3690441846847534e-07, -8.940696716308594e-08, -4.190951585769653e-08, 5.587935447692871e-09, 5.3085386753082275e-08, 1.0058283805847168e-07, 1.4808028936386108e-07, 1.955777406692505e-07, 2.430751919746399e-07, 2.905726432800293e-07, 3.380700945854187e-07, 3.855675458908081e-07, 4.330649971961975e-07, 4.805624485015869e-07, 5.280598998069763e-07, 5.755573511123657e-07, 6.230548024177551e-07, 6.705522537231445e-07, 7.180497050285339e-07, 7.655471563339233e-07, 8.130446076393127e-07, 8.605420589447021e-07, 9.080395102500916e-07, 9.55536961555481e-07, 1.0030344128608704e-06, 1.0505318641662598e-06, 1.0980293154716492e-06, 1.1455267667770386e-06, 1.193024218082428e-06, 1.2405216693878174e-06, 1.2880191206932068e-06, 1.3355165719985962e-06, 1.3830140233039856e-06, 1.430511474609375e-06]}, "gradients/encoder.encoder.layers.9.attention.q_proj.weight": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 3.0, 3.0, 4.0, 6.0, 9.0, 11.0, 19.0, 26.0, 48.0, 64.0, 91.0, 122.0, 175.0, 245.0, 393.0, 661.0, 981.0, 1452.0, 2360.0, 3693.0, 6189.0, 10586.0, 18651.0, 34358.0, 67306.0, 150054.0, 314308.0, 230170.0, 98407.0, 62349.0, 18612.0, 10585.0, 6233.0, 3773.0, 2329.0, 1452.0, 955.0, 582.0, 427.0, 286.0, 201.0, 116.0, 86.0, 64.0, 34.0, 31.0, 16.0, 15.0, 6.0, 9.0, 4.0, 3.0, 2.0, 2.0, 0.0, 2.0, 2.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.5762786865234375e-06, -3.4552067518234253e-06, -3.334134817123413e-06, -3.213062882423401e-06, -3.0919909477233887e-06, -2.9709190130233765e-06, -2.8498470783233643e-06, -2.728775143623352e-06, -2.60770320892334e-06, -2.4866312742233276e-06, -2.3655593395233154e-06, -2.2444874048233032e-06, -2.123415470123291e-06, -2.002343535423279e-06, -1.8812716007232666e-06, -1.7601996660232544e-06, -1.6391277313232422e-06, -1.51805579662323e-06, -1.3969838619232178e-06, -1.2759119272232056e-06, -1.1548399925231934e-06, -1.0337680578231812e-06, -9.126961231231689e-07, -7.916241884231567e-07, -6.705522537231445e-07, -5.494803190231323e-07, -4.284083843231201e-07, -3.073364496231079e-07, -1.862645149230957e-07, -6.51925802230835e-08, 5.587935447692871e-08, 1.7695128917694092e-07, 2.980232238769531e-07, 4.1909515857696533e-07, 5.401670932769775e-07, 6.612390279769897e-07, 7.82310962677002e-07, 9.033828973770142e-07, 1.0244548320770264e-06, 1.1455267667770386e-06, 1.2665987014770508e-06, 1.387670636177063e-06, 1.5087425708770752e-06, 1.6298145055770874e-06, 1.7508864402770996e-06, 1.8719583749771118e-06, 1.993030309677124e-06, 2.1141022443771362e-06, 2.2351741790771484e-06, 2.3562461137771606e-06, 2.477318048477173e-06, 2.598389983177185e-06, 2.7194619178771973e-06, 2.8405338525772095e-06, 2.9616057872772217e-06, 3.082677721977234e-06, 3.203749656677246e-06, 3.3248215913772583e-06, 3.4458935260772705e-06, 3.5669654607772827e-06, 3.688037395477295e-06, 3.809109330177307e-06, 3.930181264877319e-06, 4.0512531995773315e-06, 4.172325134277344e-06]}, "gradients/encoder.encoder.layers.9.attention.q_proj.bias": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 3.0, 5.0, 6.0, 2.0, 3.0, 7.0, 5.0, 10.0, 12.0, 2.0, 16.0, 21.0, 13.0, 16.0, 33.0, 43.0, 15.0, 44.0, 45.0, 36.0, 59.0, 55.0, 54.0, 33.0, 56.0, 63.0, 26.0, 54.0, 38.0, 38.0, 23.0, 31.0, 38.0, 17.0, 17.0, 20.0, 4.0, 20.0, 4.0, 5.0, 4.0, 4.0, 3.0, 1.0, 1.0, 5.0, 1.0, 1.0, 2.0, 1.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-3.039836883544922e-06, -2.9383227229118347e-06, -2.8368085622787476e-06, -2.7352944016456604e-06, -2.6337802410125732e-06, -2.532266080379486e-06, -2.430751919746399e-06, -2.3292377591133118e-06, -2.2277235984802246e-06, -2.1262094378471375e-06, -2.0246952772140503e-06, -1.923181116580963e-06, -1.821666955947876e-06, -1.7201527953147888e-06, -1.6186386346817017e-06, -1.5171244740486145e-06, -1.4156103134155273e-06, -1.3140961527824402e-06, -1.212581992149353e-06, -1.1110678315162659e-06, -1.0095536708831787e-06, -9.080395102500916e-07, -8.065253496170044e-07, -7.050111889839172e-07, -6.034970283508301e-07, -5.019828677177429e-07, -4.0046870708465576e-07, -2.989545464515686e-07, -1.9744038581848145e-07, -9.592622518539429e-08, 5.587935447692871e-09, 1.0710209608078003e-07, 2.086162567138672e-07, 3.1013041734695435e-07, 4.116445779800415e-07, 5.131587386131287e-07, 6.146728992462158e-07, 7.16187059879303e-07, 8.177012205123901e-07, 9.192153811454773e-07, 1.0207295417785645e-06, 1.1222437024116516e-06, 1.2237578630447388e-06, 1.325272023677826e-06, 1.426786184310913e-06, 1.5283003449440002e-06, 1.6298145055770874e-06, 1.7313286662101746e-06, 1.8328428268432617e-06, 1.934356987476349e-06, 2.035871148109436e-06, 2.137385308742523e-06, 2.2388994693756104e-06, 2.3404136300086975e-06, 2.4419277906417847e-06, 2.543441951274872e-06, 2.644956111907959e-06, 2.746470272541046e-06, 2.8479844331741333e-06, 2.9494985938072205e-06, 3.0510127544403076e-06, 3.1525269150733948e-06, 3.254041075706482e-06, 3.355555236339569e-06, 3.4570693969726562e-06]}, "gradients/encoder.encoder.layers.9.layer_norm.weight": {"_type": "histogram", "values": [2.0, 2.0, 1.0, 4.0, 1.0, 1.0, 5.0, 4.0, 5.0, 7.0, 6.0, 7.0, 10.0, 11.0, 20.0, 22.0, 31.0, 29.0, 41.0, 53.0, 77.0, 127.0, 108.0, 82.0, 73.0, 46.0, 35.0, 29.0, 28.0, 18.0, 26.0, 14.0, 14.0, 14.0, 8.0, 11.0, 6.0, 8.0, 3.0, 6.0, 5.0, 0.0, 2.0, 2.0, 2.0, 1.0, 2.0, 2.0, 2.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 3.0, 0.0, 2.0], "bins": [-0.00011226334027014673, -0.00010718232078943402, -0.00010210130858467892, -9.70202891039662e-05, -9.193927689921111e-05, -8.68582574184984e-05, -8.177723793778569e-05, -7.669621845707297e-05, -7.161520625231788e-05, -6.653418677160516e-05, -6.145317456685007e-05, -5.6372155086137354e-05, -5.129113924340345e-05, -4.6210123400669545e-05, -4.112910391995683e-05, -3.604808807722293e-05, -3.0967072234489024e-05, -2.588605639175512e-05, -2.080503873003181e-05, -1.5724021068308502e-05, -1.0643005225574598e-05, -5.561989382840693e-06, -4.809717211173847e-07, 4.6000459406059235e-06, 9.681061783339828e-06, 1.4762078535568435e-05, 1.984309528779704e-05, 2.492411294952035e-05, 3.0005128792254254e-05, 3.508614463498816e-05, 4.016716411570087e-05, 4.5248179958434775e-05, 5.032921035308391e-05, 5.541022619581781e-05, 6.049124203855172e-05, 6.557226151926443e-05, 7.065327372401953e-05, 7.573429320473224e-05, 8.081531268544495e-05, 8.589633216615766e-05, 9.097734437091276e-05, 9.605836385162547e-05, 0.00010113937605638057, 0.00010622039553709328, 0.000111301415017806, 0.00011638242722256109, 0.0001214634467032738, 0.0001265444589080289, 0.0001316254783887416, 0.00013670649786945432, 0.00014178751735016704, 0.00014686852227896452, 0.00015194954175967723, 0.00015703056124038994, 0.00016211158072110265, 0.00016719260020181537, 0.00017227360513061285, 0.00017735462461132556, 0.00018243564409203827, 0.00018751664902083576, 0.00019259766850154847, 0.00019767868798226118, 0.0002027597074629739, 0.0002078407269436866, 0.00021292174642439932]}, "gradients/encoder.encoder.layers.9.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 2.0, 8.0, 4.0, 4.0, 5.0, 9.0, 9.0, 7.0, 15.0, 12.0, 18.0, 10.0, 14.0, 20.0, 28.0, 16.0, 29.0, 27.0, 37.0, 45.0, 44.0, 32.0, 40.0, 46.0, 28.0, 56.0, 33.0, 42.0, 39.0, 37.0, 45.0, 35.0, 29.0, 27.0, 23.0, 22.0, 15.0, 21.0, 14.0, 13.0, 15.0, 8.0, 1.0, 6.0, 6.0, 7.0, 6.0, 2.0, 0.0, 0.0, 2.0, 1.0, 1.0, 2.0, 0.0, 1.0, 1.0], "bins": [-0.0001297593116760254, -0.00012562889605760574, -0.0001214984804391861, -0.00011736806482076645, -0.0001132376492023468, -0.00010910723358392715, -0.00010497681796550751, -0.00010084640234708786, -9.671598672866821e-05, -9.258557111024857e-05, -8.845515549182892e-05, -8.432473987340927e-05, -8.019432425498962e-05, -7.606390863656998e-05, -7.193349301815033e-05, -6.780307739973068e-05, -6.367266178131104e-05, -5.954224616289139e-05, -5.541183054447174e-05, -5.1281414926052094e-05, -4.7150999307632446e-05, -4.30205836892128e-05, -3.889016807079315e-05, -3.4759752452373505e-05, -3.062933683395386e-05, -2.649892121553421e-05, -2.2368505597114563e-05, -1.8238089978694916e-05, -1.4107674360275269e-05, -9.977258741855621e-06, -5.846843123435974e-06, -1.716427505016327e-06, 2.4139881134033203e-06, 6.5444037318229675e-06, 1.0674819350242615e-05, 1.4805234968662262e-05, 1.893565058708191e-05, 2.3066066205501556e-05, 2.7196481823921204e-05, 3.132689744234085e-05, 3.54573130607605e-05, 3.9587728679180145e-05, 4.371814429759979e-05, 4.784855991601944e-05, 5.197897553443909e-05, 5.6109391152858734e-05, 6.023980677127838e-05, 6.437022238969803e-05, 6.850063800811768e-05, 7.263105362653732e-05, 7.676146924495697e-05, 8.089188486337662e-05, 8.502230048179626e-05, 8.915271610021591e-05, 9.328313171863556e-05, 9.74135473370552e-05, 0.00010154396295547485, 0.0001056743785738945, 0.00010980479419231415, 0.0001139352098107338, 0.00011806562542915344, 0.0001221960410475731, 0.00012632645666599274, 0.00013045687228441238, 0.00013458728790283203]}, "gradients/encoder.encoder.layers.8.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 2.0, 0.0, 1.0, 0.0, 5.0, 2.0, 9.0, 12.0, 17.0, 30.0, 44.0, 70.0, 118.0, 224.0, 382.0, 717.0, 1460.0, 3346.0, 9202.0, 37083.0, 3836013.0, 272751.0, 21017.0, 6348.0, 2602.0, 1269.0, 614.0, 364.0, 197.0, 120.0, 77.0, 38.0, 31.0, 26.0, 23.0, 15.0, 16.0, 8.0, 9.0, 6.0, 3.0, 6.0, 7.0, 2.0, 0.0, 3.0, 4.0, 2.0, 0.0, 1.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.00012505054473876953, -0.00011931546032428741, -0.0001135803759098053, -0.00010784529149532318, -0.00010211020708084106, -9.637512266635895e-05, -9.064003825187683e-05, -8.490495383739471e-05, -7.91698694229126e-05, -7.343478500843048e-05, -6.769970059394836e-05, -6.196461617946625e-05, -5.622953176498413e-05, -5.0494447350502014e-05, -4.47593629360199e-05, -3.902427852153778e-05, -3.3289194107055664e-05, -2.7554109692573547e-05, -2.181902527809143e-05, -1.6083940863609314e-05, -1.0348856449127197e-05, -4.6137720346450806e-06, 1.1213123798370361e-06, 6.856396794319153e-06, 1.259148120880127e-05, 1.8326565623283386e-05, 2.4061650037765503e-05, 2.979673445224762e-05, 3.5531818866729736e-05, 4.126690328121185e-05, 4.700198769569397e-05, 5.2737072110176086e-05, 5.84721565246582e-05, 6.420724093914032e-05, 6.994232535362244e-05, 7.567740976810455e-05, 8.141249418258667e-05, 8.714757859706879e-05, 9.28826630115509e-05, 9.861774742603302e-05, 0.00010435283184051514, 0.00011008791625499725, 0.00011582300066947937, 0.00012155808508396149, 0.0001272931694984436, 0.00013302825391292572, 0.00013876333832740784, 0.00014449842274188995, 0.00015023350715637207, 0.0001559685915708542, 0.0001617036759853363, 0.00016743876039981842, 0.00017317384481430054, 0.00017890892922878265, 0.00018464401364326477, 0.0001903790980577469, 0.000196114182472229, 0.00020184926688671112, 0.00020758435130119324, 0.00021331943571567535, 0.00021905452013015747, 0.0002247896045446396, 0.0002305246889591217, 0.00023625977337360382, 0.00024199485778808594]}, "gradients/encoder.encoder.layers.8.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 4.0, 2.0, 4.0, 3.0, 5.0, 8.0, 12.0, 21.0, 27.0, 43.0, 45.0, 74.0, 125.0, 134.0, 117.0, 112.0, 83.0, 62.0, 44.0, 29.0, 13.0, 11.0, 12.0, 7.0, 4.0, 6.0, 2.0, 2.0, 2.0, 0.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.7358531951904297e-05, -2.605374902486801e-05, -2.4748966097831726e-05, -2.344418317079544e-05, -2.2139400243759155e-05, -2.083461731672287e-05, -1.9529834389686584e-05, -1.82250514626503e-05, -1.6920268535614014e-05, -1.5615485608577728e-05, -1.4310702681541443e-05, -1.3005919754505157e-05, -1.1701136827468872e-05, -1.0396353900432587e-05, -9.091570973396301e-06, -7.786788046360016e-06, -6.4820051193237305e-06, -5.177222192287445e-06, -3.87243926525116e-06, -2.5676563382148743e-06, -1.2628734111785889e-06, 4.190951585769653e-08, 1.346692442893982e-06, 2.6514753699302673e-06, 3.956258296966553e-06, 5.261041224002838e-06, 6.5658241510391235e-06, 7.870607078075409e-06, 9.175390005111694e-06, 1.048017293214798e-05, 1.1784955859184265e-05, 1.308973878622055e-05, 1.4394521713256836e-05, 1.569930464029312e-05, 1.7004087567329407e-05, 1.8308870494365692e-05, 1.9613653421401978e-05, 2.0918436348438263e-05, 2.222321927547455e-05, 2.3528002202510834e-05, 2.483278512954712e-05, 2.6137568056583405e-05, 2.744235098361969e-05, 2.8747133910655975e-05, 3.005191683769226e-05, 3.1356699764728546e-05, 3.266148269176483e-05, 3.396626561880112e-05, 3.52710485458374e-05, 3.657583147287369e-05, 3.788061439990997e-05, 3.918539732694626e-05, 4.0490180253982544e-05, 4.179496318101883e-05, 4.3099746108055115e-05, 4.44045290350914e-05, 4.5709311962127686e-05, 4.701409488916397e-05, 4.8318877816200256e-05, 4.962366074323654e-05, 5.092844367027283e-05, 5.223322659730911e-05, 5.35380095243454e-05, 5.484279245138168e-05, 5.614757537841797e-05]}, "gradients/encoder.encoder.layers.8.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 4.0, 4.0, 4.0, 5.0, 11.0, 15.0, 23.0, 36.0, 62.0, 78.0, 108.0, 131.0, 234.0, 298.0, 507.0, 720.0, 1175.0, 1681.0, 2728.0, 4109.0, 7000.0, 11919.0, 22280.0, 49295.0, 147027.0, 3017414.0, 749188.0, 97462.0, 36331.0, 18318.0, 9770.0, 5880.0, 3548.0, 2361.0, 1500.0, 998.0, 688.0, 434.0, 292.0, 224.0, 141.0, 95.0, 65.0, 38.0, 31.0, 23.0, 17.0, 17.0, 4.0, 1.0, 2.0, 2.0, 1.0, 3.0], "bins": [-6.937980651855469e-05, -6.744358688592911e-05, -6.550736725330353e-05, -6.357114762067795e-05, -6.163492798805237e-05, -5.969870835542679e-05, -5.776248872280121e-05, -5.582626909017563e-05, -5.389004945755005e-05, -5.195382982492447e-05, -5.001761019229889e-05, -4.808139055967331e-05, -4.614517092704773e-05, -4.420895129442215e-05, -4.227273166179657e-05, -4.033651202917099e-05, -3.840029239654541e-05, -3.646407276391983e-05, -3.452785313129425e-05, -3.259163349866867e-05, -3.065541386604309e-05, -2.871919423341751e-05, -2.678297460079193e-05, -2.484675496816635e-05, -2.291053533554077e-05, -2.097431570291519e-05, -1.9038096070289612e-05, -1.7101876437664032e-05, -1.5165656805038452e-05, -1.3229437172412872e-05, -1.1293217539787292e-05, -9.356997907161713e-06, -7.420778274536133e-06, -5.484558641910553e-06, -3.548339009284973e-06, -1.6121193766593933e-06, 3.241002559661865e-07, 2.2603198885917664e-06, 4.196539521217346e-06, 6.132759153842926e-06, 8.068978786468506e-06, 1.0005198419094086e-05, 1.1941418051719666e-05, 1.3877637684345245e-05, 1.5813857316970825e-05, 1.7750076949596405e-05, 1.9686296582221985e-05, 2.1622516214847565e-05, 2.3558735847473145e-05, 2.5494955480098724e-05, 2.7431175112724304e-05, 2.9367394745349884e-05, 3.1303614377975464e-05, 3.3239834010601044e-05, 3.5176053643226624e-05, 3.71122732758522e-05, 3.904849290847778e-05, 4.098471254110336e-05, 4.292093217372894e-05, 4.485715180635452e-05, 4.67933714389801e-05, 4.872959107160568e-05, 5.066581070423126e-05, 5.260203033685684e-05, 5.453824996948242e-05]}, "gradients/encoder.encoder.layers.8.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 3.0, 0.0, 1.0, 2.0, 3.0, 2.0, 1.0, 5.0, 3.0, 5.0, 7.0, 8.0, 14.0, 15.0, 13.0, 21.0, 20.0, 22.0, 34.0, 50.0, 73.0, 129.0, 371.0, 978.0, 1405.0, 433.0, 158.0, 76.0, 46.0, 50.0, 24.0, 24.0, 14.0, 11.0, 6.0, 10.0, 6.0, 9.0, 6.0, 3.0, 5.0, 2.0, 3.0, 4.0, 2.0, 2.0, 3.0, 1.0, 2.0, 1.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0], "bins": [-4.971027374267578e-05, -4.806090146303177e-05, -4.6411529183387756e-05, -4.4762156903743744e-05, -4.311278462409973e-05, -4.146341234445572e-05, -3.9814040064811707e-05, -3.8164667785167694e-05, -3.651529550552368e-05, -3.486592322587967e-05, -3.321655094623566e-05, -3.1567178666591644e-05, -2.9917806386947632e-05, -2.826843410730362e-05, -2.6619061827659607e-05, -2.4969689548015594e-05, -2.3320317268371582e-05, -2.167094498872757e-05, -2.0021572709083557e-05, -1.8372200429439545e-05, -1.6722828149795532e-05, -1.507345587015152e-05, -1.3424083590507507e-05, -1.1774711310863495e-05, -1.0125339031219482e-05, -8.47596675157547e-06, -6.8265944719314575e-06, -5.177222192287445e-06, -3.5278499126434326e-06, -1.8784776329994202e-06, -2.2910535335540771e-07, 1.4202669262886047e-06, 3.069639205932617e-06, 4.71901148557663e-06, 6.368383765220642e-06, 8.017756044864655e-06, 9.667128324508667e-06, 1.131650060415268e-05, 1.2965872883796692e-05, 1.4615245163440704e-05, 1.6264617443084717e-05, 1.791398972272873e-05, 1.9563362002372742e-05, 2.1212734282016754e-05, 2.2862106561660767e-05, 2.451147884130478e-05, 2.616085112094879e-05, 2.7810223400592804e-05, 2.9459595680236816e-05, 3.110896795988083e-05, 3.275834023952484e-05, 3.4407712519168854e-05, 3.6057084798812866e-05, 3.770645707845688e-05, 3.935582935810089e-05, 4.1005201637744904e-05, 4.2654573917388916e-05, 4.430394619703293e-05, 4.595331847667694e-05, 4.760269075632095e-05, 4.9252063035964966e-05, 5.090143531560898e-05, 5.255080759525299e-05, 5.4200179874897e-05, 5.5849552154541016e-05]}, "gradients/encoder.encoder.layers.8.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 3.0, 3.0, 3.0, 3.0, 1.0, 1.0, 6.0, 6.0, 9.0, 17.0, 20.0, 27.0, 28.0, 62.0, 74.0, 99.0, 104.0, 116.0, 125.0, 84.0, 52.0, 50.0, 23.0, 26.0, 23.0, 9.0, 8.0, 12.0, 3.0, 9.0, 2.0, 2.0, 3.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00021406123414635658, -0.0002047236484941095, -0.0001953860482899472, -0.00018604846263770014, -0.00017671086243353784, -0.00016737327678129077, -0.0001580356911290437, -0.0001486980909248814, -0.0001393604907207191, -0.00013002290506847203, -0.00012068530486430973, -0.00011134771921206266, -0.00010201011900790036, -9.267253335565329e-05, -8.33349404274486e-05, -7.399734749924392e-05, -6.465976184699684e-05, -5.532216891879216e-05, -4.598457599058747e-05, -3.6646986700361595e-05, -2.730939377215691e-05, -1.7971800843952224e-05, -8.634211553726345e-06, 7.033813744783401e-07, 1.0040974302683026e-05, 1.937856723088771e-05, 2.8716158340102993e-05, 3.8053749449318275e-05, 4.739134237752296e-05, 5.6728935305727646e-05, 6.606652459595352e-05, 7.540411752415821e-05, 8.47417104523629e-05, 9.407930338056758e-05, 0.00010341689630877227, 0.00011275448196101934, 0.00012209208216518164, 0.0001314296678174287, 0.00014076725346967578, 0.00015010485367383808, 0.00015944245387800038, 0.00016878003953024745, 0.00017811763973440975, 0.00018745522538665682, 0.00019679282559081912, 0.0002061304112430662, 0.00021546799689531326, 0.00022480559709947556, 0.00023414318275172263, 0.0002434807684039697, 0.000252818368608132, 0.0002621559542603791, 0.00027149353991262615, 0.0002808311546687037, 0.00029016874032095075, 0.0002995063259731978, 0.0003088439116254449, 0.00031818149727769196, 0.00032751908292993903, 0.00033685669768601656, 0.00034619428333826363, 0.0003555318689905107, 0.0003648694546427578, 0.00037420704029500484, 0.00038354465505108237]}, "gradients/encoder.encoder.layers.8.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 6.0, 5.0, 6.0, 7.0, 5.0, 14.0, 18.0, 20.0, 24.0, 24.0, 27.0, 23.0, 37.0, 42.0, 44.0, 41.0, 52.0, 52.0, 43.0, 52.0, 42.0, 41.0, 56.0, 55.0, 44.0, 42.0, 35.0, 30.0, 18.0, 23.0, 24.0, 15.0, 16.0, 6.0, 11.0, 4.0, 6.0, 4.0, 1.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00018018484115600586, -0.0001749591901898384, -0.00016973353922367096, -0.0001645078882575035, -0.00015928223729133606, -0.0001540565863251686, -0.00014883093535900116, -0.0001436052843928337, -0.00013837963342666626, -0.0001331539824604988, -0.00012792833149433136, -0.0001227026805281639, -0.00011747702956199646, -0.00011225137859582901, -0.00010702572762966156, -0.00010180007666349411, -9.657442569732666e-05, -9.134877473115921e-05, -8.612312376499176e-05, -8.089747279882431e-05, -7.567182183265686e-05, -7.044617086648941e-05, -6.522051990032196e-05, -5.999486893415451e-05, -5.476921796798706e-05, -4.954356700181961e-05, -4.431791603565216e-05, -3.909226506948471e-05, -3.386661410331726e-05, -2.864096313714981e-05, -2.341531217098236e-05, -1.818966120481491e-05, -1.2964010238647461e-05, -7.738359272480011e-06, -2.512708306312561e-06, 2.712942659854889e-06, 7.938593626022339e-06, 1.3164244592189789e-05, 1.838989555835724e-05, 2.361554652452469e-05, 2.884119749069214e-05, 3.406684845685959e-05, 3.929249942302704e-05, 4.451815038919449e-05, 4.974380135536194e-05, 5.496945232152939e-05, 6.019510328769684e-05, 6.542075425386429e-05, 7.064640522003174e-05, 7.587205618619919e-05, 8.109770715236664e-05, 8.632335811853409e-05, 9.154900908470154e-05, 9.677466005086899e-05, 0.00010200031101703644, 0.00010722596198320389, 0.00011245161294937134, 0.00011767726391553879, 0.00012290291488170624, 0.0001281285658478737, 0.00013335421681404114, 0.0001385798677802086, 0.00014380551874637604, 0.0001490311697125435, 0.00015425682067871094]}, "gradients/encoder.encoder.layers.8.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 1.0, 2.0, 5.0, 12.0, 4.0, 12.0, 19.0, 23.0, 40.0, 46.0, 80.0, 107.0, 139.0, 171.0, 268.0, 412.0, 585.0, 803.0, 1258.0, 2011.0, 3400.0, 5811.0, 10914.0, 21276.0, 45546.0, 114946.0, 426531.0, 257246.0, 82194.0, 35730.0, 17034.0, 8758.0, 4879.0, 2926.0, 1773.0, 1147.0, 750.0, 490.0, 338.0, 249.0, 197.0, 101.0, 98.0, 68.0, 52.0, 29.0, 21.0, 19.0, 18.0, 10.0, 5.0, 5.0, 3.0, 3.0, 2.0, 2.0, 2.0], "bins": [-0.00011944770812988281, -0.00011591799557209015, -0.00011238828301429749, -0.00010885857045650482, -0.00010532885789871216, -0.0001017991453409195, -9.826943278312683e-05, -9.473972022533417e-05, -9.12100076675415e-05, -8.768029510974884e-05, -8.415058255195618e-05, -8.062086999416351e-05, -7.709115743637085e-05, -7.356144487857819e-05, -7.003173232078552e-05, -6.650201976299286e-05, -6.29723072052002e-05, -5.944259464740753e-05, -5.591288208961487e-05, -5.2383169531822205e-05, -4.885345697402954e-05, -4.532374441623688e-05, -4.1794031858444214e-05, -3.826431930065155e-05, -3.473460674285889e-05, -3.120489418506622e-05, -2.767518162727356e-05, -2.4145469069480896e-05, -2.0615756511688232e-05, -1.708604395389557e-05, -1.3556331396102905e-05, -1.0026618838310242e-05, -6.496906280517578e-06, -2.9671937227249146e-06, 5.62518835067749e-07, 4.092231392860413e-06, 7.621943950653076e-06, 1.115165650844574e-05, 1.4681369066238403e-05, 1.8211081624031067e-05, 2.174079418182373e-05, 2.5270506739616394e-05, 2.8800219297409058e-05, 3.232993185520172e-05, 3.5859644412994385e-05, 3.938935697078705e-05, 4.291906952857971e-05, 4.6448782086372375e-05, 4.997849464416504e-05, 5.35082072019577e-05, 5.7037919759750366e-05, 6.056763231754303e-05, 6.40973448753357e-05, 6.762705743312836e-05, 7.115676999092102e-05, 7.468648254871368e-05, 7.821619510650635e-05, 8.174590766429901e-05, 8.527562022209167e-05, 8.880533277988434e-05, 9.2335045337677e-05, 9.586475789546967e-05, 9.939447045326233e-05, 0.00010292418301105499, 0.00010645389556884766]}, "gradients/encoder.encoder.layers.8.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 1.0, 3.0, 2.0, 6.0, 0.0, 6.0, 9.0, 8.0, 14.0, 12.0, 27.0, 30.0, 35.0, 45.0, 54.0, 60.0, 80.0, 63.0, 77.0, 77.0, 80.0, 70.0, 49.0, 40.0, 40.0, 24.0, 26.0, 19.0, 8.0, 13.0, 7.0, 9.0, 3.0, 2.0, 2.0, 2.0, 1.0, 1.0, 3.0, 0.0, 0.0, 4.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 1.0], "bins": [-2.86102294921875e-05, -2.77617946267128e-05, -2.6913359761238098e-05, -2.6064924895763397e-05, -2.5216490030288696e-05, -2.4368055164813995e-05, -2.3519620299339294e-05, -2.2671185433864594e-05, -2.1822750568389893e-05, -2.097431570291519e-05, -2.012588083744049e-05, -1.927744597196579e-05, -1.842901110649109e-05, -1.7580576241016388e-05, -1.6732141375541687e-05, -1.5883706510066986e-05, -1.5035271644592285e-05, -1.4186836779117584e-05, -1.3338401913642883e-05, -1.2489967048168182e-05, -1.1641532182693481e-05, -1.079309731721878e-05, -9.94466245174408e-06, -9.096227586269379e-06, -8.247792720794678e-06, -7.399357855319977e-06, -6.550922989845276e-06, -5.702488124370575e-06, -4.854053258895874e-06, -4.005618393421173e-06, -3.157183527946472e-06, -2.3087486624717712e-06, -1.4603137969970703e-06, -6.118789315223694e-07, 2.3655593395233154e-07, 1.0849907994270325e-06, 1.9334256649017334e-06, 2.7818605303764343e-06, 3.6302953958511353e-06, 4.478730261325836e-06, 5.327165126800537e-06, 6.175599992275238e-06, 7.024034857749939e-06, 7.87246972322464e-06, 8.72090458869934e-06, 9.569339454174042e-06, 1.0417774319648743e-05, 1.1266209185123444e-05, 1.2114644050598145e-05, 1.2963078916072845e-05, 1.3811513781547546e-05, 1.4659948647022247e-05, 1.5508383512496948e-05, 1.635681837797165e-05, 1.720525324344635e-05, 1.805368810892105e-05, 1.8902122974395752e-05, 1.9750557839870453e-05, 2.0598992705345154e-05, 2.1447427570819855e-05, 2.2295862436294556e-05, 2.3144297301769257e-05, 2.3992732167243958e-05, 2.484116703271866e-05, 2.568960189819336e-05]}, "gradients/encoder.encoder.layers.8.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 2.0, 2.0, 2.0, 3.0, 6.0, 7.0, 15.0, 16.0, 21.0, 56.0, 65.0, 95.0, 146.0, 238.0, 306.0, 498.0, 744.0, 1125.0, 1788.0, 2907.0, 4462.0, 7463.0, 12163.0, 21090.0, 36841.0, 69959.0, 142192.0, 372069.0, 185735.0, 81913.0, 45654.0, 24283.0, 14011.0, 8423.0, 5195.0, 3139.0, 2151.0, 1324.0, 855.0, 526.0, 353.0, 232.0, 159.0, 99.0, 98.0, 46.0, 23.0, 25.0, 23.0, 11.0, 5.0, 1.0, 3.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0], "bins": [-5.501508712768555e-05, -5.327630788087845e-05, -5.153752863407135e-05, -4.979874938726425e-05, -4.805997014045715e-05, -4.6321190893650055e-05, -4.4582411646842957e-05, -4.284363240003586e-05, -4.110485315322876e-05, -3.936607390642166e-05, -3.762729465961456e-05, -3.5888515412807465e-05, -3.4149736166000366e-05, -3.241095691919327e-05, -3.067217767238617e-05, -2.893339842557907e-05, -2.7194619178771973e-05, -2.5455839931964874e-05, -2.3717060685157776e-05, -2.1978281438350677e-05, -2.023950219154358e-05, -1.850072294473648e-05, -1.6761943697929382e-05, -1.5023164451122284e-05, -1.3284385204315186e-05, -1.1545605957508087e-05, -9.806826710700989e-06, -8.06804746389389e-06, -6.329268217086792e-06, -4.590488970279694e-06, -2.8517097234725952e-06, -1.1129304766654968e-06, 6.258487701416016e-07, 2.3646280169487e-06, 4.103407263755798e-06, 5.842186510562897e-06, 7.580965757369995e-06, 9.319745004177094e-06, 1.1058524250984192e-05, 1.279730349779129e-05, 1.4536082744598389e-05, 1.6274861991405487e-05, 1.8013641238212585e-05, 1.9752420485019684e-05, 2.1491199731826782e-05, 2.322997897863388e-05, 2.496875822544098e-05, 2.6707537472248077e-05, 2.8446316719055176e-05, 3.0185095965862274e-05, 3.192387521266937e-05, 3.366265445947647e-05, 3.540143370628357e-05, 3.714021295309067e-05, 3.8878992199897766e-05, 4.0617771446704865e-05, 4.235655069351196e-05, 4.409532994031906e-05, 4.583410918712616e-05, 4.757288843393326e-05, 4.9311667680740356e-05, 5.1050446927547455e-05, 5.278922617435455e-05, 5.452800542116165e-05, 5.626678466796875e-05]}, "gradients/encoder.encoder.layers.8.attention.v_proj.bias": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 0.0, 3.0, 1.0, 3.0, 2.0, 3.0, 1.0, 5.0, 2.0, 7.0, 10.0, 10.0, 12.0, 13.0, 23.0, 26.0, 25.0, 20.0, 21.0, 28.0, 27.0, 33.0, 36.0, 33.0, 28.0, 44.0, 32.0, 52.0, 38.0, 42.0, 38.0, 50.0, 38.0, 34.0, 31.0, 32.0, 36.0, 33.0, 20.0, 17.0, 18.0, 9.0, 15.0, 12.0, 10.0, 11.0, 7.0, 6.0, 3.0, 8.0, 4.0, 0.0, 2.0, 1.0, 3.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0], "bins": [-4.506111145019531e-05, -4.362408071756363e-05, -4.2187049984931946e-05, -4.075001925230026e-05, -3.931298851966858e-05, -3.7875957787036896e-05, -3.643892705440521e-05, -3.500189632177353e-05, -3.3564865589141846e-05, -3.212783485651016e-05, -3.069080412387848e-05, -2.9253773391246796e-05, -2.7816742658615112e-05, -2.637971192598343e-05, -2.4942681193351746e-05, -2.3505650460720062e-05, -2.206861972808838e-05, -2.0631588995456696e-05, -1.9194558262825012e-05, -1.775752753019333e-05, -1.6320496797561646e-05, -1.4883466064929962e-05, -1.3446435332298279e-05, -1.2009404599666595e-05, -1.0572373867034912e-05, -9.135343134403229e-06, -7.698312401771545e-06, -6.261281669139862e-06, -4.824250936508179e-06, -3.3872202038764954e-06, -1.950189471244812e-06, -5.131587386131287e-07, 9.238719940185547e-07, 2.360902726650238e-06, 3.7979334592819214e-06, 5.234964191913605e-06, 6.671994924545288e-06, 8.109025657176971e-06, 9.546056389808655e-06, 1.0983087122440338e-05, 1.2420117855072021e-05, 1.3857148587703705e-05, 1.5294179320335388e-05, 1.673121005296707e-05, 1.8168240785598755e-05, 1.9605271518230438e-05, 2.104230225086212e-05, 2.2479332983493805e-05, 2.3916363716125488e-05, 2.535339444875717e-05, 2.6790425181388855e-05, 2.822745591402054e-05, 2.966448664665222e-05, 3.1101517379283905e-05, 3.253854811191559e-05, 3.397557884454727e-05, 3.5412609577178955e-05, 3.684964030981064e-05, 3.828667104244232e-05, 3.9723701775074005e-05, 4.116073250770569e-05, 4.259776324033737e-05, 4.4034793972969055e-05, 4.547182470560074e-05, 4.690885543823242e-05]}, "gradients/encoder.encoder.layers.8.attention.k_proj.weight": {"_type": "histogram", "values": [2.0, 3.0, 5.0, 1.0, 6.0, 12.0, 3.0, 15.0, 16.0, 17.0, 38.0, 26.0, 72.0, 56.0, 74.0, 169.0, 179.0, 382.0, 373.0, 499.0, 1009.0, 1051.0, 1460.0, 3741.0, 4035.0, 10411.0, 12528.0, 21050.0, 68056.0, 104537.0, 370636.0, 218340.0, 104339.0, 67982.0, 21290.0, 16832.0, 6070.0, 3999.0, 3655.0, 1573.0, 1513.0, 645.0, 494.0, 472.0, 228.0, 173.0, 160.0, 72.0, 85.0, 47.0, 41.0, 41.0, 17.0, 16.0, 6.0, 4.0, 6.0, 6.0, 2.0, 1.0, 1.0, 2.0, 1.0, 1.0], "bins": [-4.410743713378906e-06, -4.268251359462738e-06, -4.12575900554657e-06, -3.983266651630402e-06, -3.840774297714233e-06, -3.698281943798065e-06, -3.555789589881897e-06, -3.4132972359657288e-06, -3.2708048820495605e-06, -3.1283125281333923e-06, -2.985820174217224e-06, -2.843327820301056e-06, -2.7008354663848877e-06, -2.5583431124687195e-06, -2.4158507585525513e-06, -2.273358404636383e-06, -2.130866050720215e-06, -1.9883736968040466e-06, -1.8458813428878784e-06, -1.7033889889717102e-06, -1.560896635055542e-06, -1.4184042811393738e-06, -1.2759119272232056e-06, -1.1334195733070374e-06, -9.909272193908691e-07, -8.484348654747009e-07, -7.059425115585327e-07, -5.634501576423645e-07, -4.209578037261963e-07, -2.784654498100281e-07, -1.3597309589385986e-07, 6.51925802230835e-09, 1.4901161193847656e-07, 2.915039658546448e-07, 4.33996319770813e-07, 5.764886736869812e-07, 7.189810276031494e-07, 8.614733815193176e-07, 1.0039657354354858e-06, 1.146458089351654e-06, 1.2889504432678223e-06, 1.4314427971839905e-06, 1.5739351511001587e-06, 1.716427505016327e-06, 1.8589198589324951e-06, 2.0014122128486633e-06, 2.1439045667648315e-06, 2.2863969206809998e-06, 2.428889274597168e-06, 2.571381628513336e-06, 2.7138739824295044e-06, 2.8563663363456726e-06, 2.998858690261841e-06, 3.141351044178009e-06, 3.2838433980941772e-06, 3.4263357520103455e-06, 3.5688281059265137e-06, 3.711320459842682e-06, 3.85381281375885e-06, 3.996305167675018e-06, 4.1387975215911865e-06, 4.281289875507355e-06, 4.423782229423523e-06, 4.566274583339691e-06, 4.708766937255859e-06]}, "gradients/encoder.encoder.layers.8.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 2.0, 2.0, 0.0, 3.0, 2.0, 0.0, 5.0, 5.0, 13.0, 0.0, 19.0, 16.0, 0.0, 24.0, 15.0, 0.0, 33.0, 40.0, 36.0, 0.0, 58.0, 57.0, 0.0, 59.0, 64.0, 0.0, 68.0, 86.0, 50.0, 0.0, 64.0, 58.0, 0.0, 53.0, 45.0, 34.0, 0.0, 22.0, 22.0, 0.0, 21.0, 19.0, 0.0, 8.0, 8.0, 1.0, 0.0, 5.0, 2.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.1324882507324219e-06, -1.0905787348747253e-06, -1.0486692190170288e-06, -1.0067597031593323e-06, -9.648501873016357e-07, -9.229406714439392e-07, -8.810311555862427e-07, -8.391216397285461e-07, -7.972121238708496e-07, -7.553026080131531e-07, -7.133930921554565e-07, -6.7148357629776e-07, -6.295740604400635e-07, -5.876645445823669e-07, -5.457550287246704e-07, -5.038455128669739e-07, -4.6193599700927734e-07, -4.200264811515808e-07, -3.781169652938843e-07, -3.3620744943618774e-07, -2.942979335784912e-07, -2.523884177207947e-07, -2.1047890186309814e-07, -1.685693860054016e-07, -1.2665987014770508e-07, -8.475035429000854e-08, -4.284083843231201e-08, -9.313225746154785e-10, 4.0978193283081055e-08, 8.288770914077759e-08, 1.2479722499847412e-07, 1.6670674085617065e-07, 2.086162567138672e-07, 2.505257725715637e-07, 2.9243528842926025e-07, 3.343448042869568e-07, 3.762543201446533e-07, 4.1816383600234985e-07, 4.600733518600464e-07, 5.019828677177429e-07, 5.438923835754395e-07, 5.85801899433136e-07, 6.277114152908325e-07, 6.69620931148529e-07, 7.115304470062256e-07, 7.534399628639221e-07, 7.953494787216187e-07, 8.372589945793152e-07, 8.791685104370117e-07, 9.210780262947083e-07, 9.629875421524048e-07, 1.0048970580101013e-06, 1.0468065738677979e-06, 1.0887160897254944e-06, 1.130625605583191e-06, 1.1725351214408875e-06, 1.214444637298584e-06, 1.2563541531562805e-06, 1.298263669013977e-06, 1.3401731848716736e-06, 1.3820827007293701e-06, 1.4239922165870667e-06, 1.4659017324447632e-06, 1.5078112483024597e-06, 1.5497207641601562e-06]}, "gradients/encoder.encoder.layers.8.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 4.0, 1.0, 2.0, 3.0, 8.0, 5.0, 4.0, 12.0, 15.0, 22.0, 43.0, 50.0, 111.0, 158.0, 127.0, 314.0, 334.0, 843.0, 1556.0, 1672.0, 4319.0, 9126.0, 11915.0, 39650.0, 63066.0, 296911.0, 426800.0, 95935.0, 55624.0, 15960.0, 11956.0, 5629.0, 2028.0, 1867.0, 772.0, 716.0, 380.0, 154.0, 172.0, 85.0, 88.0, 52.0, 28.0, 20.0, 7.0, 7.0, 6.0, 6.0, 4.0, 4.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.231929779052734e-06, -4.0763989090919495e-06, -3.9208680391311646e-06, -3.7653371691703796e-06, -3.6098062992095947e-06, -3.45427542924881e-06, -3.298744559288025e-06, -3.14321368932724e-06, -2.987682819366455e-06, -2.83215194940567e-06, -2.6766210794448853e-06, -2.5210902094841003e-06, -2.3655593395233154e-06, -2.2100284695625305e-06, -2.0544975996017456e-06, -1.8989667296409607e-06, -1.7434358596801758e-06, -1.5879049897193909e-06, -1.432374119758606e-06, -1.276843249797821e-06, -1.1213123798370361e-06, -9.657815098762512e-07, -8.102506399154663e-07, -6.547197699546814e-07, -4.991888999938965e-07, -3.4365803003311157e-07, -1.8812716007232666e-07, -3.259629011154175e-08, 1.2293457984924316e-07, 2.784654498100281e-07, 4.33996319770813e-07, 5.895271897315979e-07, 7.450580596923828e-07, 9.005889296531677e-07, 1.0561197996139526e-06, 1.2116506695747375e-06, 1.3671815395355225e-06, 1.5227124094963074e-06, 1.6782432794570923e-06, 1.8337741494178772e-06, 1.989305019378662e-06, 2.144835889339447e-06, 2.300366759300232e-06, 2.455897629261017e-06, 2.6114284992218018e-06, 2.7669593691825867e-06, 2.9224902391433716e-06, 3.0780211091041565e-06, 3.2335519790649414e-06, 3.3890828490257263e-06, 3.5446137189865112e-06, 3.700144588947296e-06, 3.855675458908081e-06, 4.011206328868866e-06, 4.166737198829651e-06, 4.322268068790436e-06, 4.477798938751221e-06, 4.633329808712006e-06, 4.7888606786727905e-06, 4.9443915486335754e-06, 5.09992241859436e-06, 5.255453288555145e-06, 5.41098415851593e-06, 5.566515028476715e-06, 5.7220458984375e-06]}, "gradients/encoder.encoder.layers.8.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 2.0, 1.0, 5.0, 1.0, 0.0, 2.0, 1.0, 4.0, 7.0, 13.0, 4.0, 13.0, 14.0, 13.0, 15.0, 22.0, 56.0, 33.0, 56.0, 52.0, 59.0, 68.0, 60.0, 99.0, 58.0, 60.0, 55.0, 55.0, 36.0, 33.0, 46.0, 12.0, 11.0, 9.0, 9.0, 9.0, 8.0, 5.0, 3.0, 3.0, 1.0, 2.0, 0.0, 0.0, 2.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.516674041748047e-06, -3.3890828490257263e-06, -3.2614916563034058e-06, -3.133900463581085e-06, -3.0063092708587646e-06, -2.878718078136444e-06, -2.7511268854141235e-06, -2.623535692691803e-06, -2.4959444999694824e-06, -2.368353307247162e-06, -2.2407621145248413e-06, -2.1131709218025208e-06, -1.9855797290802e-06, -1.8579885363578796e-06, -1.730397343635559e-06, -1.6028061509132385e-06, -1.475214958190918e-06, -1.3476237654685974e-06, -1.2200325727462769e-06, -1.0924413800239563e-06, -9.648501873016357e-07, -8.372589945793152e-07, -7.096678018569946e-07, -5.820766091346741e-07, -4.544854164123535e-07, -3.2689422369003296e-07, -1.993030309677124e-07, -7.171183824539185e-08, 5.587935447692871e-08, 1.8347054719924927e-07, 3.110617399215698e-07, 4.386529326438904e-07, 5.662441253662109e-07, 6.938353180885315e-07, 8.21426510810852e-07, 9.490177035331726e-07, 1.0766088962554932e-06, 1.2042000889778137e-06, 1.3317912817001343e-06, 1.4593824744224548e-06, 1.5869736671447754e-06, 1.714564859867096e-06, 1.8421560525894165e-06, 1.969747245311737e-06, 2.0973384380340576e-06, 2.224929630756378e-06, 2.3525208234786987e-06, 2.4801120162010193e-06, 2.60770320892334e-06, 2.7352944016456604e-06, 2.862885594367981e-06, 2.9904767870903015e-06, 3.118067979812622e-06, 3.2456591725349426e-06, 3.373250365257263e-06, 3.5008415579795837e-06, 3.6284327507019043e-06, 3.756023943424225e-06, 3.883615136146545e-06, 4.011206328868866e-06, 4.1387975215911865e-06, 4.266388714313507e-06, 4.393979907035828e-06, 4.521571099758148e-06, 4.649162292480469e-06]}, "gradients/encoder.encoder.layers.8.layer_norm.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 2.0, 1.0, 4.0, 6.0, 6.0, 2.0, 6.0, 8.0, 13.0, 10.0, 20.0, 26.0, 33.0, 46.0, 77.0, 114.0, 146.0, 128.0, 88.0, 55.0, 42.0, 40.0, 21.0, 26.0, 27.0, 10.0, 8.0, 11.0, 12.0, 3.0, 4.0, 1.0, 3.0, 5.0, 3.0, 2.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.00019799251458607614, -0.0001915882166940719, -0.00018518391880206764, -0.00017877960635814816, -0.0001723753084661439, -0.00016597101057413965, -0.00015956669813022017, -0.00015316240023821592, -0.00014675810234621167, -0.00014035380445420742, -0.00013394950656220317, -0.0001275451941182837, -0.00012114089622627944, -0.00011473659833427519, -0.00010833229316631332, -0.00010192798799835145, -9.55236901063472e-05, -8.911939221434295e-05, -8.271508704638109e-05, -7.631078187841922e-05, -6.990648398641497e-05, -6.350218609441072e-05, -5.709788092644885e-05, -5.069357939646579e-05, -4.4289277866482735e-05, -3.7884976336499676e-05, -3.148067480651662e-05, -2.507637327653356e-05, -1.86720717465505e-05, -1.2267770216567442e-05, -5.863468686584383e-06, 5.408328433986753e-07, 6.945134373381734e-06, 1.3349435903364792e-05, 1.975373743334785e-05, 2.615803896333091e-05, 3.256234049331397e-05, 3.896664202329703e-05, 4.5370943553280085e-05, 5.1775245083263144e-05, 5.81795466132462e-05, 6.458384450525045e-05, 7.098814967321232e-05, 7.739245484117419e-05, 8.379675273317844e-05, 9.020105062518269e-05, 9.660535579314455e-05, 0.00010300966096110642, 0.00010941395885311067, 0.00011581825674511492, 0.00012222255463711917, 0.00012862686708103865, 0.0001350311649730429, 0.00014143546286504716, 0.00014783977530896664, 0.0001542440732009709, 0.00016064837109297514, 0.0001670526689849794, 0.00017345696687698364, 0.00017986127932090312, 0.00018626557721290737, 0.00019266987510491163, 0.0001990741875488311, 0.00020547848544083536, 0.0002118827833328396]}, "gradients/encoder.encoder.layers.8.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 1.0, 1.0, 0.0, 2.0, 1.0, 2.0, 2.0, 5.0, 7.0, 9.0, 12.0, 8.0, 15.0, 22.0, 22.0, 23.0, 34.0, 29.0, 36.0, 35.0, 41.0, 46.0, 36.0, 46.0, 54.0, 47.0, 43.0, 46.0, 42.0, 40.0, 30.0, 42.0, 33.0, 30.0, 28.0, 19.0, 23.0, 26.0, 17.0, 14.0, 8.0, 10.0, 5.0, 6.0, 6.0, 5.0, 1.0, 1.0, 3.0, 2.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00012511014938354492, -0.00012072455137968063, -0.00011633895337581635, -0.00011195335537195206, -0.00010756775736808777, -0.00010318215936422348, -9.879656136035919e-05, -9.44109633564949e-05, -9.002536535263062e-05, -8.563976734876633e-05, -8.125416934490204e-05, -7.686857134103775e-05, -7.248297333717346e-05, -6.809737533330917e-05, -6.371177732944489e-05, -5.93261793255806e-05, -5.494058132171631e-05, -5.055498331785202e-05, -4.616938531398773e-05, -4.1783787310123444e-05, -3.7398189306259155e-05, -3.301259130239487e-05, -2.862699329853058e-05, -2.424139529466629e-05, -1.9855797290802002e-05, -1.5470199286937714e-05, -1.1084601283073425e-05, -6.699003279209137e-06, -2.3134052753448486e-06, 2.0721927285194397e-06, 6.457790732383728e-06, 1.0843388736248016e-05, 1.5228986740112305e-05, 1.9614584743976593e-05, 2.400018274784088e-05, 2.838578075170517e-05, 3.277137875556946e-05, 3.7156976759433746e-05, 4.1542574763298035e-05, 4.592817276716232e-05, 5.031377077102661e-05, 5.46993687748909e-05, 5.908496677875519e-05, 6.347056478261948e-05, 6.785616278648376e-05, 7.224176079034805e-05, 7.662735879421234e-05, 8.101295679807663e-05, 8.539855480194092e-05, 8.97841528058052e-05, 9.41697508096695e-05, 9.855534881353378e-05, 0.00010294094681739807, 0.00010732654482126236, 0.00011171214282512665, 0.00011609774082899094, 0.00012048333883285522, 0.0001248689368367195, 0.0001292545348405838, 0.0001336401328444481, 0.00013802573084831238, 0.00014241132885217667, 0.00014679692685604095, 0.00015118252485990524, 0.00015556812286376953]}, "gradients/encoder.encoder.layers.7.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 4.0, 2.0, 1.0, 3.0, 7.0, 6.0, 6.0, 8.0, 8.0, 19.0, 22.0, 33.0, 38.0, 39.0, 55.0, 83.0, 119.0, 156.0, 198.0, 291.0, 390.0, 539.0, 769.0, 1171.0, 1796.0, 2917.0, 4724.0, 8310.0, 15623.0, 32354.0, 167066.0, 3653780.0, 228056.0, 35518.0, 15849.0, 8551.0, 5080.0, 3218.0, 2256.0, 1467.0, 1025.0, 765.0, 510.0, 403.0, 306.0, 191.0, 136.0, 107.0, 90.0, 61.0, 57.0, 38.0, 31.0, 19.0, 15.0, 8.0, 3.0, 2.0, 0.0, 3.0, 0.0, 1.0], "bins": [-8.547306060791016e-05, -8.292682468891144e-05, -8.038058876991272e-05, -7.7834352850914e-05, -7.528811693191528e-05, -7.274188101291656e-05, -7.019564509391785e-05, -6.764940917491913e-05, -6.510317325592041e-05, -6.255693733692169e-05, -6.0010701417922974e-05, -5.7464465498924255e-05, -5.491822957992554e-05, -5.237199366092682e-05, -4.98257577419281e-05, -4.727952182292938e-05, -4.4733285903930664e-05, -4.2187049984931946e-05, -3.964081406593323e-05, -3.709457814693451e-05, -3.454834222793579e-05, -3.200210630893707e-05, -2.9455870389938354e-05, -2.6909634470939636e-05, -2.4363398551940918e-05, -2.18171626329422e-05, -1.927092671394348e-05, -1.6724690794944763e-05, -1.4178454875946045e-05, -1.1632218956947327e-05, -9.085983037948608e-06, -6.53974711894989e-06, -3.993511199951172e-06, -1.4472752809524536e-06, 1.0989606380462646e-06, 3.645196557044983e-06, 6.191432476043701e-06, 8.73766839504242e-06, 1.1283904314041138e-05, 1.3830140233039856e-05, 1.6376376152038574e-05, 1.8922612071037292e-05, 2.146884799003601e-05, 2.401508390903473e-05, 2.6561319828033447e-05, 2.9107555747032166e-05, 3.1653791666030884e-05, 3.42000275850296e-05, 3.674626350402832e-05, 3.929249942302704e-05, 4.183873534202576e-05, 4.4384971261024475e-05, 4.693120718002319e-05, 4.947744309902191e-05, 5.202367901802063e-05, 5.456991493701935e-05, 5.7116150856018066e-05, 5.9662386775016785e-05, 6.22086226940155e-05, 6.475485861301422e-05, 6.730109453201294e-05, 6.984733045101166e-05, 7.239356637001038e-05, 7.49398022890091e-05, 7.748603820800781e-05]}, "gradients/encoder.encoder.layers.7.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 3.0, 0.0, 8.0, 2.0, 2.0, 6.0, 5.0, 5.0, 8.0, 10.0, 8.0, 18.0, 18.0, 39.0, 28.0, 42.0, 44.0, 47.0, 44.0, 65.0, 60.0, 68.0, 48.0, 61.0, 57.0, 50.0, 46.0, 34.0, 31.0, 19.0, 17.0, 21.0, 10.0, 19.0, 14.0, 13.0, 6.0, 5.0, 6.0, 3.0, 3.0, 5.0, 1.0, 3.0, 1.0, 4.0, 1.0, 0.0, 4.0, 0.0, 4.0], "bins": [-2.2709369659423828e-05, -2.2059306502342224e-05, -2.140924334526062e-05, -2.0759180188179016e-05, -2.0109117031097412e-05, -1.9459053874015808e-05, -1.8808990716934204e-05, -1.81589275598526e-05, -1.7508864402770996e-05, -1.6858801245689392e-05, -1.6208738088607788e-05, -1.5558674931526184e-05, -1.490861177444458e-05, -1.4258548617362976e-05, -1.3608485460281372e-05, -1.2958422303199768e-05, -1.2308359146118164e-05, -1.165829598903656e-05, -1.1008232831954956e-05, -1.0358169674873352e-05, -9.708106517791748e-06, -9.058043360710144e-06, -8.40798020362854e-06, -7.757917046546936e-06, -7.107853889465332e-06, -6.457790732383728e-06, -5.807727575302124e-06, -5.15766441822052e-06, -4.507601261138916e-06, -3.857538104057312e-06, -3.207474946975708e-06, -2.557411789894104e-06, -1.9073486328125e-06, -1.257285475730896e-06, -6.07222318649292e-07, 4.284083843231201e-08, 6.92903995513916e-07, 1.34296715259552e-06, 1.993030309677124e-06, 2.643093466758728e-06, 3.293156623840332e-06, 3.943219780921936e-06, 4.59328293800354e-06, 5.243346095085144e-06, 5.893409252166748e-06, 6.543472409248352e-06, 7.193535566329956e-06, 7.84359872341156e-06, 8.493661880493164e-06, 9.143725037574768e-06, 9.793788194656372e-06, 1.0443851351737976e-05, 1.109391450881958e-05, 1.1743977665901184e-05, 1.2394040822982788e-05, 1.3044103980064392e-05, 1.3694167137145996e-05, 1.43442302942276e-05, 1.4994293451309204e-05, 1.5644356608390808e-05, 1.6294419765472412e-05, 1.6944482922554016e-05, 1.759454607963562e-05, 1.8244609236717224e-05, 1.8894672393798828e-05]}, "gradients/encoder.encoder.layers.7.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 2.0, 5.0, 3.0, 2.0, 6.0, 9.0, 15.0, 26.0, 22.0, 44.0, 78.0, 98.0, 159.0, 247.0, 370.0, 499.0, 858.0, 1259.0, 1847.0, 2989.0, 4889.0, 7890.0, 13571.0, 25209.0, 49241.0, 137241.0, 2422490.0, 1295078.0, 124870.0, 47977.0, 23120.0, 13298.0, 7818.0, 4650.0, 2976.0, 1895.0, 1151.0, 835.0, 511.0, 361.0, 226.0, 158.0, 85.0, 68.0, 62.0, 27.0, 25.0, 15.0, 5.0, 4.0, 5.0, 6.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-5.120038986206055e-05, -4.9491412937641144e-05, -4.778243601322174e-05, -4.607345908880234e-05, -4.4364482164382935e-05, -4.265550523996353e-05, -4.094652831554413e-05, -3.9237551391124725e-05, -3.752857446670532e-05, -3.581959754228592e-05, -3.4110620617866516e-05, -3.240164369344711e-05, -3.069266676902771e-05, -2.8983689844608307e-05, -2.7274712920188904e-05, -2.55657359957695e-05, -2.3856759071350098e-05, -2.2147782146930695e-05, -2.043880522251129e-05, -1.872982829809189e-05, -1.7020851373672485e-05, -1.5311874449253082e-05, -1.360289752483368e-05, -1.1893920600414276e-05, -1.0184943675994873e-05, -8.47596675157547e-06, -6.766989827156067e-06, -5.058012902736664e-06, -3.3490359783172607e-06, -1.6400590538978577e-06, 6.891787052154541e-08, 1.7778947949409485e-06, 3.4868717193603516e-06, 5.195848643779755e-06, 6.904825568199158e-06, 8.61380249261856e-06, 1.0322779417037964e-05, 1.2031756341457367e-05, 1.374073326587677e-05, 1.5449710190296173e-05, 1.7158687114715576e-05, 1.886766403913498e-05, 2.0576640963554382e-05, 2.2285617887973785e-05, 2.399459481239319e-05, 2.570357173681259e-05, 2.7412548661231995e-05, 2.9121525585651398e-05, 3.08305025100708e-05, 3.2539479434490204e-05, 3.424845635890961e-05, 3.595743328332901e-05, 3.766641020774841e-05, 3.9375387132167816e-05, 4.108436405658722e-05, 4.279334098100662e-05, 4.4502317905426025e-05, 4.621129482984543e-05, 4.792027175426483e-05, 4.9629248678684235e-05, 5.133822560310364e-05, 5.304720252752304e-05, 5.4756179451942444e-05, 5.646515637636185e-05, 5.817413330078125e-05]}, "gradients/encoder.encoder.layers.7.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [2.0, 2.0, 1.0, 1.0, 2.0, 0.0, 1.0, 2.0, 2.0, 2.0, 3.0, 1.0, 7.0, 3.0, 3.0, 7.0, 5.0, 8.0, 10.0, 17.0, 10.0, 14.0, 27.0, 48.0, 51.0, 58.0, 90.0, 269.0, 655.0, 1542.0, 632.0, 219.0, 92.0, 65.0, 31.0, 34.0, 31.0, 20.0, 27.0, 14.0, 15.0, 14.0, 9.0, 4.0, 10.0, 5.0, 8.0, 6.0, 2.0, 2.0, 1.0, 3.0, 3.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.279613494873047e-05, -4.134420305490494e-05, -3.989227116107941e-05, -3.8440339267253876e-05, -3.6988407373428345e-05, -3.5536475479602814e-05, -3.408454358577728e-05, -3.263261169195175e-05, -3.118067979812622e-05, -2.972874790430069e-05, -2.827681601047516e-05, -2.6824884116649628e-05, -2.5372952222824097e-05, -2.3921020328998566e-05, -2.2469088435173035e-05, -2.1017156541347504e-05, -1.9565224647521973e-05, -1.811329275369644e-05, -1.666136085987091e-05, -1.520942896604538e-05, -1.3757497072219849e-05, -1.2305565178394318e-05, -1.0853633284568787e-05, -9.401701390743256e-06, -7.949769496917725e-06, -6.497837603092194e-06, -5.045905709266663e-06, -3.5939738154411316e-06, -2.1420419216156006e-06, -6.901100277900696e-07, 7.618218660354614e-07, 2.2137537598609924e-06, 3.6656856536865234e-06, 5.1176175475120544e-06, 6.5695494413375854e-06, 8.021481335163116e-06, 9.473413228988647e-06, 1.0925345122814178e-05, 1.237727701663971e-05, 1.382920891046524e-05, 1.528114080429077e-05, 1.6733072698116302e-05, 1.8185004591941833e-05, 1.9636936485767365e-05, 2.1088868379592896e-05, 2.2540800273418427e-05, 2.3992732167243958e-05, 2.544466406106949e-05, 2.689659595489502e-05, 2.834852784872055e-05, 2.980045974254608e-05, 3.125239163637161e-05, 3.2704323530197144e-05, 3.4156255424022675e-05, 3.5608187317848206e-05, 3.7060119211673737e-05, 3.851205110549927e-05, 3.99639829993248e-05, 4.141591489315033e-05, 4.286784678697586e-05, 4.431977868080139e-05, 4.577171057462692e-05, 4.7223642468452454e-05, 4.8675574362277985e-05, 5.0127506256103516e-05]}, "gradients/encoder.encoder.layers.7.final_layer_norm.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 4.0, 3.0, 6.0, 2.0, 5.0, 8.0, 6.0, 7.0, 15.0, 15.0, 23.0, 24.0, 27.0, 43.0, 68.0, 54.0, 77.0, 98.0, 103.0, 69.0, 60.0, 52.0, 35.0, 35.0, 28.0, 24.0, 27.0, 13.0, 18.0, 9.0, 13.0, 5.0, 4.0, 6.0, 5.0, 4.0, 3.0, 4.0, 4.0, 0.0, 3.0, 1.0, 2.0, 1.0, 2.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.00017304637003690004, -0.00016683088324498385, -0.00016061539645306766, -0.00015439990966115147, -0.00014818442286923528, -0.00014196893607731909, -0.0001357534492854029, -0.00012953797704540193, -0.0001233224757015705, -0.00011710698890965432, -0.00011089150211773813, -0.00010467601532582194, -9.846052853390574e-05, -9.224504174198955e-05, -8.602956222603098e-05, -7.981407543411478e-05, -7.35985959181562e-05, -6.738310912624002e-05, -6.116762233432382e-05, -5.495213918038644e-05, -4.873665238847025e-05, -4.2521165596554056e-05, -3.630568244261667e-05, -3.009019565070048e-05, -2.3874708858784288e-05, -1.7659222066868097e-05, -1.1443737093941309e-05, -5.2282521210145205e-06, 9.87234670901671e-07, 7.202721462817863e-06, 1.3418204616755247e-05, 1.963369140867144e-05, 2.584919275250286e-05, 3.206467954441905e-05, 3.828016633633524e-05, 4.4495649490272626e-05, 5.071113628218882e-05, 5.692662307410501e-05, 6.31421062280424e-05, 6.935759301995859e-05, 7.557307981187478e-05, 8.178856660379097e-05, 8.800405339570716e-05, 9.421953291166574e-05, 0.00010043501970358193, 0.00010665050649549812, 0.00011286599328741431, 0.0001190814800793305, 0.0001252969668712467, 0.0001315124536631629, 0.00013772794045507908, 0.00014394342724699527, 0.00015015891403891146, 0.00015637440083082765, 0.00016258988762274384, 0.0001688053598627448, 0.00017502086120657623, 0.00018123634799849242, 0.0001874518347904086, 0.0001936673215823248, 0.000199882808374241, 0.00020609829516615719, 0.00021231378195807338, 0.00021852925419807434, 0.00022474474098999053]}, "gradients/encoder.encoder.layers.7.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 2.0, 1.0, 1.0, 2.0, 3.0, 1.0, 1.0, 4.0, 4.0, 7.0, 8.0, 9.0, 7.0, 13.0, 11.0, 14.0, 16.0, 21.0, 14.0, 16.0, 28.0, 28.0, 25.0, 24.0, 29.0, 31.0, 36.0, 35.0, 40.0, 38.0, 47.0, 34.0, 27.0, 42.0, 29.0, 49.0, 24.0, 35.0, 38.0, 26.0, 29.0, 24.0, 18.0, 17.0, 18.0, 20.0, 12.0, 7.0, 5.0, 11.0, 11.0, 7.0, 3.0, 4.0, 6.0, 4.0, 2.0, 3.0, 0.0, 1.0], "bins": [-0.00012260675430297852, -0.00011912360787391663, -0.00011564046144485474, -0.00011215731501579285, -0.00010867416858673096, -0.00010519102215766907, -0.00010170787572860718, -9.822472929954529e-05, -9.47415828704834e-05, -9.125843644142151e-05, -8.777529001235962e-05, -8.429214358329773e-05, -8.080899715423584e-05, -7.732585072517395e-05, -7.384270429611206e-05, -7.035955786705017e-05, -6.687641143798828e-05, -6.339326500892639e-05, -5.99101185798645e-05, -5.642697215080261e-05, -5.294382572174072e-05, -4.946067929267883e-05, -4.597753286361694e-05, -4.2494386434555054e-05, -3.9011240005493164e-05, -3.5528093576431274e-05, -3.2044947147369385e-05, -2.8561800718307495e-05, -2.5078654289245605e-05, -2.1595507860183716e-05, -1.8112361431121826e-05, -1.4629215002059937e-05, -1.1146068572998047e-05, -7.662922143936157e-06, -4.179775714874268e-06, -6.966292858123779e-07, 2.7865171432495117e-06, 6.269663572311401e-06, 9.752810001373291e-06, 1.323595643043518e-05, 1.671910285949707e-05, 2.020224928855896e-05, 2.368539571762085e-05, 2.716854214668274e-05, 3.065168857574463e-05, 3.413483500480652e-05, 3.761798143386841e-05, 4.11011278629303e-05, 4.458427429199219e-05, 4.806742072105408e-05, 5.155056715011597e-05, 5.5033713579177856e-05, 5.8516860008239746e-05, 6.200000643730164e-05, 6.548315286636353e-05, 6.896629929542542e-05, 7.24494457244873e-05, 7.59325921535492e-05, 7.941573858261108e-05, 8.289888501167297e-05, 8.638203144073486e-05, 8.986517786979675e-05, 9.334832429885864e-05, 9.683147072792053e-05, 0.00010031461715698242]}, "gradients/encoder.encoder.layers.7.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 6.0, 2.0, 6.0, 10.0, 12.0, 14.0, 27.0, 42.0, 51.0, 85.0, 119.0, 180.0, 288.0, 486.0, 852.0, 1477.0, 2709.0, 5410.0, 11529.0, 26670.0, 69862.0, 264629.0, 498642.0, 99920.0, 35662.0, 14945.0, 6980.0, 3585.0, 1786.0, 1004.0, 595.0, 330.0, 195.0, 139.0, 107.0, 72.0, 43.0, 28.0, 16.0, 19.0, 9.0, 7.0, 8.0, 3.0, 2.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.0001354217529296875, -0.00013107620179653168, -0.00012673065066337585, -0.00012238509953022003, -0.00011803954839706421, -0.00011369399726390839, -0.00010934844613075256, -0.00010500289499759674, -0.00010065734386444092, -9.63117927312851e-05, -9.196624159812927e-05, -8.762069046497345e-05, -8.327513933181763e-05, -7.89295881986618e-05, -7.458403706550598e-05, -7.023848593235016e-05, -6.589293479919434e-05, -6.154738366603851e-05, -5.720183253288269e-05, -5.285628139972687e-05, -4.8510730266571045e-05, -4.416517913341522e-05, -3.98196280002594e-05, -3.547407686710358e-05, -3.1128525733947754e-05, -2.678297460079193e-05, -2.243742346763611e-05, -1.8091872334480286e-05, -1.3746321201324463e-05, -9.40077006816864e-06, -5.055218935012817e-06, -7.096678018569946e-07, 3.635883331298828e-06, 7.981434464454651e-06, 1.2326985597610474e-05, 1.6672536730766296e-05, 2.101808786392212e-05, 2.5363638997077942e-05, 2.9709190130233765e-05, 3.405474126338959e-05, 3.840029239654541e-05, 4.274584352970123e-05, 4.7091394662857056e-05, 5.143694579601288e-05, 5.57824969291687e-05, 6.0128048062324524e-05, 6.447359919548035e-05, 6.881915032863617e-05, 7.316470146179199e-05, 7.751025259494781e-05, 8.185580372810364e-05, 8.620135486125946e-05, 9.054690599441528e-05, 9.48924571275711e-05, 9.923800826072693e-05, 0.00010358355939388275, 0.00010792911052703857, 0.0001122746616601944, 0.00011662021279335022, 0.00012096576392650604, 0.00012531131505966187, 0.0001296568661928177, 0.0001340024173259735, 0.00013834796845912933, 0.00014269351959228516]}, "gradients/encoder.encoder.layers.7.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 2.0, 1.0, 2.0, 5.0, 7.0, 3.0, 4.0, 10.0, 8.0, 22.0, 23.0, 23.0, 46.0, 52.0, 63.0, 73.0, 78.0, 102.0, 97.0, 81.0, 58.0, 56.0, 43.0, 29.0, 30.0, 18.0, 22.0, 14.0, 11.0, 7.0, 6.0, 8.0, 5.0, 1.0, 3.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.8967857360839844e-05, -2.803374081850052e-05, -2.7099624276161194e-05, -2.616550773382187e-05, -2.5231391191482544e-05, -2.429727464914322e-05, -2.3363158106803894e-05, -2.242904156446457e-05, -2.1494925022125244e-05, -2.056080847978592e-05, -1.9626691937446594e-05, -1.869257539510727e-05, -1.7758458852767944e-05, -1.682434231042862e-05, -1.5890225768089294e-05, -1.495610922574997e-05, -1.4021992683410645e-05, -1.308787614107132e-05, -1.2153759598731995e-05, -1.121964305639267e-05, -1.0285526514053345e-05, -9.35140997171402e-06, -8.417293429374695e-06, -7.48317688703537e-06, -6.549060344696045e-06, -5.61494380235672e-06, -4.680827260017395e-06, -3.74671071767807e-06, -2.812594175338745e-06, -1.8784776329994202e-06, -9.443610906600952e-07, -1.0244548320770264e-08, 9.238719940185547e-07, 1.8579885363578796e-06, 2.7921050786972046e-06, 3.7262216210365295e-06, 4.6603381633758545e-06, 5.5944547057151794e-06, 6.528571248054504e-06, 7.462687790393829e-06, 8.396804332733154e-06, 9.33092087507248e-06, 1.0265037417411804e-05, 1.1199153959751129e-05, 1.2133270502090454e-05, 1.3067387044429779e-05, 1.4001503586769104e-05, 1.4935620129108429e-05, 1.5869736671447754e-05, 1.680385321378708e-05, 1.7737969756126404e-05, 1.867208629846573e-05, 1.9606202840805054e-05, 2.054031938314438e-05, 2.1474435925483704e-05, 2.240855246782303e-05, 2.3342669010162354e-05, 2.427678555250168e-05, 2.5210902094841003e-05, 2.614501863718033e-05, 2.7079135179519653e-05, 2.8013251721858978e-05, 2.8947368264198303e-05, 2.9881484806537628e-05, 3.081560134887695e-05]}, "gradients/encoder.encoder.layers.7.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 2.0, 1.0, 2.0, 5.0, 2.0, 2.0, 9.0, 8.0, 10.0, 23.0, 11.0, 19.0, 40.0, 77.0, 123.0, 181.0, 338.0, 487.0, 849.0, 1373.0, 2250.0, 3526.0, 5567.0, 9097.0, 14824.0, 25648.0, 47006.0, 89538.0, 203546.0, 353221.0, 136199.0, 67883.0, 35092.0, 20128.0, 11937.0, 7598.0, 4374.0, 2921.0, 1760.0, 1132.0, 686.0, 386.0, 262.0, 154.0, 96.0, 56.0, 29.0, 36.0, 16.0, 12.0, 11.0, 3.0, 6.0, 3.0, 4.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0], "bins": [-5.453824996948242e-05, -5.2737072110176086e-05, -5.093589425086975e-05, -4.9134716391563416e-05, -4.733353853225708e-05, -4.5532360672950745e-05, -4.373118281364441e-05, -4.1930004954338074e-05, -4.012882709503174e-05, -3.83276492357254e-05, -3.652647137641907e-05, -3.472529351711273e-05, -3.2924115657806396e-05, -3.112293779850006e-05, -2.9321759939193726e-05, -2.752058207988739e-05, -2.5719404220581055e-05, -2.391822636127472e-05, -2.2117048501968384e-05, -2.031587064266205e-05, -1.8514692783355713e-05, -1.6713514924049377e-05, -1.4912337064743042e-05, -1.3111159205436707e-05, -1.1309981346130371e-05, -9.508803486824036e-06, -7.7076256275177e-06, -5.906447768211365e-06, -4.105269908905029e-06, -2.304092049598694e-06, -5.029141902923584e-07, 1.298263669013977e-06, 3.0994415283203125e-06, 4.900619387626648e-06, 6.701797246932983e-06, 8.502975106239319e-06, 1.0304152965545654e-05, 1.210533082485199e-05, 1.3906508684158325e-05, 1.570768654346466e-05, 1.7508864402770996e-05, 1.931004226207733e-05, 2.1111220121383667e-05, 2.2912397980690002e-05, 2.4713575839996338e-05, 2.6514753699302673e-05, 2.831593155860901e-05, 3.0117109417915344e-05, 3.191828727722168e-05, 3.3719465136528015e-05, 3.552064299583435e-05, 3.7321820855140686e-05, 3.912299871444702e-05, 4.092417657375336e-05, 4.272535443305969e-05, 4.452653229236603e-05, 4.632771015167236e-05, 4.81288880109787e-05, 4.9930065870285034e-05, 5.173124372959137e-05, 5.3532421588897705e-05, 5.533359944820404e-05, 5.7134777307510376e-05, 5.893595516681671e-05, 6.073713302612305e-05]}, "gradients/encoder.encoder.layers.7.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 1.0, 7.0, 0.0, 2.0, 6.0, 5.0, 6.0, 12.0, 6.0, 7.0, 7.0, 13.0, 18.0, 12.0, 13.0, 13.0, 23.0, 23.0, 30.0, 34.0, 27.0, 28.0, 40.0, 41.0, 29.0, 42.0, 43.0, 40.0, 38.0, 29.0, 30.0, 41.0, 40.0, 31.0, 40.0, 38.0, 26.0, 18.0, 18.0, 19.0, 19.0, 21.0, 15.0, 6.0, 8.0, 9.0, 7.0, 9.0, 8.0, 6.0, 6.0, 2.0, 4.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0], "bins": [-4.45246696472168e-05, -4.3054111301898956e-05, -4.1583552956581116e-05, -4.0112994611263275e-05, -3.8642436265945435e-05, -3.7171877920627594e-05, -3.5701319575309753e-05, -3.423076122999191e-05, -3.276020288467407e-05, -3.128964453935623e-05, -2.981908619403839e-05, -2.834852784872055e-05, -2.687796950340271e-05, -2.540741115808487e-05, -2.393685281276703e-05, -2.2466294467449188e-05, -2.0995736122131348e-05, -1.9525177776813507e-05, -1.8054619431495667e-05, -1.6584061086177826e-05, -1.5113502740859985e-05, -1.3642944395542145e-05, -1.2172386050224304e-05, -1.0701827704906464e-05, -9.231269359588623e-06, -7.760711014270782e-06, -6.290152668952942e-06, -4.819594323635101e-06, -3.3490359783172607e-06, -1.8784776329994202e-06, -4.079192876815796e-07, 1.062639057636261e-06, 2.5331974029541016e-06, 4.003755748271942e-06, 5.474314093589783e-06, 6.944872438907623e-06, 8.415430784225464e-06, 9.885989129543304e-06, 1.1356547474861145e-05, 1.2827105820178986e-05, 1.4297664165496826e-05, 1.5768222510814667e-05, 1.7238780856132507e-05, 1.8709339201450348e-05, 2.017989754676819e-05, 2.165045589208603e-05, 2.312101423740387e-05, 2.459157258272171e-05, 2.606213092803955e-05, 2.753268927335739e-05, 2.9003247618675232e-05, 3.0473805963993073e-05, 3.194436430931091e-05, 3.3414922654628754e-05, 3.4885480999946594e-05, 3.6356039345264435e-05, 3.7826597690582275e-05, 3.9297156035900116e-05, 4.0767714381217957e-05, 4.22382727265358e-05, 4.370883107185364e-05, 4.517938941717148e-05, 4.664994776248932e-05, 4.812050610780716e-05, 4.9591064453125e-05]}, "gradients/encoder.encoder.layers.7.attention.k_proj.weight": {"_type": "histogram", "values": [3.0, 0.0, 0.0, 2.0, 3.0, 3.0, 5.0, 15.0, 8.0, 18.0, 31.0, 33.0, 67.0, 51.0, 125.0, 233.0, 214.0, 508.0, 983.0, 1107.0, 2910.0, 5756.0, 7324.0, 22867.0, 61604.0, 103952.0, 399561.0, 233198.0, 132500.0, 43174.0, 12755.0, 9725.0, 4664.0, 1721.0, 1458.0, 849.0, 322.0, 317.0, 158.0, 133.0, 69.0, 51.0, 43.0, 24.0, 6.0, 8.0, 4.0, 0.0, 4.0, 3.0, 1.0, 2.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.231929779052734e-06, -4.0745362639427185e-06, -3.917142748832703e-06, -3.7597492337226868e-06, -3.602355718612671e-06, -3.444962203502655e-06, -3.287568688392639e-06, -3.1301751732826233e-06, -2.9727816581726074e-06, -2.8153881430625916e-06, -2.6579946279525757e-06, -2.50060111284256e-06, -2.343207597732544e-06, -2.185814082622528e-06, -2.028420567512512e-06, -1.8710270524024963e-06, -1.7136335372924805e-06, -1.5562400221824646e-06, -1.3988465070724487e-06, -1.2414529919624329e-06, -1.084059476852417e-06, -9.266659617424011e-07, -7.692724466323853e-07, -6.118789315223694e-07, -4.544854164123535e-07, -2.9709190130233765e-07, -1.3969838619232178e-07, 1.7695128917694092e-08, 1.7508864402770996e-07, 3.3248215913772583e-07, 4.898756742477417e-07, 6.472691893577576e-07, 8.046627044677734e-07, 9.620562195777893e-07, 1.1194497346878052e-06, 1.276843249797821e-06, 1.434236764907837e-06, 1.5916302800178528e-06, 1.7490237951278687e-06, 1.9064173102378845e-06, 2.0638108253479004e-06, 2.2212043404579163e-06, 2.378597855567932e-06, 2.535991370677948e-06, 2.693384885787964e-06, 2.8507784008979797e-06, 3.0081719160079956e-06, 3.1655654311180115e-06, 3.3229589462280273e-06, 3.4803524613380432e-06, 3.637745976448059e-06, 3.795139491558075e-06, 3.952533006668091e-06, 4.109926521778107e-06, 4.2673200368881226e-06, 4.4247135519981384e-06, 4.582107067108154e-06, 4.73950058221817e-06, 4.896894097328186e-06, 5.054287612438202e-06, 5.211681127548218e-06, 5.369074642658234e-06, 5.5264681577682495e-06, 5.683861672878265e-06, 5.841255187988281e-06]}, "gradients/encoder.encoder.layers.7.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0, 4.0, 3.0, 0.0, 4.0, 6.0, 7.0, 7.0, 5.0, 13.0, 24.0, 24.0, 31.0, 46.0, 41.0, 39.0, 0.0, 75.0, 74.0, 84.0, 71.0, 62.0, 63.0, 47.0, 47.0, 57.0, 38.0, 28.0, 27.0, 0.0, 25.0, 22.0, 6.0, 8.0, 8.0, 2.0, 4.0, 3.0, 3.0, 2.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.6093254089355469e-06, -1.5543773770332336e-06, -1.4994293451309204e-06, -1.4444813132286072e-06, -1.389533281326294e-06, -1.3345852494239807e-06, -1.2796372175216675e-06, -1.2246891856193542e-06, -1.169741153717041e-06, -1.1147931218147278e-06, -1.0598450899124146e-06, -1.0048970580101013e-06, -9.499490261077881e-07, -8.950009942054749e-07, -8.400529623031616e-07, -7.851049304008484e-07, -7.301568984985352e-07, -6.752088665962219e-07, -6.202608346939087e-07, -5.653128027915955e-07, -5.103647708892822e-07, -4.55416738986969e-07, -4.0046870708465576e-07, -3.4552067518234253e-07, -2.905726432800293e-07, -2.3562461137771606e-07, -1.8067657947540283e-07, -1.257285475730896e-07, -7.078051567077637e-08, -1.5832483768463135e-08, 3.91155481338501e-08, 9.406358003616333e-08, 1.4901161193847656e-07, 2.039596438407898e-07, 2.5890767574310303e-07, 3.1385570764541626e-07, 3.688037395477295e-07, 4.237517714500427e-07, 4.78699803352356e-07, 5.336478352546692e-07, 5.885958671569824e-07, 6.435438990592957e-07, 6.984919309616089e-07, 7.534399628639221e-07, 8.083879947662354e-07, 8.633360266685486e-07, 9.182840585708618e-07, 9.73232090473175e-07, 1.0281801223754883e-06, 1.0831281542778015e-06, 1.1380761861801147e-06, 1.193024218082428e-06, 1.2479722499847412e-06, 1.3029202818870544e-06, 1.3578683137893677e-06, 1.412816345691681e-06, 1.4677643775939941e-06, 1.5227124094963074e-06, 1.5776604413986206e-06, 1.6326084733009338e-06, 1.687556505203247e-06, 1.7425045371055603e-06, 1.7974525690078735e-06, 1.8524006009101868e-06, 1.9073486328125e-06]}, "gradients/encoder.encoder.layers.7.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 3.0, 6.0, 5.0, 4.0, 11.0, 17.0, 32.0, 44.0, 53.0, 145.0, 155.0, 286.0, 427.0, 744.0, 1244.0, 2223.0, 3957.0, 7167.0, 25250.0, 40406.0, 93116.0, 246724.0, 358618.0, 149613.0, 60261.0, 27743.0, 13721.0, 9497.0, 2948.0, 1606.0, 966.0, 614.0, 351.0, 233.0, 141.0, 83.0, 80.0, 23.0, 15.0, 10.0, 8.0, 6.0, 1.0, 3.0, 2.0, 6.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-3.933906555175781e-06, -3.8081780076026917e-06, -3.682449460029602e-06, -3.5567209124565125e-06, -3.430992364883423e-06, -3.3052638173103333e-06, -3.1795352697372437e-06, -3.053806722164154e-06, -2.9280781745910645e-06, -2.802349627017975e-06, -2.6766210794448853e-06, -2.5508925318717957e-06, -2.425163984298706e-06, -2.2994354367256165e-06, -2.173706889152527e-06, -2.0479783415794373e-06, -1.9222497940063477e-06, -1.796521246433258e-06, -1.6707926988601685e-06, -1.5450641512870789e-06, -1.4193356037139893e-06, -1.2936070561408997e-06, -1.16787850856781e-06, -1.0421499609947205e-06, -9.164214134216309e-07, -7.906928658485413e-07, -6.649643182754517e-07, -5.392357707023621e-07, -4.1350722312927246e-07, -2.8777867555618286e-07, -1.6205012798309326e-07, -3.632158041000366e-08, 8.940696716308594e-08, 2.1513551473617554e-07, 3.4086406230926514e-07, 4.6659260988235474e-07, 5.923211574554443e-07, 7.180497050285339e-07, 8.437782526016235e-07, 9.695068001747131e-07, 1.0952353477478027e-06, 1.2209638953208923e-06, 1.346692442893982e-06, 1.4724209904670715e-06, 1.5981495380401611e-06, 1.7238780856132507e-06, 1.8496066331863403e-06, 1.97533518075943e-06, 2.1010637283325195e-06, 2.226792275905609e-06, 2.3525208234786987e-06, 2.4782493710517883e-06, 2.603977918624878e-06, 2.7297064661979675e-06, 2.855435013771057e-06, 2.9811635613441467e-06, 3.1068921089172363e-06, 3.232620656490326e-06, 3.3583492040634155e-06, 3.484077751636505e-06, 3.6098062992095947e-06, 3.7355348467826843e-06, 3.861263394355774e-06, 3.9869919419288635e-06, 4.112720489501953e-06]}, "gradients/encoder.encoder.layers.7.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 1.0, 2.0, 3.0, 3.0, 8.0, 11.0, 6.0, 15.0, 16.0, 10.0, 31.0, 35.0, 11.0, 42.0, 52.0, 22.0, 64.0, 68.0, 35.0, 84.0, 29.0, 78.0, 46.0, 36.0, 58.0, 46.0, 14.0, 45.0, 30.0, 7.0, 24.0, 23.0, 8.0, 12.0, 7.0, 4.0, 8.0, 5.0, 4.0, 2.0, 1.0, 2.0, 1.0, 3.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-3.0994415283203125e-06, -3.0007213354110718e-06, -2.902001142501831e-06, -2.8032809495925903e-06, -2.7045607566833496e-06, -2.605840563774109e-06, -2.507120370864868e-06, -2.4084001779556274e-06, -2.3096799850463867e-06, -2.210959792137146e-06, -2.1122395992279053e-06, -2.0135194063186646e-06, -1.914799213409424e-06, -1.816079020500183e-06, -1.7173588275909424e-06, -1.6186386346817017e-06, -1.519918441772461e-06, -1.4211982488632202e-06, -1.3224780559539795e-06, -1.2237578630447388e-06, -1.125037670135498e-06, -1.0263174772262573e-06, -9.275972843170166e-07, -8.288770914077759e-07, -7.301568984985352e-07, -6.314367055892944e-07, -5.327165126800537e-07, -4.33996319770813e-07, -3.3527612686157227e-07, -2.3655593395233154e-07, -1.3783574104309082e-07, -3.91155481338501e-08, 5.960464477539063e-08, 1.5832483768463135e-07, 2.5704503059387207e-07, 3.557652235031128e-07, 4.544854164123535e-07, 5.532056093215942e-07, 6.51925802230835e-07, 7.506459951400757e-07, 8.493661880493164e-07, 9.480863809585571e-07, 1.0468065738677979e-06, 1.1455267667770386e-06, 1.2442469596862793e-06, 1.34296715259552e-06, 1.4416873455047607e-06, 1.5404075384140015e-06, 1.6391277313232422e-06, 1.737847924232483e-06, 1.8365681171417236e-06, 1.9352883100509644e-06, 2.034008502960205e-06, 2.132728695869446e-06, 2.2314488887786865e-06, 2.3301690816879272e-06, 2.428889274597168e-06, 2.5276094675064087e-06, 2.6263296604156494e-06, 2.72504985332489e-06, 2.823770046234131e-06, 2.9224902391433716e-06, 3.0212104320526123e-06, 3.119930624961853e-06, 3.2186508178710938e-06]}, "gradients/encoder.encoder.layers.7.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 3.0, 0.0, 1.0, 4.0, 8.0, 11.0, 16.0, 25.0, 32.0, 57.0, 93.0, 169.0, 201.0, 123.0, 61.0, 59.0, 37.0, 22.0, 24.0, 12.0, 10.0, 11.0, 11.0, 7.0, 2.0, 4.0, 4.0, 2.0, 0.0, 4.0, 1.0, 0.0, 1.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.000257001694990322, -0.00024784260313026607, -0.00023868349671829492, -0.00022952439030632377, -0.00022036529844626784, -0.0002112061920342967, -0.00020204708562232554, -0.00019288799376226962, -0.00018372888735029846, -0.0001745697809383273, -0.0001654106890782714, -0.00015625158266630024, -0.00014709247625432909, -0.00013793338439427316, -0.000128774277982302, -0.00011961517884628847, -0.00011045607971027493, -0.0001012969805742614, -9.213788143824786e-05, -8.297877502627671e-05, -7.381967589026317e-05, -6.466057675424963e-05, -5.550147398025729e-05, -4.634237120626494e-05, -3.7183272070251405e-05, -2.8024171115248464e-05, -1.8865070160245523e-05, -9.705969205242582e-06, -5.468682502396405e-07, 8.612230885773897e-06, 1.7771333659766242e-05, 2.6930436433758587e-05, 3.608956467360258e-05, 4.524866380961612e-05, 5.4407766583608463e-05, 6.356686935760081e-05, 7.272596849361435e-05, 8.188506762962788e-05, 9.104417404159904e-05, 0.00010020327317761257, 0.00010936237231362611, 0.00011852147144963965, 0.00012768057058565319, 0.00013683967699762434, 0.0001459987834095955, 0.0001551578752696514, 0.00016431698168162256, 0.00017347608809359372, 0.00018263517995364964, 0.0001917942863656208, 0.00020095337822567672, 0.00021011248463764787, 0.0002192715764977038, 0.00022843068290967494, 0.0002375897893216461, 0.000246748881181702, 0.0002559080021455884, 0.0002650670940056443, 0.0002742262149695307, 0.0002833853068295866, 0.00029254439868964255, 0.00030170351965352893, 0.00031086261151358485, 0.0003200217033736408, 0.0003291807952336967]}, "gradients/encoder.encoder.layers.7.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 5.0, 1.0, 7.0, 8.0, 8.0, 7.0, 10.0, 13.0, 17.0, 17.0, 13.0, 20.0, 23.0, 25.0, 27.0, 35.0, 45.0, 43.0, 40.0, 36.0, 52.0, 58.0, 38.0, 43.0, 43.0, 30.0, 40.0, 38.0, 30.0, 36.0, 32.0, 25.0, 25.0, 22.0, 23.0, 14.0, 15.0, 14.0, 10.0, 4.0, 2.0, 5.0, 4.0, 6.0, 2.0, 2.0, 1.0, 1.0, 0.0, 2.0, 2.0], "bins": [-0.00016099214553833008, -0.00015639979392290115, -0.00015180744230747223, -0.0001472150906920433, -0.00014262273907661438, -0.00013803038746118546, -0.00013343803584575653, -0.0001288456842303276, -0.00012425333261489868, -0.00011966098099946976, -0.00011506862938404083, -0.00011047627776861191, -0.00010588392615318298, -0.00010129157453775406, -9.669922292232513e-05, -9.210687130689621e-05, -8.751451969146729e-05, -8.292216807603836e-05, -7.832981646060944e-05, -7.373746484518051e-05, -6.914511322975159e-05, -6.455276161432266e-05, -5.996040999889374e-05, -5.536805838346481e-05, -5.077570676803589e-05, -4.6183355152606964e-05, -4.159100353717804e-05, -3.6998651921749115e-05, -3.240630030632019e-05, -2.7813948690891266e-05, -2.322159707546234e-05, -1.8629245460033417e-05, -1.4036893844604492e-05, -9.444542229175568e-06, -4.852190613746643e-06, -2.598389983177185e-07, 4.332512617111206e-06, 8.92486423254013e-06, 1.3517215847969055e-05, 1.810956746339798e-05, 2.2701919078826904e-05, 2.729427069425583e-05, 3.1886622309684753e-05, 3.647897392511368e-05, 4.10713255405426e-05, 4.566367715597153e-05, 5.025602877140045e-05, 5.4848380386829376e-05, 5.94407320022583e-05, 6.403308361768723e-05, 6.862543523311615e-05, 7.321778684854507e-05, 7.7810138463974e-05, 8.240249007940292e-05, 8.699484169483185e-05, 9.158719331026077e-05, 9.61795449256897e-05, 0.00010077189654111862, 0.00010536424815654755, 0.00010995659977197647, 0.0001145489513874054, 0.00011914130300283432, 0.00012373365461826324, 0.00012832600623369217, 0.0001329183578491211]}, "gradients/encoder.encoder.layers.6.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 2.0, 4.0, 5.0, 6.0, 7.0, 17.0, 23.0, 26.0, 48.0, 48.0, 48.0, 63.0, 106.0, 155.0, 198.0, 296.0, 402.0, 520.0, 703.0, 947.0, 1391.0, 1976.0, 2887.0, 4287.0, 6788.0, 10662.0, 20181.0, 46192.0, 274018.0, 3455156.0, 283470.0, 39781.0, 17219.0, 9631.0, 5544.0, 3372.0, 2255.0, 1489.0, 1087.0, 777.0, 559.0, 445.0, 355.0, 257.0, 195.0, 165.0, 145.0, 92.0, 81.0, 52.0, 35.0, 35.0, 30.0, 17.0, 15.0, 7.0, 12.0, 7.0, 1.0, 4.0, 6.0], "bins": [-6.943941116333008e-05, -6.730202585458755e-05, -6.516464054584503e-05, -6.302725523710251e-05, -6.0889869928359985e-05, -5.875248461961746e-05, -5.661509931087494e-05, -5.4477714002132416e-05, -5.234032869338989e-05, -5.020294338464737e-05, -4.8065558075904846e-05, -4.592817276716232e-05, -4.37907874584198e-05, -4.1653402149677277e-05, -3.9516016840934753e-05, -3.737863153219223e-05, -3.524124622344971e-05, -3.3103860914707184e-05, -3.096647560596466e-05, -2.8829090297222137e-05, -2.6691704988479614e-05, -2.455431967973709e-05, -2.2416934370994568e-05, -2.0279549062252045e-05, -1.814216375350952e-05, -1.6004778444766998e-05, -1.3867393136024475e-05, -1.1730007827281952e-05, -9.592622518539429e-06, -7.4552372097969055e-06, -5.317851901054382e-06, -3.180466592311859e-06, -1.043081283569336e-06, 1.0943040251731873e-06, 3.2316893339157104e-06, 5.369074642658234e-06, 7.506459951400757e-06, 9.64384526014328e-06, 1.1781230568885803e-05, 1.3918615877628326e-05, 1.605600118637085e-05, 1.8193386495113373e-05, 2.0330771803855896e-05, 2.246815711259842e-05, 2.4605542421340942e-05, 2.6742927730083466e-05, 2.888031303882599e-05, 3.101769834756851e-05, 3.3155083656311035e-05, 3.529246896505356e-05, 3.742985427379608e-05, 3.9567239582538605e-05, 4.170462489128113e-05, 4.384201020002365e-05, 4.5979395508766174e-05, 4.81167808175087e-05, 5.025416612625122e-05, 5.2391551434993744e-05, 5.452893674373627e-05, 5.666632205247879e-05, 5.8803707361221313e-05, 6.094109266996384e-05, 6.307847797870636e-05, 6.521586328744888e-05, 6.73532485961914e-05]}, "gradients/encoder.encoder.layers.6.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 4.0, 0.0, 2.0, 0.0, 1.0, 3.0, 3.0, 3.0, 10.0, 5.0, 3.0, 8.0, 11.0, 14.0, 18.0, 15.0, 19.0, 33.0, 26.0, 32.0, 42.0, 46.0, 54.0, 57.0, 61.0, 72.0, 69.0, 60.0, 55.0, 45.0, 49.0, 37.0, 29.0, 31.0, 14.0, 16.0, 8.0, 10.0, 9.0, 6.0, 5.0, 4.0, 1.0, 6.0, 5.0, 5.0, 0.0, 3.0, 2.0, 2.0, 2.0, 2.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-2.187490463256836e-05, -2.1167099475860596e-05, -2.0459294319152832e-05, -1.975148916244507e-05, -1.9043684005737305e-05, -1.833587884902954e-05, -1.7628073692321777e-05, -1.6920268535614014e-05, -1.621246337890625e-05, -1.5504658222198486e-05, -1.4796853065490723e-05, -1.4089047908782959e-05, -1.3381242752075195e-05, -1.2673437595367432e-05, -1.1965632438659668e-05, -1.1257827281951904e-05, -1.055002212524414e-05, -9.842216968536377e-06, -9.134411811828613e-06, -8.42660665512085e-06, -7.718801498413086e-06, -7.010996341705322e-06, -6.303191184997559e-06, -5.595386028289795e-06, -4.887580871582031e-06, -4.179775714874268e-06, -3.471970558166504e-06, -2.7641654014587402e-06, -2.0563602447509766e-06, -1.3485550880432129e-06, -6.407499313354492e-07, 6.705522537231445e-08, 7.748603820800781e-07, 1.4826655387878418e-06, 2.1904706954956055e-06, 2.898275852203369e-06, 3.606081008911133e-06, 4.3138861656188965e-06, 5.02169132232666e-06, 5.729496479034424e-06, 6.4373016357421875e-06, 7.145106792449951e-06, 7.852911949157715e-06, 8.560717105865479e-06, 9.268522262573242e-06, 9.976327419281006e-06, 1.068413257598877e-05, 1.1391937732696533e-05, 1.2099742889404297e-05, 1.280754804611206e-05, 1.3515353202819824e-05, 1.4223158359527588e-05, 1.4930963516235352e-05, 1.5638768672943115e-05, 1.634657382965088e-05, 1.7054378986358643e-05, 1.7762184143066406e-05, 1.846998929977417e-05, 1.9177794456481934e-05, 1.9885599613189697e-05, 2.059340476989746e-05, 2.1301209926605225e-05, 2.2009015083312988e-05, 2.2716820240020752e-05, 2.3424625396728516e-05]}, "gradients/encoder.encoder.layers.6.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [2.0, 1.0, 2.0, 1.0, 0.0, 2.0, 0.0, 0.0, 2.0, 3.0, 3.0, 5.0, 12.0, 17.0, 18.0, 33.0, 45.0, 63.0, 108.0, 177.0, 238.0, 348.0, 567.0, 888.0, 1378.0, 2144.0, 3831.0, 6017.0, 10947.0, 21494.0, 43109.0, 104768.0, 695419.0, 2968386.0, 207431.0, 62522.0, 28590.0, 15219.0, 8030.0, 4693.0, 2961.0, 1740.0, 1134.0, 706.0, 445.0, 243.0, 190.0, 120.0, 71.0, 51.0, 49.0, 19.0, 22.0, 10.0, 10.0, 4.0, 3.0, 4.0, 3.0, 1.0, 2.0, 1.0, 1.0, 1.0], "bins": [-5.620718002319336e-05, -5.451589822769165e-05, -5.282461643218994e-05, -5.113333463668823e-05, -4.9442052841186523e-05, -4.7750771045684814e-05, -4.6059489250183105e-05, -4.4368207454681396e-05, -4.267692565917969e-05, -4.098564386367798e-05, -3.929436206817627e-05, -3.760308027267456e-05, -3.591179847717285e-05, -3.422051668167114e-05, -3.2529234886169434e-05, -3.0837953090667725e-05, -2.9146671295166016e-05, -2.7455389499664307e-05, -2.5764107704162598e-05, -2.407282590866089e-05, -2.238154411315918e-05, -2.069026231765747e-05, -1.8998980522155762e-05, -1.7307698726654053e-05, -1.5616416931152344e-05, -1.3925135135650635e-05, -1.2233853340148926e-05, -1.0542571544647217e-05, -8.851289749145508e-06, -7.160007953643799e-06, -5.46872615814209e-06, -3.777444362640381e-06, -2.086162567138672e-06, -3.948807716369629e-07, 1.296401023864746e-06, 2.987682819366455e-06, 4.678964614868164e-06, 6.370246410369873e-06, 8.061528205871582e-06, 9.752810001373291e-06, 1.1444091796875e-05, 1.3135373592376709e-05, 1.4826655387878418e-05, 1.6517937183380127e-05, 1.8209218978881836e-05, 1.9900500774383545e-05, 2.1591782569885254e-05, 2.3283064365386963e-05, 2.4974346160888672e-05, 2.666562795639038e-05, 2.835690975189209e-05, 3.00481915473938e-05, 3.173947334289551e-05, 3.343075513839722e-05, 3.5122036933898926e-05, 3.6813318729400635e-05, 3.8504600524902344e-05, 4.019588232040405e-05, 4.188716411590576e-05, 4.357844591140747e-05, 4.526972770690918e-05, 4.696100950241089e-05, 4.86522912979126e-05, 5.034357309341431e-05, 5.2034854888916016e-05]}, "gradients/encoder.encoder.layers.6.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 2.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 2.0, 1.0, 2.0, 3.0, 2.0, 2.0, 3.0, 7.0, 0.0, 2.0, 5.0, 16.0, 6.0, 13.0, 18.0, 25.0, 31.0, 41.0, 52.0, 60.0, 87.0, 188.0, 320.0, 866.0, 1190.0, 498.0, 233.0, 104.0, 56.0, 40.0, 40.0, 21.0, 26.0, 25.0, 25.0, 14.0, 6.0, 12.0, 8.0, 6.0, 6.0, 6.0, 4.0, 3.0, 2.0, 2.0, 1.0, 3.0, 3.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-4.5359134674072266e-05, -4.399195313453674e-05, -4.262477159500122e-05, -4.12575900554657e-05, -3.9890408515930176e-05, -3.852322697639465e-05, -3.715604543685913e-05, -3.578886389732361e-05, -3.4421682357788086e-05, -3.3054500818252563e-05, -3.168731927871704e-05, -3.032013773918152e-05, -2.8952956199645996e-05, -2.7585774660110474e-05, -2.621859312057495e-05, -2.485141158103943e-05, -2.3484230041503906e-05, -2.2117048501968384e-05, -2.074986696243286e-05, -1.938268542289734e-05, -1.8015503883361816e-05, -1.6648322343826294e-05, -1.528114080429077e-05, -1.3913959264755249e-05, -1.2546777725219727e-05, -1.1179596185684204e-05, -9.812414646148682e-06, -8.44523310661316e-06, -7.078051567077637e-06, -5.710870027542114e-06, -4.343688488006592e-06, -2.9765069484710693e-06, -1.6093254089355469e-06, -2.421438694000244e-07, 1.125037670135498e-06, 2.4922192096710205e-06, 3.859400749206543e-06, 5.2265822887420654e-06, 6.593763828277588e-06, 7.96094536781311e-06, 9.328126907348633e-06, 1.0695308446884155e-05, 1.2062489986419678e-05, 1.34296715259552e-05, 1.4796853065490723e-05, 1.6164034605026245e-05, 1.7531216144561768e-05, 1.889839768409729e-05, 2.0265579223632812e-05, 2.1632760763168335e-05, 2.2999942302703857e-05, 2.436712384223938e-05, 2.5734305381774902e-05, 2.7101486921310425e-05, 2.8468668460845947e-05, 2.983585000038147e-05, 3.120303153991699e-05, 3.2570213079452515e-05, 3.393739461898804e-05, 3.530457615852356e-05, 3.667175769805908e-05, 3.8038939237594604e-05, 3.940612077713013e-05, 4.077330231666565e-05, 4.214048385620117e-05]}, "gradients/encoder.encoder.layers.6.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 1.0, 3.0, 2.0, 8.0, 10.0, 15.0, 14.0, 19.0, 32.0, 29.0, 50.0, 59.0, 78.0, 82.0, 101.0, 100.0, 82.0, 70.0, 60.0, 36.0, 52.0, 23.0, 21.0, 15.0, 12.0, 6.0, 11.0, 5.0, 7.0, 4.0, 2.0, 2.0, 1.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.00024238525656983256, -0.00023562720161862671, -0.00022886913211550564, -0.00022211107716429979, -0.00021535302221309394, -0.00020859495270997286, -0.000201836897758767, -0.00019507884280756116, -0.00018832077330444008, -0.00018156271835323423, -0.00017480464885011315, -0.0001680465938989073, -0.00016128853894770145, -0.00015453046944458038, -0.00014777241449337453, -0.00014101434499025345, -0.0001342562900390476, -0.00012749823508784175, -0.00012074017286067829, -0.00011398211063351482, -0.00010722404840635136, -0.0001004659861791879, -9.370793122798204e-05, -8.694986900081858e-05, -8.019182132557034e-05, -7.343375909840688e-05, -6.667570414720103e-05, -5.991764192003757e-05, -5.3159579692874104e-05, -4.640152110368945e-05, -3.964346251450479e-05, -3.2885400287341326e-05, -2.6127338060177863e-05, -1.9369277652003802e-05, -1.2611218153324444e-05, -5.853158654645085e-06, 9.04901753528975e-07, 7.662962161703035e-06, 1.4421020750887692e-05, 2.1179082978051156e-05, 2.7937141567235813e-05, 3.469520015642047e-05, 4.145326238358393e-05, 4.821132097276859e-05, 5.4969379561953247e-05, 6.172744178911671e-05, 6.848550401628017e-05, 7.524355896748602e-05, 8.200162119464949e-05, 8.875968342181295e-05, 9.55177383730188e-05, 0.00010227580060018227, 0.00010903386282734573, 0.00011579191777855158, 0.00012254997272975743, 0.0001293080422328785, 0.00013606609718408436, 0.0001428241521352902, 0.00014958222163841128, 0.00015634027658961713, 0.00016309833154082298, 0.00016985640104394406, 0.0001766144559951499, 0.000183372525498271, 0.00019013058044947684]}, "gradients/encoder.encoder.layers.6.final_layer_norm.bias": {"_type": "histogram", "values": [3.0, 0.0, 0.0, 2.0, 2.0, 1.0, 4.0, 1.0, 3.0, 4.0, 6.0, 6.0, 7.0, 9.0, 7.0, 11.0, 9.0, 9.0, 13.0, 19.0, 19.0, 24.0, 13.0, 21.0, 28.0, 43.0, 36.0, 35.0, 41.0, 44.0, 30.0, 27.0, 28.0, 35.0, 34.0, 32.0, 35.0, 30.0, 35.0, 28.0, 44.0, 20.0, 28.0, 20.0, 20.0, 22.0, 20.0, 28.0, 17.0, 13.0, 11.0, 5.0, 6.0, 8.0, 3.0, 6.0, 3.0, 7.0, 2.0, 0.0, 1.0, 1.0, 2.0, 3.0], "bins": [-0.0001049041748046875, -0.00010178796947002411, -9.867176413536072e-05, -9.555555880069733e-05, -9.243935346603394e-05, -8.932314813137054e-05, -8.620694279670715e-05, -8.309073746204376e-05, -7.997453212738037e-05, -7.685832679271698e-05, -7.374212145805359e-05, -7.06259161233902e-05, -6.75097107887268e-05, -6.439350545406342e-05, -6.127730011940002e-05, -5.816109478473663e-05, -5.504488945007324e-05, -5.192868411540985e-05, -4.881247878074646e-05, -4.569627344608307e-05, -4.258006811141968e-05, -3.946386277675629e-05, -3.6347657442092896e-05, -3.3231452107429504e-05, -3.0115246772766113e-05, -2.6999041438102722e-05, -2.388283610343933e-05, -2.076663076877594e-05, -1.765042543411255e-05, -1.4534220099449158e-05, -1.1418014764785767e-05, -8.301809430122375e-06, -5.185604095458984e-06, -2.0693987607955933e-06, 1.0468065738677979e-06, 4.163011908531189e-06, 7.27921724319458e-06, 1.0395422577857971e-05, 1.3511627912521362e-05, 1.6627833247184753e-05, 1.9744038581848145e-05, 2.2860243916511536e-05, 2.5976449251174927e-05, 2.9092654585838318e-05, 3.220885992050171e-05, 3.53250652551651e-05, 3.844127058982849e-05, 4.155747592449188e-05, 4.4673681259155273e-05, 4.7789886593818665e-05, 5.0906091928482056e-05, 5.402229726314545e-05, 5.713850259780884e-05, 6.025470793247223e-05, 6.337091326713562e-05, 6.648711860179901e-05, 6.96033239364624e-05, 7.27195292711258e-05, 7.583573460578918e-05, 7.895193994045258e-05, 8.206814527511597e-05, 8.518435060977936e-05, 8.830055594444275e-05, 9.141676127910614e-05, 9.453296661376953e-05]}, "gradients/encoder.encoder.layers.6.attention.out_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 2.0, 3.0, 2.0, 8.0, 5.0, 6.0, 11.0, 12.0, 20.0, 27.0, 48.0, 49.0, 69.0, 89.0, 130.0, 206.0, 283.0, 378.0, 563.0, 859.0, 1258.0, 1967.0, 3040.0, 4873.0, 7704.0, 13587.0, 23938.0, 48497.0, 109163.0, 351917.0, 288791.0, 93311.0, 43215.0, 22041.0, 12564.0, 7135.0, 4491.0, 2726.0, 1784.0, 1229.0, 808.0, 518.0, 347.0, 262.0, 183.0, 130.0, 86.0, 58.0, 50.0, 31.0, 36.0, 14.0, 11.0, 12.0, 9.0, 5.0, 4.0, 3.0, 3.0, 2.0, 1.0], "bins": [-9.512901306152344e-05, -9.223911911249161e-05, -8.934922516345978e-05, -8.645933121442795e-05, -8.356943726539612e-05, -8.067954331636429e-05, -7.778964936733246e-05, -7.489975541830063e-05, -7.20098614692688e-05, -6.911996752023697e-05, -6.623007357120514e-05, -6.334017962217331e-05, -6.045028567314148e-05, -5.756039172410965e-05, -5.467049777507782e-05, -5.178060382604599e-05, -4.889070987701416e-05, -4.600081592798233e-05, -4.31109219789505e-05, -4.022102802991867e-05, -3.733113408088684e-05, -3.444124013185501e-05, -3.155134618282318e-05, -2.866145223379135e-05, -2.577155828475952e-05, -2.288166433572769e-05, -1.9991770386695862e-05, -1.7101876437664032e-05, -1.4211982488632202e-05, -1.1322088539600372e-05, -8.432194590568542e-06, -5.542300641536713e-06, -2.652406692504883e-06, 2.3748725652694702e-07, 3.127381205558777e-06, 6.017275154590607e-06, 8.907169103622437e-06, 1.1797063052654266e-05, 1.4686957001686096e-05, 1.7576850950717926e-05, 2.0466744899749756e-05, 2.3356638848781586e-05, 2.6246532797813416e-05, 2.9136426746845245e-05, 3.2026320695877075e-05, 3.4916214644908905e-05, 3.7806108593940735e-05, 4.0696002542972565e-05, 4.3585896492004395e-05, 4.6475790441036224e-05, 4.9365684390068054e-05, 5.2255578339099884e-05, 5.5145472288131714e-05, 5.8035366237163544e-05, 6.0925260186195374e-05, 6.38151541352272e-05, 6.670504808425903e-05, 6.959494203329086e-05, 7.248483598232269e-05, 7.537472993135452e-05, 7.826462388038635e-05, 8.115451782941818e-05, 8.404441177845001e-05, 8.693430572748184e-05, 8.982419967651367e-05]}, "gradients/encoder.encoder.layers.6.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 2.0, 0.0, 0.0, 4.0, 4.0, 0.0, 6.0, 6.0, 11.0, 9.0, 15.0, 16.0, 29.0, 27.0, 40.0, 52.0, 63.0, 65.0, 91.0, 70.0, 77.0, 81.0, 67.0, 71.0, 34.0, 34.0, 28.0, 24.0, 13.0, 16.0, 13.0, 7.0, 10.0, 6.0, 7.0, 5.0, 5.0, 1.0, 4.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.6226043701171875e-05, -2.5385990738868713e-05, -2.4545937776565552e-05, -2.370588481426239e-05, -2.286583185195923e-05, -2.2025778889656067e-05, -2.1185725927352905e-05, -2.0345672965049744e-05, -1.9505620002746582e-05, -1.866556704044342e-05, -1.782551407814026e-05, -1.6985461115837097e-05, -1.6145408153533936e-05, -1.5305355191230774e-05, -1.4465302228927612e-05, -1.362524926662445e-05, -1.2785196304321289e-05, -1.1945143342018127e-05, -1.1105090379714966e-05, -1.0265037417411804e-05, -9.424984455108643e-06, -8.584931492805481e-06, -7.74487853050232e-06, -6.904825568199158e-06, -6.064772605895996e-06, -5.2247196435928345e-06, -4.384666681289673e-06, -3.5446137189865112e-06, -2.7045607566833496e-06, -1.864507794380188e-06, -1.0244548320770264e-06, -1.8440186977386475e-07, 6.556510925292969e-07, 1.4957040548324585e-06, 2.33575701713562e-06, 3.1758099794387817e-06, 4.015862941741943e-06, 4.855915904045105e-06, 5.695968866348267e-06, 6.536021828651428e-06, 7.37607479095459e-06, 8.216127753257751e-06, 9.056180715560913e-06, 9.896233677864075e-06, 1.0736286640167236e-05, 1.1576339602470398e-05, 1.241639256477356e-05, 1.3256445527076721e-05, 1.4096498489379883e-05, 1.4936551451683044e-05, 1.5776604413986206e-05, 1.6616657376289368e-05, 1.745671033859253e-05, 1.829676330089569e-05, 1.9136816263198853e-05, 1.9976869225502014e-05, 2.0816922187805176e-05, 2.1656975150108337e-05, 2.24970281124115e-05, 2.333708107471466e-05, 2.4177134037017822e-05, 2.5017186999320984e-05, 2.5857239961624146e-05, 2.6697292923927307e-05, 2.753734588623047e-05]}, "gradients/encoder.encoder.layers.6.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 3.0, 4.0, 1.0, 3.0, 12.0, 11.0, 19.0, 37.0, 35.0, 53.0, 84.0, 203.0, 289.0, 584.0, 1078.0, 2420.0, 5001.0, 11122.0, 25141.0, 64875.0, 191496.0, 499268.0, 154739.0, 52377.0, 21632.0, 9456.0, 4350.0, 2040.0, 1074.0, 511.0, 269.0, 136.0, 81.0, 50.0, 30.0, 24.0, 13.0, 17.0, 4.0, 7.0, 3.0, 7.0, 3.0, 4.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-8.338689804077148e-05, -8.073169738054276e-05, -7.807649672031403e-05, -7.54212960600853e-05, -7.276609539985657e-05, -7.011089473962784e-05, -6.745569407939911e-05, -6.480049341917038e-05, -6.214529275894165e-05, -5.949009209871292e-05, -5.683489143848419e-05, -5.417969077825546e-05, -5.1524490118026733e-05, -4.8869289457798004e-05, -4.6214088797569275e-05, -4.3558888137340546e-05, -4.0903687477111816e-05, -3.824848681688309e-05, -3.559328615665436e-05, -3.293808549642563e-05, -3.02828848361969e-05, -2.762768417596817e-05, -2.497248351573944e-05, -2.231728285551071e-05, -1.9662082195281982e-05, -1.7006881535053253e-05, -1.4351680874824524e-05, -1.1696480214595795e-05, -9.041279554367065e-06, -6.386078894138336e-06, -3.730878233909607e-06, -1.0756775736808777e-06, 1.5795230865478516e-06, 4.234723746776581e-06, 6.88992440700531e-06, 9.54512506723404e-06, 1.2200325727462769e-05, 1.4855526387691498e-05, 1.7510727047920227e-05, 2.0165927708148956e-05, 2.2821128368377686e-05, 2.5476329028606415e-05, 2.8131529688835144e-05, 3.078673034906387e-05, 3.34419310092926e-05, 3.609713166952133e-05, 3.875233232975006e-05, 4.140753298997879e-05, 4.406273365020752e-05, 4.671793431043625e-05, 4.937313497066498e-05, 5.202833563089371e-05, 5.4683536291122437e-05, 5.7338736951351166e-05, 5.9993937611579895e-05, 6.264913827180862e-05, 6.530433893203735e-05, 6.795953959226608e-05, 7.061474025249481e-05, 7.326994091272354e-05, 7.592514157295227e-05, 7.8580342233181e-05, 8.123554289340973e-05, 8.389074355363846e-05, 8.654594421386719e-05]}, "gradients/encoder.encoder.layers.6.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 2.0, 1.0, 7.0, 8.0, 7.0, 17.0, 10.0, 14.0, 13.0, 27.0, 23.0, 31.0, 26.0, 42.0, 44.0, 35.0, 54.0, 45.0, 48.0, 48.0, 49.0, 56.0, 45.0, 42.0, 39.0, 42.0, 38.0, 33.0, 29.0, 26.0, 18.0, 17.0, 10.0, 13.0, 12.0, 9.0, 7.0, 3.0, 7.0, 5.0, 4.0, 3.0, 1.0, 3.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.125999450683594e-05, -4.960130900144577e-05, -4.79426234960556e-05, -4.6283937990665436e-05, -4.462525248527527e-05, -4.29665669798851e-05, -4.1307881474494934e-05, -3.964919596910477e-05, -3.79905104637146e-05, -3.633182495832443e-05, -3.4673139452934265e-05, -3.30144539475441e-05, -3.135576844215393e-05, -2.9697082936763763e-05, -2.8038397431373596e-05, -2.637971192598343e-05, -2.4721026420593262e-05, -2.3062340915203094e-05, -2.1403655409812927e-05, -1.974496990442276e-05, -1.8086284399032593e-05, -1.6427598893642426e-05, -1.4768913388252258e-05, -1.3110227882862091e-05, -1.1451542377471924e-05, -9.792856872081757e-06, -8.13417136669159e-06, -6.475485861301422e-06, -4.816800355911255e-06, -3.1581148505210876e-06, -1.4994293451309204e-06, 1.5925616025924683e-07, 1.817941665649414e-06, 3.4766271710395813e-06, 5.1353126764297485e-06, 6.793998181819916e-06, 8.452683687210083e-06, 1.011136919260025e-05, 1.1770054697990417e-05, 1.3428740203380585e-05, 1.5087425708770752e-05, 1.674611121416092e-05, 1.8404796719551086e-05, 2.0063482224941254e-05, 2.172216773033142e-05, 2.3380853235721588e-05, 2.5039538741111755e-05, 2.6698224246501923e-05, 2.835690975189209e-05, 3.0015595257282257e-05, 3.1674280762672424e-05, 3.333296626806259e-05, 3.499165177345276e-05, 3.6650337278842926e-05, 3.830902278423309e-05, 3.996770828962326e-05, 4.162639379501343e-05, 4.3285079300403595e-05, 4.494376480579376e-05, 4.660245031118393e-05, 4.82611358165741e-05, 4.9919821321964264e-05, 5.157850682735443e-05, 5.32371923327446e-05, 5.4895877838134766e-05]}, "gradients/encoder.encoder.layers.6.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 3.0, 1.0, 1.0, 0.0, 3.0, 6.0, 8.0, 12.0, 11.0, 11.0, 26.0, 27.0, 54.0, 86.0, 93.0, 156.0, 307.0, 343.0, 728.0, 1314.0, 1916.0, 4697.0, 7732.0, 23300.0, 72835.0, 223629.0, 522714.0, 129153.0, 30784.0, 15452.0, 6530.0, 2570.0, 1782.0, 918.0, 426.0, 332.0, 211.0, 115.0, 90.0, 72.0, 38.0, 32.0, 20.0, 7.0, 6.0, 4.0, 3.0, 2.0, 3.0, 1.0, 2.0, 1.0, 2.0, 0.0, 2.0], "bins": [-9.775161743164062e-06, -9.49762761592865e-06, -9.220093488693237e-06, -8.942559361457825e-06, -8.665025234222412e-06, -8.387491106987e-06, -8.109956979751587e-06, -7.832422852516174e-06, -7.554888725280762e-06, -7.277354598045349e-06, -6.9998204708099365e-06, -6.722286343574524e-06, -6.444752216339111e-06, -6.167218089103699e-06, -5.889683961868286e-06, -5.6121498346328735e-06, -5.334615707397461e-06, -5.057081580162048e-06, -4.779547452926636e-06, -4.502013325691223e-06, -4.2244791984558105e-06, -3.946945071220398e-06, -3.6694109439849854e-06, -3.3918768167495728e-06, -3.11434268951416e-06, -2.8368085622787476e-06, -2.559274435043335e-06, -2.2817403078079224e-06, -2.0042061805725098e-06, -1.7266720533370972e-06, -1.4491379261016846e-06, -1.171603798866272e-06, -8.940696716308594e-07, -6.165355443954468e-07, -3.390014171600342e-07, -6.146728992462158e-08, 2.1606683731079102e-07, 4.936009645462036e-07, 7.711350917816162e-07, 1.0486692190170288e-06, 1.3262033462524414e-06, 1.603737473487854e-06, 1.8812716007232666e-06, 2.158805727958679e-06, 2.436339855194092e-06, 2.7138739824295044e-06, 2.991408109664917e-06, 3.2689422369003296e-06, 3.546476364135742e-06, 3.824010491371155e-06, 4.101544618606567e-06, 4.37907874584198e-06, 4.656612873077393e-06, 4.934147000312805e-06, 5.211681127548218e-06, 5.48921525478363e-06, 5.766749382019043e-06, 6.0442835092544556e-06, 6.321817636489868e-06, 6.599351763725281e-06, 6.876885890960693e-06, 7.154420018196106e-06, 7.4319541454315186e-06, 7.709488272666931e-06, 7.987022399902344e-06]}, "gradients/encoder.encoder.layers.6.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 4.0, 0.0, 0.0, 4.0, 5.0, 6.0, 0.0, 7.0, 8.0, 0.0, 8.0, 11.0, 24.0, 0.0, 26.0, 27.0, 34.0, 0.0, 38.0, 45.0, 41.0, 0.0, 66.0, 60.0, 0.0, 70.0, 61.0, 63.0, 0.0, 57.0, 54.0, 62.0, 0.0, 44.0, 29.0, 30.0, 0.0, 37.0, 27.0, 0.0, 22.0, 11.0, 8.0, 0.0, 10.0, 5.0, 6.0, 0.0, 3.0, 3.0, 2.0, 0.0, 1.0, 1.0, 1.0], "bins": [-1.5497207641601562e-06, -1.5059486031532288e-06, -1.4621764421463013e-06, -1.4184042811393738e-06, -1.3746321201324463e-06, -1.3308599591255188e-06, -1.2870877981185913e-06, -1.2433156371116638e-06, -1.1995434761047363e-06, -1.1557713150978088e-06, -1.1119991540908813e-06, -1.0682269930839539e-06, -1.0244548320770264e-06, -9.806826710700989e-07, -9.369105100631714e-07, -8.931383490562439e-07, -8.493661880493164e-07, -8.055940270423889e-07, -7.618218660354614e-07, -7.180497050285339e-07, -6.742775440216064e-07, -6.30505383014679e-07, -5.867332220077515e-07, -5.42961061000824e-07, -4.991888999938965e-07, -4.55416738986969e-07, -4.116445779800415e-07, -3.67872416973114e-07, -3.241002559661865e-07, -2.8032809495925903e-07, -2.3655593395233154e-07, -1.9278377294540405e-07, -1.4901161193847656e-07, -1.0523945093154907e-07, -6.146728992462158e-08, -1.7695128917694092e-08, 2.60770320892334e-08, 6.984919309616089e-08, 1.1362135410308838e-07, 1.5739351511001587e-07, 2.0116567611694336e-07, 2.4493783712387085e-07, 2.8870999813079834e-07, 3.3248215913772583e-07, 3.762543201446533e-07, 4.200264811515808e-07, 4.637986421585083e-07, 5.075708031654358e-07, 5.513429641723633e-07, 5.951151251792908e-07, 6.388872861862183e-07, 6.826594471931458e-07, 7.264316082000732e-07, 7.702037692070007e-07, 8.139759302139282e-07, 8.577480912208557e-07, 9.015202522277832e-07, 9.452924132347107e-07, 9.890645742416382e-07, 1.0328367352485657e-06, 1.0766088962554932e-06, 1.1203810572624207e-06, 1.1641532182693481e-06, 1.2079253792762756e-06, 1.2516975402832031e-06]}, "gradients/encoder.encoder.layers.6.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 2.0, 3.0, 3.0, 2.0, 5.0, 11.0, 12.0, 24.0, 29.0, 48.0, 53.0, 105.0, 206.0, 375.0, 610.0, 1173.0, 2350.0, 3898.0, 11686.0, 37104.0, 167131.0, 598785.0, 166929.0, 37460.0, 10075.0, 5478.0, 2400.0, 1163.0, 572.0, 353.0, 216.0, 87.0, 71.0, 56.0, 33.0, 21.0, 12.0, 12.0, 6.0, 3.0, 2.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-9.417533874511719e-06, -9.127892553806305e-06, -8.838251233100891e-06, -8.548609912395477e-06, -8.258968591690063e-06, -7.96932727098465e-06, -7.679685950279236e-06, -7.390044629573822e-06, -7.100403308868408e-06, -6.810761988162994e-06, -6.5211206674575806e-06, -6.231479346752167e-06, -5.941838026046753e-06, -5.652196705341339e-06, -5.362555384635925e-06, -5.0729140639305115e-06, -4.783272743225098e-06, -4.493631422519684e-06, -4.20399010181427e-06, -3.914348781108856e-06, -3.6247074604034424e-06, -3.3350661396980286e-06, -3.0454248189926147e-06, -2.755783498287201e-06, -2.466142177581787e-06, -2.1765008568763733e-06, -1.8868595361709595e-06, -1.5972182154655457e-06, -1.3075768947601318e-06, -1.017935574054718e-06, -7.282942533493042e-07, -4.386529326438904e-07, -1.4901161193847656e-07, 1.4062970876693726e-07, 4.302710294723511e-07, 7.199123501777649e-07, 1.0095536708831787e-06, 1.2991949915885925e-06, 1.5888363122940063e-06, 1.8784776329994202e-06, 2.168118953704834e-06, 2.457760274410248e-06, 2.7474015951156616e-06, 3.0370429158210754e-06, 3.3266842365264893e-06, 3.616325557231903e-06, 3.905966877937317e-06, 4.195608198642731e-06, 4.4852495193481445e-06, 4.774890840053558e-06, 5.064532160758972e-06, 5.354173481464386e-06, 5.6438148021698e-06, 5.933456122875214e-06, 6.2230974435806274e-06, 6.512738764286041e-06, 6.802380084991455e-06, 7.092021405696869e-06, 7.381662726402283e-06, 7.671304047107697e-06, 7.96094536781311e-06, 8.250586688518524e-06, 8.540228009223938e-06, 8.829869329929352e-06, 9.119510650634766e-06]}, "gradients/encoder.encoder.layers.6.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 2.0, 1.0, 5.0, 0.0, 4.0, 2.0, 3.0, 9.0, 2.0, 8.0, 10.0, 18.0, 29.0, 27.0, 38.0, 35.0, 58.0, 72.0, 73.0, 79.0, 75.0, 78.0, 73.0, 64.0, 59.0, 53.0, 33.0, 29.0, 19.0, 14.0, 8.0, 9.0, 4.0, 5.0, 6.0, 2.0, 2.0, 1.0, 2.0, 3.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-5.841255187988281e-06, -5.661509931087494e-06, -5.4817646741867065e-06, -5.302019417285919e-06, -5.122274160385132e-06, -4.9425289034843445e-06, -4.762783646583557e-06, -4.58303838968277e-06, -4.403293132781982e-06, -4.223547875881195e-06, -4.043802618980408e-06, -3.86405736207962e-06, -3.684312105178833e-06, -3.5045668482780457e-06, -3.3248215913772583e-06, -3.145076334476471e-06, -2.9653310775756836e-06, -2.7855858206748962e-06, -2.605840563774109e-06, -2.4260953068733215e-06, -2.246350049972534e-06, -2.066604793071747e-06, -1.8868595361709595e-06, -1.7071142792701721e-06, -1.5273690223693848e-06, -1.3476237654685974e-06, -1.16787850856781e-06, -9.881332516670227e-07, -8.083879947662354e-07, -6.28642737865448e-07, -4.4889748096466064e-07, -2.691522240638733e-07, -8.940696716308594e-08, 9.033828973770142e-08, 2.7008354663848877e-07, 4.498288035392761e-07, 6.295740604400635e-07, 8.093193173408508e-07, 9.890645742416382e-07, 1.1688098311424255e-06, 1.3485550880432129e-06, 1.5283003449440002e-06, 1.7080456018447876e-06, 1.887790858745575e-06, 2.0675361156463623e-06, 2.2472813725471497e-06, 2.427026629447937e-06, 2.6067718863487244e-06, 2.7865171432495117e-06, 2.966262400150299e-06, 3.1460076570510864e-06, 3.3257529139518738e-06, 3.505498170852661e-06, 3.6852434277534485e-06, 3.864988684654236e-06, 4.044733941555023e-06, 4.2244791984558105e-06, 4.404224455356598e-06, 4.583969712257385e-06, 4.763714969158173e-06, 4.94346022605896e-06, 5.123205482959747e-06, 5.302950739860535e-06, 5.482695996761322e-06, 5.662441253662109e-06]}, "gradients/encoder.encoder.layers.6.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 4.0, 3.0, 0.0, 0.0, 2.0, 2.0, 3.0, 4.0, 5.0, 13.0, 8.0, 19.0, 16.0, 24.0, 20.0, 31.0, 52.0, 74.0, 109.0, 130.0, 122.0, 69.0, 59.0, 54.0, 22.0, 35.0, 33.0, 18.0, 13.0, 15.0, 9.0, 7.0, 7.0, 6.0, 7.0, 4.0, 5.0, 1.0, 1.0, 0.0, 4.0, 2.0, 2.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 2.0], "bins": [-0.0001627479650778696, -0.00015689653810113668, -0.000151045125676319, -0.0001451936986995861, -0.0001393422862747684, -0.0001334908592980355, -0.00012763944687321782, -0.00012178801989648491, -0.00011593660019570962, -0.00011008518049493432, -0.00010423376079415902, -9.838234109338373e-05, -9.253091411665082e-05, -8.667950169183314e-05, -8.082807471510023e-05, -7.497665501432493e-05, -6.912523531354964e-05, -6.327381561277434e-05, -5.742239591199905e-05, -5.1570972573244944e-05, -4.571955287246965e-05, -3.986813317169435e-05, -3.401670983294025e-05, -2.8165290132164955e-05, -2.231387043138966e-05, -1.6462450730614364e-05, -1.0611029210849665e-05, -4.759607691084966e-06, 1.0918120096903294e-06, 6.943231710465625e-06, 1.2794655049219728e-05, 1.8646074749995023e-05, 2.449747989885509e-05, 3.0348899599630386e-05, 3.620031930040568e-05, 4.2051742639159784e-05, 4.790316233993508e-05, 5.3754582040710375e-05, 5.960600537946448e-05, 6.545742508023977e-05, 7.130884478101507e-05, 7.716026448179036e-05, 8.301168418256566e-05, 8.886310388334095e-05, 9.471453086007386e-05, 0.00010056594328489155, 0.00010641737026162446, 0.00011226878996239975, 0.00011812020966317505, 0.00012397163663990796, 0.00012982304906472564, 0.00013567447604145855, 0.00014152588846627623, 0.00014737731544300914, 0.00015322872786782682, 0.00015908015484455973, 0.00016493158182129264, 0.00017078300879802555, 0.00017663442122284323, 0.00018248584819957614, 0.00018833726062439382, 0.00019418868760112673, 0.00020004011457785964, 0.00020589152700267732, 0.000211742939427495]}, "gradients/encoder.encoder.layers.6.layer_norm.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 4.0, 4.0, 5.0, 5.0, 7.0, 11.0, 5.0, 10.0, 15.0, 7.0, 27.0, 16.0, 11.0, 27.0, 22.0, 36.0, 27.0, 42.0, 29.0, 29.0, 41.0, 43.0, 45.0, 48.0, 38.0, 44.0, 43.0, 45.0, 40.0, 27.0, 35.0, 30.0, 26.0, 28.0, 19.0, 15.0, 17.0, 16.0, 15.0, 13.0, 8.0, 12.0, 9.0, 2.0, 2.0, 4.0, 3.0, 4.0, 4.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00012165307998657227, -0.00011768285185098648, -0.0001137126237154007, -0.00010974239557981491, -0.00010577216744422913, -0.00010180193930864334, -9.783171117305756e-05, -9.386148303747177e-05, -8.989125490188599e-05, -8.59210267663002e-05, -8.195079863071442e-05, -7.798057049512863e-05, -7.401034235954285e-05, -7.004011422395706e-05, -6.606988608837128e-05, -6.209965795278549e-05, -5.812942981719971e-05, -5.415920168161392e-05, -5.018897354602814e-05, -4.621874541044235e-05, -4.224851727485657e-05, -3.827828913927078e-05, -3.4308061003685e-05, -3.0337832868099213e-05, -2.6367604732513428e-05, -2.2397376596927643e-05, -1.8427148461341858e-05, -1.4456920325756073e-05, -1.0486692190170288e-05, -6.516464054584503e-06, -2.5462359189987183e-06, 1.4239922165870667e-06, 5.3942203521728516e-06, 9.364448487758636e-06, 1.3334676623344421e-05, 1.7304904758930206e-05, 2.127513289451599e-05, 2.5245361030101776e-05, 2.921558916568756e-05, 3.3185817301273346e-05, 3.715604543685913e-05, 4.1126273572444916e-05, 4.50965017080307e-05, 4.9066729843616486e-05, 5.303695797920227e-05, 5.7007186114788055e-05, 6.097741425037384e-05, 6.494764238595963e-05, 6.891787052154541e-05, 7.28880986571312e-05, 7.685832679271698e-05, 8.082855492830276e-05, 8.479878306388855e-05, 8.876901119947433e-05, 9.273923933506012e-05, 9.67094674706459e-05, 0.00010067969560623169, 0.00010464992374181747, 0.00010862015187740326, 0.00011259038001298904, 0.00011656060814857483, 0.00012053083628416061, 0.0001245010644197464, 0.00012847129255533218, 0.00013244152069091797]}, "gradients/encoder.encoder.layers.5.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 2.0, 3.0, 1.0, 5.0, 4.0, 10.0, 10.0, 32.0, 23.0, 27.0, 47.0, 73.0, 87.0, 126.0, 174.0, 252.0, 426.0, 611.0, 1000.0, 1585.0, 2781.0, 4532.0, 8824.0, 18858.0, 60945.0, 2518919.0, 1484394.0, 52931.0, 17212.0, 8286.0, 4523.0, 2636.0, 1582.0, 1069.0, 675.0, 466.0, 309.0, 233.0, 165.0, 143.0, 89.0, 63.0, 52.0, 33.0, 27.0, 8.0, 15.0, 7.0, 8.0, 5.0, 2.0, 5.0, 1.0, 2.0, 0.0, 1.0, 0.0, 1.0], "bins": [-9.423494338989258e-05, -9.128730744123459e-05, -8.83396714925766e-05, -8.539203554391861e-05, -8.244439959526062e-05, -7.949676364660263e-05, -7.654912769794464e-05, -7.360149174928665e-05, -7.065385580062866e-05, -6.770621985197067e-05, -6.475858390331268e-05, -6.18109479546547e-05, -5.8863312005996704e-05, -5.5915676057338715e-05, -5.2968040108680725e-05, -5.0020404160022736e-05, -4.7072768211364746e-05, -4.4125132262706757e-05, -4.117749631404877e-05, -3.822986036539078e-05, -3.528222441673279e-05, -3.23345884680748e-05, -2.938695251941681e-05, -2.643931657075882e-05, -2.349168062210083e-05, -2.054404467344284e-05, -1.759640872478485e-05, -1.4648772776126862e-05, -1.1701136827468872e-05, -8.753500878810883e-06, -5.805864930152893e-06, -2.8582289814949036e-06, 8.940696716308594e-08, 3.0370429158210754e-06, 5.984678864479065e-06, 8.932314813137054e-06, 1.1879950761795044e-05, 1.4827586710453033e-05, 1.7775222659111023e-05, 2.0722858607769012e-05, 2.3670494556427002e-05, 2.661813050508499e-05, 2.956576645374298e-05, 3.251340240240097e-05, 3.546103835105896e-05, 3.840867429971695e-05, 4.135631024837494e-05, 4.430394619703293e-05, 4.725158214569092e-05, 5.019921809434891e-05, 5.31468540430069e-05, 5.6094489991664886e-05, 5.9042125940322876e-05, 6.198976188898087e-05, 6.493739783763885e-05, 6.788503378629684e-05, 7.083266973495483e-05, 7.378030568361282e-05, 7.672794163227081e-05, 7.96755775809288e-05, 8.262321352958679e-05, 8.557084947824478e-05, 8.851848542690277e-05, 9.146612137556076e-05, 9.441375732421875e-05]}, "gradients/encoder.encoder.layers.5.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0, 1.0, 2.0, 3.0, 1.0, 1.0, 3.0, 6.0, 3.0, 11.0, 9.0, 13.0, 9.0, 20.0, 19.0, 23.0, 29.0, 26.0, 56.0, 41.0, 72.0, 59.0, 76.0, 67.0, 58.0, 53.0, 59.0, 61.0, 50.0, 29.0, 26.0, 20.0, 21.0, 16.0, 13.0, 7.0, 8.0, 5.0, 10.0, 6.0, 2.0, 1.0, 3.0, 5.0, 1.0, 2.0, 1.0, 3.0, 0.0, 1.0, 3.0, 1.0, 1.0, 1.0], "bins": [-2.5093555450439453e-05, -2.434477210044861e-05, -2.3595988750457764e-05, -2.284720540046692e-05, -2.2098422050476074e-05, -2.134963870048523e-05, -2.0600855350494385e-05, -1.985207200050354e-05, -1.9103288650512695e-05, -1.835450530052185e-05, -1.7605721950531006e-05, -1.685693860054016e-05, -1.6108155250549316e-05, -1.535937190055847e-05, -1.4610588550567627e-05, -1.3861805200576782e-05, -1.3113021850585938e-05, -1.2364238500595093e-05, -1.1615455150604248e-05, -1.0866671800613403e-05, -1.0117888450622559e-05, -9.369105100631714e-06, -8.620321750640869e-06, -7.871538400650024e-06, -7.12275505065918e-06, -6.373971700668335e-06, -5.62518835067749e-06, -4.8764050006866455e-06, -4.127621650695801e-06, -3.378838300704956e-06, -2.6300549507141113e-06, -1.8812716007232666e-06, -1.1324882507324219e-06, -3.8370490074157715e-07, 3.650784492492676e-07, 1.1138617992401123e-06, 1.862645149230957e-06, 2.6114284992218018e-06, 3.3602118492126465e-06, 4.108995199203491e-06, 4.857778549194336e-06, 5.606561899185181e-06, 6.355345249176025e-06, 7.10412859916687e-06, 7.852911949157715e-06, 8.60169529914856e-06, 9.350478649139404e-06, 1.0099261999130249e-05, 1.0848045349121094e-05, 1.1596828699111938e-05, 1.2345612049102783e-05, 1.3094395399093628e-05, 1.3843178749084473e-05, 1.4591962099075317e-05, 1.5340745449066162e-05, 1.6089528799057007e-05, 1.683831214904785e-05, 1.7587095499038696e-05, 1.833587884902954e-05, 1.9084662199020386e-05, 1.983344554901123e-05, 2.0582228899002075e-05, 2.133101224899292e-05, 2.2079795598983765e-05, 2.282857894897461e-05]}, "gradients/encoder.encoder.layers.5.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 2.0, 1.0, 1.0, 2.0, 4.0, 5.0, 5.0, 9.0, 6.0, 12.0, 13.0, 19.0, 24.0, 50.0, 69.0, 106.0, 196.0, 280.0, 454.0, 705.0, 1305.0, 2123.0, 3788.0, 6859.0, 12747.0, 28016.0, 66460.0, 296254.0, 3399800.0, 255380.0, 65004.0, 26468.0, 12372.0, 6740.0, 3773.0, 1991.0, 1272.0, 743.0, 468.0, 292.0, 161.0, 105.0, 58.0, 41.0, 27.0, 19.0, 17.0, 17.0, 6.0, 10.0, 6.0, 4.0, 2.0, 4.0, 2.0, 3.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-5.733966827392578e-05, -5.539413541555405e-05, -5.344860255718231e-05, -5.150306969881058e-05, -4.955753684043884e-05, -4.761200398206711e-05, -4.5666471123695374e-05, -4.372093826532364e-05, -4.1775405406951904e-05, -3.982987254858017e-05, -3.7884339690208435e-05, -3.59388068318367e-05, -3.3993273973464966e-05, -3.204774111509323e-05, -3.0102208256721497e-05, -2.8156675398349762e-05, -2.6211142539978027e-05, -2.4265609681606293e-05, -2.2320076823234558e-05, -2.0374543964862823e-05, -1.842901110649109e-05, -1.6483478248119354e-05, -1.453794538974762e-05, -1.2592412531375885e-05, -1.064687967300415e-05, -8.701346814632416e-06, -6.755813956260681e-06, -4.8102810978889465e-06, -2.864748239517212e-06, -9.192153811454773e-07, 1.0263174772262573e-06, 2.971850335597992e-06, 4.9173831939697266e-06, 6.862916052341461e-06, 8.808448910713196e-06, 1.075398176908493e-05, 1.2699514627456665e-05, 1.46450474858284e-05, 1.6590580344200134e-05, 1.853611320257187e-05, 2.0481646060943604e-05, 2.2427178919315338e-05, 2.4372711777687073e-05, 2.6318244636058807e-05, 2.8263777494430542e-05, 3.0209310352802277e-05, 3.215484321117401e-05, 3.4100376069545746e-05, 3.604590892791748e-05, 3.7991441786289215e-05, 3.993697464466095e-05, 4.1882507503032684e-05, 4.382804036140442e-05, 4.5773573219776154e-05, 4.771910607814789e-05, 4.966463893651962e-05, 5.161017179489136e-05, 5.355570465326309e-05, 5.550123751163483e-05, 5.744677037000656e-05, 5.9392303228378296e-05, 6.133783608675003e-05, 6.328336894512177e-05, 6.52289018034935e-05, 6.717443466186523e-05]}, "gradients/encoder.encoder.layers.5.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 4.0, 1.0, 2.0, 5.0, 3.0, 3.0, 2.0, 4.0, 4.0, 5.0, 4.0, 4.0, 11.0, 10.0, 16.0, 19.0, 19.0, 26.0, 38.0, 47.0, 59.0, 74.0, 135.0, 272.0, 703.0, 1297.0, 604.0, 261.0, 134.0, 72.0, 38.0, 29.0, 30.0, 35.0, 23.0, 15.0, 10.0, 6.0, 5.0, 13.0, 6.0, 7.0, 3.0, 6.0, 2.0, 7.0, 7.0, 1.0, 0.0, 2.0, 2.0, 3.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-3.933906555175781e-05, -3.80026176571846e-05, -3.666616976261139e-05, -3.532972186803818e-05, -3.3993273973464966e-05, -3.2656826078891754e-05, -3.132037818431854e-05, -2.998393028974533e-05, -2.864748239517212e-05, -2.7311034500598907e-05, -2.5974586606025696e-05, -2.4638138711452484e-05, -2.3301690816879272e-05, -2.196524292230606e-05, -2.062879502773285e-05, -1.9292347133159637e-05, -1.7955899238586426e-05, -1.6619451344013214e-05, -1.5283003449440002e-05, -1.394655555486679e-05, -1.2610107660293579e-05, -1.1273659765720367e-05, -9.937211871147156e-06, -8.600763976573944e-06, -7.264316082000732e-06, -5.927868187427521e-06, -4.591420292854309e-06, -3.2549723982810974e-06, -1.9185245037078857e-06, -5.820766091346741e-07, 7.543712854385376e-07, 2.0908191800117493e-06, 3.427267074584961e-06, 4.763714969158173e-06, 6.100162863731384e-06, 7.436610758304596e-06, 8.773058652877808e-06, 1.010950654745102e-05, 1.1445954442024231e-05, 1.2782402336597443e-05, 1.4118850231170654e-05, 1.5455298125743866e-05, 1.6791746020317078e-05, 1.812819391489029e-05, 1.94646418094635e-05, 2.0801089704036713e-05, 2.2137537598609924e-05, 2.3473985493183136e-05, 2.4810433387756348e-05, 2.614688128232956e-05, 2.748332917690277e-05, 2.8819777071475983e-05, 3.0156224966049194e-05, 3.1492672860622406e-05, 3.282912075519562e-05, 3.416556864976883e-05, 3.550201654434204e-05, 3.683846443891525e-05, 3.8174912333488464e-05, 3.9511360228061676e-05, 4.084780812263489e-05, 4.21842560172081e-05, 4.352070391178131e-05, 4.485715180635452e-05, 4.6193599700927734e-05]}, "gradients/encoder.encoder.layers.5.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 3.0, 2.0, 2.0, 3.0, 3.0, 4.0, 9.0, 10.0, 14.0, 22.0, 24.0, 24.0, 49.0, 66.0, 65.0, 110.0, 100.0, 111.0, 76.0, 70.0, 59.0, 46.0, 26.0, 22.0, 22.0, 14.0, 16.0, 12.0, 4.0, 7.0, 1.0, 3.0, 3.0, 1.0, 4.0, 3.0, 1.0, 0.0, 0.0, 4.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.0002554545644670725, -0.00024793186457827687, -0.0002404091792413965, -0.00023288647935260087, -0.0002253637940157205, -0.00021784109412692487, -0.00021031839423812926, -0.00020279570890124887, -0.00019527300901245326, -0.00018775030912365764, -0.00018022762378677726, -0.00017270492389798164, -0.00016518223856110126, -0.00015765953867230564, -0.00015013685333542526, -0.00014261415344662964, -0.00013509145355783403, -0.00012756875366903841, -0.00012004606833215803, -0.00011252336844336241, -0.00010500067583052441, -9.747798321768641e-05, -8.995529060484841e-05, -8.243259799201041e-05, -7.490991265513003e-05, -6.738722004229203e-05, -5.986452379147522e-05, -5.234183117863722e-05, -4.4819134927820414e-05, -3.7296442314982414e-05, -2.9773749702144414e-05, -2.2251053451327607e-05, -1.47283572005108e-05, -7.205663223430747e-06, 3.1703075364930555e-07, 7.839724275982007e-06, 1.536241870780941e-05, 2.2885113139636815e-05, 3.0407805752474815e-05, 3.793050200329162e-05, 4.545319461612962e-05, 5.297588722896762e-05, 6.049858347978443e-05, 6.802127609262243e-05, 7.554396870546043e-05, 8.306666859425604e-05, 9.058935393113643e-05, 9.811205381993204e-05, 0.00010563474643277004, 0.00011315743904560804, 0.00012068013165844604, 0.00012820282427128404, 0.00013572552416007966, 0.00014324820949696004, 0.00015077090938575566, 0.00015829360927455127, 0.00016581629461143166, 0.00017333899450022727, 0.00018086167983710766, 0.00018838437972590327, 0.00019590706506278366, 0.00020342976495157927, 0.00021095245028845966, 0.00021847515017725527, 0.0002259978500660509]}, "gradients/encoder.encoder.layers.5.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 2.0, 1.0, 1.0, 0.0, 2.0, 2.0, 5.0, 8.0, 4.0, 3.0, 10.0, 5.0, 15.0, 10.0, 11.0, 15.0, 18.0, 24.0, 16.0, 30.0, 24.0, 41.0, 31.0, 24.0, 53.0, 36.0, 43.0, 33.0, 47.0, 37.0, 44.0, 36.0, 44.0, 36.0, 31.0, 36.0, 25.0, 28.0, 24.0, 16.0, 30.0, 17.0, 18.0, 15.0, 14.0, 11.0, 9.0, 5.0, 6.0, 4.0, 5.0, 2.0, 4.0, 2.0, 3.0, 2.0, 2.0, 0.0, 1.0, 1.0], "bins": [-0.00012177228927612305, -0.00011810380965471268, -0.00011443533003330231, -0.00011076685041189194, -0.00010709837079048157, -0.0001034298911690712, -9.976141154766083e-05, -9.609293192625046e-05, -9.242445230484009e-05, -8.875597268342972e-05, -8.508749306201935e-05, -8.141901344060898e-05, -7.775053381919861e-05, -7.408205419778824e-05, -7.041357457637787e-05, -6.67450949549675e-05, -6.307661533355713e-05, -5.940813571214676e-05, -5.573965609073639e-05, -5.207117646932602e-05, -4.840269684791565e-05, -4.473421722650528e-05, -4.106573760509491e-05, -3.739725798368454e-05, -3.372877836227417e-05, -3.00602987408638e-05, -2.639181911945343e-05, -2.272333949804306e-05, -1.905485987663269e-05, -1.538638025522232e-05, -1.171790063381195e-05, -8.04942101240158e-06, -4.380941390991211e-06, -7.124617695808411e-07, 2.956017851829529e-06, 6.624497473239899e-06, 1.0292977094650269e-05, 1.3961456716060638e-05, 1.7629936337471008e-05, 2.1298415958881378e-05, 2.4966895580291748e-05, 2.8635375201702118e-05, 3.230385482311249e-05, 3.597233444452286e-05, 3.964081406593323e-05, 4.33092936873436e-05, 4.697777330875397e-05, 5.064625293016434e-05, 5.431473255157471e-05, 5.798321217298508e-05, 6.165169179439545e-05, 6.532017141580582e-05, 6.898865103721619e-05, 7.265713065862656e-05, 7.632561028003693e-05, 7.99940899014473e-05, 8.366256952285767e-05, 8.733104914426804e-05, 9.09995287656784e-05, 9.466800838708878e-05, 9.833648800849915e-05, 0.00010200496762990952, 0.00010567344725131989, 0.00010934192687273026, 0.00011301040649414062]}, "gradients/encoder.encoder.layers.5.attention.out_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 3.0, 1.0, 5.0, 1.0, 9.0, 6.0, 8.0, 7.0, 9.0, 14.0, 17.0, 17.0, 36.0, 29.0, 65.0, 92.0, 183.0, 289.0, 635.0, 1324.0, 3064.0, 7635.0, 21109.0, 70858.0, 461920.0, 382673.0, 66197.0, 19587.0, 7252.0, 2901.0, 1247.0, 601.0, 301.0, 167.0, 83.0, 49.0, 40.0, 31.0, 22.0, 20.0, 10.0, 13.0, 8.0, 5.0, 6.0, 5.0, 5.0, 5.0, 2.0, 1.0, 3.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.0001977682113647461, -0.00019094161689281464, -0.00018411502242088318, -0.00017728842794895172, -0.00017046183347702026, -0.0001636352390050888, -0.00015680864453315735, -0.0001499820500612259, -0.00014315545558929443, -0.00013632886111736298, -0.00012950226664543152, -0.00012267567217350006, -0.0001158490777015686, -0.00010902248322963715, -0.00010219588875770569, -9.536929428577423e-05, -8.854269981384277e-05, -8.171610534191132e-05, -7.488951086997986e-05, -6.80629163980484e-05, -6.123632192611694e-05, -5.4409727454185486e-05, -4.758313298225403e-05, -4.075653851032257e-05, -3.392994403839111e-05, -2.7103349566459656e-05, -2.0276755094528198e-05, -1.345016062259674e-05, -6.623566150665283e-06, 2.0302832126617432e-07, 7.029622793197632e-06, 1.385621726512909e-05, 2.0682811737060547e-05, 2.7509406208992004e-05, 3.433600068092346e-05, 4.116259515285492e-05, 4.798918962478638e-05, 5.4815784096717834e-05, 6.164237856864929e-05, 6.846897304058075e-05, 7.529556751251221e-05, 8.212216198444366e-05, 8.894875645637512e-05, 9.577535092830658e-05, 0.00010260194540023804, 0.0001094285398721695, 0.00011625513434410095, 0.0001230817288160324, 0.00012990832328796387, 0.00013673491775989532, 0.00014356151223182678, 0.00015038810670375824, 0.0001572147011756897, 0.00016404129564762115, 0.0001708678901195526, 0.00017769448459148407, 0.00018452107906341553, 0.00019134767353534698, 0.00019817426800727844, 0.0002050008624792099, 0.00021182745695114136, 0.00021865405142307281, 0.00022548064589500427, 0.00023230724036693573, 0.0002391338348388672]}, "gradients/encoder.encoder.layers.5.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0, 3.0, 5.0, 9.0, 7.0, 11.0, 19.0, 23.0, 36.0, 46.0, 74.0, 80.0, 105.0, 125.0, 103.0, 101.0, 77.0, 50.0, 42.0, 24.0, 23.0, 15.0, 13.0, 8.0, 4.0, 6.0, 1.0, 1.0, 1.0, 3.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.568960189819336e-05, -2.445746213197708e-05, -2.3225322365760803e-05, -2.1993182599544525e-05, -2.0761042833328247e-05, -1.952890306711197e-05, -1.829676330089569e-05, -1.7064623534679413e-05, -1.5832483768463135e-05, -1.4600344002246857e-05, -1.3368204236030579e-05, -1.21360644698143e-05, -1.0903924703598022e-05, -9.671784937381744e-06, -8.439645171165466e-06, -7.207505404949188e-06, -5.97536563873291e-06, -4.743225872516632e-06, -3.511086106300354e-06, -2.278946340084076e-06, -1.0468065738677979e-06, 1.8533319234848022e-07, 1.4174729585647583e-06, 2.6496127247810364e-06, 3.8817524909973145e-06, 5.1138922572135925e-06, 6.346032023429871e-06, 7.578171789646149e-06, 8.810311555862427e-06, 1.0042451322078705e-05, 1.1274591088294983e-05, 1.2506730854511261e-05, 1.3738870620727539e-05, 1.4971010386943817e-05, 1.6203150153160095e-05, 1.7435289919376373e-05, 1.866742968559265e-05, 1.989956945180893e-05, 2.1131709218025208e-05, 2.2363848984241486e-05, 2.3595988750457764e-05, 2.4828128516674042e-05, 2.606026828289032e-05, 2.7292408049106598e-05, 2.8524547815322876e-05, 2.9756687581539154e-05, 3.098882734775543e-05, 3.222096711397171e-05, 3.345310688018799e-05, 3.4685246646404266e-05, 3.5917386412620544e-05, 3.714952617883682e-05, 3.83816659450531e-05, 3.961380571126938e-05, 4.084594547748566e-05, 4.2078085243701935e-05, 4.331022500991821e-05, 4.454236477613449e-05, 4.577450454235077e-05, 4.700664430856705e-05, 4.8238784074783325e-05, 4.94709238409996e-05, 5.070306360721588e-05, 5.193520337343216e-05, 5.316734313964844e-05]}, "gradients/encoder.encoder.layers.5.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 4.0, 1.0, 3.0, 3.0, 7.0, 4.0, 17.0, 16.0, 34.0, 56.0, 68.0, 143.0, 199.0, 290.0, 503.0, 1009.0, 1753.0, 3283.0, 6216.0, 12099.0, 24963.0, 50633.0, 119988.0, 402203.0, 259651.0, 87579.0, 38234.0, 18949.0, 9655.0, 5063.0, 2586.0, 1411.0, 774.0, 453.0, 264.0, 169.0, 108.0, 59.0, 41.0, 29.0, 18.0, 12.0, 5.0, 2.0, 4.0, 2.0, 2.0, 1.0, 2.0, 2.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-7.939338684082031e-05, -7.68173485994339e-05, -7.424131035804749e-05, -7.166527211666107e-05, -6.908923387527466e-05, -6.651319563388824e-05, -6.393715739250183e-05, -6.136111915111542e-05, -5.8785080909729004e-05, -5.620904266834259e-05, -5.363300442695618e-05, -5.105696618556976e-05, -4.848092794418335e-05, -4.5904889702796936e-05, -4.332885146141052e-05, -4.075281322002411e-05, -3.8176774978637695e-05, -3.560073673725128e-05, -3.302469849586487e-05, -3.0448660254478455e-05, -2.787262201309204e-05, -2.5296583771705627e-05, -2.2720545530319214e-05, -2.01445072889328e-05, -1.7568469047546387e-05, -1.4992430806159973e-05, -1.241639256477356e-05, -9.840354323387146e-06, -7.264316082000732e-06, -4.688277840614319e-06, -2.1122395992279053e-06, 4.637986421585083e-07, 3.039836883544922e-06, 5.6158751249313354e-06, 8.191913366317749e-06, 1.0767951607704163e-05, 1.3343989849090576e-05, 1.592002809047699e-05, 1.8496066331863403e-05, 2.1072104573249817e-05, 2.364814281463623e-05, 2.6224181056022644e-05, 2.8800219297409058e-05, 3.137625753879547e-05, 3.3952295780181885e-05, 3.65283340215683e-05, 3.910437226295471e-05, 4.1680410504341125e-05, 4.425644874572754e-05, 4.683248698711395e-05, 4.9408525228500366e-05, 5.198456346988678e-05, 5.456060171127319e-05, 5.713663995265961e-05, 5.971267819404602e-05, 6.228871643543243e-05, 6.486475467681885e-05, 6.744079291820526e-05, 7.001683115959167e-05, 7.259286940097809e-05, 7.51689076423645e-05, 7.774494588375092e-05, 8.032098412513733e-05, 8.289702236652374e-05, 8.547306060791016e-05]}, "gradients/encoder.encoder.layers.5.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 3.0, 2.0, 0.0, 2.0, 1.0, 5.0, 8.0, 9.0, 12.0, 13.0, 19.0, 15.0, 22.0, 24.0, 26.0, 38.0, 39.0, 35.0, 44.0, 40.0, 54.0, 35.0, 49.0, 45.0, 44.0, 55.0, 46.0, 43.0, 47.0, 34.0, 22.0, 39.0, 29.0, 20.0, 29.0, 8.0, 13.0, 11.0, 13.0, 7.0, 4.0, 3.0, 2.0, 1.0, 2.0, 1.0, 1.0, 2.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.6684017181396484e-05, -5.4834410548210144e-05, -5.2984803915023804e-05, -5.113519728183746e-05, -4.928559064865112e-05, -4.743598401546478e-05, -4.558637738227844e-05, -4.37367707490921e-05, -4.188716411590576e-05, -4.003755748271942e-05, -3.818795084953308e-05, -3.633834421634674e-05, -3.44887375831604e-05, -3.263913094997406e-05, -3.078952431678772e-05, -2.893991768360138e-05, -2.709031105041504e-05, -2.52407044172287e-05, -2.339109778404236e-05, -2.1541491150856018e-05, -1.9691884517669678e-05, -1.7842277884483337e-05, -1.5992671251296997e-05, -1.4143064618110657e-05, -1.2293457984924316e-05, -1.0443851351737976e-05, -8.594244718551636e-06, -6.744638085365295e-06, -4.895031452178955e-06, -3.0454248189926147e-06, -1.1958181858062744e-06, 6.537884473800659e-07, 2.5033950805664062e-06, 4.353001713752747e-06, 6.202608346939087e-06, 8.052214980125427e-06, 9.901821613311768e-06, 1.1751428246498108e-05, 1.3601034879684448e-05, 1.545064151287079e-05, 1.730024814605713e-05, 1.914985477924347e-05, 2.099946141242981e-05, 2.284906804561615e-05, 2.469867467880249e-05, 2.654828131198883e-05, 2.839788794517517e-05, 3.024749457836151e-05, 3.209710121154785e-05, 3.394670784473419e-05, 3.579631447792053e-05, 3.764592111110687e-05, 3.949552774429321e-05, 4.134513437747955e-05, 4.3194741010665894e-05, 4.5044347643852234e-05, 4.6893954277038574e-05, 4.8743560910224915e-05, 5.0593167543411255e-05, 5.2442774176597595e-05, 5.4292380809783936e-05, 5.6141987442970276e-05, 5.7991594076156616e-05, 5.9841200709342957e-05, 6.16908073425293e-05]}, "gradients/encoder.encoder.layers.5.attention.k_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 5.0, 1.0, 3.0, 5.0, 6.0, 7.0, 12.0, 9.0, 13.0, 17.0, 31.0, 47.0, 48.0, 84.0, 89.0, 151.0, 245.0, 364.0, 559.0, 911.0, 1520.0, 2533.0, 4458.0, 8519.0, 17990.0, 42804.0, 121749.0, 386167.0, 302440.0, 91955.0, 34088.0, 14816.0, 7245.0, 3850.0, 2160.0, 1000.0, 954.0, 560.0, 364.0, 246.0, 163.0, 131.0, 77.0, 51.0, 38.0, 25.0, 16.0, 11.0, 11.0, 6.0, 5.0, 3.0, 3.0, 3.0, 2.0, 0.0, 1.0], "bins": [-8.225440979003906e-06, -7.98981636762619e-06, -7.754191756248474e-06, -7.518567144870758e-06, -7.282942533493042e-06, -7.047317922115326e-06, -6.81169331073761e-06, -6.576068699359894e-06, -6.340444087982178e-06, -6.104819476604462e-06, -5.869194865226746e-06, -5.6335702538490295e-06, -5.3979456424713135e-06, -5.162321031093597e-06, -4.926696419715881e-06, -4.691071808338165e-06, -4.455447196960449e-06, -4.219822585582733e-06, -3.984197974205017e-06, -3.748573362827301e-06, -3.512948751449585e-06, -3.277324140071869e-06, -3.041699528694153e-06, -2.8060749173164368e-06, -2.5704503059387207e-06, -2.3348256945610046e-06, -2.0992010831832886e-06, -1.8635764718055725e-06, -1.6279518604278564e-06, -1.3923272490501404e-06, -1.1567026376724243e-06, -9.210780262947083e-07, -6.854534149169922e-07, -4.498288035392761e-07, -2.1420419216156006e-07, 2.1420419216156006e-08, 2.5704503059387207e-07, 4.926696419715881e-07, 7.282942533493042e-07, 9.639188647270203e-07, 1.1995434761047363e-06, 1.4351680874824524e-06, 1.6707926988601685e-06, 1.9064173102378845e-06, 2.1420419216156006e-06, 2.3776665329933167e-06, 2.6132911443710327e-06, 2.8489157557487488e-06, 3.084540367126465e-06, 3.320164978504181e-06, 3.555789589881897e-06, 3.791414201259613e-06, 4.027038812637329e-06, 4.262663424015045e-06, 4.498288035392761e-06, 4.733912646770477e-06, 4.969537258148193e-06, 5.2051618695259094e-06, 5.4407864809036255e-06, 5.6764110922813416e-06, 5.912035703659058e-06, 6.147660315036774e-06, 6.38328492641449e-06, 6.618909537792206e-06, 6.854534149169922e-06]}, "gradients/encoder.encoder.layers.5.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 0.0, 1.0, 0.0, 3.0, 3.0, 3.0, 4.0, 0.0, 2.0, 2.0, 3.0, 3.0, 8.0, 5.0, 0.0, 12.0, 19.0, 21.0, 30.0, 37.0, 51.0, 0.0, 51.0, 43.0, 67.0, 58.0, 55.0, 73.0, 0.0, 65.0, 54.0, 50.0, 49.0, 52.0, 35.0, 0.0, 33.0, 33.0, 21.0, 18.0, 12.0, 13.0, 0.0, 9.0, 4.0, 3.0, 4.0, 3.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-1.7881393432617188e-06, -1.7369166016578674e-06, -1.6856938600540161e-06, -1.6344711184501648e-06, -1.5832483768463135e-06, -1.5320256352424622e-06, -1.4808028936386108e-06, -1.4295801520347595e-06, -1.3783574104309082e-06, -1.3271346688270569e-06, -1.2759119272232056e-06, -1.2246891856193542e-06, -1.173466444015503e-06, -1.1222437024116516e-06, -1.0710209608078003e-06, -1.019798219203949e-06, -9.685754776000977e-07, -9.173527359962463e-07, -8.66129994392395e-07, -8.149072527885437e-07, -7.636845111846924e-07, -7.124617695808411e-07, -6.612390279769897e-07, -6.100162863731384e-07, -5.587935447692871e-07, -5.075708031654358e-07, -4.5634806156158447e-07, -4.0512531995773315e-07, -3.5390257835388184e-07, -3.026798367500305e-07, -2.514570951461792e-07, -2.0023435354232788e-07, -1.4901161193847656e-07, -9.778887033462524e-08, -4.6566128730773926e-08, 4.6566128730773926e-09, 5.587935447692871e-08, 1.0710209608078003e-07, 1.5832483768463135e-07, 2.0954757928848267e-07, 2.60770320892334e-07, 3.119930624961853e-07, 3.632158041000366e-07, 4.1443854570388794e-07, 4.6566128730773926e-07, 5.168840289115906e-07, 5.681067705154419e-07, 6.193295121192932e-07, 6.705522537231445e-07, 7.217749953269958e-07, 7.729977369308472e-07, 8.242204785346985e-07, 8.754432201385498e-07, 9.266659617424011e-07, 9.778887033462524e-07, 1.0291114449501038e-06, 1.080334186553955e-06, 1.1315569281578064e-06, 1.1827796697616577e-06, 1.234002411365509e-06, 1.2852251529693604e-06, 1.3364478945732117e-06, 1.387670636177063e-06, 1.4388933777809143e-06, 1.4901161193847656e-06]}, "gradients/encoder.encoder.layers.5.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 3.0, 4.0, 3.0, 9.0, 8.0, 13.0, 22.0, 30.0, 59.0, 90.0, 89.0, 256.0, 385.0, 416.0, 1081.0, 1839.0, 2115.0, 5456.0, 6410.0, 17668.0, 39361.0, 53762.0, 193962.0, 401838.0, 151305.0, 97096.0, 39301.0, 13275.0, 10819.0, 4034.0, 3503.0, 1889.0, 793.0, 727.0, 390.0, 181.0, 145.0, 97.0, 36.0, 35.0, 25.0, 7.0, 12.0, 5.0, 6.0, 3.0, 0.0, 2.0, 0.0, 1.0, 2.0, 2.0, 0.0, 0.0, 2.0], "bins": [-5.125999450683594e-06, -4.968605935573578e-06, -4.811212420463562e-06, -4.653818905353546e-06, -4.49642539024353e-06, -4.339031875133514e-06, -4.1816383600234985e-06, -4.024244844913483e-06, -3.866851329803467e-06, -3.709457814693451e-06, -3.552064299583435e-06, -3.394670784473419e-06, -3.2372772693634033e-06, -3.0798837542533875e-06, -2.9224902391433716e-06, -2.7650967240333557e-06, -2.60770320892334e-06, -2.450309693813324e-06, -2.292916178703308e-06, -2.1355226635932922e-06, -1.9781291484832764e-06, -1.8207356333732605e-06, -1.6633421182632446e-06, -1.5059486031532288e-06, -1.3485550880432129e-06, -1.191161572933197e-06, -1.0337680578231812e-06, -8.763745427131653e-07, -7.189810276031494e-07, -5.615875124931335e-07, -4.041939973831177e-07, -2.468004822731018e-07, -8.940696716308594e-08, 6.798654794692993e-08, 2.253800630569458e-07, 3.8277357816696167e-07, 5.401670932769775e-07, 6.975606083869934e-07, 8.549541234970093e-07, 1.0123476386070251e-06, 1.169741153717041e-06, 1.3271346688270569e-06, 1.4845281839370728e-06, 1.6419216990470886e-06, 1.7993152141571045e-06, 1.9567087292671204e-06, 2.1141022443771362e-06, 2.271495759487152e-06, 2.428889274597168e-06, 2.586282789707184e-06, 2.7436763048171997e-06, 2.9010698199272156e-06, 3.0584633350372314e-06, 3.2158568501472473e-06, 3.373250365257263e-06, 3.530643880367279e-06, 3.688037395477295e-06, 3.845430910587311e-06, 4.002824425697327e-06, 4.1602179408073425e-06, 4.317611455917358e-06, 4.475004971027374e-06, 4.63239848613739e-06, 4.789792001247406e-06, 4.947185516357422e-06]}, "gradients/encoder.encoder.layers.5.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 3.0, 2.0, 3.0, 4.0, 1.0, 4.0, 8.0, 6.0, 13.0, 13.0, 19.0, 13.0, 11.0, 33.0, 35.0, 34.0, 47.0, 15.0, 55.0, 43.0, 51.0, 45.0, 66.0, 29.0, 38.0, 38.0, 51.0, 40.0, 45.0, 42.0, 19.0, 41.0, 29.0, 32.0, 12.0, 10.0, 11.0, 6.0, 10.0, 6.0, 5.0, 4.0, 7.0, 4.0, 5.0, 0.0, 2.0, 3.0, 0.0, 1.0, 1.0, 2.0, 1.0, 1.0, 0.0, 1.0], "bins": [-3.4570693969726562e-06, -3.3471733331680298e-06, -3.2372772693634033e-06, -3.127381205558777e-06, -3.0174851417541504e-06, -2.907589077949524e-06, -2.7976930141448975e-06, -2.687796950340271e-06, -2.5779008865356445e-06, -2.468004822731018e-06, -2.3581087589263916e-06, -2.248212695121765e-06, -2.1383166313171387e-06, -2.028420567512512e-06, -1.9185245037078857e-06, -1.8086284399032593e-06, -1.6987323760986328e-06, -1.5888363122940063e-06, -1.4789402484893799e-06, -1.3690441846847534e-06, -1.259148120880127e-06, -1.1492520570755005e-06, -1.039355993270874e-06, -9.294599294662476e-07, -8.195638656616211e-07, -7.096678018569946e-07, -5.997717380523682e-07, -4.898756742477417e-07, -3.7997961044311523e-07, -2.7008354663848877e-07, -1.601874828338623e-07, -5.029141902923584e-08, 5.960464477539063e-08, 1.695007085800171e-07, 2.7939677238464355e-07, 3.8929283618927e-07, 4.991888999938965e-07, 6.09084963798523e-07, 7.189810276031494e-07, 8.288770914077759e-07, 9.387731552124023e-07, 1.0486692190170288e-06, 1.1585652828216553e-06, 1.2684613466262817e-06, 1.3783574104309082e-06, 1.4882534742355347e-06, 1.5981495380401611e-06, 1.7080456018447876e-06, 1.817941665649414e-06, 1.9278377294540405e-06, 2.037733793258667e-06, 2.1476298570632935e-06, 2.25752592086792e-06, 2.3674219846725464e-06, 2.477318048477173e-06, 2.5872141122817993e-06, 2.6971101760864258e-06, 2.8070062398910522e-06, 2.9169023036956787e-06, 3.026798367500305e-06, 3.1366944313049316e-06, 3.246590495109558e-06, 3.3564865589141846e-06, 3.466382622718811e-06, 3.5762786865234375e-06]}, "gradients/encoder.encoder.layers.5.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 2.0, 1.0, 2.0, 5.0, 0.0, 1.0, 7.0, 4.0, 6.0, 8.0, 8.0, 12.0, 22.0, 29.0, 32.0, 39.0, 77.0, 104.0, 174.0, 144.0, 92.0, 61.0, 46.0, 34.0, 21.0, 19.0, 12.0, 8.0, 8.0, 8.0, 8.0, 3.0, 4.0, 0.0, 2.0, 3.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 2.0], "bins": [-0.00024611453409306705, -0.0002386648702668026, -0.00023121519188862294, -0.0002237655280623585, -0.00021631584968417883, -0.0002088661858579144, -0.00020141652203164995, -0.00019396684365347028, -0.0001865171652752906, -0.00017906750144902617, -0.0001716178230708465, -0.00016416815924458206, -0.0001567184808664024, -0.00014926881704013795, -0.0001418191532138735, -0.00013436947483569384, -0.0001269198110094294, -0.00011947013990720734, -0.00011202046880498528, -0.00010457080497872084, -9.712112660054117e-05, -8.967146277427673e-05, -8.222179167205468e-05, -7.477212056983262e-05, -6.732244946761057e-05, -5.987277836538851e-05, -5.242310726316646e-05, -4.497343979892321e-05, -3.7523768696701154e-05, -3.00740975944791e-05, -2.262443013023585e-05, -1.5174759028013796e-05, -7.725073373876512e-06, -2.7540318114915863e-07, 7.174267011578195e-06, 1.4623936294810846e-05, 2.20736073970329e-05, 2.9523278499254957e-05, 3.6972945963498205e-05, 4.442261706572026e-05, 5.1872288167942315e-05, 5.932195927016437e-05, 6.677163037238643e-05, 7.422130147460848e-05, 8.167096530087292e-05, 8.912064367905259e-05, 9.657030750531703e-05, 0.00010401997860753909, 0.00011146964970976114, 0.0001189193208119832, 0.00012636899191420525, 0.0001338186557404697, 0.00014126833411864936, 0.0001487179979449138, 0.00015616766177117825, 0.00016361734014935791, 0.00017106701852753758, 0.00017851668235380203, 0.0001859663607319817, 0.00019341602455824614, 0.0002008657029364258, 0.00020831536676269025, 0.0002157650305889547, 0.00022321470896713436, 0.0002306643727933988]}, "gradients/encoder.encoder.layers.5.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 3.0, 3.0, 6.0, 0.0, 5.0, 5.0, 6.0, 9.0, 18.0, 12.0, 13.0, 19.0, 20.0, 22.0, 21.0, 25.0, 41.0, 38.0, 31.0, 56.0, 39.0, 39.0, 45.0, 54.0, 41.0, 46.0, 45.0, 35.0, 42.0, 43.0, 30.0, 22.0, 37.0, 24.0, 15.0, 17.0, 17.0, 15.0, 9.0, 7.0, 11.0, 7.0, 6.0, 4.0, 4.0, 4.0, 3.0, 1.0, 3.0, 0.0, 1.0, 1.0], "bins": [-0.00016099214553833008, -0.0001564202830195427, -0.0001518484205007553, -0.00014727655798196793, -0.00014270469546318054, -0.00013813283294439316, -0.00013356097042560577, -0.0001289891079068184, -0.000124417245388031, -0.00011984538286924362, -0.00011527352035045624, -0.00011070165783166885, -0.00010612979531288147, -0.00010155793279409409, -9.69860702753067e-05, -9.241420775651932e-05, -8.784234523773193e-05, -8.327048271894455e-05, -7.869862020015717e-05, -7.412675768136978e-05, -6.95548951625824e-05, -6.498303264379501e-05, -6.041117012500763e-05, -5.5839307606220245e-05, -5.126744508743286e-05, -4.669558256864548e-05, -4.212372004985809e-05, -3.755185753107071e-05, -3.2979995012283325e-05, -2.840813249349594e-05, -2.3836269974708557e-05, -1.9264407455921173e-05, -1.4692544937133789e-05, -1.0120682418346405e-05, -5.548819899559021e-06, -9.76957380771637e-07, 3.594905138015747e-06, 8.166767656803131e-06, 1.2738630175590515e-05, 1.73104926943779e-05, 2.1882355213165283e-05, 2.6454217731952667e-05, 3.102608025074005e-05, 3.5597942769527435e-05, 4.016980528831482e-05, 4.47416678071022e-05, 4.931353032588959e-05, 5.388539284467697e-05, 5.8457255363464355e-05, 6.302911788225174e-05, 6.760098040103912e-05, 7.217284291982651e-05, 7.674470543861389e-05, 8.131656795740128e-05, 8.588843047618866e-05, 9.046029299497604e-05, 9.503215551376343e-05, 9.960401803255081e-05, 0.0001041758805513382, 0.00010874774307012558, 0.00011331960558891296, 0.00011789146810770035, 0.00012246333062648773, 0.00012703519314527512, 0.0001316070556640625]}, "gradients/encoder.encoder.layers.4.feed_forward.output_dense.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 0.0, 2.0, 0.0, 3.0, 3.0, 4.0, 2.0, 4.0, 4.0, 2.0, 10.0, 18.0, 24.0, 29.0, 40.0, 49.0, 57.0, 107.0, 114.0, 168.0, 313.0, 489.0, 730.0, 1235.0, 1911.0, 3581.0, 6990.0, 14896.0, 40765.0, 843255.0, 3182603.0, 55625.0, 19379.0, 9171.0, 4852.0, 2841.0, 1770.0, 1025.0, 740.0, 457.0, 283.0, 209.0, 147.0, 92.0, 87.0, 53.0, 42.0, 32.0, 21.0, 17.0, 7.0, 10.0, 4.0, 5.0, 4.0, 7.0, 3.0, 4.0, 1.0, 3.0, 2.0], "bins": [-0.0001170039176940918, -0.00011347047984600067, -0.00010993704199790955, -0.00010640360414981842, -0.0001028701663017273, -9.933672845363617e-05, -9.580329060554504e-05, -9.226985275745392e-05, -8.873641490936279e-05, -8.520297706127167e-05, -8.166953921318054e-05, -7.813610136508942e-05, -7.460266351699829e-05, -7.106922566890717e-05, -6.753578782081604e-05, -6.400234997272491e-05, -6.046891212463379e-05, -5.6935474276542664e-05, -5.340203642845154e-05, -4.986859858036041e-05, -4.633516073226929e-05, -4.280172288417816e-05, -3.9268285036087036e-05, -3.573484718799591e-05, -3.2201409339904785e-05, -2.866797149181366e-05, -2.5134533643722534e-05, -2.160109579563141e-05, -1.8067657947540283e-05, -1.4534220099449158e-05, -1.1000782251358032e-05, -7.467344403266907e-06, -3.933906555175781e-06, -4.0046870708465576e-07, 3.1329691410064697e-06, 6.666406989097595e-06, 1.019984483718872e-05, 1.3733282685279846e-05, 1.726672053337097e-05, 2.0800158381462097e-05, 2.4333596229553223e-05, 2.7867034077644348e-05, 3.1400471925735474e-05, 3.49339097738266e-05, 3.8467347621917725e-05, 4.200078547000885e-05, 4.5534223318099976e-05, 4.90676611661911e-05, 5.2601099014282227e-05, 5.613453686237335e-05, 5.966797471046448e-05, 6.32014125585556e-05, 6.673485040664673e-05, 7.026828825473785e-05, 7.380172610282898e-05, 7.73351639509201e-05, 8.086860179901123e-05, 8.440203964710236e-05, 8.793547749519348e-05, 9.146891534328461e-05, 9.500235319137573e-05, 9.853579103946686e-05, 0.00010206922888755798, 0.00010560266673564911, 0.00010913610458374023]}, "gradients/encoder.encoder.layers.4.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 2.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 4.0, 6.0, 2.0, 2.0, 13.0, 9.0, 10.0, 16.0, 16.0, 28.0, 31.0, 38.0, 63.0, 58.0, 63.0, 62.0, 90.0, 65.0, 74.0, 54.0, 53.0, 47.0, 39.0, 33.0, 23.0, 19.0, 16.0, 21.0, 13.0, 10.0, 7.0, 4.0, 6.0, 5.0, 2.0, 3.0, 3.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0], "bins": [-2.658367156982422e-05, -2.5725923478603363e-05, -2.4868175387382507e-05, -2.401042729616165e-05, -2.3152679204940796e-05, -2.229493111371994e-05, -2.1437183022499084e-05, -2.057943493127823e-05, -1.9721686840057373e-05, -1.8863938748836517e-05, -1.800619065761566e-05, -1.7148442566394806e-05, -1.629069447517395e-05, -1.5432946383953094e-05, -1.4575198292732239e-05, -1.3717450201511383e-05, -1.2859702110290527e-05, -1.2001954019069672e-05, -1.1144205927848816e-05, -1.028645783662796e-05, -9.428709745407104e-06, -8.570961654186249e-06, -7.713213562965393e-06, -6.855465471744537e-06, -5.997717380523682e-06, -5.139969289302826e-06, -4.28222119808197e-06, -3.4244731068611145e-06, -2.566725015640259e-06, -1.708976924419403e-06, -8.512288331985474e-07, 6.51925802230835e-09, 8.642673492431641e-07, 1.7220154404640198e-06, 2.5797635316848755e-06, 3.437511622905731e-06, 4.295259714126587e-06, 5.153007805347443e-06, 6.010755896568298e-06, 6.868503987789154e-06, 7.72625207901001e-06, 8.584000170230865e-06, 9.441748261451721e-06, 1.0299496352672577e-05, 1.1157244443893433e-05, 1.2014992535114288e-05, 1.2872740626335144e-05, 1.3730488717556e-05, 1.4588236808776855e-05, 1.544598489999771e-05, 1.6303732991218567e-05, 1.7161481082439423e-05, 1.801922917366028e-05, 1.8876977264881134e-05, 1.973472535610199e-05, 2.0592473447322845e-05, 2.14502215385437e-05, 2.2307969629764557e-05, 2.3165717720985413e-05, 2.402346581220627e-05, 2.4881213903427124e-05, 2.573896199464798e-05, 2.6596710085868835e-05, 2.745445817708969e-05, 2.8312206268310547e-05]}, "gradients/encoder.encoder.layers.4.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 1.0, 3.0, 2.0, 4.0, 3.0, 5.0, 11.0, 11.0, 16.0, 14.0, 35.0, 49.0, 51.0, 82.0, 108.0, 200.0, 315.0, 448.0, 729.0, 1153.0, 1831.0, 3093.0, 5756.0, 10718.0, 22199.0, 55914.0, 238021.0, 3513015.0, 240125.0, 53324.0, 22445.0, 10796.0, 5648.0, 3112.0, 1856.0, 1090.0, 728.0, 462.0, 304.0, 223.0, 119.0, 99.0, 54.0, 40.0, 23.0, 19.0, 14.0, 9.0, 7.0, 4.0, 1.0, 2.0, 3.0, 3.0, 2.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-7.009506225585938e-05, -6.786920130252838e-05, -6.564334034919739e-05, -6.34174793958664e-05, -6.11916184425354e-05, -5.896575748920441e-05, -5.673989653587341e-05, -5.451403558254242e-05, -5.2288174629211426e-05, -5.006231367588043e-05, -4.783645272254944e-05, -4.5610591769218445e-05, -4.338473081588745e-05, -4.115886986255646e-05, -3.8933008909225464e-05, -3.670714795589447e-05, -3.4481287002563477e-05, -3.225542604923248e-05, -3.002956509590149e-05, -2.7803704142570496e-05, -2.5577843189239502e-05, -2.3351982235908508e-05, -2.1126121282577515e-05, -1.890026032924652e-05, -1.6674399375915527e-05, -1.4448538422584534e-05, -1.222267746925354e-05, -9.996816515922546e-06, -7.770955562591553e-06, -5.545094609260559e-06, -3.3192336559295654e-06, -1.0933727025985718e-06, 1.1324882507324219e-06, 3.3583492040634155e-06, 5.584210157394409e-06, 7.810071110725403e-06, 1.0035932064056396e-05, 1.226179301738739e-05, 1.4487653970718384e-05, 1.6713514924049377e-05, 1.893937587738037e-05, 2.1165236830711365e-05, 2.339109778404236e-05, 2.5616958737373352e-05, 2.7842819690704346e-05, 3.006868064403534e-05, 3.229454159736633e-05, 3.452040255069733e-05, 3.674626350402832e-05, 3.8972124457359314e-05, 4.119798541069031e-05, 4.34238463640213e-05, 4.5649707317352295e-05, 4.787556827068329e-05, 5.010142922401428e-05, 5.2327290177345276e-05, 5.455315113067627e-05, 5.677901208400726e-05, 5.900487303733826e-05, 6.123073399066925e-05, 6.345659494400024e-05, 6.568245589733124e-05, 6.790831685066223e-05, 7.013417780399323e-05, 7.236003875732422e-05]}, "gradients/encoder.encoder.layers.4.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 1.0, 2.0, 1.0, 6.0, 0.0, 3.0, 2.0, 3.0, 6.0, 8.0, 14.0, 9.0, 24.0, 23.0, 27.0, 48.0, 63.0, 92.0, 223.0, 763.0, 1734.0, 549.0, 175.0, 83.0, 41.0, 31.0, 28.0, 28.0, 18.0, 20.0, 14.0, 14.0, 8.0, 5.0, 4.0, 3.0, 1.0, 5.0, 2.0, 1.0, 2.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-5.906820297241211e-05, -5.718506872653961e-05, -5.5301934480667114e-05, -5.341880023479462e-05, -5.153566598892212e-05, -4.965253174304962e-05, -4.7769397497177124e-05, -4.5886263251304626e-05, -4.400312900543213e-05, -4.211999475955963e-05, -4.0236860513687134e-05, -3.8353726267814636e-05, -3.647059202194214e-05, -3.458745777606964e-05, -3.2704323530197144e-05, -3.0821189284324646e-05, -2.893805503845215e-05, -2.705492079257965e-05, -2.5171786546707153e-05, -2.3288652300834656e-05, -2.1405518054962158e-05, -1.952238380908966e-05, -1.7639249563217163e-05, -1.5756115317344666e-05, -1.3872981071472168e-05, -1.198984682559967e-05, -1.0106712579727173e-05, -8.223578333854675e-06, -6.340444087982178e-06, -4.45730984210968e-06, -2.5741755962371826e-06, -6.910413503646851e-07, 1.1920928955078125e-06, 3.07522714138031e-06, 4.958361387252808e-06, 6.841495633125305e-06, 8.724629878997803e-06, 1.06077641248703e-05, 1.2490898370742798e-05, 1.4374032616615295e-05, 1.6257166862487793e-05, 1.814030110836029e-05, 2.0023435354232788e-05, 2.1906569600105286e-05, 2.3789703845977783e-05, 2.567283809185028e-05, 2.755597233772278e-05, 2.9439106583595276e-05, 3.1322240829467773e-05, 3.320537507534027e-05, 3.508850932121277e-05, 3.6971643567085266e-05, 3.8854777812957764e-05, 4.073791205883026e-05, 4.262104630470276e-05, 4.4504180550575256e-05, 4.6387314796447754e-05, 4.827044904232025e-05, 5.015358328819275e-05, 5.2036717534065247e-05, 5.3919851779937744e-05, 5.580298602581024e-05, 5.768612027168274e-05, 5.956925451755524e-05, 6.145238876342773e-05]}, "gradients/encoder.encoder.layers.4.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 2.0, 2.0, 5.0, 2.0, 6.0, 9.0, 13.0, 22.0, 47.0, 64.0, 126.0, 171.0, 158.0, 132.0, 86.0, 57.0, 36.0, 24.0, 17.0, 10.0, 13.0, 5.0, 2.0, 4.0, 3.0, 2.0], "bins": [-0.0006312414188869298, -0.0006186043028719723, -0.0006059671868570149, -0.0005933301290497184, -0.000580693013034761, -0.0005680558970198035, -0.0005554187810048461, -0.0005427816649898887, -0.0005301445489749312, -0.0005175074329599738, -0.0005048703169450164, -0.0004922332591377199, -0.00047959614312276244, -0.000466959027107805, -0.0004543219110928476, -0.00044168479507789016, -0.00042904773727059364, -0.0004164106212556362, -0.00040377353434450924, -0.0003911364183295518, -0.00037849933141842484, -0.0003658622154034674, -0.00035322509938851, -0.00034058798337355256, -0.0003279508964624256, -0.00031531378044746816, -0.0003026766935363412, -0.00029003957752138376, -0.00027740246150642633, -0.00026476537459529936, -0.00025212825858034194, -0.00023949115711729974, -0.000226854084758088, -0.0002142169832950458, -0.0002015798818320036, -0.00018894276581704617, -0.00017630566435400397, -0.00016366856289096177, -0.00015103144687600434, -0.00013839434541296214, -0.00012575724394991994, -0.00011312014248687774, -0.00010048303374787793, -8.784592500887811e-05, -7.520882354583591e-05, -6.257172208279371e-05, -4.99346133437939e-05, -3.7297504604794085e-05, -2.4660403141751885e-05, -1.2023298040730879e-05, 6.13807060290128e-07, 1.3250912161311135e-05, 2.588801726233214e-05, 3.852511872537434e-05, 5.1162227464374155e-05, 6.379933620337397e-05, 7.643643766641617e-05, 8.907353912945837e-05, 0.00010171064786845818, 0.000114347756607458, 0.0001269848580705002, 0.0001396219595335424, 0.00015225907554849982, 0.00016489617701154202, 0.00017753327847458422]}, "gradients/encoder.encoder.layers.4.final_layer_norm.bias": {"_type": "histogram", "values": [2.0, 0.0, 2.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 3.0, 5.0, 2.0, 4.0, 9.0, 8.0, 14.0, 7.0, 18.0, 5.0, 13.0, 14.0, 24.0, 19.0, 21.0, 33.0, 32.0, 32.0, 25.0, 49.0, 44.0, 34.0, 37.0, 46.0, 43.0, 32.0, 29.0, 50.0, 28.0, 43.0, 43.0, 35.0, 34.0, 27.0, 25.0, 26.0, 27.0, 21.0, 11.0, 9.0, 7.0, 4.0, 8.0, 1.0, 1.0, 2.0, 6.0, 0.0, 1.0, 2.0, 0.0, 1.0, 0.0, 1.0, 1.0], "bins": [-0.00013947486877441406, -0.00013529788702726364, -0.00013112090528011322, -0.0001269439235329628, -0.00012276694178581238, -0.00011858996003866196, -0.00011441297829151154, -0.00011023599654436111, -0.0001060590147972107, -0.00010188203305006027, -9.770505130290985e-05, -9.352806955575943e-05, -8.935108780860901e-05, -8.517410606145859e-05, -8.099712431430817e-05, -7.682014256715775e-05, -7.264316082000732e-05, -6.84661790728569e-05, -6.428919732570648e-05, -6.011221557855606e-05, -5.593523383140564e-05, -5.175825208425522e-05, -4.75812703371048e-05, -4.3404288589954376e-05, -3.9227306842803955e-05, -3.5050325095653534e-05, -3.087334334850311e-05, -2.669636160135269e-05, -2.251937985420227e-05, -1.834239810705185e-05, -1.4165416359901428e-05, -9.988434612751007e-06, -5.811452865600586e-06, -1.6344711184501648e-06, 2.5425106287002563e-06, 6.7194923758506775e-06, 1.0896474123001099e-05, 1.507345587015152e-05, 1.925043761730194e-05, 2.3427419364452362e-05, 2.7604401111602783e-05, 3.1781382858753204e-05, 3.5958364605903625e-05, 4.013534635305405e-05, 4.431232810020447e-05, 4.848930984735489e-05, 5.266629159450531e-05, 5.684327334165573e-05, 6.102025508880615e-05, 6.519723683595657e-05, 6.9374218583107e-05, 7.355120033025742e-05, 7.772818207740784e-05, 8.190516382455826e-05, 8.608214557170868e-05, 9.02591273188591e-05, 9.443610906600952e-05, 9.861309081315994e-05, 0.00010279007256031036, 0.00010696705430746078, 0.0001111440360546112, 0.00011532101780176163, 0.00011949799954891205, 0.00012367498129606247, 0.0001278519630432129]}, "gradients/encoder.encoder.layers.4.attention.out_proj.weight": {"_type": "histogram", "values": [3.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 4.0, 6.0, 10.0, 4.0, 16.0, 16.0, 22.0, 36.0, 49.0, 61.0, 58.0, 106.0, 131.0, 202.0, 257.0, 347.0, 543.0, 754.0, 1073.0, 1532.0, 2380.0, 3911.0, 6091.0, 10399.0, 18975.0, 38202.0, 90713.0, 341594.0, 352592.0, 93272.0, 38402.0, 18855.0, 10431.0, 6061.0, 3754.0, 2405.0, 1613.0, 1124.0, 741.0, 495.0, 376.0, 269.0, 201.0, 136.0, 95.0, 88.0, 49.0, 39.0, 27.0, 8.0, 19.0, 9.0, 3.0, 4.0, 4.0, 2.0, 3.0], "bins": [-0.00012958049774169922, -0.0001258784905076027, -0.00012217648327350616, -0.00011847447603940964, -0.00011477246880531311, -0.00011107046157121658, -0.00010736845433712006, -0.00010366644710302353, -9.9964439868927e-05, -9.626243263483047e-05, -9.256042540073395e-05, -8.885841816663742e-05, -8.51564109325409e-05, -8.145440369844437e-05, -7.775239646434784e-05, -7.405038923025131e-05, -7.034838199615479e-05, -6.664637476205826e-05, -6.294436752796173e-05, -5.9242360293865204e-05, -5.554035305976868e-05, -5.183834582567215e-05, -4.813633859157562e-05, -4.4434331357479095e-05, -4.073232412338257e-05, -3.703031688928604e-05, -3.3328309655189514e-05, -2.9626302421092987e-05, -2.592429518699646e-05, -2.2222287952899933e-05, -1.8520280718803406e-05, -1.4818273484706879e-05, -1.1116266250610352e-05, -7.4142590165138245e-06, -3.7122517824172974e-06, -1.0244548320770264e-08, 3.691762685775757e-06, 7.393769919872284e-06, 1.1095777153968811e-05, 1.4797784388065338e-05, 1.8499791622161865e-05, 2.2201798856258392e-05, 2.590380609035492e-05, 2.9605813324451447e-05, 3.3307820558547974e-05, 3.70098277926445e-05, 4.071183502674103e-05, 4.4413842260837555e-05, 4.811584949493408e-05, 5.181785672903061e-05, 5.5519863963127136e-05, 5.922187119722366e-05, 6.292387843132019e-05, 6.662588566541672e-05, 7.032789289951324e-05, 7.402990013360977e-05, 7.77319073677063e-05, 8.143391460180283e-05, 8.513592183589935e-05, 8.883792906999588e-05, 9.253993630409241e-05, 9.624194353818893e-05, 9.994395077228546e-05, 0.00010364595800638199, 0.00010734796524047852]}, "gradients/encoder.encoder.layers.4.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 2.0, 2.0, 4.0, 2.0, 6.0, 1.0, 7.0, 7.0, 6.0, 13.0, 16.0, 18.0, 33.0, 28.0, 30.0, 59.0, 67.0, 65.0, 82.0, 104.0, 79.0, 61.0, 62.0, 57.0, 41.0, 33.0, 23.0, 21.0, 16.0, 9.0, 9.0, 13.0, 2.0, 9.0, 3.0, 4.0, 4.0, 4.0, 3.0, 2.0, 1.0, 0.0, 1.0, 4.0, 0.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-2.5510787963867188e-05, -2.4650245904922485e-05, -2.3789703845977783e-05, -2.292916178703308e-05, -2.206861972808838e-05, -2.1208077669143677e-05, -2.0347535610198975e-05, -1.9486993551254272e-05, -1.862645149230957e-05, -1.7765909433364868e-05, -1.6905367374420166e-05, -1.6044825315475464e-05, -1.5184283256530762e-05, -1.432374119758606e-05, -1.3463199138641357e-05, -1.2602657079696655e-05, -1.1742115020751953e-05, -1.0881572961807251e-05, -1.0021030902862549e-05, -9.160488843917847e-06, -8.299946784973145e-06, -7.439404726028442e-06, -6.57886266708374e-06, -5.718320608139038e-06, -4.857778549194336e-06, -3.997236490249634e-06, -3.1366944313049316e-06, -2.2761523723602295e-06, -1.4156103134155273e-06, -5.550682544708252e-07, 3.0547380447387695e-07, 1.166015863418579e-06, 2.0265579223632812e-06, 2.8870999813079834e-06, 3.7476420402526855e-06, 4.608184099197388e-06, 5.46872615814209e-06, 6.329268217086792e-06, 7.189810276031494e-06, 8.050352334976196e-06, 8.910894393920898e-06, 9.7714364528656e-06, 1.0631978511810303e-05, 1.1492520570755005e-05, 1.2353062629699707e-05, 1.321360468864441e-05, 1.4074146747589111e-05, 1.4934688806533813e-05, 1.5795230865478516e-05, 1.6655772924423218e-05, 1.751631498336792e-05, 1.8376857042312622e-05, 1.9237399101257324e-05, 2.0097941160202026e-05, 2.095848321914673e-05, 2.181902527809143e-05, 2.2679567337036133e-05, 2.3540109395980835e-05, 2.4400651454925537e-05, 2.526119351387024e-05, 2.612173557281494e-05, 2.6982277631759644e-05, 2.7842819690704346e-05, 2.8703361749649048e-05, 2.956390380859375e-05]}, "gradients/encoder.encoder.layers.4.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 2.0, 1.0, 1.0, 3.0, 4.0, 9.0, 7.0, 15.0, 24.0, 31.0, 64.0, 81.0, 122.0, 199.0, 286.0, 450.0, 688.0, 1093.0, 1757.0, 2729.0, 3997.0, 6433.0, 10257.0, 16548.0, 27436.0, 47868.0, 88349.0, 194629.0, 331453.0, 143420.0, 70334.0, 39100.0, 22852.0, 14120.0, 8688.0, 5491.0, 3531.0, 2357.0, 1477.0, 975.0, 584.0, 373.0, 246.0, 185.0, 103.0, 58.0, 47.0, 29.0, 24.0, 15.0, 7.0, 10.0, 6.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 2.0], "bins": [-5.5849552154541016e-05, -5.412101745605469e-05, -5.239248275756836e-05, -5.066394805908203e-05, -4.89354133605957e-05, -4.7206878662109375e-05, -4.547834396362305e-05, -4.374980926513672e-05, -4.202127456665039e-05, -4.029273986816406e-05, -3.8564205169677734e-05, -3.6835670471191406e-05, -3.510713577270508e-05, -3.337860107421875e-05, -3.165006637573242e-05, -2.9921531677246094e-05, -2.8192996978759766e-05, -2.6464462280273438e-05, -2.473592758178711e-05, -2.300739288330078e-05, -2.1278858184814453e-05, -1.9550323486328125e-05, -1.7821788787841797e-05, -1.609325408935547e-05, -1.436471939086914e-05, -1.2636184692382812e-05, -1.0907649993896484e-05, -9.179115295410156e-06, -7.450580596923828e-06, -5.7220458984375e-06, -3.993511199951172e-06, -2.2649765014648438e-06, -5.364418029785156e-07, 1.1920928955078125e-06, 2.9206275939941406e-06, 4.649162292480469e-06, 6.377696990966797e-06, 8.106231689453125e-06, 9.834766387939453e-06, 1.1563301086425781e-05, 1.329183578491211e-05, 1.5020370483398438e-05, 1.6748905181884766e-05, 1.8477439880371094e-05, 2.0205974578857422e-05, 2.193450927734375e-05, 2.3663043975830078e-05, 2.5391578674316406e-05, 2.7120113372802734e-05, 2.8848648071289062e-05, 3.057718276977539e-05, 3.230571746826172e-05, 3.403425216674805e-05, 3.5762786865234375e-05, 3.74913215637207e-05, 3.921985626220703e-05, 4.094839096069336e-05, 4.267692565917969e-05, 4.4405460357666016e-05, 4.6133995056152344e-05, 4.786252975463867e-05, 4.9591064453125e-05, 5.131959915161133e-05, 5.3048133850097656e-05, 5.4776668548583984e-05]}, "gradients/encoder.encoder.layers.4.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 2.0, 3.0, 4.0, 10.0, 6.0, 7.0, 12.0, 12.0, 14.0, 19.0, 18.0, 20.0, 23.0, 23.0, 31.0, 23.0, 35.0, 33.0, 37.0, 44.0, 38.0, 29.0, 36.0, 47.0, 40.0, 32.0, 40.0, 46.0, 41.0, 37.0, 38.0, 32.0, 18.0, 24.0, 23.0, 28.0, 12.0, 11.0, 15.0, 14.0, 7.0, 10.0, 5.0, 9.0, 5.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0], "bins": [-5.2928924560546875e-05, -5.1291659474372864e-05, -4.965439438819885e-05, -4.801712930202484e-05, -4.637986421585083e-05, -4.474259912967682e-05, -4.310533404350281e-05, -4.1468068957328796e-05, -3.9830803871154785e-05, -3.8193538784980774e-05, -3.655627369880676e-05, -3.491900861263275e-05, -3.328174352645874e-05, -3.164447844028473e-05, -3.0007213354110718e-05, -2.8369948267936707e-05, -2.6732683181762695e-05, -2.5095418095588684e-05, -2.3458153009414673e-05, -2.182088792324066e-05, -2.018362283706665e-05, -1.854635775089264e-05, -1.6909092664718628e-05, -1.5271827578544617e-05, -1.3634562492370605e-05, -1.1997297406196594e-05, -1.0360032320022583e-05, -8.722767233848572e-06, -7.0855021476745605e-06, -5.448237061500549e-06, -3.810971975326538e-06, -2.173706889152527e-06, -5.364418029785156e-07, 1.1008232831954956e-06, 2.738088369369507e-06, 4.375353455543518e-06, 6.012618541717529e-06, 7.64988362789154e-06, 9.287148714065552e-06, 1.0924413800239563e-05, 1.2561678886413574e-05, 1.4198943972587585e-05, 1.5836209058761597e-05, 1.7473474144935608e-05, 1.911073923110962e-05, 2.074800431728363e-05, 2.238526940345764e-05, 2.4022534489631653e-05, 2.5659799575805664e-05, 2.7297064661979675e-05, 2.8934329748153687e-05, 3.05715948343277e-05, 3.220885992050171e-05, 3.384612500667572e-05, 3.548339009284973e-05, 3.712065517902374e-05, 3.8757920265197754e-05, 4.0395185351371765e-05, 4.2032450437545776e-05, 4.366971552371979e-05, 4.53069806098938e-05, 4.694424569606781e-05, 4.858151078224182e-05, 5.021877586841583e-05, 5.1856040954589844e-05]}, "gradients/encoder.encoder.layers.4.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 4.0, 4.0, 6.0, 11.0, 4.0, 6.0, 10.0, 13.0, 27.0, 25.0, 44.0, 72.0, 74.0, 121.0, 183.0, 257.0, 339.0, 740.0, 1027.0, 1713.0, 3167.0, 5872.0, 12009.0, 27476.0, 79482.0, 328488.0, 454530.0, 79803.0, 27395.0, 12007.0, 5777.0, 3122.0, 1735.0, 1024.0, 678.0, 473.0, 265.0, 183.0, 120.0, 76.0, 72.0, 34.0, 28.0, 20.0, 19.0, 6.0, 5.0, 8.0, 6.0, 7.0, 3.0, 0.0, 1.0, 1.0], "bins": [-1.0967254638671875e-05, -1.0662712156772614e-05, -1.0358169674873352e-05, -1.005362719297409e-05, -9.749084711074829e-06, -9.444542229175568e-06, -9.139999747276306e-06, -8.835457265377045e-06, -8.530914783477783e-06, -8.226372301578522e-06, -7.92182981967926e-06, -7.617287337779999e-06, -7.312744855880737e-06, -7.008202373981476e-06, -6.703659892082214e-06, -6.399117410182953e-06, -6.094574928283691e-06, -5.79003244638443e-06, -5.4854899644851685e-06, -5.180947482585907e-06, -4.8764050006866455e-06, -4.571862518787384e-06, -4.2673200368881226e-06, -3.962777554988861e-06, -3.6582350730895996e-06, -3.353692591190338e-06, -3.0491501092910767e-06, -2.744607627391815e-06, -2.4400651454925537e-06, -2.1355226635932922e-06, -1.8309801816940308e-06, -1.5264376997947693e-06, -1.2218952178955078e-06, -9.173527359962463e-07, -6.128102540969849e-07, -3.082677721977234e-07, -3.725290298461914e-09, 3.0081719160079956e-07, 6.05359673500061e-07, 9.099021553993225e-07, 1.214444637298584e-06, 1.5189871191978455e-06, 1.823529601097107e-06, 2.1280720829963684e-06, 2.43261456489563e-06, 2.7371570467948914e-06, 3.041699528694153e-06, 3.3462420105934143e-06, 3.6507844924926758e-06, 3.955326974391937e-06, 4.259869456291199e-06, 4.56441193819046e-06, 4.868954420089722e-06, 5.173496901988983e-06, 5.478039383888245e-06, 5.782581865787506e-06, 6.087124347686768e-06, 6.391666829586029e-06, 6.6962093114852905e-06, 7.000751793384552e-06, 7.3052942752838135e-06, 7.609836757183075e-06, 7.914379239082336e-06, 8.218921720981598e-06, 8.52346420288086e-06]}, "gradients/encoder.encoder.layers.4.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 3.0, 9.0, 9.0, 0.0, 6.0, 16.0, 15.0, 21.0, 0.0, 24.0, 35.0, 46.0, 0.0, 48.0, 55.0, 59.0, 49.0, 0.0, 63.0, 72.0, 77.0, 58.0, 0.0, 58.0, 36.0, 39.0, 0.0, 36.0, 38.0, 37.0, 23.0, 0.0, 14.0, 20.0, 12.0, 0.0, 5.0, 9.0, 7.0, 5.0, 0.0, 1.0, 6.0, 5.0, 2.0], "bins": [-1.7881393432617188e-06, -1.7415732145309448e-06, -1.695007085800171e-06, -1.648440957069397e-06, -1.601874828338623e-06, -1.5553086996078491e-06, -1.5087425708770752e-06, -1.4621764421463013e-06, -1.4156103134155273e-06, -1.3690441846847534e-06, -1.3224780559539795e-06, -1.2759119272232056e-06, -1.2293457984924316e-06, -1.1827796697616577e-06, -1.1362135410308838e-06, -1.0896474123001099e-06, -1.043081283569336e-06, -9.96515154838562e-07, -9.499490261077881e-07, -9.033828973770142e-07, -8.568167686462402e-07, -8.102506399154663e-07, -7.636845111846924e-07, -7.171183824539185e-07, -6.705522537231445e-07, -6.239861249923706e-07, -5.774199962615967e-07, -5.308538675308228e-07, -4.842877388000488e-07, -4.377216100692749e-07, -3.91155481338501e-07, -3.4458935260772705e-07, -2.980232238769531e-07, -2.514570951461792e-07, -2.0489096641540527e-07, -1.5832483768463135e-07, -1.1175870895385742e-07, -6.51925802230835e-08, -1.862645149230957e-08, 2.7939677238464355e-08, 7.450580596923828e-08, 1.210719347000122e-07, 1.6763806343078613e-07, 2.1420419216156006e-07, 2.60770320892334e-07, 3.073364496231079e-07, 3.5390257835388184e-07, 4.0046870708465576e-07, 4.470348358154297e-07, 4.936009645462036e-07, 5.401670932769775e-07, 5.867332220077515e-07, 6.332993507385254e-07, 6.798654794692993e-07, 7.264316082000732e-07, 7.729977369308472e-07, 8.195638656616211e-07, 8.66129994392395e-07, 9.126961231231689e-07, 9.592622518539429e-07, 1.0058283805847168e-06, 1.0523945093154907e-06, 1.0989606380462646e-06, 1.1455267667770386e-06, 1.1920928955078125e-06]}, "gradients/encoder.encoder.layers.4.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 3.0, 1.0, 2.0, 4.0, 4.0, 3.0, 6.0, 13.0, 17.0, 19.0, 30.0, 38.0, 57.0, 75.0, 139.0, 205.0, 294.0, 471.0, 1052.0, 1619.0, 2807.0, 5238.0, 10751.0, 23479.0, 60478.0, 214274.0, 551259.0, 107494.0, 36612.0, 15644.0, 7419.0, 3783.0, 2086.0, 1214.0, 851.0, 391.0, 247.0, 160.0, 104.0, 68.0, 48.0, 26.0, 28.0, 17.0, 10.0, 6.0, 12.0, 2.0, 2.0, 1.0, 2.0, 1.0, 1.0, 1.0, 3.0, 1.0, 0.0, 1.0], "bins": [-7.927417755126953e-06, -7.681548595428467e-06, -7.4356794357299805e-06, -7.189810276031494e-06, -6.943941116333008e-06, -6.6980719566345215e-06, -6.452202796936035e-06, -6.206333637237549e-06, -5.9604644775390625e-06, -5.714595317840576e-06, -5.46872615814209e-06, -5.2228569984436035e-06, -4.976987838745117e-06, -4.731118679046631e-06, -4.4852495193481445e-06, -4.239380359649658e-06, -3.993511199951172e-06, -3.7476420402526855e-06, -3.5017728805541992e-06, -3.255903720855713e-06, -3.0100345611572266e-06, -2.7641654014587402e-06, -2.518296241760254e-06, -2.2724270820617676e-06, -2.0265579223632812e-06, -1.780688762664795e-06, -1.5348196029663086e-06, -1.2889504432678223e-06, -1.043081283569336e-06, -7.972121238708496e-07, -5.513429641723633e-07, -3.0547380447387695e-07, -5.960464477539063e-08, 1.862645149230957e-07, 4.3213367462158203e-07, 6.780028343200684e-07, 9.238719940185547e-07, 1.169741153717041e-06, 1.4156103134155273e-06, 1.6614794731140137e-06, 1.9073486328125e-06, 2.1532177925109863e-06, 2.3990869522094727e-06, 2.644956111907959e-06, 2.8908252716064453e-06, 3.1366944313049316e-06, 3.382563591003418e-06, 3.6284327507019043e-06, 3.874301910400391e-06, 4.120171070098877e-06, 4.366040229797363e-06, 4.61190938949585e-06, 4.857778549194336e-06, 5.103647708892822e-06, 5.349516868591309e-06, 5.595386028289795e-06, 5.841255187988281e-06, 6.087124347686768e-06, 6.332993507385254e-06, 6.57886266708374e-06, 6.8247318267822266e-06, 7.070600986480713e-06, 7.316470146179199e-06, 7.5623393058776855e-06, 7.808208465576172e-06]}, "gradients/encoder.encoder.layers.4.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 2.0, 4.0, 7.0, 8.0, 3.0, 6.0, 12.0, 14.0, 27.0, 26.0, 38.0, 62.0, 61.0, 86.0, 77.0, 123.0, 105.0, 80.0, 72.0, 39.0, 33.0, 34.0, 30.0, 15.0, 9.0, 9.0, 11.0, 7.0, 5.0, 0.0, 5.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6.9141387939453125e-06, -6.687827408313751e-06, -6.46151602268219e-06, -6.235204637050629e-06, -6.008893251419067e-06, -5.782581865787506e-06, -5.556270480155945e-06, -5.3299590945243835e-06, -5.103647708892822e-06, -4.877336323261261e-06, -4.6510249376297e-06, -4.4247135519981384e-06, -4.198402166366577e-06, -3.972090780735016e-06, -3.7457793951034546e-06, -3.5194680094718933e-06, -3.293156623840332e-06, -3.0668452382087708e-06, -2.8405338525772095e-06, -2.614222466945648e-06, -2.387911081314087e-06, -2.1615996956825256e-06, -1.9352883100509644e-06, -1.708976924419403e-06, -1.4826655387878418e-06, -1.2563541531562805e-06, -1.0300427675247192e-06, -8.03731381893158e-07, -5.774199962615967e-07, -3.511086106300354e-07, -1.2479722499847412e-07, 1.0151416063308716e-07, 3.2782554626464844e-07, 5.541369318962097e-07, 7.80448317527771e-07, 1.0067597031593323e-06, 1.2330710887908936e-06, 1.4593824744224548e-06, 1.6856938600540161e-06, 1.9120052456855774e-06, 2.1383166313171387e-06, 2.3646280169487e-06, 2.5909394025802612e-06, 2.8172507882118225e-06, 3.043562173843384e-06, 3.269873559474945e-06, 3.4961849451065063e-06, 3.7224963307380676e-06, 3.948807716369629e-06, 4.17511910200119e-06, 4.4014304876327515e-06, 4.627741873264313e-06, 4.854053258895874e-06, 5.080364644527435e-06, 5.306676030158997e-06, 5.532987415790558e-06, 5.759298801422119e-06, 5.98561018705368e-06, 6.211921572685242e-06, 6.438232958316803e-06, 6.664544343948364e-06, 6.8908557295799255e-06, 7.117167115211487e-06, 7.343478500843048e-06, 7.569789886474609e-06]}, "gradients/encoder.encoder.layers.4.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 3.0, 2.0, 1.0, 3.0, 0.0, 2.0, 3.0, 1.0, 1.0, 6.0, 5.0, 3.0, 9.0, 6.0, 14.0, 12.0, 14.0, 21.0, 21.0, 37.0, 54.0, 73.0, 121.0, 129.0, 123.0, 80.0, 58.0, 37.0, 37.0, 26.0, 33.0, 13.0, 18.0, 11.0, 10.0, 12.0, 3.0, 1.0, 1.0, 4.0, 2.0, 0.0, 1.0, 1.0, 1.0, 3.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 1.0], "bins": [-0.0001898759073810652, -0.0001832707494031638, -0.00017666557687334716, -0.00017006041889544576, -0.00016345526091754436, -0.00015685008838772774, -0.00015024493040982634, -0.00014363977243192494, -0.0001370345999021083, -0.0001304294419242069, -0.00012382426939439029, -0.00011721911141648889, -0.00011061394616262987, -0.00010400878090877086, -9.740362293086946e-05, -9.079845767701045e-05, -8.419329242315143e-05, -7.758812716929242e-05, -7.098296191543341e-05, -6.437780393753201e-05, -5.7772638683672994e-05, -5.116747342981398e-05, -4.4562311813933775e-05, -3.795715019805357e-05, -3.1351984944194555e-05, -2.4746821509324946e-05, -1.8141658074455336e-05, -1.1536494639585726e-05, -4.931331204716116e-06, 1.6738340491428971e-06, 8.278995665023103e-06, 1.488415728090331e-05, 2.1489322534762323e-05, 2.8094485969631933e-05, 3.469964940450154e-05, 4.130481102038175e-05, 4.790997627424076e-05, 5.4515141528099775e-05, 6.112029950600117e-05, 6.772546475986019e-05, 7.43306300137192e-05, 8.093579526757821e-05, 8.754096052143723e-05, 9.414611849933863e-05, 0.00010075128375319764, 0.00010735644900705665, 0.00011396160698495805, 0.00012056677223881707, 0.00012717193749267608, 0.00013377709547057748, 0.0001403822680003941, 0.0001469874259782955, 0.0001535925839561969, 0.00016019775648601353, 0.00016680291446391493, 0.00017340807244181633, 0.00018001324497163296, 0.00018661840294953436, 0.00019322357547935098, 0.00019982873345725238, 0.000206433905987069, 0.0002130390639649704, 0.0002196442219428718, 0.00022624939447268844, 0.00023285455245058984]}, "gradients/encoder.encoder.layers.4.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 2.0, 4.0, 6.0, 4.0, 10.0, 6.0, 14.0, 14.0, 12.0, 14.0, 16.0, 23.0, 17.0, 17.0, 21.0, 22.0, 34.0, 30.0, 35.0, 28.0, 38.0, 43.0, 26.0, 39.0, 40.0, 49.0, 43.0, 41.0, 46.0, 33.0, 31.0, 33.0, 19.0, 34.0, 27.0, 17.0, 23.0, 17.0, 10.0, 13.0, 11.0, 13.0, 8.0, 9.0, 4.0, 4.0, 5.0, 2.0, 0.0, 4.0, 1.0, 3.0, 1.0, 0.0, 1.0, 1.0, 1.0], "bins": [-0.00013202428817749023, -0.0001277690753340721, -0.000123513862490654, -0.00011925864964723587, -0.00011500343680381775, -0.00011074822396039963, -0.0001064930111169815, -0.00010223779827356339, -9.798258543014526e-05, -9.372737258672714e-05, -8.947215974330902e-05, -8.52169468998909e-05, -8.096173405647278e-05, -7.670652121305466e-05, -7.245130836963654e-05, -6.819609552621841e-05, -6.394088268280029e-05, -5.968566983938217e-05, -5.543045699596405e-05, -5.117524415254593e-05, -4.692003130912781e-05, -4.2664818465709686e-05, -3.8409605622291565e-05, -3.4154392778873444e-05, -2.9899179935455322e-05, -2.56439670920372e-05, -2.138875424861908e-05, -1.7133541405200958e-05, -1.2878328561782837e-05, -8.623115718364716e-06, -4.367902874946594e-06, -1.126900315284729e-07, 4.1425228118896484e-06, 8.39773565530777e-06, 1.2652948498725891e-05, 1.6908161342144012e-05, 2.1163374185562134e-05, 2.5418587028980255e-05, 2.9673799872398376e-05, 3.39290127158165e-05, 3.818422555923462e-05, 4.243943840265274e-05, 4.669465124607086e-05, 5.094986408948898e-05, 5.5205076932907104e-05, 5.9460289776325226e-05, 6.371550261974335e-05, 6.797071546316147e-05, 7.222592830657959e-05, 7.648114114999771e-05, 8.073635399341583e-05, 8.499156683683395e-05, 8.924677968025208e-05, 9.35019925236702e-05, 9.775720536708832e-05, 0.00010201241821050644, 0.00010626763105392456, 0.00011052284389734268, 0.0001147780567407608, 0.00011903326958417892, 0.00012328848242759705, 0.00012754369527101517, 0.0001317989081144333, 0.0001360541209578514, 0.00014030933380126953]}, "gradients/encoder.encoder.layers.3.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 3.0, 4.0, 5.0, 4.0, 6.0, 11.0, 10.0, 17.0, 22.0, 24.0, 50.0, 63.0, 94.0, 144.0, 223.0, 310.0, 668.0, 1106.0, 2166.0, 4677.0, 11497.0, 34995.0, 825143.0, 3240866.0, 47881.0, 13098.0, 5404.0, 2635.0, 1286.0, 757.0, 425.0, 277.0, 149.0, 89.0, 54.0, 43.0, 28.0, 24.0, 14.0, 9.0, 5.0, 3.0, 6.0, 1.0, 2.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0], "bins": [-0.00014460086822509766, -0.000140482559800148, -0.00013636425137519836, -0.00013224594295024872, -0.00012812763452529907, -0.00012400932610034943, -0.00011989101767539978, -0.00011577270925045013, -0.00011165440082550049, -0.00010753609240055084, -0.0001034177839756012, -9.929947555065155e-05, -9.51811671257019e-05, -9.106285870075226e-05, -8.694455027580261e-05, -8.282624185085297e-05, -7.870793342590332e-05, -7.458962500095367e-05, -7.047131657600403e-05, -6.635300815105438e-05, -6.223469972610474e-05, -5.811639130115509e-05, -5.3998082876205444e-05, -4.98797744512558e-05, -4.576146602630615e-05, -4.1643157601356506e-05, -3.752484917640686e-05, -3.3406540751457214e-05, -2.928823232650757e-05, -2.5169923901557922e-05, -2.1051615476608276e-05, -1.693330705165863e-05, -1.2814998626708984e-05, -8.696690201759338e-06, -4.578381776809692e-06, -4.600733518600464e-07, 3.6582350730895996e-06, 7.776543498039246e-06, 1.1894851922988892e-05, 1.6013160347938538e-05, 2.0131468772888184e-05, 2.424977719783783e-05, 2.8368085622787476e-05, 3.248639404773712e-05, 3.660470247268677e-05, 4.0723010897636414e-05, 4.484131932258606e-05, 4.8959627747535706e-05, 5.307793617248535e-05, 5.7196244597435e-05, 6.131455302238464e-05, 6.543286144733429e-05, 6.955116987228394e-05, 7.366947829723358e-05, 7.778778672218323e-05, 8.190609514713287e-05, 8.602440357208252e-05, 9.014271199703217e-05, 9.426102042198181e-05, 9.837932884693146e-05, 0.0001024976372718811, 0.00010661594569683075, 0.0001107342541217804, 0.00011485256254673004, 0.00011897087097167969]}, "gradients/encoder.encoder.layers.3.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 3.0, 1.0, 1.0, 1.0, 4.0, 3.0, 4.0, 8.0, 13.0, 15.0, 19.0, 21.0, 26.0, 35.0, 58.0, 52.0, 61.0, 79.0, 64.0, 75.0, 78.0, 64.0, 60.0, 63.0, 37.0, 48.0, 21.0, 24.0, 18.0, 9.0, 13.0, 8.0, 4.0, 4.0, 7.0, 5.0, 3.0, 2.0, 3.0, 0.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-3.331899642944336e-05, -3.238581120967865e-05, -3.145262598991394e-05, -3.051944077014923e-05, -2.958625555038452e-05, -2.8653070330619812e-05, -2.7719885110855103e-05, -2.6786699891090393e-05, -2.5853514671325684e-05, -2.4920329451560974e-05, -2.3987144231796265e-05, -2.3053959012031555e-05, -2.2120773792266846e-05, -2.1187588572502136e-05, -2.0254403352737427e-05, -1.9321218132972717e-05, -1.8388032913208008e-05, -1.74548476934433e-05, -1.652166247367859e-05, -1.558847725391388e-05, -1.465529203414917e-05, -1.372210681438446e-05, -1.2788921594619751e-05, -1.1855736374855042e-05, -1.0922551155090332e-05, -9.989365935325623e-06, -9.056180715560913e-06, -8.122995495796204e-06, -7.189810276031494e-06, -6.256625056266785e-06, -5.323439836502075e-06, -4.390254616737366e-06, -3.4570693969726562e-06, -2.5238841772079468e-06, -1.5906989574432373e-06, -6.575137376785278e-07, 2.7567148208618164e-07, 1.2088567018508911e-06, 2.1420419216156006e-06, 3.07522714138031e-06, 4.0084123611450195e-06, 4.941597580909729e-06, 5.8747828006744385e-06, 6.807968020439148e-06, 7.741153240203857e-06, 8.674338459968567e-06, 9.607523679733276e-06, 1.0540708899497986e-05, 1.1473894119262695e-05, 1.2407079339027405e-05, 1.3340264558792114e-05, 1.4273449778556824e-05, 1.5206634998321533e-05, 1.6139820218086243e-05, 1.7073005437850952e-05, 1.800619065761566e-05, 1.893937587738037e-05, 1.987256109714508e-05, 2.080574631690979e-05, 2.17389315366745e-05, 2.267211675643921e-05, 2.360530197620392e-05, 2.4538487195968628e-05, 2.5471672415733337e-05, 2.6404857635498047e-05]}, "gradients/encoder.encoder.layers.3.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 3.0, 2.0, 3.0, 5.0, 7.0, 5.0, 9.0, 14.0, 18.0, 22.0, 34.0, 52.0, 62.0, 123.0, 169.0, 238.0, 367.0, 594.0, 987.0, 1541.0, 2533.0, 4765.0, 8582.0, 17355.0, 37598.0, 116331.0, 2114938.0, 1696962.0, 116314.0, 38200.0, 16663.0, 8253.0, 4630.0, 2687.0, 1626.0, 895.0, 590.0, 374.0, 224.0, 167.0, 114.0, 79.0, 45.0, 29.0, 27.0, 25.0, 12.0, 4.0, 7.0, 4.0, 3.0, 2.0, 2.0, 2.0, 0.0, 1.0, 0.0, 2.0], "bins": [-6.437301635742188e-05, -6.242189556360245e-05, -6.047077476978302e-05, -5.851965397596359e-05, -5.6568533182144165e-05, -5.461741238832474e-05, -5.266629159450531e-05, -5.071517080068588e-05, -4.8764050006866455e-05, -4.681292921304703e-05, -4.48618084192276e-05, -4.291068762540817e-05, -4.0959566831588745e-05, -3.900844603776932e-05, -3.705732524394989e-05, -3.510620445013046e-05, -3.3155083656311035e-05, -3.120396286249161e-05, -2.925284206867218e-05, -2.7301721274852753e-05, -2.5350600481033325e-05, -2.3399479687213898e-05, -2.144835889339447e-05, -1.9497238099575043e-05, -1.7546117305755615e-05, -1.5594996511936188e-05, -1.364387571811676e-05, -1.1692754924297333e-05, -9.741634130477905e-06, -7.790513336658478e-06, -5.83939254283905e-06, -3.888271749019623e-06, -1.9371509552001953e-06, 1.3969838619232178e-08, 1.9650906324386597e-06, 3.916211426258087e-06, 5.867332220077515e-06, 7.818453013896942e-06, 9.76957380771637e-06, 1.1720694601535797e-05, 1.3671815395355225e-05, 1.5622936189174652e-05, 1.757405698299408e-05, 1.9525177776813507e-05, 2.1476298570632935e-05, 2.3427419364452362e-05, 2.537854015827179e-05, 2.7329660952091217e-05, 2.9280781745910645e-05, 3.123190253973007e-05, 3.31830233335495e-05, 3.513414412736893e-05, 3.7085264921188354e-05, 3.903638571500778e-05, 4.098750650882721e-05, 4.293862730264664e-05, 4.4889748096466064e-05, 4.684086889028549e-05, 4.879198968410492e-05, 5.074311047792435e-05, 5.2694231271743774e-05, 5.46453520655632e-05, 5.659647285938263e-05, 5.854759365320206e-05, 6.0498714447021484e-05]}, "gradients/encoder.encoder.layers.3.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 3.0, 0.0, 0.0, 1.0, 3.0, 2.0, 1.0, 1.0, 1.0, 3.0, 6.0, 8.0, 3.0, 5.0, 6.0, 7.0, 19.0, 13.0, 26.0, 17.0, 31.0, 48.0, 41.0, 63.0, 147.0, 270.0, 610.0, 1313.0, 717.0, 287.0, 146.0, 66.0, 48.0, 34.0, 28.0, 24.0, 13.0, 12.0, 14.0, 7.0, 4.0, 5.0, 7.0, 5.0, 6.0, 3.0, 4.0, 4.0, 3.0, 2.0, 1.0, 0.0, 1.0, 3.0], "bins": [-5.352497100830078e-05, -5.21000474691391e-05, -5.067512392997742e-05, -4.9250200390815735e-05, -4.782527685165405e-05, -4.640035331249237e-05, -4.497542977333069e-05, -4.3550506234169006e-05, -4.2125582695007324e-05, -4.070065915584564e-05, -3.927573561668396e-05, -3.785081207752228e-05, -3.6425888538360596e-05, -3.5000964999198914e-05, -3.357604146003723e-05, -3.215111792087555e-05, -3.072619438171387e-05, -2.9301270842552185e-05, -2.7876347303390503e-05, -2.645142376422882e-05, -2.502650022506714e-05, -2.3601576685905457e-05, -2.2176653146743774e-05, -2.0751729607582092e-05, -1.932680606842041e-05, -1.7901882529258728e-05, -1.6476958990097046e-05, -1.5052035450935364e-05, -1.3627111911773682e-05, -1.2202188372612e-05, -1.0777264833450317e-05, -9.352341294288635e-06, -7.927417755126953e-06, -6.502494215965271e-06, -5.077570676803589e-06, -3.6526471376419067e-06, -2.2277235984802246e-06, -8.028000593185425e-07, 6.221234798431396e-07, 2.0470470190048218e-06, 3.471970558166504e-06, 4.896894097328186e-06, 6.321817636489868e-06, 7.74674117565155e-06, 9.171664714813232e-06, 1.0596588253974915e-05, 1.2021511793136597e-05, 1.3446435332298279e-05, 1.4871358871459961e-05, 1.6296282410621643e-05, 1.7721205949783325e-05, 1.9146129488945007e-05, 2.057105302810669e-05, 2.199597656726837e-05, 2.3420900106430054e-05, 2.4845823645591736e-05, 2.6270747184753418e-05, 2.76956707239151e-05, 2.9120594263076782e-05, 3.0545517802238464e-05, 3.1970441341400146e-05, 3.339536488056183e-05, 3.482028841972351e-05, 3.624521195888519e-05, 3.7670135498046875e-05]}, "gradients/encoder.encoder.layers.3.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 4.0, 5.0, 2.0, 5.0, 6.0, 5.0, 7.0, 9.0, 7.0, 12.0, 11.0, 7.0, 24.0, 24.0, 25.0, 36.0, 54.0, 72.0, 83.0, 97.0, 100.0, 78.0, 72.0, 61.0, 46.0, 37.0, 31.0, 23.0, 19.0, 14.0, 10.0, 6.0, 4.0, 3.0, 5.0, 3.0, 2.0, 2.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0], "bins": [-0.00021340936655178666, -0.00020714483980555087, -0.00020088031305931509, -0.00019461580086499453, -0.00018835127411875874, -0.00018208674737252295, -0.00017582222062628716, -0.00016955769388005137, -0.0001632931671338156, -0.0001570286403875798, -0.000150764113641344, -0.00014449958689510822, -0.00013823507470078766, -0.00013197054795455188, -0.0001257060212083161, -0.0001194414944620803, -0.00011317697499180213, -0.00010691244824556634, -0.00010064792877528816, -9.438340202905238e-05, -8.811887528281659e-05, -8.18543485365808e-05, -7.558982906630263e-05, -6.932530232006684e-05, -6.306078284978867e-05, -5.6796259741531685e-05, -5.05317329952959e-05, -4.4267209887038916e-05, -3.800268314080313e-05, -3.173816003254615e-05, -2.5473636924289167e-05, -1.920911017805338e-05, -1.2944583431817591e-05, -6.680058959318558e-06, -4.155344868195243e-07, 5.848989530932158e-06, 1.2113514458178543e-05, 1.8378039385424927e-05, 2.4642562493681908e-05, 3.0907089239917696e-05, 3.7171612348174676e-05, 4.343613545643166e-05, 4.9700662202667445e-05, 5.5965185310924426e-05, 6.22297084191814e-05, 6.84942351654172e-05, 7.475876191165298e-05, 8.102328865788877e-05, 8.728780812816694e-05, 9.355233487440273e-05, 9.98168543446809e-05, 0.00010608138109091669, 0.00011234590783715248, 0.00011861043458338827, 0.00012487496132962406, 0.00013113947352394462, 0.0001374040002701804, 0.0001436685270164162, 0.00014993305376265198, 0.00015619758050888777, 0.00016246209270320833, 0.00016872661944944412, 0.0001749911461956799, 0.0001812556729419157, 0.00018752019968815148]}, "gradients/encoder.encoder.layers.3.final_layer_norm.bias": {"_type": "histogram", "values": [2.0, 5.0, 2.0, 1.0, 5.0, 4.0, 6.0, 4.0, 7.0, 9.0, 13.0, 17.0, 12.0, 8.0, 20.0, 15.0, 17.0, 19.0, 17.0, 24.0, 32.0, 26.0, 29.0, 34.0, 27.0, 21.0, 30.0, 52.0, 31.0, 39.0, 42.0, 35.0, 28.0, 34.0, 35.0, 35.0, 32.0, 33.0, 36.0, 29.0, 18.0, 19.0, 13.0, 21.0, 8.0, 13.0, 5.0, 11.0, 9.0, 3.0, 9.0, 1.0, 4.0, 4.0, 3.0, 1.0, 3.0, 5.0, 1.0, 2.0, 1.0, 0.0, 2.0, 1.0], "bins": [-9.322166442871094e-05, -9.010452777147293e-05, -8.698739111423492e-05, -8.387025445699692e-05, -8.075311779975891e-05, -7.76359811425209e-05, -7.45188444852829e-05, -7.140170782804489e-05, -6.828457117080688e-05, -6.516743451356888e-05, -6.205029785633087e-05, -5.8933161199092865e-05, -5.581602454185486e-05, -5.269888788461685e-05, -4.9581751227378845e-05, -4.646461457014084e-05, -4.334747791290283e-05, -4.0230341255664825e-05, -3.711320459842682e-05, -3.399606794118881e-05, -3.0878931283950806e-05, -2.77617946267128e-05, -2.4644657969474792e-05, -2.1527521312236786e-05, -1.841038465499878e-05, -1.5293247997760773e-05, -1.2176111340522766e-05, -9.05897468328476e-06, -5.941838026046753e-06, -2.8247013688087463e-06, 2.9243528842926025e-07, 3.409571945667267e-06, 6.5267086029052734e-06, 9.64384526014328e-06, 1.2760981917381287e-05, 1.5878118574619293e-05, 1.89952552318573e-05, 2.2112391889095306e-05, 2.5229528546333313e-05, 2.834666520357132e-05, 3.1463801860809326e-05, 3.458093851804733e-05, 3.769807517528534e-05, 4.0815211832523346e-05, 4.393234848976135e-05, 4.704948514699936e-05, 5.0166621804237366e-05, 5.328375846147537e-05, 5.640089511871338e-05, 5.9518031775951385e-05, 6.263516843318939e-05, 6.57523050904274e-05, 6.88694417476654e-05, 7.198657840490341e-05, 7.510371506214142e-05, 7.822085171937943e-05, 8.133798837661743e-05, 8.445512503385544e-05, 8.757226169109344e-05, 9.068939834833145e-05, 9.380653500556946e-05, 9.692367166280746e-05, 0.00010004080832004547, 0.00010315794497728348, 0.00010627508163452148]}, "gradients/encoder.encoder.layers.3.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 3.0, 0.0, 0.0, 2.0, 0.0, 1.0, 4.0, 7.0, 3.0, 4.0, 9.0, 14.0, 14.0, 26.0, 30.0, 55.0, 64.0, 88.0, 140.0, 211.0, 308.0, 487.0, 714.0, 1128.0, 1862.0, 3185.0, 5608.0, 11384.0, 24885.0, 62586.0, 205730.0, 522381.0, 124631.0, 44219.0, 18667.0, 8683.0, 4503.0, 2564.0, 1565.0, 939.0, 588.0, 407.0, 265.0, 186.0, 127.0, 85.0, 50.0, 42.0, 29.0, 32.0, 17.0, 15.0, 11.0, 5.0, 3.0, 1.0, 2.0, 0.0, 1.0, 4.0], "bins": [-0.0001556873321533203, -0.00015127845108509064, -0.00014686957001686096, -0.0001424606889486313, -0.0001380518078804016, -0.00013364292681217194, -0.00012923404574394226, -0.00012482516467571259, -0.00012041628360748291, -0.00011600740253925323, -0.00011159852147102356, -0.00010718964040279388, -0.00010278075933456421, -9.837187826633453e-05, -9.396299719810486e-05, -8.955411612987518e-05, -8.514523506164551e-05, -8.073635399341583e-05, -7.632747292518616e-05, -7.191859185695648e-05, -6.75097107887268e-05, -6.310082972049713e-05, -5.8691948652267456e-05, -5.428306758403778e-05, -4.9874186515808105e-05, -4.546530544757843e-05, -4.1056424379348755e-05, -3.664754331111908e-05, -3.2238662242889404e-05, -2.782978117465973e-05, -2.3420900106430054e-05, -1.901201903820038e-05, -1.4603137969970703e-05, -1.0194256901741028e-05, -5.7853758335113525e-06, -1.3764947652816772e-06, 3.032386302947998e-06, 7.441267371177673e-06, 1.1850148439407349e-05, 1.6259029507637024e-05, 2.06679105758667e-05, 2.5076791644096375e-05, 2.948567271232605e-05, 3.3894553780555725e-05, 3.83034348487854e-05, 4.2712315917015076e-05, 4.712119698524475e-05, 5.1530078053474426e-05, 5.59389591217041e-05, 6.034784018993378e-05, 6.475672125816345e-05, 6.916560232639313e-05, 7.35744833946228e-05, 7.798336446285248e-05, 8.239224553108215e-05, 8.680112659931183e-05, 9.12100076675415e-05, 9.561888873577118e-05, 0.00010002776980400085, 0.00010443665087223053, 0.0001088455319404602, 0.00011325441300868988, 0.00011766329407691956, 0.00012207217514514923, 0.0001264810562133789]}, "gradients/encoder.encoder.layers.3.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 2.0, 0.0, 3.0, 1.0, 3.0, 3.0, 1.0, 6.0, 7.0, 6.0, 8.0, 17.0, 14.0, 17.0, 31.0, 49.0, 51.0, 59.0, 57.0, 78.0, 81.0, 92.0, 89.0, 85.0, 43.0, 47.0, 42.0, 38.0, 20.0, 18.0, 10.0, 6.0, 10.0, 4.0, 2.0, 3.0, 1.0, 1.0, 4.0, 4.0, 0.0, 1.0, 3.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.314018249511719e-05, -3.2191164791584015e-05, -3.124214708805084e-05, -3.029312938451767e-05, -2.9344111680984497e-05, -2.8395093977451324e-05, -2.7446076273918152e-05, -2.649705857038498e-05, -2.5548040866851807e-05, -2.4599023163318634e-05, -2.365000545978546e-05, -2.270098775625229e-05, -2.1751970052719116e-05, -2.0802952349185944e-05, -1.985393464565277e-05, -1.89049169421196e-05, -1.7955899238586426e-05, -1.7006881535053253e-05, -1.605786383152008e-05, -1.5108846127986908e-05, -1.4159828424453735e-05, -1.3210810720920563e-05, -1.226179301738739e-05, -1.1312775313854218e-05, -1.0363757610321045e-05, -9.414739906787872e-06, -8.4657222032547e-06, -7.516704499721527e-06, -6.5676867961883545e-06, -5.618669092655182e-06, -4.669651389122009e-06, -3.7206336855888367e-06, -2.771615982055664e-06, -1.8225982785224915e-06, -8.735805749893188e-07, 7.543712854385376e-08, 1.0244548320770264e-06, 1.973472535610199e-06, 2.9224902391433716e-06, 3.871507942676544e-06, 4.820525646209717e-06, 5.769543349742889e-06, 6.718561053276062e-06, 7.667578756809235e-06, 8.616596460342407e-06, 9.56561416387558e-06, 1.0514631867408752e-05, 1.1463649570941925e-05, 1.2412667274475098e-05, 1.336168497800827e-05, 1.4310702681541443e-05, 1.5259720385074615e-05, 1.6208738088607788e-05, 1.715775579214096e-05, 1.8106773495674133e-05, 1.9055791199207306e-05, 2.000480890274048e-05, 2.095382660627365e-05, 2.1902844309806824e-05, 2.2851862013339996e-05, 2.380087971687317e-05, 2.474989742040634e-05, 2.5698915123939514e-05, 2.6647932827472687e-05, 2.759695053100586e-05]}, "gradients/encoder.encoder.layers.3.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 1.0, 1.0, 3.0, 6.0, 7.0, 10.0, 16.0, 15.0, 30.0, 47.0, 64.0, 105.0, 135.0, 239.0, 368.0, 562.0, 891.0, 1496.0, 2363.0, 3698.0, 6224.0, 10096.0, 17251.0, 31046.0, 55003.0, 111034.0, 305666.0, 272138.0, 104737.0, 54289.0, 28782.0, 16967.0, 9745.0, 5934.0, 3578.0, 2184.0, 1405.0, 886.0, 548.0, 355.0, 226.0, 151.0, 101.0, 61.0, 39.0, 18.0, 17.0, 12.0, 8.0, 1.0, 5.0, 2.0, 1.0, 1.0, 3.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-5.59687614440918e-05, -5.410052835941315e-05, -5.22322952747345e-05, -5.036406219005585e-05, -4.84958291053772e-05, -4.662759602069855e-05, -4.47593629360199e-05, -4.289112985134125e-05, -4.10228967666626e-05, -3.915466368198395e-05, -3.72864305973053e-05, -3.541819751262665e-05, -3.3549964427948e-05, -3.168173134326935e-05, -2.9813498258590698e-05, -2.794526517391205e-05, -2.60770320892334e-05, -2.420879900455475e-05, -2.23405659198761e-05, -2.047233283519745e-05, -1.86040997505188e-05, -1.673586666584015e-05, -1.4867633581161499e-05, -1.2999400496482849e-05, -1.11311674118042e-05, -9.26293432712555e-06, -7.394701242446899e-06, -5.5264681577682495e-06, -3.6582350730895996e-06, -1.7900019884109497e-06, 7.82310962677002e-08, 1.94646418094635e-06, 3.814697265625e-06, 5.68293035030365e-06, 7.5511634349823e-06, 9.41939651966095e-06, 1.12876296043396e-05, 1.315586268901825e-05, 1.50240957736969e-05, 1.689232885837555e-05, 1.87605619430542e-05, 2.062879502773285e-05, 2.24970281124115e-05, 2.436526119709015e-05, 2.62334942817688e-05, 2.810172736644745e-05, 2.99699604511261e-05, 3.183819353580475e-05, 3.37064266204834e-05, 3.557465970516205e-05, 3.74428927898407e-05, 3.931112587451935e-05, 4.1179358959198e-05, 4.304759204387665e-05, 4.49158251285553e-05, 4.678405821323395e-05, 4.86522912979126e-05, 5.052052438259125e-05, 5.23887574672699e-05, 5.425699055194855e-05, 5.61252236366272e-05, 5.799345672130585e-05, 5.98616898059845e-05, 6.172992289066315e-05, 6.35981559753418e-05]}, "gradients/encoder.encoder.layers.3.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 2.0, 0.0, 2.0, 2.0, 1.0, 1.0, 2.0, 5.0, 4.0, 9.0, 7.0, 4.0, 10.0, 14.0, 13.0, 25.0, 19.0, 20.0, 18.0, 28.0, 29.0, 29.0, 32.0, 35.0, 41.0, 44.0, 40.0, 48.0, 37.0, 44.0, 42.0, 48.0, 40.0, 26.0, 32.0, 34.0, 34.0, 31.0, 33.0, 20.0, 13.0, 18.0, 13.0, 16.0, 12.0, 10.0, 10.0, 6.0, 3.0, 5.0, 1.0, 3.0, 2.0, 3.0], "bins": [-6.341934204101562e-05, -6.1769038438797e-05, -6.011873483657837e-05, -5.846843123435974e-05, -5.681812763214111e-05, -5.5167824029922485e-05, -5.351752042770386e-05, -5.186721682548523e-05, -5.02169132232666e-05, -4.8566609621047974e-05, -4.6916306018829346e-05, -4.526600241661072e-05, -4.361569881439209e-05, -4.196539521217346e-05, -4.0315091609954834e-05, -3.8664788007736206e-05, -3.701448440551758e-05, -3.536418080329895e-05, -3.371387720108032e-05, -3.2063573598861694e-05, -3.0413269996643066e-05, -2.876296639442444e-05, -2.711266279220581e-05, -2.5462359189987183e-05, -2.3812055587768555e-05, -2.2161751985549927e-05, -2.05114483833313e-05, -1.886114478111267e-05, -1.7210841178894043e-05, -1.5560537576675415e-05, -1.3910233974456787e-05, -1.225993037223816e-05, -1.0609626770019531e-05, -8.959323167800903e-06, -7.309019565582275e-06, -5.6587159633636475e-06, -4.0084123611450195e-06, -2.3581087589263916e-06, -7.078051567077637e-07, 9.424984455108643e-07, 2.592802047729492e-06, 4.24310564994812e-06, 5.893409252166748e-06, 7.543712854385376e-06, 9.194016456604004e-06, 1.0844320058822632e-05, 1.249462366104126e-05, 1.4144927263259888e-05, 1.5795230865478516e-05, 1.7445534467697144e-05, 1.909583806991577e-05, 2.07461416721344e-05, 2.2396445274353027e-05, 2.4046748876571655e-05, 2.5697052478790283e-05, 2.734735608100891e-05, 2.899765968322754e-05, 3.064796328544617e-05, 3.2298266887664795e-05, 3.394857048988342e-05, 3.559887409210205e-05, 3.724917769432068e-05, 3.889948129653931e-05, 4.0549784898757935e-05, 4.220008850097656e-05]}, "gradients/encoder.encoder.layers.3.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 2.0, 5.0, 5.0, 4.0, 10.0, 10.0, 11.0, 15.0, 33.0, 34.0, 52.0, 49.0, 152.0, 177.0, 259.0, 373.0, 823.0, 1169.0, 1998.0, 3645.0, 9297.0, 18120.0, 48727.0, 178493.0, 581139.0, 133667.0, 39085.0, 15512.0, 7750.0, 3295.0, 1790.0, 1006.0, 745.0, 362.0, 226.0, 171.0, 116.0, 61.0, 55.0, 33.0, 30.0, 12.0, 12.0, 13.0, 7.0, 3.0, 7.0, 4.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 4.0], "bins": [-1.0132789611816406e-05, -9.819865226745605e-06, -9.506940841674805e-06, -9.194016456604004e-06, -8.881092071533203e-06, -8.568167686462402e-06, -8.255243301391602e-06, -7.9423189163208e-06, -7.62939453125e-06, -7.316470146179199e-06, -7.0035457611083984e-06, -6.690621376037598e-06, -6.377696990966797e-06, -6.064772605895996e-06, -5.751848220825195e-06, -5.4389238357543945e-06, -5.125999450683594e-06, -4.813075065612793e-06, -4.500150680541992e-06, -4.187226295471191e-06, -3.874301910400391e-06, -3.56137752532959e-06, -3.248453140258789e-06, -2.9355287551879883e-06, -2.6226043701171875e-06, -2.3096799850463867e-06, -1.996755599975586e-06, -1.6838312149047852e-06, -1.3709068298339844e-06, -1.0579824447631836e-06, -7.450580596923828e-07, -4.3213367462158203e-07, -1.1920928955078125e-07, 1.9371509552001953e-07, 5.066394805908203e-07, 8.195638656616211e-07, 1.1324882507324219e-06, 1.4454126358032227e-06, 1.7583370208740234e-06, 2.0712614059448242e-06, 2.384185791015625e-06, 2.6971101760864258e-06, 3.0100345611572266e-06, 3.3229589462280273e-06, 3.635883331298828e-06, 3.948807716369629e-06, 4.26173210144043e-06, 4.5746564865112305e-06, 4.887580871582031e-06, 5.200505256652832e-06, 5.513429641723633e-06, 5.826354026794434e-06, 6.139278411865234e-06, 6.452202796936035e-06, 6.765127182006836e-06, 7.078051567077637e-06, 7.3909759521484375e-06, 7.703900337219238e-06, 8.016824722290039e-06, 8.32974910736084e-06, 8.64267349243164e-06, 8.955597877502441e-06, 9.268522262573242e-06, 9.581446647644043e-06, 9.894371032714844e-06]}, "gradients/encoder.encoder.layers.3.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0, 2.0, 0.0, 3.0, 5.0, 5.0, 0.0, 8.0, 11.0, 0.0, 16.0, 9.0, 24.0, 0.0, 30.0, 38.0, 0.0, 33.0, 46.0, 44.0, 0.0, 58.0, 60.0, 0.0, 72.0, 72.0, 68.0, 0.0, 64.0, 56.0, 68.0, 0.0, 53.0, 43.0, 0.0, 31.0, 19.0, 19.0, 0.0, 17.0, 7.0, 0.0, 10.0, 9.0, 4.0, 0.0, 1.0, 3.0, 0.0, 5.0, 2.0, 1.0, 0.0, 1.0, 1.0, 1.0], "bins": [-1.430511474609375e-06, -1.387670636177063e-06, -1.344829797744751e-06, -1.301988959312439e-06, -1.259148120880127e-06, -1.216307282447815e-06, -1.173466444015503e-06, -1.130625605583191e-06, -1.087784767150879e-06, -1.044943928718567e-06, -1.0021030902862549e-06, -9.592622518539429e-07, -9.164214134216309e-07, -8.735805749893188e-07, -8.307397365570068e-07, -7.878988981246948e-07, -7.450580596923828e-07, -7.022172212600708e-07, -6.593763828277588e-07, -6.165355443954468e-07, -5.736947059631348e-07, -5.308538675308228e-07, -4.880130290985107e-07, -4.4517219066619873e-07, -4.023313522338867e-07, -3.594905138015747e-07, -3.166496753692627e-07, -2.738088369369507e-07, -2.3096799850463867e-07, -1.8812716007232666e-07, -1.4528632164001465e-07, -1.0244548320770264e-07, -5.960464477539063e-08, -1.6763806343078613e-08, 2.60770320892334e-08, 6.891787052154541e-08, 1.1175870895385742e-07, 1.5459954738616943e-07, 1.9744038581848145e-07, 2.4028122425079346e-07, 2.8312206268310547e-07, 3.259629011154175e-07, 3.688037395477295e-07, 4.116445779800415e-07, 4.544854164123535e-07, 4.973262548446655e-07, 5.401670932769775e-07, 5.830079317092896e-07, 6.258487701416016e-07, 6.686896085739136e-07, 7.115304470062256e-07, 7.543712854385376e-07, 7.972121238708496e-07, 8.400529623031616e-07, 8.828938007354736e-07, 9.257346391677856e-07, 9.685754776000977e-07, 1.0114163160324097e-06, 1.0542571544647217e-06, 1.0970979928970337e-06, 1.1399388313293457e-06, 1.1827796697616577e-06, 1.2256205081939697e-06, 1.2684613466262817e-06, 1.3113021850585938e-06]}, "gradients/encoder.encoder.layers.3.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 2.0, 4.0, 11.0, 5.0, 12.0, 14.0, 11.0, 26.0, 31.0, 31.0, 39.0, 106.0, 92.0, 142.0, 215.0, 302.0, 436.0, 742.0, 1079.0, 1589.0, 3949.0, 5452.0, 9946.0, 18796.0, 39813.0, 96008.0, 271807.0, 358921.0, 133236.0, 62659.0, 18914.0, 9848.0, 5381.0, 3234.0, 2040.0, 1211.0, 770.0, 519.0, 424.0, 204.0, 164.0, 91.0, 73.0, 71.0, 39.0, 36.0, 19.0, 18.0, 5.0, 7.0, 7.0, 7.0, 3.0, 3.0, 1.0, 2.0, 4.0], "bins": [-6.318092346191406e-06, -6.132759153842926e-06, -5.947425961494446e-06, -5.7620927691459656e-06, -5.576759576797485e-06, -5.391426384449005e-06, -5.206093192100525e-06, -5.020759999752045e-06, -4.8354268074035645e-06, -4.650093615055084e-06, -4.464760422706604e-06, -4.279427230358124e-06, -4.0940940380096436e-06, -3.908760845661163e-06, -3.723427653312683e-06, -3.538094460964203e-06, -3.3527612686157227e-06, -3.1674280762672424e-06, -2.982094883918762e-06, -2.796761691570282e-06, -2.6114284992218018e-06, -2.4260953068733215e-06, -2.2407621145248413e-06, -2.055428922176361e-06, -1.8700957298278809e-06, -1.6847625374794006e-06, -1.4994293451309204e-06, -1.3140961527824402e-06, -1.12876296043396e-06, -9.434297680854797e-07, -7.580965757369995e-07, -5.727633833885193e-07, -3.8743019104003906e-07, -2.0209699869155884e-07, -1.6763806343078613e-08, 1.685693860054016e-07, 3.5390257835388184e-07, 5.392357707023621e-07, 7.245689630508423e-07, 9.099021553993225e-07, 1.0952353477478027e-06, 1.280568540096283e-06, 1.4659017324447632e-06, 1.6512349247932434e-06, 1.8365681171417236e-06, 2.021901309490204e-06, 2.207234501838684e-06, 2.3925676941871643e-06, 2.5779008865356445e-06, 2.7632340788841248e-06, 2.948567271232605e-06, 3.133900463581085e-06, 3.3192336559295654e-06, 3.5045668482780457e-06, 3.689900040626526e-06, 3.875233232975006e-06, 4.060566425323486e-06, 4.2458996176719666e-06, 4.431232810020447e-06, 4.616566002368927e-06, 4.801899194717407e-06, 4.9872323870658875e-06, 5.172565579414368e-06, 5.357898771762848e-06, 5.543231964111328e-06]}, "gradients/encoder.encoder.layers.3.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 2.0, 6.0, 1.0, 3.0, 4.0, 4.0, 3.0, 2.0, 11.0, 10.0, 18.0, 15.0, 23.0, 26.0, 45.0, 55.0, 37.0, 54.0, 67.0, 67.0, 82.0, 87.0, 49.0, 55.0, 58.0, 31.0, 40.0, 28.0, 23.0, 29.0, 14.0, 17.0, 10.0, 8.0, 9.0, 4.0, 4.0, 5.0, 0.0, 1.0, 1.0, 0.0, 3.0, 1.0, 3.0], "bins": [-6.318092346191406e-06, -6.157904863357544e-06, -5.997717380523682e-06, -5.837529897689819e-06, -5.677342414855957e-06, -5.517154932022095e-06, -5.356967449188232e-06, -5.19677996635437e-06, -5.036592483520508e-06, -4.8764050006866455e-06, -4.716217517852783e-06, -4.556030035018921e-06, -4.395842552185059e-06, -4.235655069351196e-06, -4.075467586517334e-06, -3.915280103683472e-06, -3.7550926208496094e-06, -3.594905138015747e-06, -3.4347176551818848e-06, -3.2745301723480225e-06, -3.11434268951416e-06, -2.954155206680298e-06, -2.7939677238464355e-06, -2.6337802410125732e-06, -2.473592758178711e-06, -2.3134052753448486e-06, -2.1532177925109863e-06, -1.993030309677124e-06, -1.8328428268432617e-06, -1.6726553440093994e-06, -1.5124678611755371e-06, -1.3522803783416748e-06, -1.1920928955078125e-06, -1.0319054126739502e-06, -8.717179298400879e-07, -7.115304470062256e-07, -5.513429641723633e-07, -3.91155481338501e-07, -2.3096799850463867e-07, -7.078051567077637e-08, 8.940696716308594e-08, 2.4959444999694824e-07, 4.0978193283081055e-07, 5.699694156646729e-07, 7.301568984985352e-07, 8.903443813323975e-07, 1.0505318641662598e-06, 1.210719347000122e-06, 1.3709068298339844e-06, 1.5310943126678467e-06, 1.691281795501709e-06, 1.8514692783355713e-06, 2.0116567611694336e-06, 2.171844244003296e-06, 2.332031726837158e-06, 2.4922192096710205e-06, 2.652406692504883e-06, 2.812594175338745e-06, 2.9727816581726074e-06, 3.1329691410064697e-06, 3.293156623840332e-06, 3.4533441066741943e-06, 3.6135315895080566e-06, 3.773719072341919e-06, 3.933906555175781e-06]}, "gradients/encoder.encoder.layers.3.layer_norm.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 2.0, 1.0, 0.0, 3.0, 4.0, 1.0, 6.0, 3.0, 8.0, 8.0, 7.0, 15.0, 18.0, 31.0, 44.0, 75.0, 88.0, 119.0, 127.0, 94.0, 61.0, 43.0, 46.0, 29.0, 32.0, 23.0, 18.0, 12.0, 14.0, 6.0, 11.0, 13.0, 7.0, 5.0, 7.0, 7.0, 5.0, 7.0, 4.0, 2.0, 2.0, 1.0, 1.0, 1.0, 2.0, 0.0, 1.0, 3.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00014365033712238073, -0.00013677602692041546, -0.00012990170216653496, -0.0001230273919645697, -0.00011615306721068919, -0.00010927875700872391, -0.00010240443225484341, -9.553012205287814e-05, -8.865579729899764e-05, -8.178147982107475e-05, -7.490716234315187e-05, -6.803284486522898e-05, -6.11585273873061e-05, -5.4284213547362015e-05, -4.740989606943913e-05, -4.053557859151624e-05, -3.366126475157216e-05, -2.6786947273649275e-05, -1.991262979572639e-05, -1.3038314136792906e-05, -6.163996658870019e-06, 7.10319000063464e-07, 7.584636477986351e-06, 1.4458953955909237e-05, 2.1333271433832124e-05, 2.820758891175501e-05, 3.50819063896779e-05, 4.195622022962198e-05, 4.883053770754486e-05, 5.570485518546775e-05, 6.257917266339064e-05, 6.945349014131352e-05, 7.632780761923641e-05, 8.32021250971593e-05, 9.007644257508218e-05, 9.695076005300507e-05, 0.00010382507753092796, 0.00011069938773289323, 0.00011757371248677373, 0.000124448022688739, 0.0001313223474426195, 0.00013819665764458477, 0.00014507098239846528, 0.00015194529260043055, 0.00015881961735431105, 0.00016569392755627632, 0.00017256825231015682, 0.0001794425625121221, 0.00018631687271408737, 0.00019319118291605264, 0.00020006550766993314, 0.0002069398178718984, 0.0002138141426257789, 0.00022068845282774419, 0.0002275627775816247, 0.00023443708778358996, 0.00024131141253747046, 0.00024818573729135096, 0.000255060032941401, 0.0002619343576952815, 0.000268808682449162, 0.0002756830072030425, 0.00028255730285309255, 0.00028943162760697305, 0.00029630595236085355]}, "gradients/encoder.encoder.layers.3.layer_norm.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0, 1.0, 2.0, 4.0, 3.0, 11.0, 4.0, 13.0, 12.0, 16.0, 15.0, 15.0, 17.0, 30.0, 31.0, 27.0, 34.0, 39.0, 38.0, 35.0, 49.0, 51.0, 39.0, 39.0, 57.0, 44.0, 49.0, 26.0, 38.0, 39.0, 46.0, 27.0, 28.0, 27.0, 23.0, 11.0, 17.0, 12.0, 11.0, 10.0, 6.0, 2.0, 1.0, 6.0, 2.0, 1.0, 2.0, 1.0, 1.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.00016701221466064453, -0.00016153696924448013, -0.00015606172382831573, -0.00015058647841215134, -0.00014511123299598694, -0.00013963598757982254, -0.00013416074216365814, -0.00012868549674749374, -0.00012321025133132935, -0.00011773500591516495, -0.00011225976049900055, -0.00010678451508283615, -0.00010130926966667175, -9.583402425050735e-05, -9.035877883434296e-05, -8.488353341817856e-05, -7.940828800201416e-05, -7.393304258584976e-05, -6.845779716968536e-05, -6.298255175352097e-05, -5.750730633735657e-05, -5.203206092119217e-05, -4.655681550502777e-05, -4.108157008886337e-05, -3.5606324672698975e-05, -3.0131079256534576e-05, -2.4655833840370178e-05, -1.918058842420578e-05, -1.3705343008041382e-05, -8.230097591876984e-06, -2.7548521757125854e-06, 2.7203932404518127e-06, 8.195638656616211e-06, 1.3670884072780609e-05, 1.9146129488945007e-05, 2.4621374905109406e-05, 3.0096620321273804e-05, 3.55718657374382e-05, 4.10471111536026e-05, 4.6522356569767e-05, 5.1997601985931396e-05, 5.7472847402095795e-05, 6.294809281826019e-05, 6.842333823442459e-05, 7.389858365058899e-05, 7.937382906675339e-05, 8.484907448291779e-05, 9.032431989908218e-05, 9.579956531524658e-05, 0.00010127481073141098, 0.00010675005614757538, 0.00011222530156373978, 0.00011770054697990417, 0.00012317579239606857, 0.00012865103781223297, 0.00013412628322839737, 0.00013960152864456177, 0.00014507677406072617, 0.00015055201947689056, 0.00015602726489305496, 0.00016150251030921936, 0.00016697775572538376, 0.00017245300114154816, 0.00017792824655771255, 0.00018340349197387695]}, "gradients/encoder.encoder.layers.2.feed_forward.output_dense.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 3.0, 2.0, 6.0, 6.0, 5.0, 7.0, 10.0, 15.0, 32.0, 25.0, 32.0, 42.0, 73.0, 98.0, 134.0, 205.0, 290.0, 411.0, 608.0, 1001.0, 1559.0, 2413.0, 4288.0, 7208.0, 13949.0, 29469.0, 99407.0, 2960952.0, 956100.0, 62978.0, 24045.0, 11851.0, 6427.0, 3717.0, 2400.0, 1435.0, 985.0, 683.0, 430.0, 318.0, 173.0, 158.0, 118.0, 61.0, 60.0, 44.0, 18.0, 12.0, 11.0, 7.0, 5.0, 6.0, 2.0, 0.0, 3.0, 2.0, 0.0, 1.0], "bins": [-9.638071060180664e-05, -9.353458881378174e-05, -9.068846702575684e-05, -8.784234523773193e-05, -8.499622344970703e-05, -8.215010166168213e-05, -7.930397987365723e-05, -7.645785808563232e-05, -7.361173629760742e-05, -7.076561450958252e-05, -6.791949272155762e-05, -6.507337093353271e-05, -6.222724914550781e-05, -5.938112735748291e-05, -5.653500556945801e-05, -5.3688883781433105e-05, -5.08427619934082e-05, -4.79966402053833e-05, -4.51505184173584e-05, -4.2304396629333496e-05, -3.9458274841308594e-05, -3.661215305328369e-05, -3.376603126525879e-05, -3.091990947723389e-05, -2.8073787689208984e-05, -2.5227665901184082e-05, -2.238154411315918e-05, -1.9535422325134277e-05, -1.6689300537109375e-05, -1.3843178749084473e-05, -1.099705696105957e-05, -8.150935173034668e-06, -5.304813385009766e-06, -2.4586915969848633e-06, 3.8743019104003906e-07, 3.2335519790649414e-06, 6.079673767089844e-06, 8.925795555114746e-06, 1.1771917343139648e-05, 1.461803913116455e-05, 1.7464160919189453e-05, 2.0310282707214355e-05, 2.3156404495239258e-05, 2.600252628326416e-05, 2.8848648071289062e-05, 3.1694769859313965e-05, 3.454089164733887e-05, 3.738701343536377e-05, 4.023313522338867e-05, 4.3079257011413574e-05, 4.5925378799438477e-05, 4.877150058746338e-05, 5.161762237548828e-05, 5.4463744163513184e-05, 5.7309865951538086e-05, 6.015598773956299e-05, 6.300210952758789e-05, 6.584823131561279e-05, 6.86943531036377e-05, 7.15404748916626e-05, 7.43865966796875e-05, 7.72327184677124e-05, 8.00788402557373e-05, 8.292496204376221e-05, 8.577108383178711e-05]}, "gradients/encoder.encoder.layers.2.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 1.0, 1.0, 3.0, 3.0, 2.0, 4.0, 2.0, 3.0, 11.0, 10.0, 8.0, 7.0, 14.0, 12.0, 19.0, 19.0, 28.0, 34.0, 37.0, 34.0, 45.0, 62.0, 58.0, 40.0, 64.0, 76.0, 51.0, 45.0, 44.0, 42.0, 29.0, 32.0, 21.0, 29.0, 23.0, 20.0, 15.0, 13.0, 10.0, 4.0, 7.0, 3.0, 4.0, 8.0, 3.0, 5.0, 4.0, 3.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0], "bins": [-2.187490463256836e-05, -2.118665724992752e-05, -2.0498409867286682e-05, -1.9810162484645844e-05, -1.9121915102005005e-05, -1.8433667719364166e-05, -1.7745420336723328e-05, -1.705717295408249e-05, -1.636892557144165e-05, -1.5680678188800812e-05, -1.4992430806159973e-05, -1.4304183423519135e-05, -1.3615936040878296e-05, -1.2927688658237457e-05, -1.2239441275596619e-05, -1.155119389295578e-05, -1.0862946510314941e-05, -1.0174699127674103e-05, -9.486451745033264e-06, -8.798204362392426e-06, -8.109956979751587e-06, -7.421709597110748e-06, -6.73346221446991e-06, -6.045214831829071e-06, -5.356967449188232e-06, -4.668720066547394e-06, -3.980472683906555e-06, -3.2922253012657166e-06, -2.603977918624878e-06, -1.9157305359840393e-06, -1.2274831533432007e-06, -5.392357707023621e-07, 1.4901161193847656e-07, 8.372589945793152e-07, 1.5255063772201538e-06, 2.2137537598609924e-06, 2.902001142501831e-06, 3.5902485251426697e-06, 4.278495907783508e-06, 4.966743290424347e-06, 5.6549906730651855e-06, 6.343238055706024e-06, 7.031485438346863e-06, 7.719732820987701e-06, 8.40798020362854e-06, 9.096227586269379e-06, 9.784474968910217e-06, 1.0472722351551056e-05, 1.1160969734191895e-05, 1.1849217116832733e-05, 1.2537464499473572e-05, 1.322571188211441e-05, 1.3913959264755249e-05, 1.4602206647396088e-05, 1.5290454030036926e-05, 1.5978701412677765e-05, 1.6666948795318604e-05, 1.7355196177959442e-05, 1.804344356060028e-05, 1.873169094324112e-05, 1.9419938325881958e-05, 2.0108185708522797e-05, 2.0796433091163635e-05, 2.1484680473804474e-05, 2.2172927856445312e-05]}, "gradients/encoder.encoder.layers.2.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0, 2.0, 2.0, 7.0, 1.0, 9.0, 12.0, 15.0, 28.0, 43.0, 66.0, 96.0, 140.0, 264.0, 413.0, 713.0, 1230.0, 2212.0, 3780.0, 7405.0, 15436.0, 34028.0, 89254.0, 546803.0, 3218586.0, 175277.0, 53105.0, 22894.0, 10356.0, 5256.0, 2884.0, 1624.0, 923.0, 588.0, 278.0, 202.0, 131.0, 75.0, 61.0, 29.0, 22.0, 21.0, 8.0, 7.0, 3.0, 2.0, 1.0, 3.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6.973743438720703e-05, -6.757490336894989e-05, -6.541237235069275e-05, -6.324984133243561e-05, -6.108731031417847e-05, -5.8924779295921326e-05, -5.6762248277664185e-05, -5.4599717259407043e-05, -5.24371862411499e-05, -5.027465522289276e-05, -4.811212420463562e-05, -4.594959318637848e-05, -4.378706216812134e-05, -4.16245311498642e-05, -3.9462000131607056e-05, -3.7299469113349915e-05, -3.5136938095092773e-05, -3.297440707683563e-05, -3.081187605857849e-05, -2.864934504032135e-05, -2.648681402206421e-05, -2.4324283003807068e-05, -2.2161751985549927e-05, -1.9999220967292786e-05, -1.7836689949035645e-05, -1.5674158930778503e-05, -1.3511627912521362e-05, -1.1349096894264221e-05, -9.18656587600708e-06, -7.024034857749939e-06, -4.861503839492798e-06, -2.6989728212356567e-06, -5.364418029785156e-07, 1.6260892152786255e-06, 3.7886202335357666e-06, 5.951151251792908e-06, 8.113682270050049e-06, 1.027621328830719e-05, 1.2438744306564331e-05, 1.4601275324821472e-05, 1.6763806343078613e-05, 1.8926337361335754e-05, 2.1088868379592896e-05, 2.3251399397850037e-05, 2.5413930416107178e-05, 2.757646143436432e-05, 2.973899245262146e-05, 3.19015234708786e-05, 3.406405448913574e-05, 3.622658550739288e-05, 3.8389116525650024e-05, 4.0551647543907166e-05, 4.271417856216431e-05, 4.487670958042145e-05, 4.703924059867859e-05, 4.920177161693573e-05, 5.136430263519287e-05, 5.352683365345001e-05, 5.568936467170715e-05, 5.7851895689964294e-05, 6.0014426708221436e-05, 6.217695772647858e-05, 6.433948874473572e-05, 6.650201976299286e-05, 6.866455078125e-05]}, "gradients/encoder.encoder.layers.2.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 3.0, 2.0, 2.0, 2.0, 8.0, 3.0, 2.0, 10.0, 6.0, 17.0, 12.0, 16.0, 12.0, 16.0, 20.0, 36.0, 37.0, 32.0, 53.0, 74.0, 123.0, 305.0, 663.0, 1151.0, 682.0, 337.0, 114.0, 68.0, 53.0, 35.0, 35.0, 31.0, 32.0, 14.0, 16.0, 9.0, 11.0, 11.0, 7.0, 1.0, 6.0, 5.0, 6.0, 2.0, 2.0, 2.0, 1.0, 1.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-4.404783248901367e-05, -4.260614514350891e-05, -4.116445779800415e-05, -3.972277045249939e-05, -3.828108310699463e-05, -3.683939576148987e-05, -3.539770841598511e-05, -3.395602107048035e-05, -3.2514333724975586e-05, -3.1072646379470825e-05, -2.9630959033966064e-05, -2.8189271688461304e-05, -2.6747584342956543e-05, -2.5305896997451782e-05, -2.386420965194702e-05, -2.242252230644226e-05, -2.09808349609375e-05, -1.953914761543274e-05, -1.809746026992798e-05, -1.6655772924423218e-05, -1.5214085578918457e-05, -1.3772398233413696e-05, -1.2330710887908936e-05, -1.0889023542404175e-05, -9.447336196899414e-06, -8.005648851394653e-06, -6.563961505889893e-06, -5.122274160385132e-06, -3.680586814880371e-06, -2.2388994693756104e-06, -7.972121238708496e-07, 6.444752216339111e-07, 2.086162567138672e-06, 3.5278499126434326e-06, 4.969537258148193e-06, 6.411224603652954e-06, 7.852911949157715e-06, 9.294599294662476e-06, 1.0736286640167236e-05, 1.2177973985671997e-05, 1.3619661331176758e-05, 1.5061348676681519e-05, 1.650303602218628e-05, 1.794472336769104e-05, 1.93864107131958e-05, 2.082809805870056e-05, 2.2269785404205322e-05, 2.3711472749710083e-05, 2.5153160095214844e-05, 2.6594847440719604e-05, 2.8036534786224365e-05, 2.9478222131729126e-05, 3.091990947723389e-05, 3.236159682273865e-05, 3.380328416824341e-05, 3.524497151374817e-05, 3.668665885925293e-05, 3.812834620475769e-05, 3.957003355026245e-05, 4.101172089576721e-05, 4.245340824127197e-05, 4.3895095586776733e-05, 4.5336782932281494e-05, 4.6778470277786255e-05, 4.8220157623291016e-05]}, "gradients/encoder.encoder.layers.2.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 2.0, 0.0, 3.0, 2.0, 2.0, 4.0, 5.0, 5.0, 2.0, 8.0, 8.0, 7.0, 17.0, 13.0, 22.0, 31.0, 37.0, 57.0, 68.0, 81.0, 98.0, 108.0, 101.0, 67.0, 71.0, 53.0, 35.0, 16.0, 21.0, 17.0, 9.0, 11.0, 2.0, 6.0, 3.0, 2.0, 2.0, 2.0, 2.0, 5.0, 1.0, 0.0, 3.0, 2.0, 0.0, 2.0, 3.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.0002596330887172371, -0.00025185724371112883, -0.00024408139870502055, -0.00023630555369891226, -0.0002285297232447192, -0.00022075387823861092, -0.00021297803323250264, -0.00020520218822639436, -0.0001974263577722013, -0.00018965051276609302, -0.00018187466775998473, -0.00017409882275387645, -0.0001663229922996834, -0.0001585471472935751, -0.00015077130228746682, -0.00014299545728135854, -0.00013521961227525026, -0.00012744376726914197, -0.0001196679295389913, -0.00011189208453288302, -0.00010411624680273235, -9.634040179662406e-05, -8.856455679051578e-05, -8.07887117844075e-05, -7.301287405425683e-05, -6.523702904814854e-05, -5.746119131799787e-05, -4.968534631188959e-05, -4.190950494376011e-05, -3.4133663575630635e-05, -2.635781856952235e-05, -1.8581977201392874e-05, -1.0806135833263397e-05, -3.0302935556392185e-06, 4.74554872198496e-06, 1.252139190910384e-05, 2.0297233277233317e-05, 2.8073074645362794e-05, 3.584891965147108e-05, 4.3624761019600555e-05, 5.140060238773003e-05, 5.917644375585951e-05, 6.695228512398899e-05, 7.472813013009727e-05, 8.250397513620555e-05, 9.027981286635622e-05, 9.805565787246451e-05, 0.00010583149560261518, 0.00011360734060872346, 0.00012138318561483175, 0.00012915903062094003, 0.00013693486107513309, 0.00014471070608124137, 0.00015248655108734965, 0.00016026239609345794, 0.00016803824109956622, 0.0001758140861056745, 0.0001835899311117828, 0.00019136577611789107, 0.00019914162112399936, 0.0002069174515781924, 0.0002146932965843007, 0.00022246914159040898, 0.00023024498659651726, 0.00023802081705071032]}, "gradients/encoder.encoder.layers.2.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 2.0, 1.0, 2.0, 4.0, 4.0, 5.0, 5.0, 8.0, 12.0, 14.0, 15.0, 12.0, 15.0, 25.0, 16.0, 20.0, 43.0, 40.0, 37.0, 35.0, 57.0, 37.0, 45.0, 51.0, 45.0, 52.0, 44.0, 42.0, 36.0, 41.0, 39.0, 32.0, 30.0, 33.0, 21.0, 18.0, 13.0, 14.0, 15.0, 7.0, 5.0, 7.0, 4.0, 8.0, 4.0, 0.0, 2.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0001538991928100586, -0.00014925748109817505, -0.0001446157693862915, -0.00013997405767440796, -0.00013533234596252441, -0.00013069063425064087, -0.00012604892253875732, -0.00012140721082687378, -0.00011676549911499023, -0.00011212378740310669, -0.00010748207569122314, -0.0001028403639793396, -9.819865226745605e-05, -9.355694055557251e-05, -8.891522884368896e-05, -8.427351713180542e-05, -7.963180541992188e-05, -7.499009370803833e-05, -7.034838199615479e-05, -6.570667028427124e-05, -6.10649585723877e-05, -5.642324686050415e-05, -5.1781535148620605e-05, -4.713982343673706e-05, -4.2498111724853516e-05, -3.785640001296997e-05, -3.3214688301086426e-05, -2.857297658920288e-05, -2.3931264877319336e-05, -1.928955316543579e-05, -1.4647841453552246e-05, -1.0006129741668701e-05, -5.364418029785156e-06, -7.227063179016113e-07, 3.919005393981934e-06, 8.560717105865479e-06, 1.3202428817749023e-05, 1.784414052963257e-05, 2.2485852241516113e-05, 2.7127563953399658e-05, 3.17692756652832e-05, 3.641098737716675e-05, 4.105269908905029e-05, 4.569441080093384e-05, 5.033612251281738e-05, 5.497783422470093e-05, 5.961954593658447e-05, 6.426125764846802e-05, 6.890296936035156e-05, 7.354468107223511e-05, 7.818639278411865e-05, 8.28281044960022e-05, 8.746981620788574e-05, 9.211152791976929e-05, 9.675323963165283e-05, 0.00010139495134353638, 0.00010603666305541992, 0.00011067837476730347, 0.00011532008647918701, 0.00011996179819107056, 0.0001246035099029541, 0.00012924522161483765, 0.0001338869333267212, 0.00013852864503860474, 0.00014317035675048828]}, "gradients/encoder.encoder.layers.2.attention.out_proj.weight": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 2.0, 5.0, 10.0, 9.0, 12.0, 18.0, 23.0, 49.0, 48.0, 89.0, 145.0, 182.0, 324.0, 493.0, 950.0, 1543.0, 3001.0, 5938.0, 12573.0, 29812.0, 73951.0, 250937.0, 477651.0, 113410.0, 42528.0, 18030.0, 7982.0, 3900.0, 2075.0, 1107.0, 676.0, 423.0, 224.0, 140.0, 90.0, 68.0, 41.0, 31.0, 18.0, 13.0, 15.0, 6.0, 5.0, 6.0, 5.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.00012373924255371094, -0.00011950172483921051, -0.00011526420712471008, -0.00011102668941020966, -0.00010678917169570923, -0.0001025516539812088, -9.831413626670837e-05, -9.407661855220795e-05, -8.983910083770752e-05, -8.560158312320709e-05, -8.136406540870667e-05, -7.712654769420624e-05, -7.288902997970581e-05, -6.865151226520538e-05, -6.441399455070496e-05, -6.017647683620453e-05, -5.59389591217041e-05, -5.1701441407203674e-05, -4.746392369270325e-05, -4.322640597820282e-05, -3.898888826370239e-05, -3.4751370549201965e-05, -3.0513852834701538e-05, -2.627633512020111e-05, -2.2038817405700684e-05, -1.7801299691200256e-05, -1.3563781976699829e-05, -9.326264262199402e-06, -5.088746547698975e-06, -8.512288331985474e-07, 3.38628888130188e-06, 7.623806595802307e-06, 1.1861324310302734e-05, 1.609884202480316e-05, 2.033635973930359e-05, 2.4573877453804016e-05, 2.8811395168304443e-05, 3.304891288280487e-05, 3.72864305973053e-05, 4.1523948311805725e-05, 4.576146602630615e-05, 4.999898374080658e-05, 5.423650145530701e-05, 5.8474019169807434e-05, 6.271153688430786e-05, 6.694905459880829e-05, 7.118657231330872e-05, 7.542409002780914e-05, 7.966160774230957e-05, 8.389912545681e-05, 8.813664317131042e-05, 9.237416088581085e-05, 9.661167860031128e-05, 0.0001008491963148117, 0.00010508671402931213, 0.00010932423174381256, 0.00011356174945831299, 0.00011779926717281342, 0.00012203678488731384, 0.00012627430260181427, 0.0001305118203163147, 0.00013474933803081512, 0.00013898685574531555, 0.00014322437345981598, 0.0001474618911743164]}, "gradients/encoder.encoder.layers.2.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 2.0, 1.0, 9.0, 5.0, 4.0, 12.0, 9.0, 13.0, 23.0, 28.0, 46.0, 35.0, 43.0, 42.0, 51.0, 66.0, 68.0, 71.0, 60.0, 63.0, 40.0, 65.0, 41.0, 55.0, 29.0, 37.0, 22.0, 9.0, 16.0, 7.0, 11.0, 8.0, 4.0, 4.0, 3.0, 2.0, 2.0, 1.0, 2.0, 3.0, 3.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.633167266845703e-05, -1.553446054458618e-05, -1.4737248420715332e-05, -1.3940036296844482e-05, -1.3142824172973633e-05, -1.2345612049102783e-05, -1.1548399925231934e-05, -1.0751187801361084e-05, -9.953975677490234e-06, -9.156763553619385e-06, -8.359551429748535e-06, -7.5623393058776855e-06, -6.765127182006836e-06, -5.967915058135986e-06, -5.170702934265137e-06, -4.373490810394287e-06, -3.5762786865234375e-06, -2.779066562652588e-06, -1.9818544387817383e-06, -1.1846423149108887e-06, -3.8743019104003906e-07, 4.0978193283081055e-07, 1.2069940567016602e-06, 2.0042061805725098e-06, 2.8014183044433594e-06, 3.598630428314209e-06, 4.395842552185059e-06, 5.193054676055908e-06, 5.990266799926758e-06, 6.787478923797607e-06, 7.584691047668457e-06, 8.381903171539307e-06, 9.179115295410156e-06, 9.976327419281006e-06, 1.0773539543151855e-05, 1.1570751667022705e-05, 1.2367963790893555e-05, 1.3165175914764404e-05, 1.3962388038635254e-05, 1.4759600162506104e-05, 1.5556812286376953e-05, 1.6354024410247803e-05, 1.7151236534118652e-05, 1.7948448657989502e-05, 1.874566078186035e-05, 1.95428729057312e-05, 2.034008502960205e-05, 2.11372971534729e-05, 2.193450927734375e-05, 2.27317214012146e-05, 2.352893352508545e-05, 2.43261456489563e-05, 2.512335777282715e-05, 2.5920569896697998e-05, 2.6717782020568848e-05, 2.7514994144439697e-05, 2.8312206268310547e-05, 2.9109418392181396e-05, 2.9906630516052246e-05, 3.0703842639923096e-05, 3.1501054763793945e-05, 3.2298266887664795e-05, 3.3095479011535645e-05, 3.3892691135406494e-05, 3.4689903259277344e-05]}, "gradients/encoder.encoder.layers.2.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 0.0, 0.0, 5.0, 5.0, 5.0, 3.0, 8.0, 14.0, 20.0, 23.0, 41.0, 60.0, 92.0, 143.0, 214.0, 359.0, 564.0, 884.0, 1406.0, 2267.0, 3789.0, 6337.0, 10930.0, 18925.0, 34532.0, 62771.0, 139972.0, 349495.0, 219223.0, 91642.0, 46135.0, 24745.0, 13356.0, 8116.0, 4822.0, 2918.0, 1786.0, 1074.0, 704.0, 405.0, 248.0, 196.0, 117.0, 62.0, 56.0, 32.0, 27.0, 18.0, 8.0, 8.0, 2.0, 2.0, 4.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-4.553794860839844e-05, -4.405621439218521e-05, -4.2574480175971985e-05, -4.109274595975876e-05, -3.961101174354553e-05, -3.8129277527332306e-05, -3.664754331111908e-05, -3.516580909490585e-05, -3.368407487869263e-05, -3.22023406624794e-05, -3.0720606446266174e-05, -2.9238872230052948e-05, -2.775713801383972e-05, -2.6275403797626495e-05, -2.479366958141327e-05, -2.3311935365200043e-05, -2.1830201148986816e-05, -2.034846693277359e-05, -1.8866732716560364e-05, -1.7384998500347137e-05, -1.590326428413391e-05, -1.4421530067920685e-05, -1.2939795851707458e-05, -1.1458061635494232e-05, -9.976327419281006e-06, -8.49459320306778e-06, -7.012858986854553e-06, -5.531124770641327e-06, -4.049390554428101e-06, -2.5676563382148743e-06, -1.085922122001648e-06, 3.9581209421157837e-07, 1.8775463104248047e-06, 3.359280526638031e-06, 4.841014742851257e-06, 6.322748959064484e-06, 7.80448317527771e-06, 9.286217391490936e-06, 1.0767951607704163e-05, 1.2249685823917389e-05, 1.3731420040130615e-05, 1.5213154256343842e-05, 1.6694888472557068e-05, 1.8176622688770294e-05, 1.965835690498352e-05, 2.1140091121196747e-05, 2.2621825337409973e-05, 2.41035595536232e-05, 2.5585293769836426e-05, 2.7067027986049652e-05, 2.854876220226288e-05, 3.0030496418476105e-05, 3.151223063468933e-05, 3.299396485090256e-05, 3.4475699067115784e-05, 3.595743328332901e-05, 3.7439167499542236e-05, 3.892090171575546e-05, 4.040263593196869e-05, 4.1884370148181915e-05, 4.336610436439514e-05, 4.484783858060837e-05, 4.6329572796821594e-05, 4.781130701303482e-05, 4.929304122924805e-05]}, "gradients/encoder.encoder.layers.2.attention.v_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 1.0, 2.0, 2.0, 6.0, 7.0, 8.0, 4.0, 11.0, 15.0, 15.0, 16.0, 16.0, 15.0, 23.0, 27.0, 34.0, 40.0, 22.0, 41.0, 30.0, 54.0, 28.0, 32.0, 40.0, 30.0, 31.0, 46.0, 45.0, 35.0, 49.0, 34.0, 30.0, 34.0, 31.0, 30.0, 19.0, 18.0, 15.0, 16.0, 12.0, 11.0, 4.0, 9.0, 7.0, 8.0, 2.0, 3.0, 3.0, 3.0, 0.0, 2.0, 0.0, 2.0, 0.0, 1.0], "bins": [-4.696846008300781e-05, -4.556681960821152e-05, -4.416517913341522e-05, -4.276353865861893e-05, -4.136189818382263e-05, -3.996025770902634e-05, -3.855861723423004e-05, -3.7156976759433746e-05, -3.575533628463745e-05, -3.4353695809841156e-05, -3.295205533504486e-05, -3.1550414860248566e-05, -3.014877438545227e-05, -2.8747133910655975e-05, -2.734549343585968e-05, -2.5943852961063385e-05, -2.454221248626709e-05, -2.3140572011470795e-05, -2.17389315366745e-05, -2.0337291061878204e-05, -1.893565058708191e-05, -1.7534010112285614e-05, -1.613236963748932e-05, -1.4730729162693024e-05, -1.3329088687896729e-05, -1.1927448213100433e-05, -1.0525807738304138e-05, -9.124167263507843e-06, -7.722526788711548e-06, -6.320886313915253e-06, -4.9192458391189575e-06, -3.5176053643226624e-06, -2.115964889526367e-06, -7.14324414730072e-07, 6.873160600662231e-07, 2.0889565348625183e-06, 3.4905970096588135e-06, 4.892237484455109e-06, 6.293877959251404e-06, 7.695518434047699e-06, 9.097158908843994e-06, 1.049879938364029e-05, 1.1900439858436584e-05, 1.330208033323288e-05, 1.4703720808029175e-05, 1.610536128282547e-05, 1.7507001757621765e-05, 1.890864223241806e-05, 2.0310282707214355e-05, 2.171192318201065e-05, 2.3113563656806946e-05, 2.451520413160324e-05, 2.5916844606399536e-05, 2.731848508119583e-05, 2.8720125555992126e-05, 3.012176603078842e-05, 3.152340650558472e-05, 3.292504698038101e-05, 3.432668745517731e-05, 3.57283279299736e-05, 3.71299684047699e-05, 3.853160887956619e-05, 3.993324935436249e-05, 4.133488982915878e-05, 4.273653030395508e-05]}, "gradients/encoder.encoder.layers.2.attention.k_proj.weight": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 0.0, 0.0, 2.0, 2.0, 1.0, 3.0, 3.0, 0.0, 2.0, 2.0, 3.0, 3.0, 4.0, 7.0, 10.0, 13.0, 10.0, 10.0, 24.0, 39.0, 74.0, 114.0, 174.0, 259.0, 481.0, 944.0, 1714.0, 3573.0, 7369.0, 25124.0, 144198.0, 719948.0, 109739.0, 21562.0, 6362.0, 3285.0, 1484.0, 764.0, 429.0, 254.0, 202.0, 124.0, 89.0, 56.0, 16.0, 24.0, 18.0, 21.0, 7.0, 6.0, 3.0, 8.0, 3.0, 1.0, 2.0, 3.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.6033649444580078e-05, -1.556798815727234e-05, -1.51023268699646e-05, -1.463666558265686e-05, -1.4171004295349121e-05, -1.3705343008041382e-05, -1.3239681720733643e-05, -1.2774020433425903e-05, -1.2308359146118164e-05, -1.1842697858810425e-05, -1.1377036571502686e-05, -1.0911375284194946e-05, -1.0445713996887207e-05, -9.980052709579468e-06, -9.514391422271729e-06, -9.04873013496399e-06, -8.58306884765625e-06, -8.11740756034851e-06, -7.651746273040771e-06, -7.186084985733032e-06, -6.720423698425293e-06, -6.254762411117554e-06, -5.7891011238098145e-06, -5.323439836502075e-06, -4.857778549194336e-06, -4.392117261886597e-06, -3.926455974578857e-06, -3.460794687271118e-06, -2.995133399963379e-06, -2.5294721126556396e-06, -2.0638108253479004e-06, -1.5981495380401611e-06, -1.1324882507324219e-06, -6.668269634246826e-07, -2.0116567611694336e-07, 2.644956111907959e-07, 7.301568984985352e-07, 1.1958181858062744e-06, 1.6614794731140137e-06, 2.127140760421753e-06, 2.592802047729492e-06, 3.0584633350372314e-06, 3.5241246223449707e-06, 3.98978590965271e-06, 4.455447196960449e-06, 4.9211084842681885e-06, 5.386769771575928e-06, 5.852431058883667e-06, 6.318092346191406e-06, 6.7837536334991455e-06, 7.249414920806885e-06, 7.715076208114624e-06, 8.180737495422363e-06, 8.646398782730103e-06, 9.112060070037842e-06, 9.577721357345581e-06, 1.004338264465332e-05, 1.050904393196106e-05, 1.0974705219268799e-05, 1.1440366506576538e-05, 1.1906027793884277e-05, 1.2371689081192017e-05, 1.2837350368499756e-05, 1.3303011655807495e-05, 1.3768672943115234e-05]}, "gradients/encoder.encoder.layers.2.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 1.0, 1.0, 1.0, 0.0, 2.0, 1.0, 3.0, 4.0, 6.0, 4.0, 5.0, 16.0, 15.0, 18.0, 18.0, 26.0, 0.0, 23.0, 27.0, 33.0, 47.0, 53.0, 57.0, 63.0, 64.0, 58.0, 64.0, 60.0, 47.0, 0.0, 53.0, 32.0, 41.0, 29.0, 25.0, 29.0, 20.0, 18.0, 14.0, 5.0, 14.0, 7.0, 0.0, 5.0, 0.0, 0.0, 3.0, 1.0, 0.0, 3.0, 2.0, 1.0, 0.0, 0.0, 1.0], "bins": [-1.9073486328125e-06, -1.8524006009101868e-06, -1.7974525690078735e-06, -1.7425045371055603e-06, -1.687556505203247e-06, -1.6326084733009338e-06, -1.5776604413986206e-06, -1.5227124094963074e-06, -1.4677643775939941e-06, -1.412816345691681e-06, -1.3578683137893677e-06, -1.3029202818870544e-06, -1.2479722499847412e-06, -1.193024218082428e-06, -1.1380761861801147e-06, -1.0831281542778015e-06, -1.0281801223754883e-06, -9.73232090473175e-07, -9.182840585708618e-07, -8.633360266685486e-07, -8.083879947662354e-07, -7.534399628639221e-07, -6.984919309616089e-07, -6.435438990592957e-07, -5.885958671569824e-07, -5.336478352546692e-07, -4.78699803352356e-07, -4.237517714500427e-07, -3.688037395477295e-07, -3.1385570764541626e-07, -2.5890767574310303e-07, -2.039596438407898e-07, -1.4901161193847656e-07, -9.406358003616333e-08, -3.91155481338501e-08, 1.5832483768463135e-08, 7.078051567077637e-08, 1.257285475730896e-07, 1.8067657947540283e-07, 2.3562461137771606e-07, 2.905726432800293e-07, 3.4552067518234253e-07, 4.0046870708465576e-07, 4.55416738986969e-07, 5.103647708892822e-07, 5.653128027915955e-07, 6.202608346939087e-07, 6.752088665962219e-07, 7.301568984985352e-07, 7.851049304008484e-07, 8.400529623031616e-07, 8.950009942054749e-07, 9.499490261077881e-07, 1.0048970580101013e-06, 1.0598450899124146e-06, 1.1147931218147278e-06, 1.169741153717041e-06, 1.2246891856193542e-06, 1.2796372175216675e-06, 1.3345852494239807e-06, 1.389533281326294e-06, 1.4444813132286072e-06, 1.4994293451309204e-06, 1.5543773770332336e-06, 1.6093254089355469e-06]}, "gradients/encoder.encoder.layers.2.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 3.0, 4.0, 4.0, 8.0, 9.0, 9.0, 10.0, 18.0, 29.0, 47.0, 63.0, 84.0, 108.0, 189.0, 417.0, 485.0, 706.0, 1145.0, 1766.0, 4387.0, 6041.0, 11607.0, 23974.0, 58303.0, 320670.0, 416031.0, 119197.0, 42529.0, 22261.0, 7409.0, 4053.0, 2479.0, 1492.0, 1215.0, 585.0, 352.0, 263.0, 227.0, 113.0, 80.0, 62.0, 30.0, 43.0, 23.0, 15.0, 5.0, 6.0, 6.0, 3.0, 1.0, 0.0, 1.0, 1.0, 0.0, 2.0, 1.0], "bins": [-6.377696990966797e-06, -6.185844540596008e-06, -5.99399209022522e-06, -5.802139639854431e-06, -5.610287189483643e-06, -5.418434739112854e-06, -5.2265822887420654e-06, -5.034729838371277e-06, -4.842877388000488e-06, -4.6510249376297e-06, -4.459172487258911e-06, -4.2673200368881226e-06, -4.075467586517334e-06, -3.883615136146545e-06, -3.691762685775757e-06, -3.4999102354049683e-06, -3.3080577850341797e-06, -3.116205334663391e-06, -2.9243528842926025e-06, -2.732500433921814e-06, -2.5406479835510254e-06, -2.348795533180237e-06, -2.1569430828094482e-06, -1.9650906324386597e-06, -1.773238182067871e-06, -1.5813857316970825e-06, -1.389533281326294e-06, -1.1976808309555054e-06, -1.0058283805847168e-06, -8.139759302139282e-07, -6.221234798431396e-07, -4.302710294723511e-07, -2.384185791015625e-07, -4.6566128730773926e-08, 1.4528632164001465e-07, 3.371387720108032e-07, 5.289912223815918e-07, 7.208436727523804e-07, 9.126961231231689e-07, 1.1045485734939575e-06, 1.296401023864746e-06, 1.4882534742355347e-06, 1.6801059246063232e-06, 1.8719583749771118e-06, 2.0638108253479004e-06, 2.255663275718689e-06, 2.4475157260894775e-06, 2.639368176460266e-06, 2.8312206268310547e-06, 3.0230730772018433e-06, 3.214925527572632e-06, 3.4067779779434204e-06, 3.598630428314209e-06, 3.7904828786849976e-06, 3.982335329055786e-06, 4.174187779426575e-06, 4.366040229797363e-06, 4.557892680168152e-06, 4.7497451305389404e-06, 4.941597580909729e-06, 5.133450031280518e-06, 5.325302481651306e-06, 5.517154932022095e-06, 5.709007382392883e-06, 5.900859832763672e-06]}, "gradients/encoder.encoder.layers.2.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 1.0, 4.0, 1.0, 3.0, 2.0, 5.0, 8.0, 7.0, 14.0, 8.0, 21.0, 23.0, 27.0, 48.0, 36.0, 55.0, 73.0, 87.0, 90.0, 81.0, 65.0, 68.0, 62.0, 41.0, 37.0, 33.0, 22.0, 11.0, 15.0, 15.0, 8.0, 8.0, 5.0, 1.0, 6.0, 4.0, 2.0, 7.0, 2.0, 2.0, 0.0, 1.0, 3.0, 1.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.245208740234375e-06, -5.0710514187812805e-06, -4.896894097328186e-06, -4.7227367758750916e-06, -4.548579454421997e-06, -4.374422132968903e-06, -4.200264811515808e-06, -4.026107490062714e-06, -3.851950168609619e-06, -3.6777928471565247e-06, -3.50363552570343e-06, -3.3294782042503357e-06, -3.155320882797241e-06, -2.9811635613441467e-06, -2.8070062398910522e-06, -2.6328489184379578e-06, -2.4586915969848633e-06, -2.284534275531769e-06, -2.1103769540786743e-06, -1.93621963262558e-06, -1.7620623111724854e-06, -1.5879049897193909e-06, -1.4137476682662964e-06, -1.239590346813202e-06, -1.0654330253601074e-06, -8.912757039070129e-07, -7.171183824539185e-07, -5.42961061000824e-07, -3.688037395477295e-07, -1.94646418094635e-07, -2.0489096641540527e-08, 1.5366822481155396e-07, 3.2782554626464844e-07, 5.019828677177429e-07, 6.761401891708374e-07, 8.502975106239319e-07, 1.0244548320770264e-06, 1.1986121535301208e-06, 1.3727694749832153e-06, 1.5469267964363098e-06, 1.7210841178894043e-06, 1.8952414393424988e-06, 2.0693987607955933e-06, 2.2435560822486877e-06, 2.4177134037017822e-06, 2.5918707251548767e-06, 2.766028046607971e-06, 2.9401853680610657e-06, 3.11434268951416e-06, 3.2885000109672546e-06, 3.462657332420349e-06, 3.6368146538734436e-06, 3.810971975326538e-06, 3.9851292967796326e-06, 4.159286618232727e-06, 4.3334439396858215e-06, 4.507601261138916e-06, 4.6817585825920105e-06, 4.855915904045105e-06, 5.0300732254981995e-06, 5.204230546951294e-06, 5.3783878684043884e-06, 5.552545189857483e-06, 5.726702511310577e-06, 5.900859832763672e-06]}, "gradients/encoder.encoder.layers.2.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 5.0, 14.0, 14.0, 47.0, 72.0, 166.0, 259.0, 185.0, 81.0, 61.0, 40.0, 25.0, 11.0, 12.0, 11.0, 3.0, 2.0, 3.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.000359229336027056, -0.00034707499435171485, -0.0003349206817802042, -0.0003227663692086935, -0.0003106120275333524, -0.00029845768585801125, -0.0002863033732865006, -0.0002741490607149899, -0.00026199471903964877, -0.00024984037736430764, -0.00023768606479279697, -0.00022553173766937107, -0.00021337741054594517, -0.00020122308342251927, -0.00018906875629909337, -0.00017691442917566746, -0.00016476010205224156, -0.00015260577492881566, -0.00014045144780538976, -0.00012829712068196386, -0.00011614279355853796, -0.00010398846643511206, -9.183413931168616e-05, -7.967981218826026e-05, -6.752548506483436e-05, -5.5371157941408455e-05, -4.3216830817982554e-05, -3.1062503694556653e-05, -1.8908176571130753e-05, -6.753849447704852e-06, 5.400477675721049e-06, 1.755480479914695e-05, 2.970913192257285e-05, 4.186345904599875e-05, 5.401778616942465e-05, 6.617211329285055e-05, 7.832644041627645e-05, 9.048076753970236e-05, 0.00010263509466312826, 0.00011478942178655416, 0.00012694374890998006, 0.00013909807603340596, 0.00015125240315683186, 0.00016340673028025776, 0.00017556105740368366, 0.00018771538452710956, 0.00019986971165053546, 0.00021202403877396137, 0.00022417836589738727, 0.00023633269302081317, 0.00024848702014423907, 0.00026064133271574974, 0.00027279567439109087, 0.000284950016066432, 0.00029710432863794267, 0.00030925864120945334, 0.0003214129828847945, 0.0003335673245601356, 0.0003457216371316463, 0.00035787594970315695, 0.0003700302913784981, 0.0003821846330538392, 0.0003943389456253499, 0.00040649325819686055, 0.0004186475998722017]}, "gradients/encoder.encoder.layers.2.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 1.0, 2.0, 2.0, 0.0, 3.0, 2.0, 5.0, 6.0, 10.0, 16.0, 16.0, 10.0, 23.0, 24.0, 33.0, 23.0, 36.0, 35.0, 46.0, 43.0, 51.0, 44.0, 40.0, 40.0, 52.0, 49.0, 40.0, 54.0, 50.0, 39.0, 35.0, 24.0, 33.0, 32.0, 20.0, 20.0, 7.0, 8.0, 10.0, 11.0, 5.0, 5.0, 6.0, 4.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00014978647232055664, -0.00014559458941221237, -0.0001414027065038681, -0.00013721082359552383, -0.00013301894068717957, -0.0001288270577788353, -0.00012463517487049103, -0.00012044329196214676, -0.00011625140905380249, -0.00011205952614545822, -0.00010786764323711395, -0.00010367576032876968, -9.948387742042542e-05, -9.529199451208115e-05, -9.110011160373688e-05, -8.690822869539261e-05, -8.271634578704834e-05, -7.852446287870407e-05, -7.43325799703598e-05, -7.014069706201553e-05, -6.594881415367126e-05, -6.1756931245327e-05, -5.756504833698273e-05, -5.337316542863846e-05, -4.918128252029419e-05, -4.498939961194992e-05, -4.079751670360565e-05, -3.660563379526138e-05, -3.2413750886917114e-05, -2.8221867978572845e-05, -2.4029985070228577e-05, -1.9838102161884308e-05, -1.564621925354004e-05, -1.145433634519577e-05, -7.2624534368515015e-06, -3.0705705285072327e-06, 1.1213123798370361e-06, 5.313195288181305e-06, 9.505078196525574e-06, 1.3696961104869843e-05, 1.788884401321411e-05, 2.208072692155838e-05, 2.627260982990265e-05, 3.0464492738246918e-05, 3.4656375646591187e-05, 3.8848258554935455e-05, 4.3040141463279724e-05, 4.723202437162399e-05, 5.142390727996826e-05, 5.561579018831253e-05, 5.98076730966568e-05, 6.399955600500107e-05, 6.819143891334534e-05, 7.23833218216896e-05, 7.657520473003387e-05, 8.076708763837814e-05, 8.495897054672241e-05, 8.915085345506668e-05, 9.334273636341095e-05, 9.753461927175522e-05, 0.00010172650218009949, 0.00010591838508844376, 0.00011011026799678802, 0.0001143021509051323, 0.00011849403381347656]}, "gradients/encoder.encoder.layers.1.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 1.0, 2.0, 2.0, 1.0, 4.0, 3.0, 5.0, 5.0, 14.0, 21.0, 22.0, 30.0, 35.0, 45.0, 64.0, 92.0, 131.0, 191.0, 252.0, 395.0, 590.0, 904.0, 1615.0, 2810.0, 5211.0, 10815.0, 25414.0, 123392.0, 3660868.0, 297790.0, 34947.0, 13658.0, 6368.0, 3227.0, 1976.0, 1177.0, 673.0, 496.0, 326.0, 195.0, 157.0, 106.0, 85.0, 44.0, 36.0, 20.0, 24.0, 18.0, 18.0, 7.0, 4.0, 5.0, 3.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0], "bins": [-9.864568710327148e-05, -9.562354534864426e-05, -9.260140359401703e-05, -8.95792618393898e-05, -8.655712008476257e-05, -8.353497833013535e-05, -8.051283657550812e-05, -7.749069482088089e-05, -7.446855306625366e-05, -7.144641131162643e-05, -6.84242695569992e-05, -6.540212780237198e-05, -6.237998604774475e-05, -5.935784429311752e-05, -5.6335702538490295e-05, -5.331356078386307e-05, -5.029141902923584e-05, -4.726927727460861e-05, -4.4247135519981384e-05, -4.1224993765354156e-05, -3.820285201072693e-05, -3.51807102560997e-05, -3.215856850147247e-05, -2.9136426746845245e-05, -2.6114284992218018e-05, -2.309214323759079e-05, -2.0070001482963562e-05, -1.7047859728336334e-05, -1.4025717973709106e-05, -1.1003576219081879e-05, -7.981434464454651e-06, -4.959292709827423e-06, -1.9371509552001953e-06, 1.0849907994270325e-06, 4.10713255405426e-06, 7.129274308681488e-06, 1.0151416063308716e-05, 1.3173557817935944e-05, 1.619569957256317e-05, 1.92178413271904e-05, 2.2239983081817627e-05, 2.5262124836444855e-05, 2.8284266591072083e-05, 3.130640834569931e-05, 3.432855010032654e-05, 3.7350691854953766e-05, 4.0372833609580994e-05, 4.339497536420822e-05, 4.641711711883545e-05, 4.943925887346268e-05, 5.2461400628089905e-05, 5.548354238271713e-05, 5.850568413734436e-05, 6.152782589197159e-05, 6.454996764659882e-05, 6.757210940122604e-05, 7.059425115585327e-05, 7.36163929104805e-05, 7.663853466510773e-05, 7.966067641973495e-05, 8.268281817436218e-05, 8.570495992898941e-05, 8.872710168361664e-05, 9.174924343824387e-05, 9.47713851928711e-05]}, "gradients/encoder.encoder.layers.1.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 2.0, 0.0, 2.0, 2.0, 4.0, 8.0, 2.0, 5.0, 8.0, 11.0, 14.0, 20.0, 16.0, 21.0, 36.0, 38.0, 45.0, 58.0, 63.0, 82.0, 66.0, 55.0, 67.0, 58.0, 57.0, 38.0, 54.0, 37.0, 21.0, 30.0, 23.0, 9.0, 14.0, 5.0, 7.0, 10.0, 7.0, 2.0, 3.0, 5.0, 2.0, 3.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-2.4616718292236328e-05, -2.382975071668625e-05, -2.304278314113617e-05, -2.225581556558609e-05, -2.146884799003601e-05, -2.068188041448593e-05, -1.9894912838935852e-05, -1.9107945263385773e-05, -1.8320977687835693e-05, -1.7534010112285614e-05, -1.6747042536735535e-05, -1.5960074961185455e-05, -1.5173107385635376e-05, -1.4386139810085297e-05, -1.3599172234535217e-05, -1.2812204658985138e-05, -1.2025237083435059e-05, -1.123826950788498e-05, -1.04513019323349e-05, -9.66433435678482e-06, -8.877366781234741e-06, -8.090399205684662e-06, -7.3034316301345825e-06, -6.516464054584503e-06, -5.729496479034424e-06, -4.9425289034843445e-06, -4.155561327934265e-06, -3.368593752384186e-06, -2.5816261768341064e-06, -1.794658601284027e-06, -1.0076910257339478e-06, -2.207234501838684e-07, 5.662441253662109e-07, 1.3532117009162903e-06, 2.1401792764663696e-06, 2.927146852016449e-06, 3.7141144275665283e-06, 4.501082003116608e-06, 5.288049578666687e-06, 6.075017154216766e-06, 6.861984729766846e-06, 7.648952305316925e-06, 8.435919880867004e-06, 9.222887456417084e-06, 1.0009855031967163e-05, 1.0796822607517242e-05, 1.1583790183067322e-05, 1.2370757758617401e-05, 1.315772533416748e-05, 1.394469290971756e-05, 1.473166048526764e-05, 1.551862806081772e-05, 1.6305595636367798e-05, 1.7092563211917877e-05, 1.7879530787467957e-05, 1.8666498363018036e-05, 1.9453465938568115e-05, 2.0240433514118195e-05, 2.1027401089668274e-05, 2.1814368665218353e-05, 2.2601336240768433e-05, 2.3388303816318512e-05, 2.417527139186859e-05, 2.496223896741867e-05, 2.574920654296875e-05]}, "gradients/encoder.encoder.layers.1.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [2.0, 2.0, 0.0, 4.0, 5.0, 0.0, 3.0, 6.0, 10.0, 6.0, 9.0, 23.0, 32.0, 50.0, 62.0, 82.0, 160.0, 236.0, 364.0, 556.0, 951.0, 1774.0, 3167.0, 6356.0, 12733.0, 29114.0, 88660.0, 874542.0, 2981173.0, 123901.0, 38568.0, 15529.0, 7316.0, 3856.0, 1995.0, 1230.0, 653.0, 432.0, 244.0, 163.0, 103.0, 72.0, 51.0, 29.0, 27.0, 14.0, 13.0, 5.0, 5.0, 5.0, 0.0, 0.0, 3.0, 2.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-5.435943603515625e-05, -5.242694169282913e-05, -5.0494447350502014e-05, -4.8561953008174896e-05, -4.662945866584778e-05, -4.469696432352066e-05, -4.276446998119354e-05, -4.0831975638866425e-05, -3.889948129653931e-05, -3.696698695421219e-05, -3.503449261188507e-05, -3.310199826955795e-05, -3.1169503927230835e-05, -2.9237009584903717e-05, -2.73045152425766e-05, -2.537202090024948e-05, -2.3439526557922363e-05, -2.1507032215595245e-05, -1.9574537873268127e-05, -1.764204353094101e-05, -1.570954918861389e-05, -1.3777054846286774e-05, -1.1844560503959656e-05, -9.912066161632538e-06, -7.97957181930542e-06, -6.047077476978302e-06, -4.114583134651184e-06, -2.182088792324066e-06, -2.4959444999694824e-07, 1.6828998923301697e-06, 3.6153942346572876e-06, 5.5478885769844055e-06, 7.4803829193115234e-06, 9.412877261638641e-06, 1.134537160396576e-05, 1.3277865946292877e-05, 1.5210360288619995e-05, 1.7142854630947113e-05, 1.907534897327423e-05, 2.100784331560135e-05, 2.2940337657928467e-05, 2.4872832000255585e-05, 2.6805326342582703e-05, 2.873782068490982e-05, 3.067031502723694e-05, 3.2602809369564056e-05, 3.4535303711891174e-05, 3.646779805421829e-05, 3.840029239654541e-05, 4.033278673887253e-05, 4.2265281081199646e-05, 4.4197775423526764e-05, 4.613026976585388e-05, 4.8062764108181e-05, 4.999525845050812e-05, 5.1927752792835236e-05, 5.3860247135162354e-05, 5.579274147748947e-05, 5.772523581981659e-05, 5.965773016214371e-05, 6.159022450447083e-05, 6.352271884679794e-05, 6.545521318912506e-05, 6.738770753145218e-05, 6.93202018737793e-05]}, "gradients/encoder.encoder.layers.1.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 2.0, 1.0, 0.0, 0.0, 1.0, 1.0, 2.0, 2.0, 2.0, 5.0, 4.0, 6.0, 3.0, 9.0, 15.0, 18.0, 25.0, 21.0, 33.0, 43.0, 57.0, 81.0, 145.0, 378.0, 994.0, 1243.0, 500.0, 157.0, 92.0, 57.0, 40.0, 32.0, 25.0, 11.0, 11.0, 14.0, 12.0, 7.0, 15.0, 4.0, 4.0, 4.0, 3.0, 0.0, 0.0, 6.0, 3.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.547834396362305e-05, -4.397239536046982e-05, -4.246644675731659e-05, -4.096049815416336e-05, -3.945454955101013e-05, -3.79486009478569e-05, -3.6442652344703674e-05, -3.4936703741550446e-05, -3.343075513839722e-05, -3.192480653524399e-05, -3.041885793209076e-05, -2.891290932893753e-05, -2.7406960725784302e-05, -2.5901012122631073e-05, -2.4395063519477844e-05, -2.2889114916324615e-05, -2.1383166313171387e-05, -1.9877217710018158e-05, -1.837126910686493e-05, -1.68653205037117e-05, -1.535937190055847e-05, -1.3853423297405243e-05, -1.2347474694252014e-05, -1.0841526091098785e-05, -9.335577487945557e-06, -7.829628884792328e-06, -6.323680281639099e-06, -4.81773167848587e-06, -3.3117830753326416e-06, -1.8058344721794128e-06, -2.998858690261841e-07, 1.2060627341270447e-06, 2.7120113372802734e-06, 4.217959940433502e-06, 5.723908543586731e-06, 7.22985714673996e-06, 8.735805749893188e-06, 1.0241754353046417e-05, 1.1747702956199646e-05, 1.3253651559352875e-05, 1.4759600162506104e-05, 1.6265548765659332e-05, 1.777149736881256e-05, 1.927744597196579e-05, 2.078339457511902e-05, 2.2289343178272247e-05, 2.3795291781425476e-05, 2.5301240384578705e-05, 2.6807188987731934e-05, 2.8313137590885162e-05, 2.981908619403839e-05, 3.132503479719162e-05, 3.283098340034485e-05, 3.433693200349808e-05, 3.5842880606651306e-05, 3.7348829209804535e-05, 3.8854777812957764e-05, 4.036072641611099e-05, 4.186667501926422e-05, 4.337262362241745e-05, 4.487857222557068e-05, 4.638452082872391e-05, 4.7890469431877136e-05, 4.9396418035030365e-05, 5.0902366638183594e-05]}, "gradients/encoder.encoder.layers.1.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 0.0, 1.0, 1.0, 2.0, 8.0, 6.0, 7.0, 13.0, 13.0, 23.0, 32.0, 39.0, 59.0, 96.0, 116.0, 133.0, 136.0, 105.0, 73.0, 39.0, 32.0, 15.0, 19.0, 15.0, 6.0, 5.0, 6.0, 4.0, 4.0, 2.0, 0.0, 2.0, 3.0, 1.0, 0.0, 2.0], "bins": [-0.0004242008726578206, -0.00041443126974627376, -0.0004046616959385574, -0.00039489209302701056, -0.00038512249011546373, -0.00037535291630774736, -0.00036558331339620054, -0.0003558137104846537, -0.00034604413667693734, -0.0003362745337653905, -0.00032650495995767415, -0.0003167353570461273, -0.0003069657541345805, -0.00029719615122303367, -0.0002874265774153173, -0.00027765697450377047, -0.00026788737159222364, -0.0002581177686806768, -0.00024834819487296045, -0.00023857859196141362, -0.0002288089890498668, -0.0002190394006902352, -0.0002092698123306036, -0.00019950020941905677, -0.00018973062105942518, -0.00017996103269979358, -0.00017019142978824675, -0.00016042184142861515, -0.00015065225306898355, -0.00014088265015743673, -0.00013111306179780513, -0.00012134346616221592, -0.00011157384142279625, -0.00010180424578720704, -9.203465015161783e-05, -8.226506179198623e-05, -7.249546615639701e-05, -6.27258705208078e-05, -5.29562785231974e-05, -4.318668652558699e-05, -3.341709088999778e-05, -2.364749707339797e-05, -1.3877903256798163e-05, -4.108309440198354e-06, 5.661284376401454e-06, 1.5430880011990666e-05, 2.520047200960107e-05, 3.4970064007211477e-05, 4.473965964280069e-05, 5.45092552783899e-05, 6.427885091397911e-05, 7.404843927361071e-05, 8.381803490919992e-05, 9.358763054478914e-05, 0.00010335721890442073, 0.00011312681454000995, 0.00012289641017559916, 0.00013266599853523076, 0.00014243560144677758, 0.00015220518980640918, 0.00016197477816604078, 0.0001717443810775876, 0.0001815139694372192, 0.0001912835577968508, 0.00020105316070839763]}, "gradients/encoder.encoder.layers.1.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 3.0, 1.0, 1.0, 3.0, 1.0, 4.0, 5.0, 3.0, 5.0, 12.0, 13.0, 12.0, 15.0, 15.0, 15.0, 19.0, 23.0, 21.0, 40.0, 43.0, 38.0, 37.0, 41.0, 30.0, 32.0, 30.0, 41.0, 49.0, 40.0, 31.0, 37.0, 31.0, 29.0, 31.0, 22.0, 26.0, 28.0, 25.0, 28.0, 18.0, 16.0, 17.0, 22.0, 10.0, 8.0, 8.0, 10.0, 4.0, 4.0, 6.0, 1.0, 2.0, 5.0, 0.0, 3.0, 2.0, 2.0, 0.0, 1.0, 2.0], "bins": [-0.00011026859283447266, -0.00010677706450223923, -0.0001032855361700058, -9.979400783777237e-05, -9.630247950553894e-05, -9.281095117330551e-05, -8.931942284107208e-05, -8.582789450883865e-05, -8.233636617660522e-05, -7.88448378443718e-05, -7.535330951213837e-05, -7.186178117990494e-05, -6.837025284767151e-05, -6.487872451543808e-05, -6.138719618320465e-05, -5.789566785097122e-05, -5.440413951873779e-05, -5.0912611186504364e-05, -4.7421082854270935e-05, -4.3929554522037506e-05, -4.043802618980408e-05, -3.694649785757065e-05, -3.345496952533722e-05, -2.996344119310379e-05, -2.647191286087036e-05, -2.2980384528636932e-05, -1.9488856196403503e-05, -1.5997327864170074e-05, -1.2505799531936646e-05, -9.014271199703217e-06, -5.522742867469788e-06, -2.0312145352363586e-06, 1.4603137969970703e-06, 4.951842129230499e-06, 8.443370461463928e-06, 1.1934898793697357e-05, 1.5426427125930786e-05, 1.8917955458164215e-05, 2.2409483790397644e-05, 2.5901012122631073e-05, 2.9392540454864502e-05, 3.288406878709793e-05, 3.637559711933136e-05, 3.986712545156479e-05, 4.335865378379822e-05, 4.685018211603165e-05, 5.0341710448265076e-05, 5.3833238780498505e-05, 5.7324767112731934e-05, 6.081629544496536e-05, 6.430782377719879e-05, 6.779935210943222e-05, 7.129088044166565e-05, 7.478240877389908e-05, 7.827393710613251e-05, 8.176546543836594e-05, 8.525699377059937e-05, 8.87485221028328e-05, 9.224005043506622e-05, 9.573157876729965e-05, 9.922310709953308e-05, 0.00010271463543176651, 0.00010620616376399994, 0.00010969769209623337, 0.0001131892204284668]}, "gradients/encoder.encoder.layers.1.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 1.0, 3.0, 5.0, 5.0, 4.0, 11.0, 14.0, 22.0, 24.0, 37.0, 67.0, 96.0, 143.0, 242.0, 332.0, 536.0, 864.0, 1390.0, 2354.0, 4238.0, 7860.0, 14970.0, 31262.0, 69114.0, 197015.0, 461108.0, 145736.0, 56924.0, 25500.0, 12742.0, 6583.0, 3762.0, 2132.0, 1251.0, 785.0, 513.0, 314.0, 222.0, 144.0, 73.0, 53.0, 42.0, 23.0, 20.0, 12.0, 6.0, 4.0, 3.0, 2.0, 1.0, 4.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.00011271238327026367, -0.00010923109948635101, -0.00010574981570243835, -0.0001022685319185257, -9.878724813461304e-05, -9.530596435070038e-05, -9.182468056678772e-05, -8.834339678287506e-05, -8.48621129989624e-05, -8.138082921504974e-05, -7.789954543113708e-05, -7.441826164722443e-05, -7.093697786331177e-05, -6.745569407939911e-05, -6.397441029548645e-05, -6.049312651157379e-05, -5.701184272766113e-05, -5.3530558943748474e-05, -5.0049275159835815e-05, -4.656799137592316e-05, -4.30867075920105e-05, -3.960542380809784e-05, -3.612414002418518e-05, -3.264285624027252e-05, -2.9161572456359863e-05, -2.5680288672447205e-05, -2.2199004888534546e-05, -1.8717721104621887e-05, -1.5236437320709229e-05, -1.175515353679657e-05, -8.273869752883911e-06, -4.7925859689712524e-06, -1.3113021850585938e-06, 2.169981598854065e-06, 5.651265382766724e-06, 9.132549166679382e-06, 1.2613832950592041e-05, 1.60951167345047e-05, 1.957640051841736e-05, 2.3057684302330017e-05, 2.6538968086242676e-05, 3.0020251870155334e-05, 3.350153565406799e-05, 3.698281943798065e-05, 4.046410322189331e-05, 4.394538700580597e-05, 4.742667078971863e-05, 5.090795457363129e-05, 5.4389238357543945e-05, 5.7870522141456604e-05, 6.135180592536926e-05, 6.483308970928192e-05, 6.831437349319458e-05, 7.179565727710724e-05, 7.52769410610199e-05, 7.875822484493256e-05, 8.223950862884521e-05, 8.572079241275787e-05, 8.920207619667053e-05, 9.268335998058319e-05, 9.616464376449585e-05, 9.964592754840851e-05, 0.00010312721133232117, 0.00010660849511623383, 0.00011008977890014648]}, "gradients/encoder.encoder.layers.1.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 3.0, 2.0, 2.0, 3.0, 2.0, 3.0, 2.0, 8.0, 14.0, 9.0, 15.0, 18.0, 27.0, 31.0, 31.0, 37.0, 47.0, 55.0, 60.0, 83.0, 61.0, 76.0, 78.0, 64.0, 41.0, 38.0, 37.0, 35.0, 27.0, 22.0, 16.0, 17.0, 12.0, 7.0, 7.0, 2.0, 5.0, 0.0, 4.0, 4.0, 5.0, 1.0, 3.0, 1.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.294778823852539e-05, -2.2064894437789917e-05, -2.1182000637054443e-05, -2.029910683631897e-05, -1.9416213035583496e-05, -1.8533319234848022e-05, -1.765042543411255e-05, -1.6767531633377075e-05, -1.58846378326416e-05, -1.5001744031906128e-05, -1.4118850231170654e-05, -1.323595643043518e-05, -1.2353062629699707e-05, -1.1470168828964233e-05, -1.058727502822876e-05, -9.704381227493286e-06, -8.821487426757812e-06, -7.938593626022339e-06, -7.055699825286865e-06, -6.172806024551392e-06, -5.289912223815918e-06, -4.407018423080444e-06, -3.5241246223449707e-06, -2.641230821609497e-06, -1.7583370208740234e-06, -8.754432201385498e-07, 7.450580596923828e-09, 8.903443813323975e-07, 1.773238182067871e-06, 2.6561319828033447e-06, 3.5390257835388184e-06, 4.421919584274292e-06, 5.304813385009766e-06, 6.187707185745239e-06, 7.070600986480713e-06, 7.953494787216187e-06, 8.83638858795166e-06, 9.719282388687134e-06, 1.0602176189422607e-05, 1.1485069990158081e-05, 1.2367963790893555e-05, 1.3250857591629028e-05, 1.4133751392364502e-05, 1.5016645193099976e-05, 1.589953899383545e-05, 1.6782432794570923e-05, 1.7665326595306396e-05, 1.854822039604187e-05, 1.9431114196777344e-05, 2.0314007997512817e-05, 2.119690179824829e-05, 2.2079795598983765e-05, 2.2962689399719238e-05, 2.3845583200454712e-05, 2.4728477001190186e-05, 2.561137080192566e-05, 2.6494264602661133e-05, 2.7377158403396606e-05, 2.826005220413208e-05, 2.9142946004867554e-05, 3.0025839805603027e-05, 3.09087336063385e-05, 3.1791627407073975e-05, 3.267452120780945e-05, 3.355741500854492e-05]}, "gradients/encoder.encoder.layers.1.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 3.0, 4.0, 8.0, 6.0, 12.0, 17.0, 24.0, 33.0, 45.0, 66.0, 86.0, 121.0, 147.0, 222.0, 306.0, 444.0, 618.0, 898.0, 1299.0, 1968.0, 2754.0, 4139.0, 6387.0, 9702.0, 15568.0, 27259.0, 44864.0, 83095.0, 171430.0, 328491.0, 159010.0, 78159.0, 42570.0, 25013.0, 15617.0, 9436.0, 5996.0, 4122.0, 2602.0, 1783.0, 1281.0, 873.0, 599.0, 459.0, 298.0, 204.0, 158.0, 110.0, 79.0, 48.0, 43.0, 28.0, 25.0, 10.0, 14.0, 8.0, 4.0, 2.0, 2.0, 2.0, 1.0, 2.0], "bins": [-3.9577484130859375e-05, -3.8319267332553864e-05, -3.706105053424835e-05, -3.580283373594284e-05, -3.454461693763733e-05, -3.328640013933182e-05, -3.2028183341026306e-05, -3.0769966542720795e-05, -2.9511749744415283e-05, -2.8253532946109772e-05, -2.699531614780426e-05, -2.573709934949875e-05, -2.4478882551193237e-05, -2.3220665752887726e-05, -2.1962448954582214e-05, -2.0704232156276703e-05, -1.944601535797119e-05, -1.818779855966568e-05, -1.692958176136017e-05, -1.5671364963054657e-05, -1.4413148164749146e-05, -1.3154931366443634e-05, -1.1896714568138123e-05, -1.0638497769832611e-05, -9.3802809715271e-06, -8.122064173221588e-06, -6.863847374916077e-06, -5.605630576610565e-06, -4.347413778305054e-06, -3.0891969799995422e-06, -1.8309801816940308e-06, -5.727633833885193e-07, 6.854534149169922e-07, 1.9436702132225037e-06, 3.201887011528015e-06, 4.460103809833527e-06, 5.718320608139038e-06, 6.9765374064445496e-06, 8.234754204750061e-06, 9.492971003055573e-06, 1.0751187801361084e-05, 1.2009404599666595e-05, 1.3267621397972107e-05, 1.4525838196277618e-05, 1.578405499458313e-05, 1.704227179288864e-05, 1.8300488591194153e-05, 1.9558705389499664e-05, 2.0816922187805176e-05, 2.2075138986110687e-05, 2.33333557844162e-05, 2.459157258272171e-05, 2.584978938102722e-05, 2.7108006179332733e-05, 2.8366222977638245e-05, 2.9624439775943756e-05, 3.088265657424927e-05, 3.214087337255478e-05, 3.339909017086029e-05, 3.46573069691658e-05, 3.5915523767471313e-05, 3.7173740565776825e-05, 3.8431957364082336e-05, 3.969017416238785e-05, 4.094839096069336e-05]}, "gradients/encoder.encoder.layers.1.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 2.0, 0.0, 3.0, 2.0, 5.0, 2.0, 4.0, 2.0, 3.0, 4.0, 8.0, 10.0, 8.0, 5.0, 12.0, 9.0, 20.0, 31.0, 25.0, 35.0, 30.0, 33.0, 27.0, 46.0, 38.0, 52.0, 38.0, 31.0, 43.0, 41.0, 38.0, 47.0, 40.0, 32.0, 33.0, 40.0, 23.0, 26.0, 31.0, 21.0, 23.0, 15.0, 12.0, 15.0, 13.0, 8.0, 7.0, 5.0, 6.0, 4.0, 3.0, 3.0, 2.0, 2.0, 0.0, 1.0, 0.0, 0.0, 3.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.976987838745117e-05, -4.8099085688591003e-05, -4.6428292989730835e-05, -4.4757500290870667e-05, -4.30867075920105e-05, -4.141591489315033e-05, -3.974512219429016e-05, -3.807432949542999e-05, -3.6403536796569824e-05, -3.4732744097709656e-05, -3.306195139884949e-05, -3.139115869998932e-05, -2.972036600112915e-05, -2.8049573302268982e-05, -2.6378780603408813e-05, -2.4707987904548645e-05, -2.3037195205688477e-05, -2.1366402506828308e-05, -1.969560980796814e-05, -1.802481710910797e-05, -1.6354024410247803e-05, -1.4683231711387634e-05, -1.3012439012527466e-05, -1.1341646313667297e-05, -9.670853614807129e-06, -8.00006091594696e-06, -6.329268217086792e-06, -4.6584755182266235e-06, -2.987682819366455e-06, -1.3168901205062866e-06, 3.5390257835388184e-07, 2.0246952772140503e-06, 3.6954879760742188e-06, 5.366280674934387e-06, 7.037073373794556e-06, 8.707866072654724e-06, 1.0378658771514893e-05, 1.2049451470375061e-05, 1.372024416923523e-05, 1.5391036868095398e-05, 1.7061829566955566e-05, 1.8732622265815735e-05, 2.0403414964675903e-05, 2.2074207663536072e-05, 2.374500036239624e-05, 2.541579306125641e-05, 2.7086585760116577e-05, 2.8757378458976746e-05, 3.0428171157836914e-05, 3.209896385669708e-05, 3.376975655555725e-05, 3.544054925441742e-05, 3.711134195327759e-05, 3.8782134652137756e-05, 4.0452927350997925e-05, 4.212372004985809e-05, 4.379451274871826e-05, 4.546530544757843e-05, 4.71360981464386e-05, 4.880689084529877e-05, 5.0477683544158936e-05, 5.2148476243019104e-05, 5.381926894187927e-05, 5.549006164073944e-05, 5.716085433959961e-05]}, "gradients/encoder.encoder.layers.1.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 4.0, 6.0, 1.0, 10.0, 15.0, 8.0, 15.0, 34.0, 27.0, 56.0, 81.0, 138.0, 160.0, 285.0, 417.0, 622.0, 920.0, 1931.0, 3223.0, 5683.0, 8772.0, 20460.0, 50095.0, 123563.0, 563986.0, 174851.0, 50206.0, 18082.0, 11059.0, 5837.0, 3194.0, 1513.0, 1242.0, 783.0, 455.0, 249.0, 181.0, 142.0, 86.0, 64.0, 36.0, 16.0, 22.0, 13.0, 11.0, 6.0, 2.0, 1.0, 4.0, 1.0, 2.0, 1.0, 1.0, 1.0], "bins": [-1.1801719665527344e-05, -1.1459924280643463e-05, -1.1118128895759583e-05, -1.0776333510875702e-05, -1.0434538125991821e-05, -1.009274274110794e-05, -9.75094735622406e-06, -9.40915197134018e-06, -9.067356586456299e-06, -8.725561201572418e-06, -8.383765816688538e-06, -8.041970431804657e-06, -7.700175046920776e-06, -7.358379662036896e-06, -7.016584277153015e-06, -6.6747888922691345e-06, -6.332993507385254e-06, -5.991198122501373e-06, -5.649402737617493e-06, -5.307607352733612e-06, -4.9658119678497314e-06, -4.624016582965851e-06, -4.28222119808197e-06, -3.94042581319809e-06, -3.598630428314209e-06, -3.2568350434303284e-06, -2.9150396585464478e-06, -2.573244273662567e-06, -2.2314488887786865e-06, -1.889653503894806e-06, -1.5478581190109253e-06, -1.2060627341270447e-06, -8.642673492431641e-07, -5.224719643592834e-07, -1.8067657947540283e-07, 1.6111880540847778e-07, 5.029141902923584e-07, 8.44709575176239e-07, 1.1865049600601196e-06, 1.5283003449440002e-06, 1.8700957298278809e-06, 2.2118911147117615e-06, 2.553686499595642e-06, 2.8954818844795227e-06, 3.2372772693634033e-06, 3.579072654247284e-06, 3.9208680391311646e-06, 4.262663424015045e-06, 4.604458808898926e-06, 4.946254193782806e-06, 5.288049578666687e-06, 5.629844963550568e-06, 5.971640348434448e-06, 6.313435733318329e-06, 6.6552311182022095e-06, 6.99702650308609e-06, 7.338821887969971e-06, 7.680617272853851e-06, 8.022412657737732e-06, 8.364208042621613e-06, 8.706003427505493e-06, 9.047798812389374e-06, 9.389594197273254e-06, 9.731389582157135e-06, 1.0073184967041016e-05]}, "gradients/encoder.encoder.layers.1.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 0.0, 3.0, 3.0, 3.0, 6.0, 0.0, 3.0, 4.0, 9.0, 0.0, 15.0, 16.0, 25.0, 35.0, 0.0, 41.0, 51.0, 55.0, 0.0, 64.0, 75.0, 78.0, 76.0, 0.0, 74.0, 70.0, 55.0, 59.0, 0.0, 36.0, 45.0, 31.0, 0.0, 22.0, 19.0, 18.0, 9.0, 0.0, 6.0, 1.0, 4.0, 0.0, 3.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0], "bins": [-1.6689300537109375e-06, -1.6223639249801636e-06, -1.5757977962493896e-06, -1.5292316675186157e-06, -1.4826655387878418e-06, -1.4360994100570679e-06, -1.389533281326294e-06, -1.34296715259552e-06, -1.296401023864746e-06, -1.2498348951339722e-06, -1.2032687664031982e-06, -1.1567026376724243e-06, -1.1101365089416504e-06, -1.0635703802108765e-06, -1.0170042514801025e-06, -9.704381227493286e-07, -9.238719940185547e-07, -8.773058652877808e-07, -8.307397365570068e-07, -7.841736078262329e-07, -7.37607479095459e-07, -6.910413503646851e-07, -6.444752216339111e-07, -5.979090929031372e-07, -5.513429641723633e-07, -5.047768354415894e-07, -4.5821070671081543e-07, -4.116445779800415e-07, -3.650784492492676e-07, -3.1851232051849365e-07, -2.7194619178771973e-07, -2.253800630569458e-07, -1.7881393432617188e-07, -1.3224780559539795e-07, -8.568167686462402e-08, -3.91155481338501e-08, 7.450580596923828e-09, 5.4016709327697754e-08, 1.0058283805847168e-07, 1.471489667892456e-07, 1.9371509552001953e-07, 2.4028122425079346e-07, 2.868473529815674e-07, 3.334134817123413e-07, 3.7997961044311523e-07, 4.2654573917388916e-07, 4.731118679046631e-07, 5.19677996635437e-07, 5.662441253662109e-07, 6.128102540969849e-07, 6.593763828277588e-07, 7.059425115585327e-07, 7.525086402893066e-07, 7.990747690200806e-07, 8.456408977508545e-07, 8.922070264816284e-07, 9.387731552124023e-07, 9.853392839431763e-07, 1.0319054126739502e-06, 1.0784715414047241e-06, 1.125037670135498e-06, 1.171603798866272e-06, 1.218169927597046e-06, 1.2647360563278198e-06, 1.3113021850585938e-06]}, "gradients/encoder.encoder.layers.1.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 2.0, 1.0, 1.0, 2.0, 6.0, 3.0, 9.0, 10.0, 24.0, 25.0, 29.0, 50.0, 84.0, 141.0, 199.0, 337.0, 504.0, 840.0, 1479.0, 2538.0, 4786.0, 9300.0, 20150.0, 53558.0, 204164.0, 592001.0, 98541.0, 31654.0, 13521.0, 6547.0, 3381.0, 1828.0, 1120.0, 625.0, 392.0, 260.0, 155.0, 101.0, 55.0, 51.0, 26.0, 23.0, 14.0, 9.0, 5.0, 7.0, 4.0, 0.0, 0.0, 2.0, 1.0, 1.0, 2.0, 2.0, 0.0, 0.0, 2.0], "bins": [-7.748603820800781e-06, -7.508322596549988e-06, -7.268041372299194e-06, -7.027760148048401e-06, -6.787478923797607e-06, -6.547197699546814e-06, -6.3069164752960205e-06, -6.066635251045227e-06, -5.826354026794434e-06, -5.58607280254364e-06, -5.345791578292847e-06, -5.105510354042053e-06, -4.86522912979126e-06, -4.624947905540466e-06, -4.384666681289673e-06, -4.144385457038879e-06, -3.904104232788086e-06, -3.6638230085372925e-06, -3.423541784286499e-06, -3.1832605600357056e-06, -2.942979335784912e-06, -2.7026981115341187e-06, -2.462416887283325e-06, -2.2221356630325317e-06, -1.9818544387817383e-06, -1.7415732145309448e-06, -1.5012919902801514e-06, -1.261010766029358e-06, -1.0207295417785645e-06, -7.80448317527771e-07, -5.401670932769775e-07, -2.998858690261841e-07, -5.960464477539063e-08, 1.8067657947540283e-07, 4.209578037261963e-07, 6.612390279769897e-07, 9.015202522277832e-07, 1.1418014764785767e-06, 1.3820827007293701e-06, 1.6223639249801636e-06, 1.862645149230957e-06, 2.1029263734817505e-06, 2.343207597732544e-06, 2.5834888219833374e-06, 2.823770046234131e-06, 3.0640512704849243e-06, 3.3043324947357178e-06, 3.5446137189865112e-06, 3.7848949432373047e-06, 4.025176167488098e-06, 4.265457391738892e-06, 4.505738615989685e-06, 4.7460198402404785e-06, 4.986301064491272e-06, 5.2265822887420654e-06, 5.466863512992859e-06, 5.707144737243652e-06, 5.947425961494446e-06, 6.187707185745239e-06, 6.427988409996033e-06, 6.668269634246826e-06, 6.90855085849762e-06, 7.148832082748413e-06, 7.3891133069992065e-06, 7.62939453125e-06]}, "gradients/encoder.encoder.layers.1.attention.q_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 4.0, 1.0, 2.0, 4.0, 5.0, 7.0, 4.0, 3.0, 9.0, 7.0, 16.0, 14.0, 25.0, 25.0, 28.0, 38.0, 69.0, 77.0, 81.0, 106.0, 89.0, 99.0, 78.0, 50.0, 18.0, 34.0, 20.0, 24.0, 17.0, 15.0, 12.0, 6.0, 11.0, 0.0, 7.0, 1.0, 4.0, 4.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6.377696990966797e-06, -6.141141057014465e-06, -5.904585123062134e-06, -5.668029189109802e-06, -5.431473255157471e-06, -5.194917321205139e-06, -4.958361387252808e-06, -4.721805453300476e-06, -4.4852495193481445e-06, -4.248693585395813e-06, -4.0121376514434814e-06, -3.77558171749115e-06, -3.5390257835388184e-06, -3.302469849586487e-06, -3.0659139156341553e-06, -2.8293579816818237e-06, -2.592802047729492e-06, -2.3562461137771606e-06, -2.119690179824829e-06, -1.8831342458724976e-06, -1.646578311920166e-06, -1.4100223779678345e-06, -1.173466444015503e-06, -9.369105100631714e-07, -7.003545761108398e-07, -4.637986421585083e-07, -2.2724270820617676e-07, 9.313225746154785e-09, 2.4586915969848633e-07, 4.824250936508179e-07, 7.189810276031494e-07, 9.55536961555481e-07, 1.1920928955078125e-06, 1.428648829460144e-06, 1.6652047634124756e-06, 1.9017606973648071e-06, 2.1383166313171387e-06, 2.3748725652694702e-06, 2.6114284992218018e-06, 2.8479844331741333e-06, 3.084540367126465e-06, 3.3210963010787964e-06, 3.557652235031128e-06, 3.7942081689834595e-06, 4.030764102935791e-06, 4.2673200368881226e-06, 4.503875970840454e-06, 4.740431904792786e-06, 4.976987838745117e-06, 5.213543772697449e-06, 5.45009970664978e-06, 5.686655640602112e-06, 5.923211574554443e-06, 6.159767508506775e-06, 6.3963234424591064e-06, 6.632879376411438e-06, 6.8694353103637695e-06, 7.105991244316101e-06, 7.342547178268433e-06, 7.579103112220764e-06, 7.815659046173096e-06, 8.052214980125427e-06, 8.288770914077759e-06, 8.52532684803009e-06, 8.761882781982422e-06]}, "gradients/encoder.encoder.layers.1.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 5.0, 5.0, 9.0, 12.0, 15.0, 27.0, 29.0, 47.0, 67.0, 104.0, 136.0, 131.0, 95.0, 78.0, 61.0, 40.0, 24.0, 38.0, 20.0, 11.0, 14.0, 11.0, 5.0, 4.0, 5.0, 5.0, 6.0, 0.0, 6.0, 0.0, 1.0, 3.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0002204432530561462, -0.00021236087195575237, -0.00020427849085535854, -0.0001961961097549647, -0.00018811371410265565, -0.00018003134755417705, -0.00017194895190186799, -0.00016386657080147415, -0.00015578418970108032, -0.0001477018086006865, -0.00013961942750029266, -0.00013153704639989883, -0.000123454665299505, -0.00011537227692315355, -0.0001072898885468021, -9.920750744640827e-05, -9.112512634601444e-05, -8.304274524562061e-05, -7.496036414522678e-05, -6.687797576887533e-05, -5.87955946684815e-05, -5.071321356808767e-05, -4.263082882971503e-05, -3.454844409134239e-05, -2.6466062990948558e-05, -1.8383680071565323e-05, -1.0301297152182087e-05, -2.218914232798852e-06, 5.863468686584383e-06, 1.3945849786978215e-05, 2.2028234525350854e-05, 3.0110619263723493e-05, 3.8192985812202096e-05, 4.627536691259593e-05, 5.4357751650968567e-05, 6.24401363893412e-05, 7.052251748973504e-05, 7.860489859012887e-05, 8.668728696648031e-05, 9.476966806687415e-05, 0.00010285204916726798, 0.00011093443026766181, 0.00011901681136805564, 0.00012709919246844947, 0.00013518158812075853, 0.00014326395466923714, 0.0001513463503215462, 0.00015942873142194003, 0.00016751111252233386, 0.0001755934936227277, 0.00018367587472312152, 0.00019175825582351536, 0.0001998406369239092, 0.00020792303257621825, 0.00021600541367661208, 0.0002240877947770059, 0.00023217017587739974, 0.00024025255697779357, 0.00024833495263010263, 0.00025641731917858124, 0.0002644997148308903, 0.0002725820813793689, 0.00028066447703167796, 0.00028874684358015656, 0.0002968292392324656]}, "gradients/encoder.encoder.layers.1.layer_norm.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 2.0, 1.0, 2.0, 5.0, 8.0, 9.0, 6.0, 7.0, 9.0, 14.0, 4.0, 23.0, 17.0, 19.0, 27.0, 32.0, 24.0, 24.0, 42.0, 40.0, 36.0, 40.0, 60.0, 36.0, 54.0, 40.0, 37.0, 40.0, 34.0, 27.0, 31.0, 43.0, 32.0, 35.0, 19.0, 19.0, 21.0, 16.0, 12.0, 16.0, 16.0, 9.0, 6.0, 9.0, 6.0, 5.0, 0.0, 1.0, 2.0, 1.0, 0.0, 2.0], "bins": [-0.0001685619354248047, -0.00016394257545471191, -0.00015932321548461914, -0.00015470385551452637, -0.0001500844955444336, -0.00014546513557434082, -0.00014084577560424805, -0.00013622641563415527, -0.0001316070556640625, -0.00012698769569396973, -0.00012236833572387695, -0.00011774897575378418, -0.0001131296157836914, -0.00010851025581359863, -0.00010389089584350586, -9.927153587341309e-05, -9.465217590332031e-05, -9.003281593322754e-05, -8.541345596313477e-05, -8.079409599304199e-05, -7.617473602294922e-05, -7.155537605285645e-05, -6.693601608276367e-05, -6.23166561126709e-05, -5.7697296142578125e-05, -5.307793617248535e-05, -4.845857620239258e-05, -4.3839216232299805e-05, -3.921985626220703e-05, -3.460049629211426e-05, -2.9981136322021484e-05, -2.536177635192871e-05, -2.0742416381835938e-05, -1.6123056411743164e-05, -1.150369644165039e-05, -6.884336471557617e-06, -2.2649765014648438e-06, 2.3543834686279297e-06, 6.973743438720703e-06, 1.1593103408813477e-05, 1.621246337890625e-05, 2.0831823348999023e-05, 2.5451183319091797e-05, 3.007054328918457e-05, 3.4689903259277344e-05, 3.930926322937012e-05, 4.392862319946289e-05, 4.8547983169555664e-05, 5.316734313964844e-05, 5.778670310974121e-05, 6.240606307983398e-05, 6.702542304992676e-05, 7.164478302001953e-05, 7.62641429901123e-05, 8.088350296020508e-05, 8.550286293029785e-05, 9.012222290039062e-05, 9.47415828704834e-05, 9.936094284057617e-05, 0.00010398030281066895, 0.00010859966278076172, 0.00011321902275085449, 0.00011783838272094727, 0.00012245774269104004, 0.0001270771026611328]}, "gradients/encoder.encoder.layers.0.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 1.0, 4.0, 3.0, 6.0, 9.0, 17.0, 22.0, 30.0, 50.0, 77.0, 153.0, 216.0, 401.0, 691.0, 1231.0, 2251.0, 4847.0, 11070.0, 33691.0, 348939.0, 3370248.0, 374462.0, 27673.0, 9486.0, 3973.0, 2015.0, 1073.0, 599.0, 354.0, 187.0, 153.0, 99.0, 72.0, 44.0, 32.0, 28.0, 18.0, 13.0, 9.0, 6.0, 13.0, 3.0, 6.0, 6.0, 2.0, 2.0, 3.0, 3.0, 0.0, 0.0, 2.0, 3.0, 1.0, 2.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-4.798173904418945e-05, -4.574935883283615e-05, -4.351697862148285e-05, -4.128459841012955e-05, -3.9052218198776245e-05, -3.681983798742294e-05, -3.458745777606964e-05, -3.235507756471634e-05, -3.0122697353363037e-05, -2.7890317142009735e-05, -2.5657936930656433e-05, -2.342555671930313e-05, -2.119317650794983e-05, -1.8960796296596527e-05, -1.6728416085243225e-05, -1.4496035873889923e-05, -1.2263655662536621e-05, -1.0031275451183319e-05, -7.798895239830017e-06, -5.566515028476715e-06, -3.334134817123413e-06, -1.101754605770111e-06, 1.130625605583191e-06, 3.363005816936493e-06, 5.595386028289795e-06, 7.827766239643097e-06, 1.0060146450996399e-05, 1.2292526662349701e-05, 1.4524906873703003e-05, 1.6757287085056305e-05, 1.8989667296409607e-05, 2.122204750776291e-05, 2.345442771911621e-05, 2.5686807930469513e-05, 2.7919188141822815e-05, 3.0151568353176117e-05, 3.238394856452942e-05, 3.461632877588272e-05, 3.684870898723602e-05, 3.9081089198589325e-05, 4.131346940994263e-05, 4.354584962129593e-05, 4.577822983264923e-05, 4.801061004400253e-05, 5.0242990255355835e-05, 5.247537046670914e-05, 5.470775067806244e-05, 5.694013088941574e-05, 5.917251110076904e-05, 6.140489131212234e-05, 6.363727152347565e-05, 6.586965173482895e-05, 6.810203194618225e-05, 7.033441215753555e-05, 7.256679236888885e-05, 7.479917258024216e-05, 7.703155279159546e-05, 7.926393300294876e-05, 8.149631321430206e-05, 8.372869342565536e-05, 8.596107363700867e-05, 8.819345384836197e-05, 9.042583405971527e-05, 9.265821427106857e-05, 9.489059448242188e-05]}, "gradients/encoder.encoder.layers.0.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 3.0, 3.0, 4.0, 3.0, 4.0, 7.0, 8.0, 11.0, 23.0, 18.0, 26.0, 25.0, 49.0, 53.0, 54.0, 49.0, 72.0, 61.0, 83.0, 65.0, 67.0, 58.0, 54.0, 41.0, 38.0, 29.0, 21.0, 25.0, 19.0, 14.0, 6.0, 7.0, 2.0, 3.0, 2.0, 4.0, 2.0, 2.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-2.3484230041503906e-05, -2.2635795176029205e-05, -2.1787360310554504e-05, -2.0938925445079803e-05, -2.0090490579605103e-05, -1.92420557141304e-05, -1.83936208486557e-05, -1.7545185983181e-05, -1.66967511177063e-05, -1.5848316252231598e-05, -1.4999881386756897e-05, -1.4151446521282196e-05, -1.3303011655807495e-05, -1.2454576790332794e-05, -1.1606141924858093e-05, -1.0757707059383392e-05, -9.909272193908691e-06, -9.06083732843399e-06, -8.21240246295929e-06, -7.363967597484589e-06, -6.515532732009888e-06, -5.667097866535187e-06, -4.818663001060486e-06, -3.970228135585785e-06, -3.121793270111084e-06, -2.273358404636383e-06, -1.4249235391616821e-06, -5.764886736869812e-07, 2.7194619178771973e-07, 1.1203810572624207e-06, 1.9688159227371216e-06, 2.8172507882118225e-06, 3.6656856536865234e-06, 4.514120519161224e-06, 5.362555384635925e-06, 6.210990250110626e-06, 7.059425115585327e-06, 7.907859981060028e-06, 8.756294846534729e-06, 9.60472971200943e-06, 1.0453164577484131e-05, 1.1301599442958832e-05, 1.2150034308433533e-05, 1.2998469173908234e-05, 1.3846904039382935e-05, 1.4695338904857635e-05, 1.5543773770332336e-05, 1.6392208635807037e-05, 1.7240643501281738e-05, 1.808907836675644e-05, 1.893751323223114e-05, 1.978594809770584e-05, 2.0634382963180542e-05, 2.1482817828655243e-05, 2.2331252694129944e-05, 2.3179687559604645e-05, 2.4028122425079346e-05, 2.4876557290554047e-05, 2.5724992156028748e-05, 2.657342702150345e-05, 2.742186188697815e-05, 2.827029675245285e-05, 2.911873161792755e-05, 2.9967166483402252e-05, 3.081560134887695e-05]}, "gradients/encoder.encoder.layers.0.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 2.0, 3.0, 1.0, 0.0, 6.0, 4.0, 2.0, 7.0, 9.0, 9.0, 22.0, 28.0, 40.0, 57.0, 86.0, 115.0, 171.0, 300.0, 428.0, 792.0, 1357.0, 2531.0, 4864.0, 10607.0, 25836.0, 75639.0, 521382.0, 3240469.0, 225437.0, 50098.0, 17780.0, 7573.0, 3801.0, 1910.0, 1090.0, 649.0, 366.0, 265.0, 167.0, 133.0, 78.0, 45.0, 30.0, 34.0, 15.0, 14.0, 16.0, 7.0, 8.0, 4.0, 3.0, 2.0, 1.0, 0.0, 0.0, 3.0, 2.0, 2.0], "bins": [-4.547834396362305e-05, -4.4113025069236755e-05, -4.2747706174850464e-05, -4.138238728046417e-05, -4.001706838607788e-05, -3.865174949169159e-05, -3.72864305973053e-05, -3.5921111702919006e-05, -3.4555792808532715e-05, -3.319047391414642e-05, -3.182515501976013e-05, -3.045983612537384e-05, -2.909451723098755e-05, -2.7729198336601257e-05, -2.6363879442214966e-05, -2.4998560547828674e-05, -2.3633241653442383e-05, -2.226792275905609e-05, -2.09026038646698e-05, -1.9537284970283508e-05, -1.8171966075897217e-05, -1.6806647181510925e-05, -1.5441328287124634e-05, -1.4076009392738342e-05, -1.271069049835205e-05, -1.134537160396576e-05, -9.980052709579468e-06, -8.614733815193176e-06, -7.249414920806885e-06, -5.884096026420593e-06, -4.518777132034302e-06, -3.1534582376480103e-06, -1.7881393432617188e-06, -4.2282044887542725e-07, 9.424984455108643e-07, 2.3078173398971558e-06, 3.6731362342834473e-06, 5.038455128669739e-06, 6.40377402305603e-06, 7.769092917442322e-06, 9.134411811828613e-06, 1.0499730706214905e-05, 1.1865049600601196e-05, 1.3230368494987488e-05, 1.459568738937378e-05, 1.596100628376007e-05, 1.7326325178146362e-05, 1.8691644072532654e-05, 2.0056962966918945e-05, 2.1422281861305237e-05, 2.278760075569153e-05, 2.415291965007782e-05, 2.551823854446411e-05, 2.6883557438850403e-05, 2.8248876333236694e-05, 2.9614195227622986e-05, 3.097951412200928e-05, 3.234483301639557e-05, 3.371015191078186e-05, 3.507547080516815e-05, 3.644078969955444e-05, 3.7806108593940735e-05, 3.9171427488327026e-05, 4.053674638271332e-05, 4.190206527709961e-05]}, "gradients/encoder.encoder.layers.0.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 6.0, 5.0, 3.0, 5.0, 13.0, 13.0, 16.0, 10.0, 29.0, 29.0, 36.0, 48.0, 77.0, 126.0, 212.0, 436.0, 820.0, 881.0, 571.0, 266.0, 153.0, 90.0, 76.0, 47.0, 30.0, 21.0, 13.0, 12.0, 18.0, 3.0, 6.0, 3.0, 3.0, 2.0, 2.0, 5.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.707408905029297e-05, -3.5599805414676666e-05, -3.4125521779060364e-05, -3.265123814344406e-05, -3.117695450782776e-05, -2.9702670872211456e-05, -2.8228387236595154e-05, -2.675410360097885e-05, -2.527981996536255e-05, -2.3805536329746246e-05, -2.2331252694129944e-05, -2.085696905851364e-05, -1.938268542289734e-05, -1.7908401787281036e-05, -1.6434118151664734e-05, -1.4959834516048431e-05, -1.3485550880432129e-05, -1.2011267244815826e-05, -1.0536983609199524e-05, -9.062699973583221e-06, -7.588416337966919e-06, -6.1141327023506165e-06, -4.639849066734314e-06, -3.1655654311180115e-06, -1.691281795501709e-06, -2.169981598854065e-07, 1.257285475730896e-06, 2.7315691113471985e-06, 4.205852746963501e-06, 5.6801363825798035e-06, 7.154420018196106e-06, 8.628703653812408e-06, 1.0102987289428711e-05, 1.1577270925045013e-05, 1.3051554560661316e-05, 1.4525838196277618e-05, 1.600012183189392e-05, 1.7474405467510223e-05, 1.8948689103126526e-05, 2.042297273874283e-05, 2.189725637435913e-05, 2.3371540009975433e-05, 2.4845823645591736e-05, 2.632010728120804e-05, 2.779439091682434e-05, 2.9268674552440643e-05, 3.0742958188056946e-05, 3.221724182367325e-05, 3.369152545928955e-05, 3.516580909490585e-05, 3.6640092730522156e-05, 3.811437636613846e-05, 3.958866000175476e-05, 4.106294363737106e-05, 4.2537227272987366e-05, 4.401151090860367e-05, 4.548579454421997e-05, 4.696007817983627e-05, 4.8434361815452576e-05, 4.990864545106888e-05, 5.138292908668518e-05, 5.285721272230148e-05, 5.4331496357917786e-05, 5.580577999353409e-05, 5.728006362915039e-05]}, "gradients/encoder.encoder.layers.0.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 2.0, 1.0, 3.0, 3.0, 7.0, 6.0, 5.0, 9.0, 9.0, 20.0, 22.0, 37.0, 46.0, 47.0, 56.0, 94.0, 98.0, 112.0, 99.0, 81.0, 55.0, 42.0, 38.0, 36.0, 21.0, 11.0, 9.0, 10.0, 8.0, 9.0, 3.0, 3.0, 3.0, 1.0, 3.0, 3.0, 2.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0002420395758235827, -0.00023325927031692117, -0.00022447896481025964, -0.0002156986593035981, -0.00020691833924502134, -0.0001981380337383598, -0.00018935772823169827, -0.00018057742272503674, -0.0001717971172183752, -0.00016301681171171367, -0.00015423650620505214, -0.0001454562006983906, -0.00013667589519172907, -0.00012789558968506753, -0.00011911526962649077, -0.00011033496411982924, -0.0001015546586131677, -9.277435310650617e-05, -8.399404759984463e-05, -7.521373481722549e-05, -6.643342931056395e-05, -5.765312380390242e-05, -4.8872814659262076e-05, -4.0092505514621735e-05, -3.13122000079602e-05, -2.2531892682309262e-05, -1.3751585356658325e-05, -4.971278031007387e-06, 3.809029294643551e-06, 1.2589334801305085e-05, 2.1369643945945427e-05, 3.0149953090585768e-05, 3.893027314916253e-05, 4.7710578655824065e-05, 5.6490887800464407e-05, 6.527119694510475e-05, 7.405150245176628e-05, 8.283180795842782e-05, 9.161212074104697e-05, 0.0001003924262477085, 0.00010917273175437003, 0.00011795303726103157, 0.0001267333427676931, 0.00013551364827435464, 0.0001442939683329314, 0.00015307427383959293, 0.00016185457934625447, 0.000170634884852916, 0.00017941519035957754, 0.00018819549586623907, 0.0001969758013729006, 0.00020575610687956214, 0.00021453641238622367, 0.0002233167178928852, 0.00023209703795146197, 0.0002408773434581235, 0.00024965766351670027, 0.0002584379690233618, 0.00026721827453002334, 0.00027599858003668487, 0.0002847788855433464, 0.00029355919105000794, 0.0003023394965566695, 0.00031111983116716146, 0.00031990010756999254]}, "gradients/encoder.encoder.layers.0.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 1.0, 1.0, 5.0, 1.0, 2.0, 6.0, 10.0, 6.0, 11.0, 14.0, 12.0, 12.0, 24.0, 32.0, 25.0, 23.0, 30.0, 32.0, 39.0, 29.0, 47.0, 44.0, 44.0, 46.0, 49.0, 38.0, 36.0, 51.0, 46.0, 46.0, 38.0, 30.0, 23.0, 25.0, 23.0, 23.0, 15.0, 17.0, 10.0, 9.0, 6.0, 8.0, 11.0, 4.0, 4.0, 3.0, 3.0, 3.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00012415647506713867, -0.00012009963393211365, -0.00011604279279708862, -0.0001119859516620636, -0.00010792911052703857, -0.00010387226939201355, -9.981542825698853e-05, -9.57585871219635e-05, -9.170174598693848e-05, -8.764490485191345e-05, -8.358806371688843e-05, -7.95312225818634e-05, -7.547438144683838e-05, -7.141754031181335e-05, -6.736069917678833e-05, -6.33038580417633e-05, -5.924701690673828e-05, -5.519017577171326e-05, -5.113333463668823e-05, -4.707649350166321e-05, -4.3019652366638184e-05, -3.896281123161316e-05, -3.4905970096588135e-05, -3.084912896156311e-05, -2.6792287826538086e-05, -2.273544669151306e-05, -1.8678605556488037e-05, -1.4621764421463013e-05, -1.0564923286437988e-05, -6.508082151412964e-06, -2.4512410163879395e-06, 1.605600118637085e-06, 5.662441253662109e-06, 9.719282388687134e-06, 1.3776123523712158e-05, 1.7832964658737183e-05, 2.1889805793762207e-05, 2.594664692878723e-05, 3.0003488063812256e-05, 3.406032919883728e-05, 3.8117170333862305e-05, 4.217401146888733e-05, 4.6230852603912354e-05, 5.028769373893738e-05, 5.43445348739624e-05, 5.840137600898743e-05, 6.245821714401245e-05, 6.651505827903748e-05, 7.05718994140625e-05, 7.462874054908752e-05, 7.868558168411255e-05, 8.274242281913757e-05, 8.67992639541626e-05, 9.085610508918762e-05, 9.491294622421265e-05, 9.896978735923767e-05, 0.0001030266284942627, 0.00010708346962928772, 0.00011114031076431274, 0.00011519715189933777, 0.00011925399303436279, 0.00012331083416938782, 0.00012736767530441284, 0.00013142451643943787, 0.0001354813575744629]}, "gradients/encoder.encoder.layers.0.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 2.0, 2.0, 0.0, 2.0, 4.0, 3.0, 4.0, 17.0, 15.0, 13.0, 19.0, 24.0, 47.0, 67.0, 87.0, 119.0, 184.0, 260.0, 401.0, 547.0, 949.0, 1554.0, 2526.0, 4286.0, 7869.0, 14933.0, 31854.0, 76093.0, 213647.0, 395344.0, 175534.0, 65729.0, 26610.0, 12964.0, 6888.0, 3934.0, 2169.0, 1350.0, 855.0, 522.0, 339.0, 242.0, 173.0, 123.0, 82.0, 51.0, 43.0, 25.0, 16.0, 15.0, 13.0, 5.0, 3.0, 6.0, 0.0, 4.0, 2.0, 4.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-5.990266799926758e-05, -5.7931989431381226e-05, -5.596131086349487e-05, -5.399063229560852e-05, -5.201995372772217e-05, -5.0049275159835815e-05, -4.807859659194946e-05, -4.610791802406311e-05, -4.413723945617676e-05, -4.2166560888290405e-05, -4.019588232040405e-05, -3.82252037525177e-05, -3.625452518463135e-05, -3.4283846616744995e-05, -3.231316804885864e-05, -3.034248948097229e-05, -2.8371810913085938e-05, -2.6401132345199585e-05, -2.4430453777313232e-05, -2.245977520942688e-05, -2.0489096641540527e-05, -1.8518418073654175e-05, -1.6547739505767822e-05, -1.457706093788147e-05, -1.2606382369995117e-05, -1.0635703802108765e-05, -8.665025234222412e-06, -6.6943466663360596e-06, -4.723668098449707e-06, -2.7529895305633545e-06, -7.82310962677002e-07, 1.1883676052093506e-06, 3.159046173095703e-06, 5.129724740982056e-06, 7.100403308868408e-06, 9.07108187675476e-06, 1.1041760444641113e-05, 1.3012439012527466e-05, 1.4983117580413818e-05, 1.695379614830017e-05, 1.8924474716186523e-05, 2.0895153284072876e-05, 2.286583185195923e-05, 2.483651041984558e-05, 2.6807188987731934e-05, 2.8777867555618286e-05, 3.074854612350464e-05, 3.271922469139099e-05, 3.4689903259277344e-05, 3.6660581827163696e-05, 3.863126039505005e-05, 4.06019389629364e-05, 4.2572617530822754e-05, 4.4543296098709106e-05, 4.651397466659546e-05, 4.848465323448181e-05, 5.0455331802368164e-05, 5.2426010370254517e-05, 5.439668893814087e-05, 5.636736750602722e-05, 5.8338046073913574e-05, 6.030872464179993e-05, 6.227940320968628e-05, 6.425008177757263e-05, 6.622076034545898e-05]}, "gradients/encoder.encoder.layers.0.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 2.0, 3.0, 5.0, 5.0, 10.0, 8.0, 5.0, 9.0, 10.0, 11.0, 19.0, 25.0, 21.0, 41.0, 34.0, 55.0, 41.0, 45.0, 64.0, 52.0, 55.0, 64.0, 56.0, 59.0, 50.0, 58.0, 32.0, 38.0, 12.0, 23.0, 12.0, 17.0, 9.0, 13.0, 12.0, 10.0, 7.0, 7.0, 2.0, 3.0, 1.0, 2.0, 2.0, 1.0, 2.0, 0.0, 0.0, 2.0, 3.0, 0.0, 1.0], "bins": [-2.4139881134033203e-05, -2.3419968783855438e-05, -2.2700056433677673e-05, -2.198014408349991e-05, -2.1260231733322144e-05, -2.054031938314438e-05, -1.9820407032966614e-05, -1.910049468278885e-05, -1.8380582332611084e-05, -1.766066998243332e-05, -1.6940757632255554e-05, -1.622084528207779e-05, -1.5500932931900024e-05, -1.478102058172226e-05, -1.4061108231544495e-05, -1.334119588136673e-05, -1.2621283531188965e-05, -1.19013711810112e-05, -1.1181458830833435e-05, -1.046154648065567e-05, -9.741634130477905e-06, -9.02172178030014e-06, -8.301809430122375e-06, -7.581897079944611e-06, -6.861984729766846e-06, -6.142072379589081e-06, -5.422160029411316e-06, -4.702247679233551e-06, -3.982335329055786e-06, -3.2624229788780212e-06, -2.5425106287002563e-06, -1.8225982785224915e-06, -1.1026859283447266e-06, -3.8277357816696167e-07, 3.371387720108032e-07, 1.0570511221885681e-06, 1.776963472366333e-06, 2.496875822544098e-06, 3.216788172721863e-06, 3.936700522899628e-06, 4.656612873077393e-06, 5.3765252232551575e-06, 6.096437573432922e-06, 6.816349923610687e-06, 7.536262273788452e-06, 8.256174623966217e-06, 8.976086974143982e-06, 9.695999324321747e-06, 1.0415911674499512e-05, 1.1135824024677277e-05, 1.1855736374855042e-05, 1.2575648725032806e-05, 1.3295561075210571e-05, 1.4015473425388336e-05, 1.4735385775566101e-05, 1.5455298125743866e-05, 1.617521047592163e-05, 1.6895122826099396e-05, 1.761503517627716e-05, 1.8334947526454926e-05, 1.905485987663269e-05, 1.9774772226810455e-05, 2.049468457698822e-05, 2.1214596927165985e-05, 2.193450927734375e-05]}, "gradients/encoder.encoder.layers.0.attention.v_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 2.0, 3.0, 1.0, 5.0, 3.0, 4.0, 4.0, 6.0, 4.0, 3.0, 25.0, 30.0, 49.0, 64.0, 99.0, 108.0, 281.0, 539.0, 1160.0, 2536.0, 6539.0, 19461.0, 77166.0, 546216.0, 314504.0, 55042.0, 15099.0, 5266.0, 2216.0, 956.0, 483.0, 264.0, 162.0, 82.0, 65.0, 32.0, 26.0, 14.0, 11.0, 12.0, 6.0, 3.0, 4.0, 2.0, 4.0, 4.0, 0.0, 1.0, 0.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.143880844116211e-05, -4.9773603677749634e-05, -4.810839891433716e-05, -4.644319415092468e-05, -4.477798938751221e-05, -4.311278462409973e-05, -4.1447579860687256e-05, -3.978237509727478e-05, -3.8117170333862305e-05, -3.645196557044983e-05, -3.4786760807037354e-05, -3.312155604362488e-05, -3.14563512802124e-05, -2.9791146516799927e-05, -2.812594175338745e-05, -2.6460736989974976e-05, -2.47955322265625e-05, -2.3130327463150024e-05, -2.146512269973755e-05, -1.9799917936325073e-05, -1.8134713172912598e-05, -1.6469508409500122e-05, -1.4804303646087646e-05, -1.3139098882675171e-05, -1.1473894119262695e-05, -9.80868935585022e-06, -8.143484592437744e-06, -6.4782798290252686e-06, -4.813075065612793e-06, -3.1478703022003174e-06, -1.4826655387878418e-06, 1.825392246246338e-07, 1.8477439880371094e-06, 3.512948751449585e-06, 5.1781535148620605e-06, 6.843358278274536e-06, 8.508563041687012e-06, 1.0173767805099487e-05, 1.1838972568511963e-05, 1.3504177331924438e-05, 1.5169382095336914e-05, 1.683458685874939e-05, 1.8499791622161865e-05, 2.016499638557434e-05, 2.1830201148986816e-05, 2.3495405912399292e-05, 2.5160610675811768e-05, 2.6825815439224243e-05, 2.849102020263672e-05, 3.0156224966049194e-05, 3.182142972946167e-05, 3.3486634492874146e-05, 3.515183925628662e-05, 3.68170440196991e-05, 3.848224878311157e-05, 4.014745354652405e-05, 4.1812658309936523e-05, 4.3477863073349e-05, 4.5143067836761475e-05, 4.680827260017395e-05, 4.8473477363586426e-05, 5.01386821269989e-05, 5.180388689041138e-05, 5.346909165382385e-05, 5.513429641723633e-05]}, "gradients/encoder.encoder.layers.0.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 4.0, 1.0, 4.0, 5.0, 2.0, 5.0, 6.0, 9.0, 7.0, 8.0, 9.0, 14.0, 18.0, 17.0, 17.0, 25.0, 36.0, 22.0, 42.0, 43.0, 36.0, 55.0, 35.0, 49.0, 47.0, 56.0, 41.0, 28.0, 40.0, 48.0, 45.0, 26.0, 32.0, 33.0, 30.0, 19.0, 20.0, 17.0, 15.0, 10.0, 7.0, 5.0, 6.0, 2.0, 3.0, 5.0, 3.0, 4.0, 4.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0], "bins": [-3.594160079956055e-05, -3.4650787711143494e-05, -3.335997462272644e-05, -3.206916153430939e-05, -3.0778348445892334e-05, -2.948753535747528e-05, -2.8196722269058228e-05, -2.6905909180641174e-05, -2.561509609222412e-05, -2.4324283003807068e-05, -2.3033469915390015e-05, -2.174265682697296e-05, -2.0451843738555908e-05, -1.9161030650138855e-05, -1.7870217561721802e-05, -1.657940447330475e-05, -1.5288591384887695e-05, -1.3997778296470642e-05, -1.2706965208053589e-05, -1.1416152119636536e-05, -1.0125339031219482e-05, -8.83452594280243e-06, -7.543712854385376e-06, -6.252899765968323e-06, -4.9620866775512695e-06, -3.6712735891342163e-06, -2.380460500717163e-06, -1.0896474123001099e-06, 2.0116567611694336e-07, 1.4919787645339966e-06, 2.78279185295105e-06, 4.073604941368103e-06, 5.364418029785156e-06, 6.6552311182022095e-06, 7.946044206619263e-06, 9.236857295036316e-06, 1.0527670383453369e-05, 1.1818483471870422e-05, 1.3109296560287476e-05, 1.4400109648704529e-05, 1.5690922737121582e-05, 1.6981735825538635e-05, 1.827254891395569e-05, 1.9563362002372742e-05, 2.0854175090789795e-05, 2.2144988179206848e-05, 2.34358012676239e-05, 2.4726614356040955e-05, 2.6017427444458008e-05, 2.730824053287506e-05, 2.8599053621292114e-05, 2.9889866709709167e-05, 3.118067979812622e-05, 3.2471492886543274e-05, 3.376230597496033e-05, 3.505311906337738e-05, 3.6343932151794434e-05, 3.763474524021149e-05, 3.892555832862854e-05, 4.021637141704559e-05, 4.1507184505462646e-05, 4.27979975938797e-05, 4.408881068229675e-05, 4.5379623770713806e-05, 4.667043685913086e-05]}, "gradients/encoder.encoder.layers.0.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 2.0, 2.0, 5.0, 5.0, 6.0, 13.0, 13.0, 7.0, 23.0, 36.0, 60.0, 81.0, 148.0, 278.0, 487.0, 1033.0, 2160.0, 6062.0, 22217.0, 145498.0, 780788.0, 67746.0, 13925.0, 4464.0, 1589.0, 876.0, 383.0, 265.0, 116.0, 83.0, 59.0, 36.0, 36.0, 17.0, 10.0, 11.0, 7.0, 5.0, 5.0, 4.0, 0.0, 1.0, 0.0, 4.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.205371856689453e-05, -2.130214124917984e-05, -2.055056393146515e-05, -1.9798986613750458e-05, -1.9047409296035767e-05, -1.8295831978321075e-05, -1.7544254660606384e-05, -1.6792677342891693e-05, -1.6041100025177002e-05, -1.528952270746231e-05, -1.453794538974762e-05, -1.3786368072032928e-05, -1.3034790754318237e-05, -1.2283213436603546e-05, -1.1531636118888855e-05, -1.0780058801174164e-05, -1.0028481483459473e-05, -9.276904165744781e-06, -8.52532684803009e-06, -7.7737495303154e-06, -7.022172212600708e-06, -6.270594894886017e-06, -5.519017577171326e-06, -4.7674402594566345e-06, -4.015862941741943e-06, -3.264285624027252e-06, -2.512708306312561e-06, -1.7611309885978699e-06, -1.0095536708831787e-06, -2.5797635316848755e-07, 4.936009645462036e-07, 1.2451782822608948e-06, 1.996755599975586e-06, 2.748332917690277e-06, 3.4999102354049683e-06, 4.2514875531196594e-06, 5.003064870834351e-06, 5.754642188549042e-06, 6.506219506263733e-06, 7.257796823978424e-06, 8.009374141693115e-06, 8.760951459407806e-06, 9.512528777122498e-06, 1.0264106094837189e-05, 1.101568341255188e-05, 1.1767260730266571e-05, 1.2518838047981262e-05, 1.3270415365695953e-05, 1.4021992683410645e-05, 1.4773570001125336e-05, 1.5525147318840027e-05, 1.6276724636554718e-05, 1.702830195426941e-05, 1.77798792719841e-05, 1.853145658969879e-05, 1.9283033907413483e-05, 2.0034611225128174e-05, 2.0786188542842865e-05, 2.1537765860557556e-05, 2.2289343178272247e-05, 2.304092049598694e-05, 2.379249781370163e-05, 2.454407513141632e-05, 2.5295652449131012e-05, 2.6047229766845703e-05]}, "gradients/encoder.encoder.layers.0.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 0.0, 1.0, 2.0, 0.0, 3.0, 8.0, 11.0, 0.0, 12.0, 9.0, 0.0, 11.0, 14.0, 0.0, 15.0, 24.0, 26.0, 0.0, 27.0, 42.0, 0.0, 32.0, 49.0, 0.0, 54.0, 62.0, 64.0, 0.0, 74.0, 55.0, 0.0, 57.0, 43.0, 58.0, 0.0, 47.0, 35.0, 0.0, 37.0, 26.0, 0.0, 29.0, 24.0, 12.0, 0.0, 16.0, 11.0, 0.0, 9.0, 2.0, 0.0, 5.0, 5.0, 1.0, 0.0, 4.0, 1.0, 0.0, 1.0, 0.0, 3.0], "bins": [-1.3113021850585938e-06, -1.2693926692008972e-06, -1.2274831533432007e-06, -1.1855736374855042e-06, -1.1436641216278076e-06, -1.101754605770111e-06, -1.0598450899124146e-06, -1.017935574054718e-06, -9.760260581970215e-07, -9.34116542339325e-07, -8.922070264816284e-07, -8.502975106239319e-07, -8.083879947662354e-07, -7.664784789085388e-07, -7.245689630508423e-07, -6.826594471931458e-07, -6.407499313354492e-07, -5.988404154777527e-07, -5.569308996200562e-07, -5.150213837623596e-07, -4.731118679046631e-07, -4.3120235204696655e-07, -3.8929283618927e-07, -3.473833203315735e-07, -3.0547380447387695e-07, -2.635642886161804e-07, -2.2165477275848389e-07, -1.7974525690078735e-07, -1.3783574104309082e-07, -9.592622518539429e-08, -5.4016709327697754e-08, -1.210719347000122e-08, 2.9802322387695312e-08, 7.171183824539185e-08, 1.1362135410308838e-07, 1.555308699607849e-07, 1.9744038581848145e-07, 2.39349901676178e-07, 2.812594175338745e-07, 3.2316893339157104e-07, 3.650784492492676e-07, 4.069879651069641e-07, 4.4889748096466064e-07, 4.908069968223572e-07, 5.327165126800537e-07, 5.746260285377502e-07, 6.165355443954468e-07, 6.584450602531433e-07, 7.003545761108398e-07, 7.422640919685364e-07, 7.841736078262329e-07, 8.260831236839294e-07, 8.67992639541626e-07, 9.099021553993225e-07, 9.51811671257019e-07, 9.937211871147156e-07, 1.0356307029724121e-06, 1.0775402188301086e-06, 1.1194497346878052e-06, 1.1613592505455017e-06, 1.2032687664031982e-06, 1.2451782822608948e-06, 1.2870877981185913e-06, 1.3289973139762878e-06, 1.3709068298339844e-06]}, "gradients/encoder.encoder.layers.0.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 3.0, 1.0, 2.0, 1.0, 3.0, 3.0, 6.0, 4.0, 4.0, 2.0, 9.0, 15.0, 11.0, 10.0, 18.0, 28.0, 36.0, 63.0, 100.0, 182.0, 319.0, 587.0, 1565.0, 3872.0, 13905.0, 86734.0, 859502.0, 64189.0, 11029.0, 3653.0, 1336.0, 617.0, 294.0, 172.0, 98.0, 55.0, 25.0, 18.0, 19.0, 16.0, 12.0, 13.0, 5.0, 9.0, 2.0, 7.0, 5.0, 1.0, 5.0, 1.0, 0.0, 2.0, 0.0, 1.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.8848648071289062e-05, -2.786889672279358e-05, -2.6889145374298096e-05, -2.5909394025802612e-05, -2.492964267730713e-05, -2.3949891328811646e-05, -2.2970139980316162e-05, -2.199038863182068e-05, -2.1010637283325195e-05, -2.0030885934829712e-05, -1.905113458633423e-05, -1.8071383237838745e-05, -1.7091631889343262e-05, -1.611188054084778e-05, -1.5132129192352295e-05, -1.4152377843856812e-05, -1.3172626495361328e-05, -1.2192875146865845e-05, -1.1213123798370361e-05, -1.0233372449874878e-05, -9.253621101379395e-06, -8.273869752883911e-06, -7.294118404388428e-06, -6.314367055892944e-06, -5.334615707397461e-06, -4.3548643589019775e-06, -3.375113010406494e-06, -2.3953616619110107e-06, -1.4156103134155273e-06, -4.3585896492004395e-07, 5.438923835754395e-07, 1.5236437320709229e-06, 2.5033950805664062e-06, 3.4831464290618896e-06, 4.462897777557373e-06, 5.4426491260528564e-06, 6.42240047454834e-06, 7.402151823043823e-06, 8.381903171539307e-06, 9.36165452003479e-06, 1.0341405868530273e-05, 1.1321157217025757e-05, 1.230090856552124e-05, 1.3280659914016724e-05, 1.4260411262512207e-05, 1.524016261100769e-05, 1.6219913959503174e-05, 1.7199665307998657e-05, 1.817941665649414e-05, 1.9159168004989624e-05, 2.0138919353485107e-05, 2.111867070198059e-05, 2.2098422050476074e-05, 2.3078173398971558e-05, 2.405792474746704e-05, 2.5037676095962524e-05, 2.6017427444458008e-05, 2.699717879295349e-05, 2.7976930141448975e-05, 2.8956681489944458e-05, 2.993643283843994e-05, 3.0916184186935425e-05, 3.189593553543091e-05, 3.287568688392639e-05, 3.3855438232421875e-05]}, "gradients/encoder.encoder.layers.0.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 3.0, 1.0, 2.0, 1.0, 2.0, 0.0, 1.0, 0.0, 1.0, 2.0, 1.0, 5.0, 1.0, 6.0, 1.0, 3.0, 5.0, 3.0, 5.0, 9.0, 10.0, 18.0, 18.0, 34.0, 30.0, 29.0, 35.0, 49.0, 87.0, 111.0, 139.0, 90.0, 81.0, 41.0, 42.0, 34.0, 18.0, 18.0, 17.0, 14.0, 8.0, 10.0, 3.0, 0.0, 9.0, 5.0, 3.0, 0.0, 0.0, 3.0, 2.0, 1.0, 4.0, 1.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-1.7642974853515625e-05, -1.7095357179641724e-05, -1.6547739505767822e-05, -1.600012183189392e-05, -1.545250415802002e-05, -1.4904886484146118e-05, -1.4357268810272217e-05, -1.3809651136398315e-05, -1.3262033462524414e-05, -1.2714415788650513e-05, -1.2166798114776611e-05, -1.161918044090271e-05, -1.1071562767028809e-05, -1.0523945093154907e-05, -9.976327419281006e-06, -9.428709745407104e-06, -8.881092071533203e-06, -8.333474397659302e-06, -7.7858567237854e-06, -7.238239049911499e-06, -6.690621376037598e-06, -6.143003702163696e-06, -5.595386028289795e-06, -5.0477683544158936e-06, -4.500150680541992e-06, -3.952533006668091e-06, -3.4049153327941895e-06, -2.857297658920288e-06, -2.3096799850463867e-06, -1.7620623111724854e-06, -1.214444637298584e-06, -6.668269634246826e-07, -1.1920928955078125e-07, 4.284083843231201e-07, 9.760260581970215e-07, 1.5236437320709229e-06, 2.0712614059448242e-06, 2.6188790798187256e-06, 3.166496753692627e-06, 3.7141144275665283e-06, 4.26173210144043e-06, 4.809349775314331e-06, 5.356967449188232e-06, 5.904585123062134e-06, 6.452202796936035e-06, 6.9998204708099365e-06, 7.547438144683838e-06, 8.09505581855774e-06, 8.64267349243164e-06, 9.190291166305542e-06, 9.737908840179443e-06, 1.0285526514053345e-05, 1.0833144187927246e-05, 1.1380761861801147e-05, 1.1928379535675049e-05, 1.247599720954895e-05, 1.3023614883422852e-05, 1.3571232557296753e-05, 1.4118850231170654e-05, 1.4666467905044556e-05, 1.5214085578918457e-05, 1.576170325279236e-05, 1.630932092666626e-05, 1.685693860054016e-05, 1.7404556274414062e-05]}, "gradients/encoder.encoder.layers.0.layer_norm.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 3.0, 4.0, 2.0, 2.0, 7.0, 3.0, 10.0, 10.0, 12.0, 17.0, 35.0, 43.0, 90.0, 113.0, 140.0, 158.0, 140.0, 88.0, 54.0, 30.0, 19.0, 10.0, 9.0, 3.0, 3.0, 2.0, 3.0, 1.0, 3.0, 3.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00025718886172398925, -0.00024349137675017118, -0.00022979389177635312, -0.00021609640680253506, -0.000202398921828717, -0.00018870143685489893, -0.00017500395188108087, -0.0001613064669072628, -0.00014760898193344474, -0.00013391149695962667, -0.00012021401198580861, -0.00010651652701199055, -9.281904203817248e-05, -7.912155706435442e-05, -6.542407209053636e-05, -5.172658711671829e-05, -3.802910214290023e-05, -2.4331617169082165e-05, -1.0634132195264101e-05, 3.0633527785539627e-06, 1.6760837752372026e-05, 3.045832272619009e-05, 4.4155807700008154e-05, 5.785329267382622e-05, 7.155077764764428e-05, 8.524826262146235e-05, 9.894574759528041e-05, 0.00011264323256909847, 0.00012634071754291654, 0.0001400382025167346, 0.00015373568749055266, 0.00016743317246437073, 0.0001811306574381888, 0.00019482814241200686, 0.00020852562738582492, 0.00022222311235964298, 0.00023592059733346105, 0.0002496180823072791, 0.0002633155672810972, 0.00027701305225491524, 0.0002907105372287333, 0.00030440802220255136, 0.00031810550717636943, 0.0003318029921501875, 0.00034550047712400556, 0.0003591979620978236, 0.0003728954470716417, 0.00038659293204545975, 0.0004002904170192778, 0.0004139879019930959, 0.00042768538696691394, 0.000441382871940732, 0.00045508035691455007, 0.00046877784188836813, 0.0004824753268621862, 0.0004961728118360043, 0.0005098702968098223, 0.0005235677817836404, 0.0005372652667574584, 0.0005509627517312765, 0.0005646602367050946, 0.0005783577216789126, 0.0005920552066527307, 0.0006057526916265488, 0.0006194501766003668]}, "gradients/encoder.encoder.layers.0.layer_norm.bias": {"_type": "histogram", "values": [2.0, 2.0, 2.0, 2.0, 2.0, 3.0, 4.0, 5.0, 2.0, 4.0, 3.0, 5.0, 2.0, 5.0, 8.0, 10.0, 12.0, 22.0, 18.0, 18.0, 17.0, 25.0, 21.0, 31.0, 31.0, 30.0, 42.0, 42.0, 37.0, 52.0, 37.0, 43.0, 25.0, 38.0, 38.0, 39.0, 30.0, 30.0, 36.0, 28.0, 30.0, 28.0, 17.0, 21.0, 17.0, 23.0, 14.0, 11.0, 5.0, 13.0, 12.0, 7.0, 3.0, 4.0, 4.0, 0.0, 2.0, 3.0, 3.0, 1.0, 1.0, 1.0, 0.0, 1.0], "bins": [-0.00013113021850585938, -0.00012691039592027664, -0.0001226905733346939, -0.00011847075074911118, -0.00011425092816352844, -0.00011003110557794571, -0.00010581128299236298, -0.00010159146040678024, -9.737163782119751e-05, -9.315181523561478e-05, -8.893199265003204e-05, -8.471217006444931e-05, -8.049234747886658e-05, -7.627252489328384e-05, -7.205270230770111e-05, -6.783287972211838e-05, -6.361305713653564e-05, -5.939323455095291e-05, -5.517341196537018e-05, -5.0953589379787445e-05, -4.673376679420471e-05, -4.251394420862198e-05, -3.8294121623039246e-05, -3.407429903745651e-05, -2.985447645187378e-05, -2.5634653866291046e-05, -2.1414831280708313e-05, -1.719500869512558e-05, -1.2975186109542847e-05, -8.755363523960114e-06, -4.53554093837738e-06, -3.157183527946472e-07, 3.904104232788086e-06, 8.123926818370819e-06, 1.2343749403953552e-05, 1.6563571989536285e-05, 2.078339457511902e-05, 2.5003217160701752e-05, 2.9223039746284485e-05, 3.344286233186722e-05, 3.766268491744995e-05, 4.1882507503032684e-05, 4.610233008861542e-05, 5.032215267419815e-05, 5.4541975259780884e-05, 5.876179784536362e-05, 6.298162043094635e-05, 6.720144301652908e-05, 7.142126560211182e-05, 7.564108818769455e-05, 7.986091077327728e-05, 8.408073335886002e-05, 8.830055594444275e-05, 9.252037853002548e-05, 9.674020111560822e-05, 0.00010096002370119095, 0.00010517984628677368, 0.00010939966887235641, 0.00011361949145793915, 0.00011783931404352188, 0.00012205913662910461, 0.00012627895921468735, 0.00013049878180027008, 0.00013471860438585281, 0.00013893842697143555]}, "gradients/encoder.encoder.pos_conv_embed.conv.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 4.0, 0.0, 0.0, 0.0, 4.0, 1.0, 4.0, 2.0, 10.0, 3.0, 12.0, 12.0, 12.0, 13.0, 16.0, 19.0, 29.0, 30.0, 36.0, 71.0, 113.0, 239.0, 108.0, 60.0, 43.0, 23.0, 28.0, 21.0, 25.0, 15.0, 7.0, 13.0, 9.0, 6.0, 7.0, 5.0, 11.0, 1.0, 3.0, 1.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.731250762939453e-05, -3.622937947511673e-05, -3.514625132083893e-05, -3.406312316656113e-05, -3.2979995012283325e-05, -3.1896866858005524e-05, -3.081373870372772e-05, -2.973061054944992e-05, -2.864748239517212e-05, -2.7564354240894318e-05, -2.6481226086616516e-05, -2.5398097932338715e-05, -2.4314969778060913e-05, -2.323184162378311e-05, -2.214871346950531e-05, -2.106558531522751e-05, -1.9982457160949707e-05, -1.8899329006671906e-05, -1.7816200852394104e-05, -1.6733072698116302e-05, -1.56499445438385e-05, -1.45668163895607e-05, -1.3483688235282898e-05, -1.2400560081005096e-05, -1.1317431926727295e-05, -1.0234303772449493e-05, -9.151175618171692e-06, -8.06804746389389e-06, -6.984919309616089e-06, -5.901791155338287e-06, -4.818663001060486e-06, -3.7355348467826843e-06, -2.652406692504883e-06, -1.5692785382270813e-06, -4.861503839492798e-07, 5.969777703285217e-07, 1.6801059246063232e-06, 2.7632340788841248e-06, 3.846362233161926e-06, 4.929490387439728e-06, 6.012618541717529e-06, 7.095746695995331e-06, 8.178874850273132e-06, 9.262003004550934e-06, 1.0345131158828735e-05, 1.1428259313106537e-05, 1.2511387467384338e-05, 1.359451562166214e-05, 1.4677643775939941e-05, 1.5760771930217743e-05, 1.6843900084495544e-05, 1.7927028238773346e-05, 1.9010156393051147e-05, 2.009328454732895e-05, 2.117641270160675e-05, 2.2259540855884552e-05, 2.3342669010162354e-05, 2.4425797164440155e-05, 2.5508925318717957e-05, 2.6592053472995758e-05, 2.767518162727356e-05, 2.875830978155136e-05, 2.9841437935829163e-05, 3.0924566090106964e-05, 3.2007694244384766e-05]}, "gradients/encoder.encoder.pos_conv_embed.conv.weight_v": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 1.0, 0.0, 1.0, 2.0, 0.0, 3.0, 2.0, 2.0, 1.0, 4.0, 5.0, 1.0, 7.0, 15.0, 17.0, 9.0, 14.0, 28.0, 39.0, 57.0, 91.0, 152.0, 286.0, 422.0, 814.0, 1680.0, 3857.0, 11035.0, 80801.0, 8248063.0, 28879.0, 6938.0, 2674.0, 1160.0, 581.0, 318.0, 224.0, 142.0, 71.0, 45.0, 38.0, 33.0, 24.0, 15.0, 11.0, 7.0, 7.0, 7.0, 6.0, 3.0, 1.0, 1.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 0.0, 0.0, 2.0], "bins": [-8.250893006334081e-05, -7.995621854206547e-05, -7.740350702079013e-05, -7.485079549951479e-05, -7.229808397823945e-05, -6.97453724569641e-05, -6.719266821164638e-05, -6.463995669037104e-05, -6.20872451690957e-05, -5.9534533647820354e-05, -5.698182212654501e-05, -5.442911060526967e-05, -5.187639908399433e-05, -4.932368756271899e-05, -4.677097967942245e-05, -4.421826815814711e-05, -4.166555299889296e-05, -3.911284147761762e-05, -3.656012995634228e-05, -3.400741843506694e-05, -3.14547069137916e-05, -2.890199721150566e-05, -2.634928750921972e-05, -2.379657598794438e-05, -2.1243864466669038e-05, -1.8691152945393696e-05, -1.6138441424118355e-05, -1.3585731721832417e-05, -1.1033020200557075e-05, -8.480308679281734e-06, -5.927598976995796e-06, -3.3748874557204545e-06, -8.221759344451129e-07, 1.7305351320828777e-06, 4.283246198610868e-06, 6.835956810391508e-06, 9.38866833166685e-06, 1.1941379852942191e-05, 1.4494089555228129e-05, 1.704680107650347e-05, 1.9599512597778812e-05, 2.2152224119054154e-05, 2.4704935640329495e-05, 2.7257645342615433e-05, 2.9810356863890775e-05, 3.236306656617671e-05, 3.4915778087452054e-05, 3.7468489608727396e-05, 4.002120113000274e-05, 4.257391265127808e-05, 4.512662417255342e-05, 4.767933569382876e-05, 5.02320472151041e-05, 5.2784758736379445e-05, 5.533746661967598e-05, 5.789017814095132e-05, 6.044288966222666e-05, 6.29955975455232e-05, 6.554830906679854e-05, 6.810102058807388e-05, 7.065373210934922e-05, 7.320644363062456e-05, 7.57591551518999e-05, 7.831186667317525e-05, 8.086457819445059e-05]}, "gradients/encoder.encoder.pos_conv_embed.conv.weight_g": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 3.0, 0.0, 2.0, 1.0, 3.0, 2.0, 5.0, 3.0, 2.0, 7.0, 5.0, 10.0, 7.0, 12.0, 9.0, 5.0, 1.0, 5.0, 3.0, 2.0, 3.0, 3.0, 3.0, 0.0, 4.0, 3.0, 2.0, 0.0, 1.0, 0.0, 2.0, 1.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-9.14358242880553e-05, -8.729112596483901e-05, -8.314643491758034e-05, -7.900173659436405e-05, -7.485704554710537e-05, -7.071234722388908e-05, -6.65676489006728e-05, -6.242295785341412e-05, -5.827825953019783e-05, -5.413356484496035e-05, -4.9988870159722865e-05, -4.5844171836506575e-05, -4.169947715126909e-05, -3.755478246603161e-05, -3.341008414281532e-05, -2.926538945757784e-05, -2.5120694772340357e-05, -2.0976000087102875e-05, -1.683130358287599e-05, -1.2686607988143805e-05, -8.541912393411621e-06, -4.397217708174139e-06, -2.525212039472535e-07, 3.892175300279632e-06, 8.036869985517114e-06, 1.2181565580249298e-05, 1.6326261174981482e-05, 2.0470957679208368e-05, 2.461565236444585e-05, 2.8760347049683332e-05, 3.290504537289962e-05, 3.7049740058137104e-05, 4.1194434743374586e-05, 4.533912942861207e-05, 4.948382411384955e-05, 5.362852243706584e-05, 5.777321712230332e-05, 6.19179118075408e-05, 6.60626101307571e-05, 7.020730117801577e-05, 7.435199950123206e-05, 7.849669782444835e-05, 8.264138887170702e-05, 8.678608719492331e-05, 9.09307855181396e-05, 9.507547656539828e-05, 9.922017488861457e-05, 0.00010336487321183085, 0.00010750956425908953, 0.00011165426258230582, 0.0001157989536295645, 0.00011994365195278078, 0.00012408834300003946, 0.00012823304859921336, 0.00013237773964647204, 0.0001365224306937307, 0.00014066713629290462, 0.0001448118273401633, 0.0001489565329393372, 0.00015310122398659587, 0.00015724591503385454, 0.00016139060608111322, 0.00016553531168028712, 0.0001696800027275458, 0.00017382469377480447]}, "gradients/encoder.masked_spec_embed": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 0.0, 0.0, 4.0, 6.0, 5.0, 4.0, 14.0, 13.0, 26.0, 42.0, 40.0, 64.0, 94.0, 96.0, 110.0, 107.0, 108.0, 82.0, 60.0, 47.0, 36.0, 21.0, 9.0, 7.0, 7.0, 3.0, 3.0, 4.0, 1.0, 2.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.4345855712890625e-05, -4.291068762540817e-05, -4.147551953792572e-05, -4.004035145044327e-05, -3.8605183362960815e-05, -3.717001527547836e-05, -3.573484718799591e-05, -3.429967910051346e-05, -3.2864511013031006e-05, -3.1429342925548553e-05, -2.99941748380661e-05, -2.855900675058365e-05, -2.7123838663101196e-05, -2.5688670575618744e-05, -2.425350248813629e-05, -2.281833440065384e-05, -2.1383166313171387e-05, -1.9947998225688934e-05, -1.8512830138206482e-05, -1.707766205072403e-05, -1.5642493963241577e-05, -1.4207325875759125e-05, -1.2772157788276672e-05, -1.133698970079422e-05, -9.901821613311768e-06, -8.466653525829315e-06, -7.031485438346863e-06, -5.59631735086441e-06, -4.161149263381958e-06, -2.7259811758995056e-06, -1.2908130884170532e-06, 1.4435499906539917e-07, 1.5795230865478516e-06, 3.014691174030304e-06, 4.449859261512756e-06, 5.885027348995209e-06, 7.320195436477661e-06, 8.755363523960114e-06, 1.0190531611442566e-05, 1.1625699698925018e-05, 1.306086778640747e-05, 1.4496035873889923e-05, 1.5931203961372375e-05, 1.7366372048854828e-05, 1.880154013633728e-05, 2.0236708223819733e-05, 2.1671876311302185e-05, 2.3107044398784637e-05, 2.454221248626709e-05, 2.5977380573749542e-05, 2.7412548661231995e-05, 2.8847716748714447e-05, 3.02828848361969e-05, 3.171805292367935e-05, 3.3153221011161804e-05, 3.4588389098644257e-05, 3.602355718612671e-05, 3.745872527360916e-05, 3.8893893361091614e-05, 4.0329061448574066e-05, 4.176422953605652e-05, 4.319939762353897e-05, 4.463456571102142e-05, 4.6069733798503876e-05, 4.750490188598633e-05]}, "gradients/encoder.feature_projection.projection.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 2.0, 3.0, 1.0, 3.0, 3.0, 10.0, 5.0, 8.0, 19.0, 21.0, 34.0, 66.0, 102.0, 161.0, 279.0, 438.0, 640.0, 1423.0, 2644.0, 5832.0, 14169.0, 46783.0, 246108.0, 153929.0, 31122.0, 10717.0, 4578.0, 2257.0, 1225.0, 605.0, 388.0, 262.0, 150.0, 101.0, 55.0, 40.0, 27.0, 14.0, 14.0, 9.0, 12.0, 7.0, 4.0, 2.0, 3.0, 0.0, 2.0, 3.0, 0.0, 2.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00024509429931640625, -0.00023596733808517456, -0.00022684037685394287, -0.00021771341562271118, -0.0002085864543914795, -0.0001994594931602478, -0.0001903325319290161, -0.00018120557069778442, -0.00017207860946655273, -0.00016295164823532104, -0.00015382468700408936, -0.00014469772577285767, -0.00013557076454162598, -0.0001264438033103943, -0.0001173168420791626, -0.00010818988084793091, -9.906291961669922e-05, -8.993595838546753e-05, -8.080899715423584e-05, -7.168203592300415e-05, -6.255507469177246e-05, -5.342811346054077e-05, -4.430115222930908e-05, -3.517419099807739e-05, -2.6047229766845703e-05, -1.6920268535614014e-05, -7.793307304382324e-06, 1.3336539268493652e-06, 1.0460615158081055e-05, 1.9587576389312744e-05, 2.8714537620544434e-05, 3.784149885177612e-05, 4.696846008300781e-05, 5.60954213142395e-05, 6.522238254547119e-05, 7.434934377670288e-05, 8.347630500793457e-05, 9.260326623916626e-05, 0.00010173022747039795, 0.00011085718870162964, 0.00011998414993286133, 0.00012911111116409302, 0.0001382380723953247, 0.0001473650336265564, 0.00015649199485778809, 0.00016561895608901978, 0.00017474591732025146, 0.00018387287855148315, 0.00019299983978271484, 0.00020212680101394653, 0.00021125376224517822, 0.0002203807234764099, 0.0002295076847076416, 0.0002386346459388733, 0.000247761607170105, 0.00025688856840133667, 0.00026601552963256836, 0.00027514249086380005, 0.00028426945209503174, 0.00029339641332626343, 0.0003025233745574951, 0.0003116503357887268, 0.0003207772970199585, 0.0003299042582511902, 0.0003390312194824219]}, "gradients/encoder.feature_projection.projection.bias": {"_type": "histogram", "values": [1.0, 2.0, 0.0, 1.0, 1.0, 2.0, 1.0, 1.0, 3.0, 4.0, 1.0, 6.0, 6.0, 7.0, 7.0, 15.0, 30.0, 28.0, 27.0, 40.0, 51.0, 68.0, 58.0, 62.0, 71.0, 59.0, 65.0, 64.0, 45.0, 60.0, 38.0, 48.0, 33.0, 30.0, 19.0, 17.0, 12.0, 8.0, 7.0, 6.0, 5.0, 2.0, 1.0, 2.0, 5.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.9848346710205078e-05, -1.905113458633423e-05, -1.825392246246338e-05, -1.745671033859253e-05, -1.665949821472168e-05, -1.586228609085083e-05, -1.506507396697998e-05, -1.4267861843109131e-05, -1.3470649719238281e-05, -1.2673437595367432e-05, -1.1876225471496582e-05, -1.1079013347625732e-05, -1.0281801223754883e-05, -9.484589099884033e-06, -8.687376976013184e-06, -7.890164852142334e-06, -7.092952728271484e-06, -6.295740604400635e-06, -5.498528480529785e-06, -4.7013163566589355e-06, -3.904104232788086e-06, -3.1068921089172363e-06, -2.3096799850463867e-06, -1.5124678611755371e-06, -7.152557373046875e-07, 8.195638656616211e-08, 8.791685104370117e-07, 1.6763806343078613e-06, 2.473592758178711e-06, 3.2708048820495605e-06, 4.06801700592041e-06, 4.86522912979126e-06, 5.662441253662109e-06, 6.459653377532959e-06, 7.256865501403809e-06, 8.054077625274658e-06, 8.851289749145508e-06, 9.648501873016357e-06, 1.0445713996887207e-05, 1.1242926120758057e-05, 1.2040138244628906e-05, 1.2837350368499756e-05, 1.3634562492370605e-05, 1.4431774616241455e-05, 1.5228986740112305e-05, 1.6026198863983154e-05, 1.6823410987854004e-05, 1.7620623111724854e-05, 1.8417835235595703e-05, 1.9215047359466553e-05, 2.0012259483337402e-05, 2.0809471607208252e-05, 2.16066837310791e-05, 2.240389585494995e-05, 2.32011079788208e-05, 2.399832010269165e-05, 2.47955322265625e-05, 2.559274435043335e-05, 2.63899564743042e-05, 2.718716859817505e-05, 2.79843807220459e-05, 2.8781592845916748e-05, 2.9578804969787598e-05, 3.0376017093658447e-05, 3.11732292175293e-05]}, "gradients/encoder.feature_projection.layer_norm.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 3.0, 0.0, 2.0, 1.0, 2.0, 5.0, 3.0, 3.0, 5.0, 3.0, 12.0, 10.0, 13.0, 25.0, 22.0, 36.0, 33.0, 46.0, 53.0, 43.0, 25.0, 33.0, 26.0, 21.0, 22.0, 12.0, 6.0, 3.0, 7.0, 6.0, 4.0, 2.0, 3.0, 4.0, 1.0, 2.0, 2.0, 2.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6.060785744921304e-05, -5.86001988267526e-05, -5.659254020429216e-05, -5.4584881581831723e-05, -5.2577222959371284e-05, -5.0569564336910844e-05, -4.85619020764716e-05, -4.6554247091989964e-05, -4.454658483155072e-05, -4.253892620909028e-05, -4.053126758662984e-05, -3.85236089641694e-05, -3.651595034170896e-05, -3.450829171924852e-05, -3.250063309678808e-05, -3.0492972655338235e-05, -2.84853158518672e-05, -2.647765722940676e-05, -2.446999860694632e-05, -2.246233998448588e-05, -2.045468136202544e-05, -1.8447022739565e-05, -1.6439362298115157e-05, -1.4431703675654717e-05, -1.2424045053194277e-05, -1.0416386430733837e-05, -8.408727808273397e-06, -6.401068276318256e-06, -4.393409653857816e-06, -2.385751031397376e-06, -3.7809149944223464e-07, 1.6295671230182052e-06, 3.637229383457452e-06, 5.644888005917892e-06, 7.652546628378332e-06, 9.660206160333473e-06, 1.1667864782793913e-05, 1.3675523405254353e-05, 1.5683182937209494e-05, 1.7690841559669934e-05, 1.9698500182130374e-05, 2.1706158804590814e-05, 2.3713817427051254e-05, 2.5721477868501097e-05, 2.7729136490961537e-05, 2.9736795113421977e-05, 3.1744453735882416e-05, 3.3752112358342856e-05, 3.5759770980803296e-05, 3.7767429603263736e-05, 3.9775088225724176e-05, 4.1782746848184615e-05, 4.3790405470645055e-05, 4.5798064093105495e-05, 4.780572635354474e-05, 4.9813381338026375e-05, 5.182104359846562e-05, 5.382870222092606e-05, 5.58363608433865e-05, 5.784401946584694e-05, 5.985167808830738e-05, 6.185934034874663e-05, 6.386699533322826e-05, 6.587465759366751e-05, 6.788231257814914e-05]}, "gradients/encoder.feature_projection.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 3.0, 3.0, 4.0, 2.0, 3.0, 5.0, 8.0, 2.0, 8.0, 8.0, 6.0, 8.0, 8.0, 23.0, 30.0, 42.0, 53.0, 61.0, 48.0, 33.0, 30.0, 16.0, 11.0, 15.0, 8.0, 11.0, 9.0, 5.0, 6.0, 4.0, 6.0, 4.0, 3.0, 4.0, 2.0, 3.0, 4.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-7.68899917602539e-05, -7.455423474311829e-05, -7.221847772598267e-05, -6.988272070884705e-05, -6.754696369171143e-05, -6.52112066745758e-05, -6.287544965744019e-05, -6.0539692640304565e-05, -5.8203935623168945e-05, -5.5868178606033325e-05, -5.3532421588897705e-05, -5.1196664571762085e-05, -4.8860907554626465e-05, -4.6525150537490845e-05, -4.4189393520355225e-05, -4.1853636503219604e-05, -3.9517879486083984e-05, -3.7182122468948364e-05, -3.4846365451812744e-05, -3.2510608434677124e-05, -3.0174851417541504e-05, -2.7839094400405884e-05, -2.5503337383270264e-05, -2.3167580366134644e-05, -2.0831823348999023e-05, -1.8496066331863403e-05, -1.6160309314727783e-05, -1.3824552297592163e-05, -1.1488795280456543e-05, -9.153038263320923e-06, -6.817281246185303e-06, -4.481524229049683e-06, -2.1457672119140625e-06, 1.8998980522155762e-07, 2.5257468223571777e-06, 4.861503839492798e-06, 7.197260856628418e-06, 9.533017873764038e-06, 1.1868774890899658e-05, 1.4204531908035278e-05, 1.65402889251709e-05, 1.887604594230652e-05, 2.121180295944214e-05, 2.354755997657776e-05, 2.588331699371338e-05, 2.8219074010849e-05, 3.055483102798462e-05, 3.289058804512024e-05, 3.522634506225586e-05, 3.756210207939148e-05, 3.98978590965271e-05, 4.223361611366272e-05, 4.456937313079834e-05, 4.690513014793396e-05, 4.924088716506958e-05, 5.15766441822052e-05, 5.391240119934082e-05, 5.624815821647644e-05, 5.858391523361206e-05, 6.091967225074768e-05, 6.32554292678833e-05, 6.559118628501892e-05, 6.792694330215454e-05, 7.026270031929016e-05, 7.259845733642578e-05]}, "eval/loss": 20.161333084106445, "eval/bleu": 0.0, "eval/runtime": 3697.3282, "eval/samples_per_second": 3.992, "eval/steps_per_second": 0.998} \ No newline at end of file diff --git a/wandb/run-20220503_172048-zotxt8wa/logs/debug-internal.log b/wandb/run-20220503_172048-zotxt8wa/logs/debug-internal.log index 09d6115a104c91f42532719d649c9367e58c38d8..758268134aa6d48f43d0d20700731711d11c9b79 100644 --- a/wandb/run-20220503_172048-zotxt8wa/logs/debug-internal.log +++ b/wandb/run-20220503_172048-zotxt8wa/logs/debug-internal.log @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4cf34289ced88806aa5004597ab168c26fd5feb8d7b6ee2eb21cdc5d07a28980 -size 11853234 +oid sha256:c0a2e7a710b37576ae024cc239b7addbc280d08321415351e2da4805f24f33cb +size 12884226 diff --git a/wandb/run-20220503_172048-zotxt8wa/logs/debug.log b/wandb/run-20220503_172048-zotxt8wa/logs/debug.log index f53bf234059880528b78950632a7d060d1d0d267..3ac6cf54842c7b14c280dccfad2f077aaa177c49 100644 --- a/wandb/run-20220503_172048-zotxt8wa/logs/debug.log +++ b/wandb/run-20220503_172048-zotxt8wa/logs/debug.log @@ -26,3 +26,7 @@ config: {} 2022-05-03 17:20:52,207 INFO MainThread:42221 [wandb_init.py:init():651] run started, returning control to user process 2022-05-03 17:20:52,210 INFO MainThread:42221 [wandb_run.py:_config_callback():966] config_cb None None {'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'torch.float32', 'use_bfloat16': False, 'pruned_heads': {}, 'tie_word_embeddings': False, 'is_encoder_decoder': True, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 40, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'chunk_size_feed_forward': 0, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'architectures': ['SpeechEncoderDecoderModel'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': None, 'pad_token_id': 1, 'eos_token_id': 2, 'sep_token_id': None, 'decoder_start_token_id': 0, 'task_specific_params': None, 'problem_type': None, '_name_or_path': './', 'transformers_version': None, 'decoder': {'vocab_size': 50265, 'max_position_embeddings': 1024, 'd_model': 1024, 'encoder_ffn_dim': 4096, 'encoder_layers': 12, 'encoder_attention_heads': 16, 'decoder_ffn_dim': 4096, 'decoder_layers': 12, 'decoder_attention_heads': 16, 'dropout': 0.1, 'attention_dropout': 0.1, 'activation_dropout': 0.1, 'activation_function': 'gelu', 'init_std': 0.02, 'encoder_layerdrop': 0.0, 'decoder_layerdrop': 0.0, 'classifier_dropout': 0.0, 'use_cache': True, 'num_hidden_layers': 12, 'scale_embedding': False, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': None, 'use_bfloat16': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'is_encoder_decoder': False, 'is_decoder': True, 'cross_attention_hidden_size': None, 'add_cross_attention': True, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': True, 'num_beams': 4, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 3, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'chunk_size_feed_forward': 0, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'architectures': ['BartModel'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1', 2: 'LABEL_2'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1, 'LABEL_2': 2}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 0, 'pad_token_id': 1, 'eos_token_id': 2, 'sep_token_id': None, 'decoder_start_token_id': 2, 'task_specific_params': {'summarization': {'length_penalty': 1.0, 'max_length': 128, 'min_length': 12, 'num_beams': 4}, 'summarization_cnn': {'length_penalty': 2.0, 'max_length': 142, 'min_length': 56, 'num_beams': 4}, 'summarization_xsum': {'length_penalty': 1.0, 'max_length': 62, 'min_length': 11, 'num_beams': 6}}, 'problem_type': None, '_name_or_path': 'facebook/bart-large', 'transformers_version': '4.19.0.dev0', 'add_bias_logits': False, 'add_final_layer_norm': False, 'classif_dropout': 0.1, 'gradient_checkpointing': False, 'normalize_before': False, 'model_type': 'bart'}, 'encoder': {'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float32', 'use_bfloat16': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'chunk_size_feed_forward': 0, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'architectures': ['Wav2Vec2ForPreTraining'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 1, 'pad_token_id': 0, 'eos_token_id': 2, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'facebook/wav2vec2-xls-r-300m', 'transformers_version': '4.19.0.dev0', 'feat_extract_dropout': 0.0, 'gradient_checkpointing': False, 'num_feat_extract_layers': 7, 'hidden_size': 1024, 'feat_extract_norm': 'layer', 'feat_extract_activation': 'gelu', 'conv_dim': [512, 512, 512, 512, 512, 512, 512], 'conv_stride': [5, 2, 2, 2, 2, 2, 2], 'conv_kernel': [10, 3, 3, 3, 3, 2, 2], 'conv_bias': True, 'num_conv_pos_embeddings': 128, 'num_conv_pos_embedding_groups': 16, 'num_hidden_layers': 24, 'intermediate_size': 4096, 'hidden_act': 'gelu', 'num_attention_heads': 16, 'hidden_dropout': 0.1742341660721257, 'attention_dropout': 0.1, 'activation_dropout': 0.0, 'feat_proj_dropout': 0.0, 'final_dropout': 0.0, 'layerdrop': 0.0, 'layer_norm_eps': 1e-05, 'initializer_range': 0.02, 'vocab_size': 32, 'do_stable_layer_norm': True, 'use_weighted_layer_sum': False, 'apply_spec_augment': True, 'mask_time_prob': 0.1, 'mask_time_length': 10, 'mask_time_min_masks': 2, 'mask_feature_prob': 0.0, 'mask_feature_length': 10, 'mask_feature_min_masks': 0, 'num_codevectors_per_group': 320, 'num_codevector_groups': 2, 'contrastive_logits_temperature': 0.1, 'feat_quantizer_dropout': 0.0, 'num_negatives': 100, 'codevector_dim': 768, 'proj_codevector_dim': 768, 'diversity_loss_weight': 0.1, 'ctc_loss_reduction': 'sum', 'ctc_zero_infinity': False, 'add_adapter': True, 'adapter_kernel_size': 3, 'adapter_stride': 2, 'num_adapter_layers': 3, 'output_hidden_size': 1024, 'classifier_proj_size': 256, 'tdnn_dim': [512, 512, 512, 512, 1500], 'tdnn_kernel': [5, 3, 3, 1, 1], 'tdnn_dilation': [1, 2, 3, 1, 1], 'xvector_output_dim': 512, 'model_type': 'wav2vec2'}, 'model_type': 'speech-encoder-decoder', 'processor_class': 'Wav2Vec2Processor', 'use_cache': False, 'overwrite_output_dir': True, 'do_train': True, 'do_eval': True, 'do_predict': False, 'prediction_loss_only': False, 'per_gpu_train_batch_size': 'None', 'per_gpu_eval_batch_size': 'None', 'eval_accumulation_steps': 'None', 'eval_delay': 0, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'warmup_ratio': 0.0, 'log_level': -1, 'log_level_replica': -1, 'log_on_each_node': True, 'logging_dir': './runs/May03_17-16-03_sanchit--v100', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_total_limit': 'None', 'save_on_each_node': False, 'no_cuda': False, 'seed': 42, 'data_seed': 'None', 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'amp', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': 'None', 'local_rank': -1, 'xpu_backend': 'None', 'tpu_num_cores': 'None', 'tpu_metrics_debug': False, 'debug': '[]', 'dataloader_drop_last': False, 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': './', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': 'None', 'load_best_model_at_end': True, 'ignore_data_skip': False, 'sharded_ddp': '[]', 'deepspeed': 'None', 'label_smoothing_factor': 0.0, 'optim': 'adamw_hf', 'adafactor': False, 'group_by_length': True, 'length_column_name': 'length', 'report_to': "['tensorboard', 'wandb', 'codecarbon']", 'ddp_find_unused_parameters': 'None', 'ddp_bucket_cap_mb': 'None', 'dataloader_pin_memory': True, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': 'None', 'hub_model_id': 'None', 'hub_strategy': 'every_save', 'hub_token': '', 'hub_private_repo': False, 'gradient_checkpointing': True, 'include_inputs_for_metrics': False, 'fp16_backend': 'auto', 'push_to_hub_model_id': 'None', 'push_to_hub_organization': 'None', 'push_to_hub_token': '', '_n_gpu': 1, 'mp_parameters': '', 'sortish_sampler': False, 'predict_with_generate': True, 'train_batch_size': 4, 'eval_batch_size': 4} 2022-05-03 17:20:52,212 INFO MainThread:42221 [wandb_watch.py:watch():43] Watching +2022-05-04 09:50:07,025 INFO MainThread:42221 [wandb_run.py:_atexit_cleanup():1797] got exitcode: 1 +2022-05-04 09:50:07,028 INFO MainThread:42221 [wandb_run.py:_restore():1769] restore +2022-05-04 09:50:14,824 INFO MainThread:42221 [wandb_run.py:_wait_for_finish():1929] got exit ret: None +2022-05-04 09:50:14,925 INFO MainThread:42221 [wandb_run.py:_restore():1769] restore diff --git a/wandb/run-20220503_172048-zotxt8wa/run-zotxt8wa.wandb b/wandb/run-20220503_172048-zotxt8wa/run-zotxt8wa.wandb index a8cba33255b22f7454bfe6f436ed06edfe3fe902..583d153ee4e0ccf00b19d5ac08d6a9ddcca05632 100644 --- a/wandb/run-20220503_172048-zotxt8wa/run-zotxt8wa.wandb +++ b/wandb/run-20220503_172048-zotxt8wa/run-zotxt8wa.wandb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:deca75fd7f7abe31d20bd34d64d807146971a38e8f71a77f19628dbc4ecc623f -size 560787813 +oid sha256:57663d51ea5e075a70e89c188110952839cfcc26c48b6c8cb2575360ac7877e9 +size 618047695 diff --git a/wandb/run-20220504_095140-cwhobv6l/files/config.yaml b/wandb/run-20220504_095140-cwhobv6l/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..25f6c79536dcca6fb94ea877e42a210ae123226b --- /dev/null +++ b/wandb/run-20220504_095140-cwhobv6l/files/config.yaml @@ -0,0 +1,9368 @@ +wandb_version: 1 + +_n_gpu: + desc: null + value: 1 +_name_or_path: + desc: null + value: ./ +_wandb: + desc: null + value: + cli_version: 0.12.10 + framework: huggingface + huggingface_version: 4.19.0.dev0 + is_jupyter_run: false + is_kaggle_kernel: false + m: + - 1: train/global_step + 6: + - 3 + - 1: train/loss + 5: 1 + 6: + - 1 + - 1: train/learning_rate + 5: 1 + 6: + - 1 + - 1: train/epoch + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.fc2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.fc2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.fc2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.fc2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.fc2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.fc2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.fc1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.fc1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.fc1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.fc1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.fc1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.fc1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.encoder_attn_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.encoder_attn_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.encoder_attn_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.encoder_attn_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.encoder_attn_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.encoder_attn_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.encoder_attn\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.encoder_attn\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.encoder_attn\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.encoder_attn\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.encoder_attn\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.encoder_attn\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.encoder_attn\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.encoder_attn\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.encoder_attn\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.encoder_attn\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.encoder_attn\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.encoder_attn\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.encoder_attn\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.encoder_attn\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.encoder_attn\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.encoder_attn\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.encoder_attn\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.encoder_attn\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.encoder_attn\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.encoder_attn\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.encoder_attn\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.encoder_attn\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.encoder_attn\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.encoder_attn\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.self_attn_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.self_attn_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.self_attn_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.self_attn_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.self_attn_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.self_attn_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.self_attn\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.self_attn\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.self_attn\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.self_attn\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.self_attn\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.self_attn\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.self_attn\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.self_attn\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.self_attn\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.self_attn\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.self_attn\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.self_attn\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.self_attn\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.self_attn\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.self_attn\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.self_attn\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.self_attn\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.self_attn\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.self_attn\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.self_attn\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.self_attn\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.self_attn\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.self_attn\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.self_attn\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.fc2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.fc2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.fc2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.fc2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.fc2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.fc2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.fc1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.fc1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.fc1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.fc1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.fc1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.fc1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.encoder_attn_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.encoder_attn_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.encoder_attn_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.encoder_attn_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.encoder_attn_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.encoder_attn_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.encoder_attn\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.encoder_attn\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.encoder_attn\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.encoder_attn\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.encoder_attn\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.encoder_attn\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.encoder_attn\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.encoder_attn\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.encoder_attn\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.encoder_attn\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.encoder_attn\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.encoder_attn\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.encoder_attn\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.encoder_attn\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.encoder_attn\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.encoder_attn\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.encoder_attn\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.encoder_attn\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.encoder_attn\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.encoder_attn\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.encoder_attn\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.encoder_attn\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.encoder_attn\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.encoder_attn\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.self_attn_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.self_attn_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.self_attn_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.self_attn_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.self_attn_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.self_attn_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.self_attn\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.self_attn\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.self_attn\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.self_attn\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.self_attn\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.self_attn\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.self_attn\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.self_attn\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.self_attn\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.self_attn\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.self_attn\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.self_attn\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.self_attn\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.self_attn\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.self_attn\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.self_attn\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.self_attn\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.self_attn\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.self_attn\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.self_attn\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.self_attn\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.self_attn\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.self_attn\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.self_attn\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.fc2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.fc2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.fc2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.fc2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.fc2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.fc2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.fc1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.fc1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.fc1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.fc1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.fc1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.fc1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.encoder_attn_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.encoder_attn_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.encoder_attn_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.encoder_attn_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.encoder_attn_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.encoder_attn_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.encoder_attn\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.encoder_attn\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.encoder_attn\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.encoder_attn\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.encoder_attn\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.encoder_attn\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.encoder_attn\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.encoder_attn\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.encoder_attn\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.encoder_attn\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.encoder_attn\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.encoder_attn\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.encoder_attn\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.encoder_attn\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.encoder_attn\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.encoder_attn\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.encoder_attn\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.encoder_attn\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.encoder_attn\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.encoder_attn\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.encoder_attn\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.encoder_attn\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.encoder_attn\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.encoder_attn\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.self_attn_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.self_attn_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.self_attn_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.self_attn_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.self_attn_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.self_attn_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.self_attn\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.self_attn\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.self_attn\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.self_attn\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.self_attn\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.self_attn\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.self_attn\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.self_attn\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.self_attn\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.self_attn\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.self_attn\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.self_attn\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.self_attn\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.self_attn\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.self_attn\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.self_attn\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.self_attn\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.self_attn\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.self_attn\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.self_attn\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.self_attn\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.self_attn\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.self_attn\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.self_attn\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.fc2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.fc2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.fc2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.fc2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.fc2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.fc2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.fc1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.fc1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.fc1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.fc1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.fc1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.fc1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.encoder_attn_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.encoder_attn_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.encoder_attn_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.encoder_attn_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.encoder_attn_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.encoder_attn_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.encoder_attn\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.encoder_attn\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.encoder_attn\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.encoder_attn\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.encoder_attn\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.encoder_attn\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.encoder_attn\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.encoder_attn\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.encoder_attn\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.encoder_attn\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.encoder_attn\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.encoder_attn\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.encoder_attn\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.encoder_attn\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.encoder_attn\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.encoder_attn\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.encoder_attn\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.encoder_attn\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.encoder_attn\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.encoder_attn\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.encoder_attn\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.encoder_attn\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.encoder_attn\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.encoder_attn\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.self_attn_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.self_attn_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.self_attn_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.self_attn_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.self_attn_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.self_attn_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.self_attn\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.self_attn\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.self_attn\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.self_attn\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.self_attn\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.self_attn\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.self_attn\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.self_attn\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.self_attn\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.self_attn\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.self_attn\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.self_attn\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.self_attn\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.self_attn\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.self_attn\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.self_attn\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.self_attn\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.self_attn\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.self_attn\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.self_attn\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.self_attn\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.self_attn\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.self_attn\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.self_attn\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.fc2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.fc2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.fc2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.fc2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.fc2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.fc2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.fc1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.fc1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.fc1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.fc1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.fc1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.fc1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.encoder_attn_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.encoder_attn_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.encoder_attn_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.encoder_attn_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.encoder_attn_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.encoder_attn_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.encoder_attn\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.encoder_attn\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.encoder_attn\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.encoder_attn\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.encoder_attn\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.encoder_attn\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.encoder_attn\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.encoder_attn\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.encoder_attn\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.encoder_attn\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.encoder_attn\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.encoder_attn\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.encoder_attn\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.encoder_attn\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.encoder_attn\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.encoder_attn\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.encoder_attn\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.encoder_attn\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.encoder_attn\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.encoder_attn\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.encoder_attn\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.encoder_attn\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.encoder_attn\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.encoder_attn\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.self_attn_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.self_attn_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.self_attn_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.self_attn_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.self_attn_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.self_attn_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.self_attn\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.self_attn\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.self_attn\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.self_attn\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.self_attn\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.self_attn\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.self_attn\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.self_attn\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.self_attn\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.self_attn\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.self_attn\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.self_attn\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.self_attn\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.self_attn\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.self_attn\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.self_attn\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.self_attn\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.self_attn\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.self_attn\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.self_attn\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.self_attn\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.self_attn\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.self_attn\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.self_attn\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.fc2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.fc2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.fc2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.fc2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.fc2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.fc2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.fc1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.fc1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.fc1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.fc1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.fc1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.fc1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.encoder_attn_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.encoder_attn_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.encoder_attn_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.encoder_attn_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.encoder_attn_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.encoder_attn_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.encoder_attn\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.encoder_attn\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.encoder_attn\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.encoder_attn\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.encoder_attn\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.encoder_attn\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.encoder_attn\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.encoder_attn\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.encoder_attn\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.encoder_attn\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.encoder_attn\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.encoder_attn\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.encoder_attn\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.encoder_attn\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.encoder_attn\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.encoder_attn\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.encoder_attn\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.encoder_attn\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.encoder_attn\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.encoder_attn\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.encoder_attn\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.encoder_attn\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.encoder_attn\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.encoder_attn\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.self_attn_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.self_attn_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.self_attn_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.self_attn_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.self_attn_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.self_attn_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.self_attn\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.self_attn\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.self_attn\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.self_attn\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.self_attn\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.self_attn\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.self_attn\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.self_attn\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.self_attn\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.self_attn\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.self_attn\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.self_attn\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.self_attn\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.self_attn\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.self_attn\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.self_attn\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.self_attn\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.self_attn\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.self_attn\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.self_attn\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.self_attn\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.self_attn\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.self_attn\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.self_attn\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.fc2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.fc2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.fc2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.fc2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.fc2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.fc2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.fc1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.fc1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.fc1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.fc1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.fc1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.fc1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.encoder_attn_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.encoder_attn_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.encoder_attn_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.encoder_attn_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.encoder_attn_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.encoder_attn_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.encoder_attn\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.encoder_attn\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.encoder_attn\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.encoder_attn\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.encoder_attn\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.encoder_attn\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.encoder_attn\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.encoder_attn\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.encoder_attn\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.encoder_attn\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.encoder_attn\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.encoder_attn\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.encoder_attn\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.encoder_attn\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.encoder_attn\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.encoder_attn\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.encoder_attn\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.encoder_attn\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.encoder_attn\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.encoder_attn\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.encoder_attn\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.encoder_attn\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.encoder_attn\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.encoder_attn\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.self_attn_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.self_attn_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.self_attn_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.self_attn_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.self_attn_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.self_attn_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.self_attn\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.self_attn\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.self_attn\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.self_attn\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.self_attn\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.self_attn\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.self_attn\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.self_attn\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.self_attn\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.self_attn\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.self_attn\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.self_attn\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.self_attn\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.self_attn\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.self_attn\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.self_attn\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.self_attn\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.self_attn\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.self_attn\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.self_attn\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.self_attn\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.self_attn\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.self_attn\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.self_attn\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.fc2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.fc2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.fc2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.fc2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.fc2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.fc2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.fc1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.fc1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.fc1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.fc1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.fc1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.fc1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.encoder_attn_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.encoder_attn_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.encoder_attn_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.encoder_attn_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.encoder_attn_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.encoder_attn_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.encoder_attn\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.encoder_attn\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.encoder_attn\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.encoder_attn\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.encoder_attn\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.encoder_attn\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.encoder_attn\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.encoder_attn\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.encoder_attn\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.encoder_attn\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.encoder_attn\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.encoder_attn\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.encoder_attn\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.encoder_attn\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.encoder_attn\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.encoder_attn\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.encoder_attn\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.encoder_attn\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.encoder_attn\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.encoder_attn\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.encoder_attn\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.encoder_attn\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.encoder_attn\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.encoder_attn\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.self_attn_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.self_attn_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.self_attn_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.self_attn_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.self_attn_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.self_attn_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.self_attn\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.self_attn\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.self_attn\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.self_attn\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.self_attn\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.self_attn\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.self_attn\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.self_attn\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.self_attn\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.self_attn\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.self_attn\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.self_attn\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.self_attn\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.self_attn\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.self_attn\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.self_attn\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.self_attn\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.self_attn\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.self_attn\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.self_attn\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.self_attn\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.self_attn\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.self_attn\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.self_attn\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.fc2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.fc2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.fc2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.fc2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.fc2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.fc2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.fc1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.fc1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.fc1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.fc1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.fc1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.fc1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.encoder_attn_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.encoder_attn_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.encoder_attn_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.encoder_attn_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.encoder_attn_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.encoder_attn_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.encoder_attn\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.encoder_attn\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.encoder_attn\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.encoder_attn\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.encoder_attn\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.encoder_attn\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.encoder_attn\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.encoder_attn\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.encoder_attn\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.encoder_attn\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.encoder_attn\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.encoder_attn\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.encoder_attn\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.encoder_attn\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.encoder_attn\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.encoder_attn\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.encoder_attn\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.encoder_attn\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.encoder_attn\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.encoder_attn\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.encoder_attn\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.encoder_attn\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.encoder_attn\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.encoder_attn\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.self_attn_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.self_attn_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.self_attn_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.self_attn_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.self_attn_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.self_attn_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.self_attn\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.self_attn\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.self_attn\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.self_attn\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.self_attn\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.self_attn\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.self_attn\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.self_attn\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.self_attn\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.self_attn\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.self_attn\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.self_attn\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.self_attn\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.self_attn\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.self_attn\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.self_attn\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.self_attn\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.self_attn\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.self_attn\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.self_attn\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.self_attn\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.self_attn\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.self_attn\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.self_attn\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.fc2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.fc2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.fc2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.fc2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.fc2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.fc2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.fc1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.fc1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.fc1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.fc1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.fc1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.fc1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.encoder_attn_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.encoder_attn_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.encoder_attn_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.encoder_attn_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.encoder_attn_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.encoder_attn_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.encoder_attn\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.encoder_attn\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.encoder_attn\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.encoder_attn\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.encoder_attn\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.encoder_attn\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.encoder_attn\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.encoder_attn\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.encoder_attn\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.encoder_attn\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.encoder_attn\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.encoder_attn\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.encoder_attn\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.encoder_attn\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.encoder_attn\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.encoder_attn\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.encoder_attn\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.encoder_attn\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.encoder_attn\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.encoder_attn\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.encoder_attn\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.encoder_attn\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.encoder_attn\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.encoder_attn\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.self_attn_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.self_attn_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.self_attn_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.self_attn_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.self_attn_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.self_attn_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.self_attn\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.self_attn\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.self_attn\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.self_attn\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.self_attn\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.self_attn\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.self_attn\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.self_attn\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.self_attn\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.self_attn\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.self_attn\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.self_attn\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.self_attn\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.self_attn\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.self_attn\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.self_attn\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.self_attn\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.self_attn\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.self_attn\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.self_attn\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.self_attn\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.self_attn\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.self_attn\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.self_attn\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.fc2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.fc2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.fc2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.fc2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.fc2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.fc2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.fc1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.fc1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.fc1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.fc1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.fc1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.fc1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.encoder_attn_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.encoder_attn_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.encoder_attn_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.encoder_attn_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.encoder_attn_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.encoder_attn_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.encoder_attn\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.encoder_attn\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.encoder_attn\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.encoder_attn\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.encoder_attn\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.encoder_attn\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.encoder_attn\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.encoder_attn\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.encoder_attn\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.encoder_attn\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.encoder_attn\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.encoder_attn\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.encoder_attn\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.encoder_attn\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.encoder_attn\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.encoder_attn\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.encoder_attn\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.encoder_attn\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.encoder_attn\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.encoder_attn\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.encoder_attn\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.encoder_attn\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.encoder_attn\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.encoder_attn\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.self_attn_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.self_attn_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.self_attn_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.self_attn_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.self_attn_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.self_attn_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.self_attn\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.self_attn\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.self_attn\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.self_attn\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.self_attn\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.self_attn\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.self_attn\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.self_attn\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.self_attn\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.self_attn\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.self_attn\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.self_attn\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.self_attn\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.self_attn\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.self_attn\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.self_attn\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.self_attn\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.self_attn\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.self_attn\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.self_attn\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.self_attn\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.self_attn\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.self_attn\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.self_attn\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.fc2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.fc2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.fc2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.fc2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.fc2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.fc2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.fc1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.fc1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.fc1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.fc1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.fc1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.fc1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.encoder_attn_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.encoder_attn_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.encoder_attn_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.encoder_attn_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.encoder_attn_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.encoder_attn_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.encoder_attn\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.encoder_attn\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.encoder_attn\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.encoder_attn\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.encoder_attn\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.encoder_attn\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.encoder_attn\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.encoder_attn\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.encoder_attn\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.encoder_attn\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.encoder_attn\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.encoder_attn\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.encoder_attn\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.encoder_attn\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.encoder_attn\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.encoder_attn\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.encoder_attn\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.encoder_attn\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.encoder_attn\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.encoder_attn\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.encoder_attn\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.encoder_attn\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.encoder_attn\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.encoder_attn\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.self_attn_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.self_attn_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.self_attn_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.self_attn_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.self_attn_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.self_attn_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.self_attn\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.self_attn\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.self_attn\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.self_attn\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.self_attn\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.self_attn\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.self_attn\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.self_attn\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.self_attn\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.self_attn\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.self_attn\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.self_attn\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.self_attn\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.self_attn\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.self_attn\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.self_attn\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.self_attn\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.self_attn\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.self_attn\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.self_attn\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.self_attn\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.self_attn\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.self_attn\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.self_attn\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layernorm_embedding\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layernorm_embedding\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layernorm_embedding\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layernorm_embedding\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layernorm_embedding\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layernorm_embedding\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.embed_positions\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.embed_positions\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.embed_positions\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.embed_tokens\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.embed_tokens\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.embed_tokens\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.2\.conv\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.2\.conv\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.2\.conv\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.2\.conv\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.2\.conv\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.2\.conv\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.1\.conv\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.1\.conv\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.1\.conv\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.1\.conv\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.1\.conv\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.1\.conv\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.0\.conv\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.0\.conv\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.0\.conv\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.0\.conv\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.0\.conv\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.0\.conv\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.pos_conv_embed\.conv\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.pos_conv_embed\.conv\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.pos_conv_embed\.conv\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.pos_conv_embed\.conv\.weight_v._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.pos_conv_embed\.conv\.weight_v.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.pos_conv_embed\.conv\.weight_v.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.pos_conv_embed\.conv\.weight_g._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.pos_conv_embed\.conv\.weight_g.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.pos_conv_embed\.conv\.weight_g.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.masked_spec_embed._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.masked_spec_embed.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.masked_spec_embed.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.feature_projection\.projection\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.feature_projection\.projection\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.feature_projection\.projection\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.feature_projection\.projection\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.feature_projection\.projection\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.feature_projection\.projection\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.feature_projection\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.feature_projection\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.feature_projection\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.feature_projection\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.feature_projection\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.feature_projection\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + python_version: 3.9.5 + start_time: 1651657900 + t: + 1: + - 1 + - 2 + - 3 + - 5 + - 11 + - 12 + 3: + - 13 + 4: 3.9.5 + 5: 0.12.10 + 6: 4.19.0.dev0 + 8: + - 5 +adafactor: + desc: null + value: false +adam_beta1: + desc: null + value: 0.9 +adam_beta2: + desc: null + value: 0.999 +adam_epsilon: + desc: null + value: 1.0e-08 +add_cross_attention: + desc: null + value: false +architectures: + desc: null + value: + - SpeechEncoderDecoderModel +bad_words_ids: + desc: null + value: null +bf16: + desc: null + value: false +bf16_full_eval: + desc: null + value: false +bos_token_id: + desc: null + value: null +chunk_size_feed_forward: + desc: null + value: 0 +cross_attention_hidden_size: + desc: null + value: null +data_seed: + desc: null + value: None +dataloader_drop_last: + desc: null + value: false +dataloader_num_workers: + desc: null + value: 0 +dataloader_pin_memory: + desc: null + value: true +ddp_bucket_cap_mb: + desc: null + value: None +ddp_find_unused_parameters: + desc: null + value: None +debug: + desc: null + value: '[]' +decoder: + desc: null + value: + _name_or_path: facebook/bart-large + activation_dropout: 0.1 + activation_function: gelu + add_bias_logits: false + add_cross_attention: true + add_final_layer_norm: false + architectures: + - BartModel + attention_dropout: 0.1 + bad_words_ids: null + bos_token_id: 0 + chunk_size_feed_forward: 0 + classif_dropout: 0.1 + classifier_dropout: 0.0 + cross_attention_hidden_size: null + d_model: 1024 + decoder_attention_heads: 16 + decoder_ffn_dim: 4096 + decoder_layerdrop: 0.0 + decoder_layers: 12 + decoder_start_token_id: 2 + diversity_penalty: 0.0 + do_sample: false + dropout: 0.1 + early_stopping: true + encoder_attention_heads: 16 + encoder_ffn_dim: 4096 + encoder_layerdrop: 0.0 + encoder_layers: 12 + encoder_no_repeat_ngram_size: 0 + eos_token_id: 2 + exponential_decay_length_penalty: null + finetuning_task: null + forced_bos_token_id: 0 + forced_eos_token_id: 2 + gradient_checkpointing: false + id2label: + '0': LABEL_0 + '1': LABEL_1 + '2': LABEL_2 + init_std: 0.02 + is_decoder: true + is_encoder_decoder: false + label2id: + LABEL_0: 0 + LABEL_1: 1 + LABEL_2: 2 + length_penalty: 1.0 + max_length: 20 + max_position_embeddings: 1024 + min_length: 0 + model_type: bart + no_repeat_ngram_size: 3 + normalize_before: false + num_beam_groups: 1 + num_beams: 4 + num_hidden_layers: 12 + num_return_sequences: 1 + output_attentions: false + output_hidden_states: false + output_scores: false + pad_token_id: 1 + prefix: null + problem_type: null + pruned_heads: {} + remove_invalid_values: false + repetition_penalty: 1.0 + return_dict: true + return_dict_in_generate: false + scale_embedding: false + sep_token_id: null + task_specific_params: + summarization: + length_penalty: 1.0 + max_length: 128 + min_length: 12 + num_beams: 4 + summarization_cnn: + length_penalty: 2.0 + max_length: 142 + min_length: 56 + num_beams: 4 + summarization_xsum: + length_penalty: 1.0 + max_length: 62 + min_length: 11 + num_beams: 6 + temperature: 1.0 + tie_encoder_decoder: false + tie_word_embeddings: true + tokenizer_class: null + top_k: 50 + top_p: 1.0 + torch_dtype: null + torchscript: false + transformers_version: 4.19.0.dev0 + typical_p: 1.0 + use_bfloat16: false + use_cache: true + vocab_size: 50265 +decoder_start_token_id: + desc: null + value: 0 +deepspeed: + desc: null + value: None +disable_tqdm: + desc: null + value: false +diversity_penalty: + desc: null + value: 0.0 +do_eval: + desc: null + value: true +do_predict: + desc: null + value: false +do_sample: + desc: null + value: false +do_train: + desc: null + value: true +early_stopping: + desc: null + value: false +encoder: + desc: null + value: + _name_or_path: facebook/wav2vec2-xls-r-300m + activation_dropout: 0.0 + adapter_kernel_size: 3 + adapter_stride: 2 + add_adapter: true + add_cross_attention: false + apply_spec_augment: true + architectures: + - Wav2Vec2ForPreTraining + attention_dropout: 0.1 + bad_words_ids: null + bos_token_id: 1 + chunk_size_feed_forward: 0 + classifier_proj_size: 256 + codevector_dim: 768 + contrastive_logits_temperature: 0.1 + conv_bias: true + conv_dim: + - 512 + - 512 + - 512 + - 512 + - 512 + - 512 + - 512 + conv_kernel: + - 10 + - 3 + - 3 + - 3 + - 3 + - 2 + - 2 + conv_stride: + - 5 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + cross_attention_hidden_size: null + ctc_loss_reduction: sum + ctc_zero_infinity: false + decoder_start_token_id: null + diversity_loss_weight: 0.1 + diversity_penalty: 0.0 + do_sample: false + do_stable_layer_norm: true + early_stopping: false + encoder_no_repeat_ngram_size: 0 + eos_token_id: 2 + exponential_decay_length_penalty: null + feat_extract_activation: gelu + feat_extract_dropout: 0.0 + feat_extract_norm: layer + feat_proj_dropout: 0.0 + feat_quantizer_dropout: 0.0 + final_dropout: 0.0 + finetuning_task: null + forced_bos_token_id: null + forced_eos_token_id: null + gradient_checkpointing: false + hidden_act: gelu + hidden_dropout: 0.043267782095468554 + hidden_size: 1024 + id2label: + '0': LABEL_0 + '1': LABEL_1 + initializer_range: 0.02 + intermediate_size: 4096 + is_decoder: false + is_encoder_decoder: false + label2id: + LABEL_0: 0 + LABEL_1: 1 + layer_norm_eps: 1.0e-05 + layerdrop: 0.0 + length_penalty: 1.0 + mask_feature_length: 10 + mask_feature_min_masks: 0 + mask_feature_prob: 0.0 + mask_time_length: 10 + mask_time_min_masks: 2 + mask_time_prob: 0.1 + max_length: 20 + min_length: 0 + model_type: wav2vec2 + no_repeat_ngram_size: 0 + num_adapter_layers: 3 + num_attention_heads: 16 + num_beam_groups: 1 + num_beams: 1 + num_codevector_groups: 2 + num_codevectors_per_group: 320 + num_conv_pos_embedding_groups: 16 + num_conv_pos_embeddings: 128 + num_feat_extract_layers: 7 + num_hidden_layers: 24 + num_negatives: 100 + num_return_sequences: 1 + output_attentions: false + output_hidden_size: 1024 + output_hidden_states: false + output_scores: false + pad_token_id: 0 + prefix: null + problem_type: null + proj_codevector_dim: 768 + pruned_heads: {} + remove_invalid_values: false + repetition_penalty: 1.0 + return_dict: true + return_dict_in_generate: false + sep_token_id: null + task_specific_params: null + tdnn_dilation: + - 1 + - 2 + - 3 + - 1 + - 1 + tdnn_dim: + - 512 + - 512 + - 512 + - 512 + - 1500 + tdnn_kernel: + - 5 + - 3 + - 3 + - 1 + - 1 + temperature: 1.0 + tie_encoder_decoder: false + tie_word_embeddings: true + tokenizer_class: null + top_k: 50 + top_p: 1.0 + torch_dtype: float32 + torchscript: false + transformers_version: 4.19.0.dev0 + typical_p: 1.0 + use_bfloat16: false + use_weighted_layer_sum: false + vocab_size: 32 + xvector_output_dim: 512 +encoder_no_repeat_ngram_size: + desc: null + value: 0 +eos_token_id: + desc: null + value: 2 +eval_accumulation_steps: + desc: null + value: None +eval_batch_size: + desc: null + value: 4 +eval_delay: + desc: null + value: 0 +eval_split_name: + desc: null + value: test +eval_steps: + desc: null + value: 500 +evaluation_strategy: + desc: null + value: steps +exponential_decay_length_penalty: + desc: null + value: null +finetuning_task: + desc: null + value: null +forced_bos_token_id: + desc: null + value: null +forced_eos_token_id: + desc: null + value: null +fp16: + desc: null + value: true +fp16_backend: + desc: null + value: auto +fp16_full_eval: + desc: null + value: false +fp16_opt_level: + desc: null + value: O1 +generation_max_length: + desc: null + value: 40 +generation_num_beams: + desc: null + value: 1 +gradient_accumulation_steps: + desc: null + value: 8 +gradient_checkpointing: + desc: null + value: true +greater_is_better: + desc: null + value: true +group_by_length: + desc: null + value: true +half_precision_backend: + desc: null + value: amp +hidden_dropout: + desc: null + value: 0.043267782095468554 +hub_model_id: + desc: null + value: None +hub_private_repo: + desc: null + value: false +hub_strategy: + desc: null + value: every_save +hub_token: + desc: null + value: +id2label: + desc: null + value: + '0': LABEL_0 + '1': LABEL_1 +ignore_data_skip: + desc: null + value: false +include_inputs_for_metrics: + desc: null + value: false +is_decoder: + desc: null + value: false +is_encoder_decoder: + desc: null + value: true +label2id: + desc: null + value: + LABEL_0: 0 + LABEL_1: 1 +label_names: + desc: null + value: None +label_smoothing_factor: + desc: null + value: 0.0 +language: + desc: null + value: fr.en +learning_rate: + desc: null + value: 0.0009027256702272704 +length_column_name: + desc: null + value: length +length_penalty: + desc: null + value: 1.0 +load_best_model_at_end: + desc: null + value: true +local_rank: + desc: null + value: -1 +log_level: + desc: null + value: -1 +log_level_replica: + desc: null + value: -1 +log_on_each_node: + desc: null + value: true +logging_dir: + desc: null + value: ./runs/May04_09-50-47_sanchit--v100 +logging_first_step: + desc: null + value: false +logging_nan_inf_filter: + desc: null + value: true +logging_steps: + desc: null + value: 1 +logging_strategy: + desc: null + value: steps +lr_scheduler_type: + desc: null + value: linear +max_duration_in_seconds: + desc: null + value: 20 +max_grad_norm: + desc: null + value: 1.0 +max_length: + desc: null + value: 40 +max_steps: + desc: null + value: -1 +metric_for_best_model: + desc: null + value: bleu +min_length: + desc: null + value: 0 +model_name_or_path: + desc: null + value: ./ +model_type: + desc: null + value: speech-encoder-decoder +mp_parameters: + desc: null + value: '' +no_cuda: + desc: null + value: false +no_repeat_ngram_size: + desc: null + value: 0 +num_beam_groups: + desc: null + value: 1 +num_beams: + desc: null + value: 1 +num_return_sequences: + desc: null + value: 1 +num_train_epochs: + desc: null + value: 3 +optim: + desc: null + value: adamw_hf +output_attentions: + desc: null + value: false +output_dir: + desc: null + value: ./ +output_hidden_states: + desc: null + value: false +output_scores: + desc: null + value: false +overwrite_output_dir: + desc: null + value: true +pad_token_id: + desc: null + value: 1 +past_index: + desc: null + value: -1 +per_device_eval_batch_size: + desc: null + value: 4 +per_device_train_batch_size: + desc: null + value: 4 +per_gpu_eval_batch_size: + desc: null + value: None +per_gpu_train_batch_size: + desc: null + value: None +predict_with_generate: + desc: null + value: true +prediction_loss_only: + desc: null + value: false +prefix: + desc: null + value: null +problem_type: + desc: null + value: null +processor_class: + desc: null + value: Wav2Vec2Processor +pruned_heads: + desc: null + value: {} +push_to_hub: + desc: null + value: true +push_to_hub_model_id: + desc: null + value: None +push_to_hub_organization: + desc: null + value: None +push_to_hub_token: + desc: null + value: +remove_invalid_values: + desc: null + value: false +remove_unused_columns: + desc: null + value: true +repetition_penalty: + desc: null + value: 1.0 +report_to: + desc: null + value: '[''tensorboard'', ''wandb'', ''codecarbon'']' +resume_from_checkpoint: + desc: null + value: None +return_dict: + desc: null + value: true +return_dict_in_generate: + desc: null + value: false +run_name: + desc: null + value: ./ +save_on_each_node: + desc: null + value: false +save_steps: + desc: null + value: 500 +save_strategy: + desc: null + value: steps +save_total_limit: + desc: null + value: None +seed: + desc: null + value: 42 +sep_token_id: + desc: null + value: null +sharded_ddp: + desc: null + value: '[]' +skip_memory_metrics: + desc: null + value: true +sortish_sampler: + desc: null + value: false +task: + desc: null + value: covost2 +task_specific_params: + desc: null + value: null +temperature: + desc: null + value: 1.0 +tf32: + desc: null + value: None +tie_encoder_decoder: + desc: null + value: false +tie_word_embeddings: + desc: null + value: false +tokenizer_class: + desc: null + value: null +top_k: + desc: null + value: 50 +top_p: + desc: null + value: 1.0 +torch_dtype: + desc: null + value: torch.float32 +torchscript: + desc: null + value: false +tpu_metrics_debug: + desc: null + value: false +tpu_num_cores: + desc: null + value: None +train_batch_size: + desc: null + value: 4 +transformers_version: + desc: null + value: null +typical_p: + desc: null + value: 1.0 +use_bfloat16: + desc: null + value: false +use_cache: + desc: null + value: false +use_legacy_prediction_loop: + desc: null + value: false +warmup_ratio: + desc: null + value: 0.0 +warmup_steps: + desc: null + value: 500 +weight_decay: + desc: null + value: 0.0 +xpu_backend: + desc: null + value: None diff --git a/wandb/run-20220504_095140-cwhobv6l/files/output.log b/wandb/run-20220504_095140-cwhobv6l/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..e6978a78d4b52d564d58373f78cad4ba9c44dad4 --- /dev/null +++ b/wandb/run-20220504_095140-cwhobv6l/files/output.log @@ -0,0 +1,2987 @@ +wandb: WARNING Config item 'output_dir' was locked by 'sweep' (ignored update). +wandb: WARNING Config item 'evaluation_strategy' was locked by 'sweep' (ignored update). +wandb: WARNING Config item 'per_device_train_batch_size' was locked by 'sweep' (ignored update). +wandb: WARNING Config item 'per_device_eval_batch_size' was locked by 'sweep' (ignored update). +wandb: WARNING Config item 'gradient_accumulation_steps' was locked by 'sweep' (ignored update). +wandb: WARNING Config item 'learning_rate' was locked by 'sweep' (ignored update). +wandb: WARNING Config item 'num_train_epochs' was locked by 'sweep' (ignored update). +wandb: WARNING Config item 'warmup_steps' was locked by 'sweep' (ignored update). +wandb: WARNING Config item 'logging_steps' was locked by 'sweep' (ignored update). +wandb: WARNING Config item 'save_steps' was locked by 'sweep' (ignored update). +wandb: WARNING Config item 'eval_steps' was locked by 'sweep' (ignored update). +wandb: WARNING Config item 'metric_for_best_model' was locked by 'sweep' (ignored update). +wandb: WARNING Config item 'greater_is_better' was locked by 'sweep' (ignored update). +wandb: WARNING Config item 'generation_max_length' was locked by 'sweep' (ignored update). +wandb: WARNING Config item 'generation_num_beams' was locked by 'sweep' (ignored update). + 0%| | 0/19440 [00:00 + main() + File "/home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/run_xtreme_s.py", line 874, in main + train_result = trainer.train(resume_from_checkpoint=checkpoint) + File "/home/sanchit_huggingface_co/transformers/src/transformers/trainer.py", line 1524, in train + self._maybe_log_save_evaluate(tr_loss, model, trial, epoch, ignore_keys_for_eval) + File "/home/sanchit_huggingface_co/transformers/src/transformers/trainer.py", line 1647, in _maybe_log_save_evaluate + self.log(logs) + File "/home/sanchit_huggingface_co/transformers/src/transformers/trainer.py", line 1960, in log + self.control = self.callback_handler.on_log(self.args, self.state, self.control, logs) + File "/home/sanchit_huggingface_co/transformers/src/transformers/trainer_callback.py", line 381, in on_log + return self.call_event("on_log", args, state, control, logs=logs) + File "/home/sanchit_huggingface_co/transformers/src/transformers/trainer_callback.py", line 388, in call_event + result = getattr(callback, event)( + File "/home/sanchit_huggingface_co/transformers/src/transformers/integrations.py", line 658, in on_log + self._wandb.log({**logs, "train/global_step": state.global_step}) + File "/home/sanchit_huggingface_co/gcp/lib/python3.9/site-packages/wandb/sdk/wandb_run.py", line 1349, in log + self.history._row_add(data) + File "/home/sanchit_huggingface_co/gcp/lib/python3.9/site-packages/wandb/sdk/wandb_history.py", line 44, in _row_add + self._flush() + File "/home/sanchit_huggingface_co/gcp/lib/python3.9/site-packages/wandb/sdk/wandb_history.py", line 59, in _flush + self._callback(row=self._data, step=self._step) + File "/home/sanchit_huggingface_co/gcp/lib/python3.9/site-packages/wandb/sdk/wandb_run.py", line 1027, in _history_callback + self._backend.interface.publish_history( + File "/home/sanchit_huggingface_co/gcp/lib/python3.9/site-packages/wandb/sdk/interface/interface.py", line 506, in publish_history + self._publish_history(history) + File "/home/sanchit_huggingface_co/gcp/lib/python3.9/site-packages/wandb/sdk/interface/interface_shared.py", line 59, in _publish_history + self._publish(rec) + File "/home/sanchit_huggingface_co/gcp/lib/python3.9/site-packages/wandb/sdk/interface/interface_queue.py", line 49, in _publish + raise Exception("The wandb backend process has shutdown") +Exception: The wandb backend process has shutdown \ No newline at end of file diff --git a/wandb/run-20220504_095140-cwhobv6l/files/requirements.txt b/wandb/run-20220504_095140-cwhobv6l/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..9b14e59ff199212bab0602eefc99e053cb713312 --- /dev/null +++ b/wandb/run-20220504_095140-cwhobv6l/files/requirements.txt @@ -0,0 +1,287 @@ +absl-py==1.0.0 +aiohttp==3.8.1 +aiosignal==1.2.0 +alembic==1.7.7 +anyio==3.5.0 +appdirs==1.4.4 +apscheduler==3.9.1 +argon2-cffi-bindings==21.2.0 +argon2-cffi==21.3.0 +arrow==1.2.2 +asttokens==2.0.5 +astunparse==1.6.3 +async-timeout==4.0.2 +attrs==21.4.0 +audioread==2.1.9 +autopage==0.5.0 +babel==2.9.1 +backcall==0.2.0 +backoff==1.11.1 +binaryornot==0.4.4 +bitsandbytes-cuda113==0.26.0 +black==22.1.0 +bleach==4.1.0 +boto3==1.16.34 +botocore==1.19.63 +brotli==1.0.9 +cachetools==5.0.0 +certifi==2021.10.8 +cffi==1.15.0 +chardet==4.0.0 +charset-normalizer==2.0.11 +chex==0.1.0 +click==8.0.3 +cliff==3.10.1 +clldutils==3.10.1 +cmaes==0.8.2 +cmd2==2.4.0 +codecarbon==1.2.0 +colorlog==6.6.0 +cookiecutter==1.7.3 +cryptography==36.0.2 +csvw==1.11.0 +cycler==0.11.0 +dash-bootstrap-components==1.1.0 +dash-core-components==2.0.0 +dash-html-components==2.0.0 +dash-table==5.0.0 +dash==2.3.1 +datasets==2.1.1.dev0 +debugpy==1.5.1 +decorator==5.1.1 +defusedxml==0.7.1 +deprecated==1.2.13 +dill==0.3.4 +dlinfo==1.2.1 +dm-tree==0.1.6 +docker-pycreds==0.4.0 +docker==4.4.4 +entrypoints==0.4 +execnet==1.9.0 +executing==0.8.2 +faiss-cpu==1.7.2 +filelock==3.4.2 +fire==0.4.0 +flake8==4.0.1 +flask-compress==1.11 +flask==2.1.1 +flatbuffers==1.12 +flax==0.4.0 +fonttools==4.29.1 +frozenlist==1.3.0 +fsspec==2022.1.0 +fugashi==1.1.2 +gast==0.5.3 +gitdb==4.0.9 +gitpython==3.1.18 +google-auth-oauthlib==0.4.6 +google-auth==2.6.0 +google-pasta==0.2.0 +greenlet==1.1.2 +grpcio==1.43.0 +h5py==3.6.0 +hf-doc-builder==0.2.0 +huggingface-hub==0.4.0 +hypothesis==6.36.1 +idna==3.3 +importlib-metadata==4.10.1 +iniconfig==1.1.1 +ipadic==1.0.0 +ipdb==0.13.9 +ipykernel==6.8.0 +ipython-genutils==0.2.0 +ipython==8.0.1 +ipywidgets==7.6.5 +isodate==0.6.1 +isort==5.10.1 +itsdangerous==2.1.2 +jax==0.2.28 +jaxlib==0.1.76+cuda11.cudnn82 +jedi==0.18.1 +jinja2-time==0.2.0 +jinja2==3.0.3 +jiwer==2.3.0 +jmespath==0.10.0 +joblib==1.1.0 +json5==0.9.6 +jsonschema==4.4.0 +jupyter-client==7.1.2 +jupyter-console==6.4.0 +jupyter-core==4.9.1 +jupyter-server==1.13.5 +jupyter==1.0.0 +jupyterlab-pygments==0.1.2 +jupyterlab-server==2.10.3 +jupyterlab-widgets==1.0.2 +jupyterlab==3.2.9 +keras-preprocessing==1.1.2 +keras==2.8.0 +kiwisolver==1.3.2 +kubernetes==12.0.1 +libclang==13.0.0 +librosa==0.8.1 +llvmlite==0.38.0 +mako==1.2.0 +markdown==3.3.6 +markupsafe==2.0.1 +matplotlib-inline==0.1.3 +matplotlib==3.5.1 +mccabe==0.6.1 +mistune==0.8.4 +msgpack==1.0.3 +multidict==6.0.2 +multiprocess==0.70.12.2 +mypy-extensions==0.4.3 +nbclassic==0.3.5 +nbclient==0.5.10 +nbconvert==6.4.1 +nbformat==5.1.3 +nest-asyncio==1.5.4 +nltk==3.7 +notebook==6.4.8 +numba==0.55.1 +numpy==1.21.5 +oauthlib==3.2.0 +onnx==1.11.0 +onnxconverter-common==1.9.0 +opt-einsum==3.3.0 +optax==0.1.0 +optuna==2.10.0 +packaging==21.3 +pandas==1.4.0 +pandocfilters==1.5.0 +parameterized==0.8.1 +parso==0.8.3 +pathspec==0.9.0 +pathtools==0.1.2 +pbr==5.8.1 +pexpect==4.8.0 +phonemizer==3.0.1 +pickleshare==0.7.5 +pillow==9.0.0 +pint==0.16.1 +pip==22.0.2 +pkg-resources==0.0.0 +plac==1.3.5 +platformdirs==2.4.1 +plotly==5.6.0 +pluggy==1.0.0 +pooch==1.6.0 +portalocker==2.0.0 +poyo==0.5.0 +prettytable==3.2.0 +prometheus-client==0.13.1 +promise==2.3 +prompt-toolkit==3.0.26 +protobuf==3.19.4 +psutil==5.9.0 +ptyprocess==0.7.0 +pure-eval==0.2.2 +py-cpuinfo==8.0.0 +py==1.11.0 +pyarrow==6.0.1 +pyasn1-modules==0.2.8 +pyasn1==0.4.8 +pycodestyle==2.8.0 +pycparser==2.21 +pyctcdecode==0.3.0 +pyflakes==2.4.0 +pygments==2.11.2 +pygtrie==2.4.2 +pynvml==11.4.1 +pyopenssl==22.0.0 +pyparsing==3.0.7 +pyperclip==1.8.2 +pypng==0.0.21 +pyrsistent==0.18.1 +pytest-forked==1.4.0 +pytest-timeout==2.1.0 +pytest-xdist==2.5.0 +pytest==7.1.1 +python-dateutil==2.8.2 +python-levenshtein==0.12.2 +python-slugify==6.1.1 +pytz-deprecation-shim==0.1.0.post0 +pytz==2021.3 +pyyaml==5.4.1 +pyzmq==22.3.0 +qtconsole==5.2.2 +qtpy==2.0.1 +ray==1.11.0 +redis==4.2.2 +regex==2022.1.18 +requests-oauthlib==1.3.1 +requests==2.27.1 +resampy==0.2.2 +responses==0.18.0 +rfc3986==2.0.0 +rouge-score==0.0.4 +rsa==4.8 +s3transfer==0.3.7 +sacrebleu==1.5.1 +sacremoses==0.0.47 +scikit-learn==1.0.2 +scipy==1.7.3 +segments==2.2.0 +send2trash==1.8.0 +sentencepiece==0.1.96 +sentry-sdk==1.5.6 +setuptools==44.1.1 +shortuuid==1.0.8 +sigopt==8.3.0 +six==1.16.0 +smmap==5.0.0 +sniffio==1.2.0 +sortedcontainers==2.4.0 +soundfile==0.10.3.post1 +sqlalchemy==1.4.34 +stack-data==0.1.4 +stevedore==3.5.0 +tabulate==0.8.9 +tenacity==8.0.1 +tensorboard-data-server==0.6.1 +tensorboard-plugin-wit==1.8.1 +tensorboard==2.8.0 +tensorboardx==2.5 +tensorflow-io-gcs-filesystem==0.24.0 +tensorflow==2.8.0 +termcolor==1.1.0 +terminado==0.13.1 +testpath==0.5.0 +text-unidecode==1.3 +tf-estimator-nightly==2.8.0.dev2021122109 +tf2onnx==1.9.3 +threadpoolctl==3.1.0 +timeout-decorator==0.5.0 +timm==0.5.4 +tokenizers==0.11.4 +toml==0.10.2 +tomli==2.0.0 +toolz==0.11.2 +torch==1.10.2+cu113 +torchaudio==0.10.2+cu113 +torchvision==0.11.3 +tornado==6.1 +tqdm==4.62.3 +traitlets==5.1.1 +transformers==4.18.0.dev0 +typing-extensions==3.10.0.2 +tzdata==2022.1 +tzlocal==4.2 +unidic-lite==1.0.8 +unidic==1.1.0 +uritemplate==4.1.1 +urllib3==1.26.8 +wandb==0.12.10 +wasabi==0.9.1 +wcwidth==0.2.5 +webencodings==0.5.1 +websocket-client==1.2.3 +werkzeug==2.0.2 +wheel==0.37.1 +widgetsnbextension==3.5.2 +wrapt==1.14.0 +xxhash==2.0.2 +yarl==1.7.2 +yaspin==2.1.0 +zipp==3.7.0 \ No newline at end of file diff --git a/wandb/run-20220504_095140-cwhobv6l/files/wandb-metadata.json b/wandb/run-20220504_095140-cwhobv6l/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..0ad62b419fddfba2724202ed07d1c4f0fb1186e6 --- /dev/null +++ b/wandb/run-20220504_095140-cwhobv6l/files/wandb-metadata.json @@ -0,0 +1,57 @@ +{ + "os": "Linux-5.11.0-1028-gcp-x86_64-with-glibc2.33", + "python": "3.9.5", + "heartbeatAt": "2022-05-04T09:51:43.711561", + "startedAt": "2022-05-04T09:51:40.114661", + "docker": null, + "gpu": "Tesla V100-SXM2-16GB", + "gpu_count": 2, + "cpu_count": 16, + "cuda": null, + "args": [ + "--overwrite_output_dir", + "--freeze_feature_encoder", + "--gradient_checkpointing", + "--predict_with_generate", + "--fp16", + "--group_by_length", + "--do_train", + "--do_eval", + "--load_best_model_at_end", + "--push_to_hub", + "--use_auth_token", + "--eval_split_name=test", + "--eval_steps=500", + "--evaluation_strategy=steps", + "--generation_max_length=40", + "--generation_num_beams=1", + "--gradient_accumulation_steps=8", + "--greater_is_better=True", + "--hidden_dropout=0.043267782095468554", + "--language=fr.en", + "--learning_rate=0.0009027256702272704", + "--logging_steps=1", + "--max_duration_in_seconds=20", + "--metric_for_best_model=bleu", + "--model_name_or_path=./", + "--num_train_epochs=3", + "--output_dir=./", + "--per_device_eval_batch_size=4", + "--per_device_train_batch_size=4", + "--save_steps=500", + "--task=covost2", + "--warmup_steps=500" + ], + "state": "running", + "program": "/home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/run_xtreme_s.py", + "codePath": "run_xtreme_s.py", + "git": { + "remote": "https://huggingface.co/sanchit-gandhi/xtreme_s_xlsr_2_bart_covost2_fr_en_2", + "commit": "5015bdbffa2c4f0d03807220a8ea2f3b7dbcf7ed" + }, + "email": "sanchit@huggingface.co", + "root": "/home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2", + "host": "sanchit--v100", + "username": "sanchit_huggingface_co", + "executable": "/home/sanchit_huggingface_co/gcp/bin/python3" +} diff --git a/wandb/run-20220504_095140-cwhobv6l/files/wandb-summary.json b/wandb/run-20220504_095140-cwhobv6l/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..cf9fa1f2dca47fce3048e0183996e5abf6dae6a0 --- /dev/null +++ b/wandb/run-20220504_095140-cwhobv6l/files/wandb-summary.json @@ -0,0 +1 @@ +{"train/loss": 5.2025, "train/learning_rate": 0.0002960940198345447, "train/epoch": 0.03, "train/global_step": 170, "_runtime": 549, "_timestamp": 1651658449, "_step": 169, "gradients/decoder.model.decoder.layers.11.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 22.0, 393.0, 558.0, 38.0, 5.0, 2.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-13.791566848754883, -12.933463096618652, -12.075359344482422, -11.217254638671875, -10.359150886535645, -9.501047134399414, -8.642942428588867, -7.784838676452637, -6.926734924316406, -6.068631172180176, -5.210526943206787, -4.352422714233398, -3.494318962097168, -2.6362152099609375, -1.7781109809875488, -0.9200067520141602, -0.06190299987792969, 0.7962009906768799, 1.6543049812316895, 2.512408971786499, 3.3705129623413086, 4.228616714477539, 5.086720943450928, 5.944825172424316, 6.802928924560547, 7.661032676696777, 8.519136428833008, 9.377241134643555, 10.235344886779785, 11.093448638916016, 11.951553344726562, 12.809657096862793, 13.66775894165039, 14.525862693786621, 15.383966445922852, 16.2420711517334, 17.100173950195312, 17.95827865600586, 18.816383361816406, 19.674488067626953, 20.532590866088867, 21.390695571899414, 22.248798370361328, 23.106903076171875, 23.965007781982422, 24.823110580444336, 25.681215286254883, 26.539318084716797, 27.397422790527344, 28.25552749633789, 29.113630294799805, 29.97173500061035, 30.829837799072266, 31.687942504882812, 32.54604721069336, 33.404151916503906, 34.26225280761719, 35.120357513427734, 35.97846221923828, 36.83656311035156, 37.69466781616211, 38.552772521972656, 39.4108772277832, 40.26898193359375, 41.1270866394043]}, "gradients/decoder.model.decoder.layers.11.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 5.0, 4.0, 5.0, 9.0, 21.0, 24.0, 29.0, 50.0, 66.0, 70.0, 102.0, 104.0, 90.0, 88.0, 87.0, 70.0, 53.0, 39.0, 36.0, 23.0, 19.0, 9.0, 7.0, 1.0, 0.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-10.638748168945312, -10.282394409179688, -9.926039695739746, -9.569684982299805, -9.21333122253418, -8.856977462768555, -8.500622749328613, -8.144268035888672, -7.787914276123047, -7.431560039520264, -7.0752058029174805, -6.718851566314697, -6.362497329711914, -6.006143093109131, -5.649788856506348, -5.2934346199035645, -4.937080383300781, -4.580726146697998, -4.224371910095215, -3.8680176734924316, -3.5116634368896484, -3.1553092002868652, -2.798954963684082, -2.442600727081299, -2.0862464904785156, -1.7298922538757324, -1.3735380172729492, -1.017183780670166, -0.6608295440673828, -0.3044753074645996, 0.051878929138183594, 0.4082331657409668, 0.76458740234375, 1.1209416389465332, 1.4772958755493164, 1.8336501121520996, 2.190004348754883, 2.546358585357666, 2.902712821960449, 3.2590670585632324, 3.6154212951660156, 3.971775531768799, 4.328129768371582, 4.684484004974365, 5.040838241577148, 5.397192478179932, 5.753546714782715, 6.109900951385498, 6.466255187988281, 6.8226094245910645, 7.178963661193848, 7.535317897796631, 7.891672134399414, 8.248025894165039, 8.60438060760498, 8.960735321044922, 9.317089080810547, 9.673442840576172, 10.029797554016113, 10.386152267456055, 10.74250602722168, 11.098859786987305, 11.455214500427246, 11.811569213867188, 12.167922973632812]}, "gradients/decoder.model.decoder.layers.11.fc2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 2.0, 2.0, 4.0, 2.0, 6.0, 4.0, 10.0, 23.0, 43.0, 69.0, 351.0, 1606.0, 10019.0, 236859.0, 3344025.0, 585453.0, 13161.0, 1998.0, 441.0, 118.0, 37.0, 24.0, 10.0, 5.0, 3.0, 9.0, 3.0, 3.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-12.78125, -12.318359375, -11.85546875, -11.392578125, -10.9296875, -10.466796875, -10.00390625, -9.541015625, -9.078125, -8.615234375, -8.15234375, -7.689453125, -7.2265625, -6.763671875, -6.30078125, -5.837890625, -5.375, -4.912109375, -4.44921875, -3.986328125, -3.5234375, -3.060546875, -2.59765625, -2.134765625, -1.671875, -1.208984375, -0.74609375, -0.283203125, 0.1796875, 0.642578125, 1.10546875, 1.568359375, 2.03125, 2.494140625, 2.95703125, 3.419921875, 3.8828125, 4.345703125, 4.80859375, 5.271484375, 5.734375, 6.197265625, 6.66015625, 7.123046875, 7.5859375, 8.048828125, 8.51171875, 8.974609375, 9.4375, 9.900390625, 10.36328125, 10.826171875, 11.2890625, 11.751953125, 12.21484375, 12.677734375, 13.140625, 13.603515625, 14.06640625, 14.529296875, 14.9921875, 15.455078125, 15.91796875, 16.380859375, 16.84375]}, "gradients/decoder.model.decoder.layers.11.fc2.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 4.0, 1.0, 1.0, 5.0, 6.0, 12.0, 10.0, 20.0, 27.0, 26.0, 33.0, 48.0, 46.0, 49.0, 61.0, 76.0, 73.0, 53.0, 71.0, 66.0, 60.0, 57.0, 46.0, 41.0, 30.0, 27.0, 20.0, 11.0, 11.0, 8.0, 8.0, 4.0, 2.0, 2.0, 2.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-8.0078125, -7.76068115234375, -7.5135498046875, -7.26641845703125, -7.019287109375, -6.77215576171875, -6.5250244140625, -6.27789306640625, -6.03076171875, -5.78363037109375, -5.5364990234375, -5.28936767578125, -5.042236328125, -4.79510498046875, -4.5479736328125, -4.30084228515625, -4.0537109375, -3.80657958984375, -3.5594482421875, -3.31231689453125, -3.065185546875, -2.81805419921875, -2.5709228515625, -2.32379150390625, -2.07666015625, -1.82952880859375, -1.5823974609375, -1.33526611328125, -1.088134765625, -0.84100341796875, -0.5938720703125, -0.34674072265625, -0.099609375, 0.14752197265625, 0.3946533203125, 0.64178466796875, 0.888916015625, 1.13604736328125, 1.3831787109375, 1.63031005859375, 1.87744140625, 2.12457275390625, 2.3717041015625, 2.61883544921875, 2.865966796875, 3.11309814453125, 3.3602294921875, 3.60736083984375, 3.8544921875, 4.10162353515625, 4.3487548828125, 4.59588623046875, 4.843017578125, 5.09014892578125, 5.3372802734375, 5.58441162109375, 5.83154296875, 6.07867431640625, 6.3258056640625, 6.57293701171875, 6.820068359375, 7.06719970703125, 7.3143310546875, 7.56146240234375, 7.80859375]}, "gradients/decoder.model.decoder.layers.11.fc1.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 2.0, 1.0, 0.0, 0.0, 1.0, 2.0, 3.0, 2.0, 1.0, 1.0, 5.0, 17.0, 24.0, 60.0, 127.0, 389.0, 342557.0, 3850353.0, 486.0, 139.0, 52.0, 28.0, 15.0, 9.0, 7.0, 5.0, 4.0, 2.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-139.875, -133.796875, -127.71875, -121.640625, -115.5625, -109.484375, -103.40625, -97.328125, -91.25, -85.171875, -79.09375, -73.015625, -66.9375, -60.859375, -54.78125, -48.703125, -42.625, -36.546875, -30.46875, -24.390625, -18.3125, -12.234375, -6.15625, -0.078125, 6.0, 12.078125, 18.15625, 24.234375, 30.3125, 36.390625, 42.46875, 48.546875, 54.625, 60.703125, 66.78125, 72.859375, 78.9375, 85.015625, 91.09375, 97.171875, 103.25, 109.328125, 115.40625, 121.484375, 127.5625, 133.640625, 139.71875, 145.796875, 151.875, 157.953125, 164.03125, 170.109375, 176.1875, 182.265625, 188.34375, 194.421875, 200.5, 206.578125, 212.65625, 218.734375, 224.8125, 230.890625, 236.96875, 243.046875, 249.125]}, "gradients/decoder.model.decoder.layers.11.fc1.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 0.0, 0.0, 3.0, 0.0, 0.0, 1.0, 1.0, 3.0, 3.0, 0.0, 2.0, 1.0, 1.0, 13.0, 16.0, 31.0, 79.0, 167.0, 461.0, 1477.0, 1221.0, 351.0, 125.0, 56.0, 26.0, 19.0, 7.0, 10.0, 5.0, 2.0, 2.0, 3.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.970703125, -2.840545654296875, -2.71038818359375, -2.580230712890625, -2.4500732421875, -2.319915771484375, -2.18975830078125, -2.059600830078125, -1.929443359375, -1.799285888671875, -1.66912841796875, -1.538970947265625, -1.4088134765625, -1.278656005859375, -1.14849853515625, -1.018341064453125, -0.88818359375, -0.758026123046875, -0.62786865234375, -0.497711181640625, -0.3675537109375, -0.237396240234375, -0.10723876953125, 0.022918701171875, 0.153076171875, 0.283233642578125, 0.41339111328125, 0.543548583984375, 0.6737060546875, 0.803863525390625, 0.93402099609375, 1.064178466796875, 1.1943359375, 1.324493408203125, 1.45465087890625, 1.584808349609375, 1.7149658203125, 1.845123291015625, 1.97528076171875, 2.105438232421875, 2.235595703125, 2.365753173828125, 2.49591064453125, 2.626068115234375, 2.7562255859375, 2.886383056640625, 3.01654052734375, 3.146697998046875, 3.27685546875, 3.407012939453125, 3.53717041015625, 3.667327880859375, 3.7974853515625, 3.927642822265625, 4.05780029296875, 4.187957763671875, 4.318115234375, 4.448272705078125, 4.57843017578125, 4.708587646484375, 4.8387451171875, 4.968902587890625, 5.09906005859375, 5.229217529296875, 5.359375]}, "gradients/decoder.model.decoder.layers.11.encoder_attn_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 2.0, 9.0, 9.0, 24.0, 50.0, 94.0, 181.0, 257.0, 172.0, 119.0, 51.0, 26.0, 9.0, 8.0, 2.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-20.08782386779785, -19.582401275634766, -19.07697868347168, -18.571556091308594, -18.066133499145508, -17.560710906982422, -17.055288314819336, -16.54986572265625, -16.044443130493164, -15.539020538330078, -15.033597946166992, -14.528175354003906, -14.02275276184082, -13.517330169677734, -13.011907577514648, -12.506484985351562, -12.001062393188477, -11.49563980102539, -10.990217208862305, -10.484794616699219, -9.979372024536133, -9.473949432373047, -8.968526840209961, -8.463104248046875, -7.957681655883789, -7.452259063720703, -6.946836471557617, -6.441413879394531, -5.935991287231445, -5.430568695068359, -4.925146102905273, -4.4197235107421875, -3.9142990112304688, -3.408876419067383, -2.903453826904297, -2.398031234741211, -1.892608642578125, -1.387186050415039, -0.8817634582519531, -0.3763408660888672, 0.12908172607421875, 0.6345043182373047, 1.1399269104003906, 1.6453495025634766, 2.1507720947265625, 2.6561946868896484, 3.1616172790527344, 3.6670398712158203, 4.172462463378906, 4.677885055541992, 5.183307647705078, 5.688730239868164, 6.19415283203125, 6.699575424194336, 7.204998016357422, 7.710420608520508, 8.215843200683594, 8.72126579284668, 9.226688385009766, 9.732110977172852, 10.237533569335938, 10.742956161499023, 11.24837875366211, 11.753801345825195, 12.259223937988281]}, "gradients/decoder.model.decoder.layers.11.encoder_attn_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 1.0, 4.0, 7.0, 6.0, 7.0, 20.0, 24.0, 26.0, 34.0, 37.0, 62.0, 63.0, 68.0, 63.0, 74.0, 85.0, 81.0, 66.0, 54.0, 53.0, 44.0, 27.0, 27.0, 32.0, 13.0, 12.0, 5.0, 7.0, 3.0, 2.0, 4.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-6.967407703399658, -6.700961112976074, -6.434514045715332, -6.168067455291748, -5.901620864868164, -5.635173797607422, -5.368727207183838, -5.102280616760254, -4.835833549499512, -4.569386959075928, -4.3029398918151855, -4.036493301391602, -3.7700467109680176, -3.5035998821258545, -3.2371530532836914, -2.9707064628601074, -2.7042598724365234, -2.4378130435943604, -2.1713664531707764, -1.9049196243286133, -1.6384729146957397, -1.3720262050628662, -1.1055793762207031, -0.8391326665878296, -0.572685956954956, -0.30623921751976013, -0.03979247808456421, 0.2266542911529541, 0.49310100078582764, 0.7595477104187012, 1.0259945392608643, 1.2924412488937378, 1.5588884353637695, 1.825335144996643, 2.0917818546295166, 2.3582286834716797, 2.6246752738952637, 2.8911221027374268, 3.15756893157959, 3.424015522003174, 3.690462350845337, 3.9569091796875, 4.223355770111084, 4.489802360534668, 4.75624942779541, 5.022696018218994, 5.289142608642578, 5.55558967590332, 5.822036266326904, 6.088482856750488, 6.3549299240112305, 6.6213765144348145, 6.887823104858398, 7.154270172119141, 7.420716762542725, 7.687163352966309, 7.953610420227051, 8.220057487487793, 8.486503601074219, 8.752950668334961, 9.019397735595703, 9.285843849182129, 9.552290916442871, 9.818737983703613, 10.085184097290039]}, "gradients/decoder.model.decoder.layers.11.encoder_attn.out_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 1.0, 1.0, 3.0, 4.0, 4.0, 6.0, 8.0, 9.0, 12.0, 17.0, 28.0, 25.0, 59.0, 100.0, 177.0, 251.0, 504.0, 1074.0, 2441.0, 7769.0, 48887.0, 721345.0, 236647.0, 21258.0, 4438.0, 1685.0, 814.0, 425.0, 206.0, 144.0, 82.0, 45.0, 20.0, 19.0, 7.0, 11.0, 3.0, 5.0, 4.0, 8.0, 6.0, 3.0, 1.0, 3.0, 0.0, 0.0, 1.0, 2.0, 1.0, 2.0, 1.0], "bins": [-2.9296875, -2.845489501953125, -2.76129150390625, -2.677093505859375, -2.5928955078125, -2.508697509765625, -2.42449951171875, -2.340301513671875, -2.256103515625, -2.171905517578125, -2.08770751953125, -2.003509521484375, -1.9193115234375, -1.835113525390625, -1.75091552734375, -1.666717529296875, -1.58251953125, -1.498321533203125, -1.41412353515625, -1.329925537109375, -1.2457275390625, -1.161529541015625, -1.07733154296875, -0.993133544921875, -0.908935546875, -0.824737548828125, -0.74053955078125, -0.656341552734375, -0.5721435546875, -0.487945556640625, -0.40374755859375, -0.319549560546875, -0.2353515625, -0.151153564453125, -0.06695556640625, 0.017242431640625, 0.1014404296875, 0.185638427734375, 0.26983642578125, 0.354034423828125, 0.438232421875, 0.522430419921875, 0.60662841796875, 0.690826416015625, 0.7750244140625, 0.859222412109375, 0.94342041015625, 1.027618408203125, 1.11181640625, 1.196014404296875, 1.28021240234375, 1.364410400390625, 1.4486083984375, 1.532806396484375, 1.61700439453125, 1.701202392578125, 1.785400390625, 1.869598388671875, 1.95379638671875, 2.037994384765625, 2.1221923828125, 2.206390380859375, 2.29058837890625, 2.374786376953125, 2.458984375]}, "gradients/decoder.model.decoder.layers.11.encoder_attn.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 3.0, 1.0, 11.0, 13.0, 16.0, 26.0, 47.0, 59.0, 69.0, 91.0, 86.0, 86.0, 94.0, 105.0, 79.0, 55.0, 49.0, 36.0, 29.0, 17.0, 15.0, 13.0, 6.0, 2.0, 0.0, 1.0, 3.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-8.1484375, -7.868408203125, -7.58837890625, -7.308349609375, -7.0283203125, -6.748291015625, -6.46826171875, -6.188232421875, -5.908203125, -5.628173828125, -5.34814453125, -5.068115234375, -4.7880859375, -4.508056640625, -4.22802734375, -3.947998046875, -3.66796875, -3.387939453125, -3.10791015625, -2.827880859375, -2.5478515625, -2.267822265625, -1.98779296875, -1.707763671875, -1.427734375, -1.147705078125, -0.86767578125, -0.587646484375, -0.3076171875, -0.027587890625, 0.25244140625, 0.532470703125, 0.8125, 1.092529296875, 1.37255859375, 1.652587890625, 1.9326171875, 2.212646484375, 2.49267578125, 2.772705078125, 3.052734375, 3.332763671875, 3.61279296875, 3.892822265625, 4.1728515625, 4.452880859375, 4.73291015625, 5.012939453125, 5.29296875, 5.572998046875, 5.85302734375, 6.133056640625, 6.4130859375, 6.693115234375, 6.97314453125, 7.253173828125, 7.533203125, 7.813232421875, 8.09326171875, 8.373291015625, 8.6533203125, 8.933349609375, 9.21337890625, 9.493408203125, 9.7734375]}, "gradients/decoder.model.decoder.layers.11.encoder_attn.v_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 1.0, 2.0, 1.0, 2.0, 4.0, 3.0, 8.0, 3.0, 5.0, 14.0, 7.0, 22.0, 39.0, 37.0, 82.0, 116.0, 152.0, 227.0, 347.0, 539.0, 877.0, 1483.0, 2270.0, 3902.0, 6496.0, 11087.0, 19407.0, 35271.0, 68247.0, 142150.0, 286583.0, 236345.0, 108908.0, 54435.0, 29310.0, 16485.0, 9265.0, 5630.0, 3363.0, 2004.0, 1295.0, 804.0, 463.0, 296.0, 170.0, 142.0, 73.0, 60.0, 39.0, 25.0, 28.0, 11.0, 11.0, 9.0, 7.0, 4.0, 4.0, 1.0, 0.0, 1.0, 1.0], "bins": [-0.48291015625, -0.46863555908203125, -0.4543609619140625, -0.44008636474609375, -0.425811767578125, -0.41153717041015625, -0.3972625732421875, -0.38298797607421875, -0.36871337890625, -0.35443878173828125, -0.3401641845703125, -0.32588958740234375, -0.311614990234375, -0.29734039306640625, -0.2830657958984375, -0.26879119873046875, -0.2545166015625, -0.24024200439453125, -0.2259674072265625, -0.21169281005859375, -0.197418212890625, -0.18314361572265625, -0.1688690185546875, -0.15459442138671875, -0.14031982421875, -0.12604522705078125, -0.1117706298828125, -0.09749603271484375, -0.083221435546875, -0.06894683837890625, -0.0546722412109375, -0.04039764404296875, -0.026123046875, -0.01184844970703125, 0.0024261474609375, 0.01670074462890625, 0.030975341796875, 0.04524993896484375, 0.0595245361328125, 0.07379913330078125, 0.08807373046875, 0.10234832763671875, 0.1166229248046875, 0.13089752197265625, 0.145172119140625, 0.15944671630859375, 0.1737213134765625, 0.18799591064453125, 0.2022705078125, 0.21654510498046875, 0.2308197021484375, 0.24509429931640625, 0.259368896484375, 0.27364349365234375, 0.2879180908203125, 0.30219268798828125, 0.31646728515625, 0.33074188232421875, 0.3450164794921875, 0.35929107666015625, 0.373565673828125, 0.38784027099609375, 0.4021148681640625, 0.41638946533203125, 0.4306640625]}, "gradients/decoder.model.decoder.layers.11.encoder_attn.v_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 2.0, 3.0, 1.0, 4.0, 3.0, 0.0, 9.0, 5.0, 5.0, 4.0, 8.0, 14.0, 12.0, 11.0, 24.0, 21.0, 18.0, 29.0, 21.0, 29.0, 27.0, 19.0, 34.0, 31.0, 41.0, 30.0, 33.0, 41.0, 39.0, 34.0, 32.0, 40.0, 32.0, 46.0, 24.0, 29.0, 28.0, 36.0, 31.0, 22.0, 17.0, 19.0, 18.0, 17.0, 13.0, 14.0, 9.0, 11.0, 6.0, 6.0, 4.0, 4.0, 4.0, 3.0, 3.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-7.54296875, -7.31463623046875, -7.0863037109375, -6.85797119140625, -6.629638671875, -6.40130615234375, -6.1729736328125, -5.94464111328125, -5.71630859375, -5.48797607421875, -5.2596435546875, -5.03131103515625, -4.802978515625, -4.57464599609375, -4.3463134765625, -4.11798095703125, -3.8896484375, -3.66131591796875, -3.4329833984375, -3.20465087890625, -2.976318359375, -2.74798583984375, -2.5196533203125, -2.29132080078125, -2.06298828125, -1.83465576171875, -1.6063232421875, -1.37799072265625, -1.149658203125, -0.92132568359375, -0.6929931640625, -0.46466064453125, -0.236328125, -0.00799560546875, 0.2203369140625, 0.44866943359375, 0.677001953125, 0.90533447265625, 1.1336669921875, 1.36199951171875, 1.59033203125, 1.81866455078125, 2.0469970703125, 2.27532958984375, 2.503662109375, 2.73199462890625, 2.9603271484375, 3.18865966796875, 3.4169921875, 3.64532470703125, 3.8736572265625, 4.10198974609375, 4.330322265625, 4.55865478515625, 4.7869873046875, 5.01531982421875, 5.24365234375, 5.47198486328125, 5.7003173828125, 5.92864990234375, 6.156982421875, 6.38531494140625, 6.6136474609375, 6.84197998046875, 7.0703125]}, "gradients/decoder.model.decoder.layers.11.encoder_attn.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 2.0, 0.0, 4.0, 2.0, 7.0, 5.0, 4.0, 8.0, 12.0, 16.0, 13.0, 31.0, 38.0, 62.0, 72.0, 102.0, 164.0, 271.0, 432.0, 797.0, 1365.0, 2631.0, 5701.0, 14216.0, 46953.0, 292614.0, 564945.0, 81464.0, 21250.0, 7918.0, 3582.0, 1659.0, 898.0, 521.0, 266.0, 169.0, 116.0, 77.0, 48.0, 33.0, 20.0, 23.0, 11.0, 9.0, 8.0, 5.0, 8.0, 7.0, 2.0, 0.0, 1.0, 3.0, 2.0, 1.0, 1.0, 3.0], "bins": [-0.1029052734375, -0.09989452362060547, -0.09688377380371094, -0.0938730239868164, -0.09086227416992188, -0.08785152435302734, -0.08484077453613281, -0.08183002471923828, -0.07881927490234375, -0.07580852508544922, -0.07279777526855469, -0.06978702545166016, -0.06677627563476562, -0.0637655258178711, -0.06075477600097656, -0.05774402618408203, -0.0547332763671875, -0.05172252655029297, -0.04871177673339844, -0.045701026916503906, -0.042690277099609375, -0.039679527282714844, -0.03666877746582031, -0.03365802764892578, -0.03064727783203125, -0.02763652801513672, -0.024625778198242188, -0.021615028381347656, -0.018604278564453125, -0.015593528747558594, -0.012582778930664062, -0.009572029113769531, -0.006561279296875, -0.0035505294799804688, -0.0005397796630859375, 0.0024709701538085938, 0.005481719970703125, 0.008492469787597656, 0.011503219604492188, 0.014513969421386719, 0.01752471923828125, 0.02053546905517578, 0.023546218872070312, 0.026556968688964844, 0.029567718505859375, 0.032578468322753906, 0.03558921813964844, 0.03859996795654297, 0.0416107177734375, 0.04462146759033203, 0.04763221740722656, 0.050642967224121094, 0.053653717041015625, 0.056664466857910156, 0.05967521667480469, 0.06268596649169922, 0.06569671630859375, 0.06870746612548828, 0.07171821594238281, 0.07472896575927734, 0.07773971557617188, 0.0807504653930664, 0.08376121520996094, 0.08677196502685547, 0.08978271484375]}, "gradients/decoder.model.decoder.layers.11.encoder_attn.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 3.0, 4.0, 0.0, 1.0, 3.0, 7.0, 8.0, 8.0, 12.0, 16.0, 27.0, 24.0, 43.0, 66.0, 66.0, 106.0, 89.0, 122.0, 79.0, 81.0, 68.0, 41.0, 35.0, 22.0, 14.0, 11.0, 12.0, 11.0, 8.0, 7.0, 5.0, 4.0, 3.0, 3.0, 1.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-8.124113082885742e-05, -7.878616452217102e-05, -7.633119821548462e-05, -7.387623190879822e-05, -7.142126560211182e-05, -6.896629929542542e-05, -6.651133298873901e-05, -6.405636668205261e-05, -6.160140037536621e-05, -5.914643406867981e-05, -5.669146776199341e-05, -5.423650145530701e-05, -5.1781535148620605e-05, -4.9326568841934204e-05, -4.68716025352478e-05, -4.44166362285614e-05, -4.1961669921875e-05, -3.95067036151886e-05, -3.70517373085022e-05, -3.4596771001815796e-05, -3.2141804695129395e-05, -2.9686838388442993e-05, -2.7231872081756592e-05, -2.477690577507019e-05, -2.232193946838379e-05, -1.9866973161697388e-05, -1.7412006855010986e-05, -1.4957040548324585e-05, -1.2502074241638184e-05, -1.0047107934951782e-05, -7.592141628265381e-06, -5.1371753215789795e-06, -2.682209014892578e-06, -2.2724270820617676e-07, 2.2277235984802246e-06, 4.682689905166626e-06, 7.137656211853027e-06, 9.592622518539429e-06, 1.204758882522583e-05, 1.4502555131912231e-05, 1.6957521438598633e-05, 1.9412487745285034e-05, 2.1867454051971436e-05, 2.4322420358657837e-05, 2.6777386665344238e-05, 2.923235297203064e-05, 3.168731927871704e-05, 3.414228558540344e-05, 3.6597251892089844e-05, 3.9052218198776245e-05, 4.1507184505462646e-05, 4.396215081214905e-05, 4.641711711883545e-05, 4.887208342552185e-05, 5.132704973220825e-05, 5.378201603889465e-05, 5.6236982345581055e-05, 5.8691948652267456e-05, 6.114691495895386e-05, 6.360188126564026e-05, 6.605684757232666e-05, 6.851181387901306e-05, 7.096678018569946e-05, 7.342174649238586e-05, 7.587671279907227e-05]}, "gradients/decoder.model.decoder.layers.11.encoder_attn.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 3.0, 2.0, 3.0, 1.0, 4.0, 2.0, 6.0, 8.0, 4.0, 11.0, 14.0, 12.0, 17.0, 20.0, 32.0, 43.0, 48.0, 76.0, 107.0, 218.0, 2458.0, 733686.0, 309461.0, 1683.0, 189.0, 111.0, 84.0, 55.0, 41.0, 35.0, 27.0, 17.0, 13.0, 13.0, 8.0, 8.0, 8.0, 7.0, 4.0, 5.0, 5.0, 5.0, 2.0, 2.0, 2.0, 4.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0], "bins": [-0.50439453125, -0.48860931396484375, -0.4728240966796875, -0.45703887939453125, -0.441253662109375, -0.42546844482421875, -0.4096832275390625, -0.39389801025390625, -0.37811279296875, -0.36232757568359375, -0.3465423583984375, -0.33075714111328125, -0.314971923828125, -0.29918670654296875, -0.2834014892578125, -0.26761627197265625, -0.2518310546875, -0.23604583740234375, -0.2202606201171875, -0.20447540283203125, -0.188690185546875, -0.17290496826171875, -0.1571197509765625, -0.14133453369140625, -0.12554931640625, -0.10976409912109375, -0.0939788818359375, -0.07819366455078125, -0.062408447265625, -0.04662322998046875, -0.0308380126953125, -0.01505279541015625, 0.000732421875, 0.01651763916015625, 0.0323028564453125, 0.04808807373046875, 0.063873291015625, 0.07965850830078125, 0.0954437255859375, 0.11122894287109375, 0.12701416015625, 0.14279937744140625, 0.1585845947265625, 0.17436981201171875, 0.190155029296875, 0.20594024658203125, 0.2217254638671875, 0.23751068115234375, 0.2532958984375, 0.26908111572265625, 0.2848663330078125, 0.30065155029296875, 0.316436767578125, 0.33222198486328125, 0.3480072021484375, 0.36379241943359375, 0.37957763671875, 0.39536285400390625, 0.4111480712890625, 0.42693328857421875, 0.442718505859375, 0.45850372314453125, 0.4742889404296875, 0.49007415771484375, 0.505859375]}, "gradients/decoder.model.decoder.layers.11.encoder_attn.q_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 2.0, 0.0, 0.0, 5.0, 2.0, 5.0, 1.0, 5.0, 5.0, 10.0, 9.0, 9.0, 12.0, 12.0, 19.0, 26.0, 37.0, 44.0, 42.0, 52.0, 73.0, 110.0, 107.0, 78.0, 58.0, 56.0, 36.0, 38.0, 33.0, 31.0, 21.0, 12.0, 7.0, 8.0, 4.0, 9.0, 5.0, 4.0, 3.0, 6.0, 3.0, 2.0, 3.0, 0.0, 2.0, 5.0, 1.0, 2.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.03619384765625, -0.034952640533447266, -0.03371143341064453, -0.0324702262878418, -0.031229019165039062, -0.029987812042236328, -0.028746604919433594, -0.02750539779663086, -0.026264190673828125, -0.02502298355102539, -0.023781776428222656, -0.022540569305419922, -0.021299362182617188, -0.020058155059814453, -0.01881694793701172, -0.017575740814208984, -0.01633453369140625, -0.015093326568603516, -0.013852119445800781, -0.012610912322998047, -0.011369705200195312, -0.010128498077392578, -0.008887290954589844, -0.007646083831787109, -0.006404876708984375, -0.005163669586181641, -0.003922462463378906, -0.002681255340576172, -0.0014400482177734375, -0.00019884109497070312, 0.0010423660278320312, 0.0022835731506347656, 0.0035247802734375, 0.004765987396240234, 0.006007194519042969, 0.007248401641845703, 0.008489608764648438, 0.009730815887451172, 0.010972023010253906, 0.01221323013305664, 0.013454437255859375, 0.01469564437866211, 0.015936851501464844, 0.017178058624267578, 0.018419265747070312, 0.019660472869873047, 0.02090167999267578, 0.022142887115478516, 0.02338409423828125, 0.024625301361083984, 0.02586650848388672, 0.027107715606689453, 0.028348922729492188, 0.029590129852294922, 0.030831336975097656, 0.03207254409790039, 0.033313751220703125, 0.03455495834350586, 0.035796165466308594, 0.03703737258911133, 0.03827857971191406, 0.0395197868347168, 0.04076099395751953, 0.042002201080322266, 0.043243408203125]}, "gradients/decoder.model.decoder.layers.11.self_attn_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 7.0, 83.0, 650.0, 251.0, 20.0, 2.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-21.49177360534668, -20.032306671142578, -18.572837829589844, -17.113370895385742, -15.65390396118164, -14.194437026977539, -12.734969139099121, -11.275501251220703, -9.816034317016602, -8.3565673828125, -6.897099494934082, -5.437632083892822, -3.9781646728515625, -2.5186972618103027, -1.059229850769043, 0.400238037109375, 1.8597049713134766, 3.3191723823547363, 4.778639793395996, 6.238107204437256, 7.697574615478516, 9.157041549682617, 10.616509437561035, 12.075977325439453, 13.535444259643555, 14.994911193847656, 16.45438003540039, 17.913846969604492, 19.373313903808594, 20.832780838012695, 22.292247772216797, 23.75171661376953, 25.211181640625, 26.6706485748291, 28.130115509033203, 29.589584350585938, 31.04905128479004, 32.50851821899414, 33.967987060546875, 35.427452087402344, 36.88692092895508, 38.34638977050781, 39.80585479736328, 41.265323638916016, 42.72479248046875, 44.18425750732422, 45.64372634887695, 47.10319519042969, 48.562660217285156, 50.02212905883789, 51.48159408569336, 52.941062927246094, 54.40052795410156, 55.8599967956543, 57.31946563720703, 58.7789306640625, 60.238399505615234, 61.69786834716797, 63.15733337402344, 64.6167984008789, 66.0762710571289, 67.53573608398438, 68.99520111083984, 70.45467376708984, 71.91413879394531]}, "gradients/decoder.model.decoder.layers.11.self_attn_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 3.0, 0.0, 0.0, 2.0, 0.0, 1.0, 2.0, 2.0, 4.0, 16.0, 15.0, 20.0, 27.0, 37.0, 57.0, 57.0, 83.0, 72.0, 81.0, 92.0, 98.0, 77.0, 54.0, 59.0, 35.0, 32.0, 26.0, 22.0, 16.0, 8.0, 3.0, 7.0, 2.0, 2.0, 2.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-7.395238876342773, -7.159341335296631, -6.923443794250488, -6.687546730041504, -6.451649188995361, -6.215751647949219, -5.979854583740234, -5.743957042694092, -5.508059501647949, -5.272161960601807, -5.036264419555664, -4.80036735534668, -4.564469814300537, -4.3285722732543945, -4.09267520904541, -3.8567776679992676, -3.620880126953125, -3.3849825859069824, -3.149085283279419, -2.9131879806518555, -2.677290439605713, -2.4413928985595703, -2.205495595932007, -1.9695981740951538, -1.7337007522583008, -1.4978033304214478, -1.2619059085845947, -1.0260084867477417, -0.7901110649108887, -0.5542136430740356, -0.3183162212371826, -0.08241879940032959, 0.15347909927368164, 0.38937652111053467, 0.6252739429473877, 0.8611713647842407, 1.0970687866210938, 1.3329662084579468, 1.5688636302947998, 1.8047610521316528, 2.040658473968506, 2.2765560150146484, 2.512453317642212, 2.7483506202697754, 2.984248161315918, 3.2201457023620605, 3.456043004989624, 3.6919403076171875, 3.92783784866333, 4.163735389709473, 4.399632453918457, 4.6355299949646, 4.871427536010742, 5.107325077056885, 5.343222618103027, 5.579119682312012, 5.815017223358154, 6.050914764404297, 6.286811828613281, 6.522709369659424, 6.758606910705566, 6.994504451751709, 7.230401992797852, 7.466299057006836, 7.7021965980529785]}, "gradients/decoder.model.decoder.layers.11.self_attn.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 1.0, 0.0, 3.0, 2.0, 3.0, 3.0, 1.0, 3.0, 7.0, 9.0, 12.0, 10.0, 10.0, 20.0, 24.0, 39.0, 75.0, 102.0, 151.0, 247.0, 429.0, 958.0, 2526.0, 9509.0, 94334.0, 870493.0, 58399.0, 7244.0, 2076.0, 796.0, 406.0, 216.0, 150.0, 108.0, 54.0, 32.0, 34.0, 23.0, 11.0, 6.0, 10.0, 7.0, 6.0, 3.0, 2.0, 4.0, 1.0, 3.0, 3.0, 2.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-10.953125, -10.615234375, -10.27734375, -9.939453125, -9.6015625, -9.263671875, -8.92578125, -8.587890625, -8.25, -7.912109375, -7.57421875, -7.236328125, -6.8984375, -6.560546875, -6.22265625, -5.884765625, -5.546875, -5.208984375, -4.87109375, -4.533203125, -4.1953125, -3.857421875, -3.51953125, -3.181640625, -2.84375, -2.505859375, -2.16796875, -1.830078125, -1.4921875, -1.154296875, -0.81640625, -0.478515625, -0.140625, 0.197265625, 0.53515625, 0.873046875, 1.2109375, 1.548828125, 1.88671875, 2.224609375, 2.5625, 2.900390625, 3.23828125, 3.576171875, 3.9140625, 4.251953125, 4.58984375, 4.927734375, 5.265625, 5.603515625, 5.94140625, 6.279296875, 6.6171875, 6.955078125, 7.29296875, 7.630859375, 7.96875, 8.306640625, 8.64453125, 8.982421875, 9.3203125, 9.658203125, 9.99609375, 10.333984375, 10.671875]}, "gradients/decoder.model.decoder.layers.11.self_attn.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 3.0, 3.0, 1.0, 0.0, 0.0, 7.0, 15.0, 26.0, 46.0, 78.0, 153.0, 156.0, 174.0, 134.0, 90.0, 62.0, 32.0, 17.0, 9.0, 4.0, 1.0, 4.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-143.25, -138.650390625, -134.05078125, -129.451171875, -124.8515625, -120.251953125, -115.65234375, -111.052734375, -106.453125, -101.853515625, -97.25390625, -92.654296875, -88.0546875, -83.455078125, -78.85546875, -74.255859375, -69.65625, -65.056640625, -60.45703125, -55.857421875, -51.2578125, -46.658203125, -42.05859375, -37.458984375, -32.859375, -28.259765625, -23.66015625, -19.060546875, -14.4609375, -9.861328125, -5.26171875, -0.662109375, 3.9375, 8.537109375, 13.13671875, 17.736328125, 22.3359375, 26.935546875, 31.53515625, 36.134765625, 40.734375, 45.333984375, 49.93359375, 54.533203125, 59.1328125, 63.732421875, 68.33203125, 72.931640625, 77.53125, 82.130859375, 86.73046875, 91.330078125, 95.9296875, 100.529296875, 105.12890625, 109.728515625, 114.328125, 118.927734375, 123.52734375, 128.126953125, 132.7265625, 137.326171875, 141.92578125, 146.525390625, 151.125]}, "gradients/decoder.model.decoder.layers.11.self_attn.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 2.0, 1.0, 1.0, 3.0, 3.0, 2.0, 2.0, 5.0, 3.0, 3.0, 7.0, 10.0, 17.0, 22.0, 23.0, 49.0, 82.0, 124.0, 247.0, 528.0, 5342.0, 1038126.0, 2960.0, 456.0, 216.0, 123.0, 81.0, 52.0, 26.0, 16.0, 8.0, 4.0, 5.0, 2.0, 0.0, 0.0, 6.0, 1.0, 1.0, 4.0, 0.0, 2.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-50.3125, -48.65869140625, -47.0048828125, -45.35107421875, -43.697265625, -42.04345703125, -40.3896484375, -38.73583984375, -37.08203125, -35.42822265625, -33.7744140625, -32.12060546875, -30.466796875, -28.81298828125, -27.1591796875, -25.50537109375, -23.8515625, -22.19775390625, -20.5439453125, -18.89013671875, -17.236328125, -15.58251953125, -13.9287109375, -12.27490234375, -10.62109375, -8.96728515625, -7.3134765625, -5.65966796875, -4.005859375, -2.35205078125, -0.6982421875, 0.95556640625, 2.609375, 4.26318359375, 5.9169921875, 7.57080078125, 9.224609375, 10.87841796875, 12.5322265625, 14.18603515625, 15.83984375, 17.49365234375, 19.1474609375, 20.80126953125, 22.455078125, 24.10888671875, 25.7626953125, 27.41650390625, 29.0703125, 30.72412109375, 32.3779296875, 34.03173828125, 35.685546875, 37.33935546875, 38.9931640625, 40.64697265625, 42.30078125, 43.95458984375, 45.6083984375, 47.26220703125, 48.916015625, 50.56982421875, 52.2236328125, 53.87744140625, 55.53125]}, "gradients/decoder.model.decoder.layers.11.self_attn.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 1.0, 2.0, 2.0, 0.0, 2.0, 1.0, 2.0, 5.0, 6.0, 6.0, 9.0, 11.0, 27.0, 39.0, 57.0, 80.0, 107.0, 113.0, 138.0, 118.0, 84.0, 64.0, 45.0, 39.0, 21.0, 15.0, 8.0, 3.0, 0.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 4.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-121.25, -117.23828125, -113.2265625, -109.21484375, -105.203125, -101.19140625, -97.1796875, -93.16796875, -89.15625, -85.14453125, -81.1328125, -77.12109375, -73.109375, -69.09765625, -65.0859375, -61.07421875, -57.0625, -53.05078125, -49.0390625, -45.02734375, -41.015625, -37.00390625, -32.9921875, -28.98046875, -24.96875, -20.95703125, -16.9453125, -12.93359375, -8.921875, -4.91015625, -0.8984375, 3.11328125, 7.125, 11.13671875, 15.1484375, 19.16015625, 23.171875, 27.18359375, 31.1953125, 35.20703125, 39.21875, 43.23046875, 47.2421875, 51.25390625, 55.265625, 59.27734375, 63.2890625, 67.30078125, 71.3125, 75.32421875, 79.3359375, 83.34765625, 87.359375, 91.37109375, 95.3828125, 99.39453125, 103.40625, 107.41796875, 111.4296875, 115.44140625, 119.453125, 123.46484375, 127.4765625, 131.48828125, 135.5]}, "gradients/decoder.model.decoder.layers.11.self_attn.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 2.0, 4.0, 1.0, 4.0, 4.0, 5.0, 8.0, 7.0, 10.0, 10.0, 17.0, 42.0, 65.0, 123.0, 294.0, 663.0, 1684.0, 4608.0, 24628.0, 998832.0, 12214.0, 3167.0, 1188.0, 504.0, 224.0, 107.0, 46.0, 24.0, 19.0, 14.0, 13.0, 6.0, 8.0, 0.0, 6.0, 6.0, 4.0, 0.0, 0.0, 2.0, 0.0, 4.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.279296875, -1.2430877685546875, -1.206878662109375, -1.1706695556640625, -1.13446044921875, -1.0982513427734375, -1.062042236328125, -1.0258331298828125, -0.9896240234375, -0.9534149169921875, -0.917205810546875, -0.8809967041015625, -0.84478759765625, -0.8085784912109375, -0.772369384765625, -0.7361602783203125, -0.699951171875, -0.6637420654296875, -0.627532958984375, -0.5913238525390625, -0.55511474609375, -0.5189056396484375, -0.482696533203125, -0.4464874267578125, -0.4102783203125, -0.3740692138671875, -0.337860107421875, -0.3016510009765625, -0.26544189453125, -0.2292327880859375, -0.193023681640625, -0.1568145751953125, -0.12060546875, -0.0843963623046875, -0.048187255859375, -0.0119781494140625, 0.02423095703125, 0.0604400634765625, 0.096649169921875, 0.1328582763671875, 0.1690673828125, 0.2052764892578125, 0.241485595703125, 0.2776947021484375, 0.31390380859375, 0.3501129150390625, 0.386322021484375, 0.4225311279296875, 0.458740234375, 0.4949493408203125, 0.531158447265625, 0.5673675537109375, 0.60357666015625, 0.6397857666015625, 0.675994873046875, 0.7122039794921875, 0.7484130859375, 0.7846221923828125, 0.820831298828125, 0.8570404052734375, 0.89324951171875, 0.9294586181640625, 0.965667724609375, 1.0018768310546875, 1.0380859375]}, "gradients/decoder.model.decoder.layers.11.self_attn.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 6.0, 4.0, 10.0, 13.0, 24.0, 29.0, 58.0, 221.0, 430.0, 123.0, 41.0, 24.0, 9.0, 6.0, 3.0, 4.0, 3.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00012683868408203125, -0.00012218765914440155, -0.00011753663420677185, -0.00011288560926914215, -0.00010823458433151245, -0.00010358355939388275, -9.893253445625305e-05, -9.428150951862335e-05, -8.963048458099365e-05, -8.497945964336395e-05, -8.032843470573425e-05, -7.567740976810455e-05, -7.102638483047485e-05, -6.637535989284515e-05, -6.172433495521545e-05, -5.7073310017585754e-05, -5.2422285079956055e-05, -4.7771260142326355e-05, -4.3120235204696655e-05, -3.8469210267066956e-05, -3.3818185329437256e-05, -2.9167160391807556e-05, -2.4516135454177856e-05, -1.9865110516548157e-05, -1.5214085578918457e-05, -1.0563060641288757e-05, -5.912035703659058e-06, -1.261010766029358e-06, 3.390014171600342e-06, 8.041039109230042e-06, 1.2692064046859741e-05, 1.734308898448944e-05, 2.199411392211914e-05, 2.664513885974884e-05, 3.129616379737854e-05, 3.594718873500824e-05, 4.059821367263794e-05, 4.524923861026764e-05, 4.990026354789734e-05, 5.455128848552704e-05, 5.920231342315674e-05, 6.385333836078644e-05, 6.850436329841614e-05, 7.315538823604584e-05, 7.780641317367554e-05, 8.245743811130524e-05, 8.710846304893494e-05, 9.175948798656464e-05, 9.641051292419434e-05, 0.00010106153786182404, 0.00010571256279945374, 0.00011036358773708344, 0.00011501461267471313, 0.00011966563761234283, 0.00012431666254997253, 0.00012896768748760223, 0.00013361871242523193, 0.00013826973736286163, 0.00014292076230049133, 0.00014757178723812103, 0.00015222281217575073, 0.00015687383711338043, 0.00016152486205101013, 0.00016617588698863983, 0.00017082691192626953]}, "gradients/decoder.model.decoder.layers.11.self_attn.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 2.0, 0.0, 5.0, 3.0, 2.0, 5.0, 7.0, 8.0, 23.0, 56.0, 173.0, 1071.0, 10882.0, 1028449.0, 6872.0, 769.0, 140.0, 47.0, 15.0, 8.0, 5.0, 1.0, 5.0, 1.0, 2.0, 1.0, 2.0, 1.0, 0.0, 1.0, 4.0, 2.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.9609375, -2.869598388671875, -2.77825927734375, -2.686920166015625, -2.5955810546875, -2.504241943359375, -2.41290283203125, -2.321563720703125, -2.230224609375, -2.138885498046875, -2.04754638671875, -1.956207275390625, -1.8648681640625, -1.773529052734375, -1.68218994140625, -1.590850830078125, -1.49951171875, -1.408172607421875, -1.31683349609375, -1.225494384765625, -1.1341552734375, -1.042816162109375, -0.95147705078125, -0.860137939453125, -0.768798828125, -0.677459716796875, -0.58612060546875, -0.494781494140625, -0.4034423828125, -0.312103271484375, -0.22076416015625, -0.129425048828125, -0.0380859375, 0.053253173828125, 0.14459228515625, 0.235931396484375, 0.3272705078125, 0.418609619140625, 0.50994873046875, 0.601287841796875, 0.692626953125, 0.783966064453125, 0.87530517578125, 0.966644287109375, 1.0579833984375, 1.149322509765625, 1.24066162109375, 1.332000732421875, 1.42333984375, 1.514678955078125, 1.60601806640625, 1.697357177734375, 1.7886962890625, 1.880035400390625, 1.97137451171875, 2.062713623046875, 2.154052734375, 2.245391845703125, 2.33673095703125, 2.428070068359375, 2.5194091796875, 2.610748291015625, 2.70208740234375, 2.793426513671875, 2.884765625]}, "gradients/decoder.model.decoder.layers.11.self_attn.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 2.0, 2.0, 2.0, 1.0, 0.0, 6.0, 4.0, 2.0, 5.0, 7.0, 9.0, 31.0, 461.0, 402.0, 40.0, 11.0, 4.0, 5.0, 1.0, 2.0, 0.0, 2.0, 0.0, 1.0, 0.0, 3.0, 1.0, 0.0, 0.0, 3.0, 3.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.404296875, -0.3916587829589844, -0.37902069091796875, -0.3663825988769531, -0.3537445068359375, -0.3411064147949219, -0.32846832275390625, -0.3158302307128906, -0.303192138671875, -0.2905540466308594, -0.27791595458984375, -0.2652778625488281, -0.2526397705078125, -0.24000167846679688, -0.22736358642578125, -0.21472549438476562, -0.20208740234375, -0.18944931030273438, -0.17681121826171875, -0.16417312622070312, -0.1515350341796875, -0.13889694213867188, -0.12625885009765625, -0.11362075805664062, -0.100982666015625, -0.08834457397460938, -0.07570648193359375, -0.06306838989257812, -0.0504302978515625, -0.037792205810546875, -0.02515411376953125, -0.012516021728515625, 0.0001220703125, 0.012760162353515625, 0.02539825439453125, 0.038036346435546875, 0.0506744384765625, 0.06331253051757812, 0.07595062255859375, 0.08858871459960938, 0.101226806640625, 0.11386489868164062, 0.12650299072265625, 0.13914108276367188, 0.1517791748046875, 0.16441726684570312, 0.17705535888671875, 0.18969345092773438, 0.20233154296875, 0.21496963500976562, 0.22760772705078125, 0.24024581909179688, 0.2528839111328125, 0.2655220031738281, 0.27816009521484375, 0.2907981872558594, 0.303436279296875, 0.3160743713378906, 0.32871246337890625, 0.3413505554199219, 0.3539886474609375, 0.3666267395019531, 0.37926483154296875, 0.3919029235839844, 0.404541015625]}, "gradients/decoder.model.decoder.layers.10.final_layer_norm.weight": {"_type": "histogram", "values": [1023.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6.374441623687744, 15.287652969360352, 36.949745178222656, 58.611839294433594, 80.27393341064453, 101.93602752685547, 123.5981216430664, 145.26022338867188, 166.9223175048828, 188.58441162109375, 210.2465057373047, 231.90859985351562, 253.57069396972656, 275.2327880859375, 296.8948974609375, 318.5569763183594, 340.21905517578125, 361.88116455078125, 383.5432434082031, 405.205322265625, 426.867431640625, 448.529541015625, 470.1916198730469, 491.85369873046875, 513.5158081054688, 535.1779174804688, 556.8399658203125, 578.5020751953125, 600.1641845703125, 621.8262939453125, 643.4884033203125, 665.1504516601562, 686.8126220703125, 708.4747314453125, 730.1368408203125, 751.7988891601562, 773.4609985351562, 795.1231079101562, 816.78515625, 838.447265625, 860.109375, 881.771484375, 903.43359375, 925.0956420898438, 946.7577514648438, 968.4198608398438, 990.0819091796875, 1011.7440185546875, 1033.4061279296875, 1055.0682373046875, 1076.7303466796875, 1098.3924560546875, 1120.054443359375, 1141.716552734375, 1163.378662109375, 1185.040771484375, 1206.702880859375, 1228.364990234375, 1250.027099609375, 1271.689208984375, 1293.351318359375, 1315.0133056640625, 1336.6754150390625, 1358.3375244140625, 1379.9996337890625]}, "gradients/decoder.model.decoder.layers.10.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 1.0, 4.0, 4.0, 7.0, 18.0, 32.0, 45.0, 47.0, 40.0, 50.0, 69.0, 96.0, 75.0, 81.0, 99.0, 77.0, 58.0, 70.0, 52.0, 22.0, 16.0, 14.0, 12.0, 12.0, 9.0, 2.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-135.20059204101562, -131.21047973632812, -127.22036743164062, -123.23025512695312, -119.2401351928711, -115.2500228881836, -111.2599105834961, -107.2697982788086, -103.27967834472656, -99.28956604003906, -95.29945373535156, -91.30934143066406, -87.31922149658203, -83.32910919189453, -79.33899688720703, -75.34888458251953, -71.35877227783203, -67.36865997314453, -63.378543853759766, -59.388431549072266, -55.3983154296875, -51.408203125, -47.4180908203125, -43.427978515625, -39.437862396240234, -35.447750091552734, -31.45763397216797, -27.46752166748047, -23.477407455444336, -19.487293243408203, -15.497180938720703, -11.50706672668457, -7.5169525146484375, -3.526838779449463, 0.4632749557495117, 4.453388214111328, 8.443502426147461, 12.433616638183594, 16.423728942871094, 20.413843154907227, 24.40395736694336, 28.394071578979492, 32.384185791015625, 36.374298095703125, 40.364410400390625, 44.35452651977539, 48.34463882446289, 52.334754943847656, 56.324867248535156, 60.314979553222656, 64.30509185791016, 68.29521179199219, 72.28532409667969, 76.27543640136719, 80.26554870605469, 84.25566101074219, 88.24577331542969, 92.23588562011719, 96.22599792480469, 100.21611022949219, 104.20623016357422, 108.19634246826172, 112.18645477294922, 116.17656707763672, 120.16668701171875]}, "gradients/decoder.model.decoder.layers.10.fc2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 2.0, 3.0, 7.0, 27.0, 33.0, 60.0, 53.0, 80.0, 103.0, 2619.0, 4190490.0, 483.0, 104.0, 79.0, 64.0, 40.0, 13.0, 19.0, 10.0, 4.0, 0.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-85.125, -82.7177734375, -80.310546875, -77.9033203125, -75.49609375, -73.0888671875, -70.681640625, -68.2744140625, -65.8671875, -63.4599609375, -61.052734375, -58.6455078125, -56.23828125, -53.8310546875, -51.423828125, -49.0166015625, -46.609375, -44.2021484375, -41.794921875, -39.3876953125, -36.98046875, -34.5732421875, -32.166015625, -29.7587890625, -27.3515625, -24.9443359375, -22.537109375, -20.1298828125, -17.72265625, -15.3154296875, -12.908203125, -10.5009765625, -8.09375, -5.6865234375, -3.279296875, -0.8720703125, 1.53515625, 3.9423828125, 6.349609375, 8.7568359375, 11.1640625, 13.5712890625, 15.978515625, 18.3857421875, 20.79296875, 23.2001953125, 25.607421875, 28.0146484375, 30.421875, 32.8291015625, 35.236328125, 37.6435546875, 40.05078125, 42.4580078125, 44.865234375, 47.2724609375, 49.6796875, 52.0869140625, 54.494140625, 56.9013671875, 59.30859375, 61.7158203125, 64.123046875, 66.5302734375, 68.9375]}, "gradients/decoder.model.decoder.layers.10.fc2.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 0.0, 4.0, 0.0, 4.0, 8.0, 5.0, 6.0, 12.0, 5.0, 8.0, 11.0, 12.0, 18.0, 18.0, 20.0, 29.0, 19.0, 27.0, 29.0, 37.0, 39.0, 46.0, 41.0, 41.0, 38.0, 33.0, 36.0, 50.0, 40.0, 37.0, 39.0, 34.0, 47.0, 35.0, 30.0, 26.0, 26.0, 25.0, 8.0, 14.0, 15.0, 10.0, 3.0, 9.0, 2.0, 5.0, 2.0, 4.0, 2.0, 1.0, 2.0, 3.0, 2.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.0234375, -2.91851806640625, -2.8135986328125, -2.70867919921875, -2.603759765625, -2.49884033203125, -2.3939208984375, -2.28900146484375, -2.18408203125, -2.07916259765625, -1.9742431640625, -1.86932373046875, -1.764404296875, -1.65948486328125, -1.5545654296875, -1.44964599609375, -1.3447265625, -1.23980712890625, -1.1348876953125, -1.02996826171875, -0.925048828125, -0.82012939453125, -0.7152099609375, -0.61029052734375, -0.50537109375, -0.40045166015625, -0.2955322265625, -0.19061279296875, -0.085693359375, 0.01922607421875, 0.1241455078125, 0.22906494140625, 0.333984375, 0.43890380859375, 0.5438232421875, 0.64874267578125, 0.753662109375, 0.85858154296875, 0.9635009765625, 1.06842041015625, 1.17333984375, 1.27825927734375, 1.3831787109375, 1.48809814453125, 1.593017578125, 1.69793701171875, 1.8028564453125, 1.90777587890625, 2.0126953125, 2.11761474609375, 2.2225341796875, 2.32745361328125, 2.432373046875, 2.53729248046875, 2.6422119140625, 2.74713134765625, 2.85205078125, 2.95697021484375, 3.0618896484375, 3.16680908203125, 3.271728515625, 3.37664794921875, 3.4815673828125, 3.58648681640625, 3.69140625]}, "gradients/decoder.model.decoder.layers.10.fc1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 3.0, 3.0, 2.0, 4.0, 14.0, 70.0, 4194054.0, 114.0, 18.0, 6.0, 6.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-946.0, -925.25, -904.5, -883.75, -863.0, -842.25, -821.5, -800.75, -780.0, -759.25, -738.5, -717.75, -697.0, -676.25, -655.5, -634.75, -614.0, -593.25, -572.5, -551.75, -531.0, -510.25, -489.5, -468.75, -448.0, -427.25, -406.5, -385.75, -365.0, -344.25, -323.5, -302.75, -282.0, -261.25, -240.5, -219.75, -199.0, -178.25, -157.5, -136.75, -116.0, -95.25, -74.5, -53.75, -33.0, -12.25, 8.5, 29.25, 50.0, 70.75, 91.5, 112.25, 133.0, 153.75, 174.5, 195.25, 216.0, 236.75, 257.5, 278.25, 299.0, 319.75, 340.5, 361.25, 382.0]}, "gradients/decoder.model.decoder.layers.10.fc1.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 2.0, 4.0, 5.0, 7.0, 16.0, 22.0, 60.0, 162.0, 545.0, 1707.0, 1097.0, 303.0, 84.0, 33.0, 17.0, 10.0, 6.0, 4.0, 0.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.57421875, -5.4437255859375, -5.313232421875, -5.1827392578125, -5.05224609375, -4.9217529296875, -4.791259765625, -4.6607666015625, -4.5302734375, -4.3997802734375, -4.269287109375, -4.1387939453125, -4.00830078125, -3.8778076171875, -3.747314453125, -3.6168212890625, -3.486328125, -3.3558349609375, -3.225341796875, -3.0948486328125, -2.96435546875, -2.8338623046875, -2.703369140625, -2.5728759765625, -2.4423828125, -2.3118896484375, -2.181396484375, -2.0509033203125, -1.92041015625, -1.7899169921875, -1.659423828125, -1.5289306640625, -1.3984375, -1.2679443359375, -1.137451171875, -1.0069580078125, -0.87646484375, -0.7459716796875, -0.615478515625, -0.4849853515625, -0.3544921875, -0.2239990234375, -0.093505859375, 0.0369873046875, 0.16748046875, 0.2979736328125, 0.428466796875, 0.5589599609375, 0.689453125, 0.8199462890625, 0.950439453125, 1.0809326171875, 1.21142578125, 1.3419189453125, 1.472412109375, 1.6029052734375, 1.7333984375, 1.8638916015625, 1.994384765625, 2.1248779296875, 2.25537109375, 2.3858642578125, 2.516357421875, 2.6468505859375, 2.77734375]}, "gradients/decoder.model.decoder.layers.10.encoder_attn_layer_norm.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 4.0, 2.0, 4.0, 0.0, 3.0, 4.0, 2.0, 14.0, 27.0, 36.0, 47.0, 75.0, 131.0, 144.0, 130.0, 126.0, 103.0, 55.0, 38.0, 27.0, 15.0, 11.0, 4.0, 9.0, 1.0, 3.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.628470420837402, -4.360640048980713, -4.092809677124023, -3.824978828430176, -3.5571484565734863, -3.289318084716797, -3.0214874744415283, -2.7536568641662598, -2.4858264923095703, -2.217996120452881, -1.9501655101776123, -1.6823350191116333, -1.4145045280456543, -1.1466740369796753, -0.8788435459136963, -0.6110130548477173, -0.3431825637817383, -0.07535207271575928, 0.19247841835021973, 0.46030890941619873, 0.7281394004821777, 0.9959698915481567, 1.2638003826141357, 1.5316308736801147, 1.7994613647460938, 2.067291736602783, 2.3351223468780518, 2.6029529571533203, 2.8707833290100098, 3.138613700866699, 3.4064443111419678, 3.6742749214172363, 3.942105293273926, 4.209935665130615, 4.477766036987305, 4.745596885681152, 5.013427257537842, 5.281257629394531, 5.549088478088379, 5.816918849945068, 6.084749221801758, 6.352579593658447, 6.620409965515137, 6.888240814208984, 7.156071186065674, 7.423901557922363, 7.691732406616211, 7.9595627784729, 8.22739315032959, 8.495223999023438, 8.763053894042969, 9.030884742736816, 9.298715591430664, 9.566545486450195, 9.834376335144043, 10.102206230163574, 10.370037078857422, 10.63786792755127, 10.9056978225708, 11.173528671264648, 11.44135856628418, 11.709189414978027, 11.977020263671875, 12.244850158691406, 12.512681007385254]}, "gradients/decoder.model.decoder.layers.10.encoder_attn_layer_norm.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 2.0, 0.0, 2.0, 3.0, 1.0, 4.0, 2.0, 6.0, 4.0, 11.0, 8.0, 7.0, 8.0, 8.0, 17.0, 12.0, 20.0, 27.0, 28.0, 29.0, 38.0, 43.0, 29.0, 40.0, 32.0, 34.0, 43.0, 42.0, 43.0, 44.0, 47.0, 51.0, 30.0, 30.0, 37.0, 31.0, 28.0, 21.0, 22.0, 21.0, 19.0, 14.0, 13.0, 13.0, 6.0, 10.0, 8.0, 2.0, 4.0, 6.0, 3.0, 7.0, 3.0, 3.0, 0.0, 1.0, 2.0, 1.0], "bins": [-3.978426933288574, -3.869804620742798, -3.7611820697784424, -3.652559757232666, -3.5439372062683105, -3.435314893722534, -3.326692581176758, -3.2180700302124023, -3.109447717666626, -3.0008254051208496, -2.892202854156494, -2.7835805416107178, -2.6749582290649414, -2.566335678100586, -2.4577133655548096, -2.349091053009033, -2.2404685020446777, -2.1318461894989014, -2.023223638534546, -1.9146013259887695, -1.8059788942337036, -1.6973564624786377, -1.5887341499328613, -1.4801117181777954, -1.3714892864227295, -1.2628668546676636, -1.1542444229125977, -1.0456221103668213, -0.9369996786117554, -0.8283772468566895, -0.7197548747062683, -0.6111325025558472, -0.5025098323822021, -0.3938874304294586, -0.2852650284767151, -0.17664262652397156, -0.06802022457122803, 0.04060220718383789, 0.14922457933425903, 0.2578469514846802, 0.3664693832397461, 0.4750917851924896, 0.5837141871452332, 0.6923365592956543, 0.8009589910507202, 0.9095814228057861, 1.0182037353515625, 1.1268261671066284, 1.2354485988616943, 1.3440710306167603, 1.4526934623718262, 1.5613157749176025, 1.6699382066726685, 1.7785606384277344, 1.8871829509735107, 1.9958053827285767, 2.1044278144836426, 2.213050127029419, 2.3216726779937744, 2.430294990539551, 2.5389175415039062, 2.6475398540496826, 2.756162166595459, 2.8647847175598145, 2.973407030105591]}, "gradients/decoder.model.decoder.layers.10.encoder_attn.out_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 3.0, 2.0, 3.0, 5.0, 14.0, 7.0, 10.0, 21.0, 25.0, 25.0, 23.0, 64.0, 84.0, 101.0, 129.0, 211.0, 274.0, 372.0, 530.0, 721.0, 1111.0, 1752.0, 3131.0, 6897.0, 20440.0, 93225.0, 617696.0, 241023.0, 38645.0, 10897.0, 4448.0, 2246.0, 1348.0, 883.0, 584.0, 464.0, 306.0, 222.0, 180.0, 109.0, 100.0, 61.0, 48.0, 34.0, 28.0, 22.0, 12.0, 7.0, 7.0, 6.0, 8.0, 2.0, 2.0, 2.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.90576171875, -0.8780899047851562, -0.8504180908203125, -0.8227462768554688, -0.795074462890625, -0.7674026489257812, -0.7397308349609375, -0.7120590209960938, -0.68438720703125, -0.6567153930664062, -0.6290435791015625, -0.6013717651367188, -0.573699951171875, -0.5460281372070312, -0.5183563232421875, -0.49068450927734375, -0.4630126953125, -0.43534088134765625, -0.4076690673828125, -0.37999725341796875, -0.352325439453125, -0.32465362548828125, -0.2969818115234375, -0.26930999755859375, -0.24163818359375, -0.21396636962890625, -0.1862945556640625, -0.15862274169921875, -0.130950927734375, -0.10327911376953125, -0.0756072998046875, -0.04793548583984375, -0.020263671875, 0.00740814208984375, 0.0350799560546875, 0.06275177001953125, 0.090423583984375, 0.11809539794921875, 0.1457672119140625, 0.17343902587890625, 0.20111083984375, 0.22878265380859375, 0.2564544677734375, 0.28412628173828125, 0.311798095703125, 0.33946990966796875, 0.3671417236328125, 0.39481353759765625, 0.4224853515625, 0.45015716552734375, 0.4778289794921875, 0.5055007934570312, 0.533172607421875, 0.5608444213867188, 0.5885162353515625, 0.6161880493164062, 0.64385986328125, 0.6715316772460938, 0.6992034912109375, 0.7268753051757812, 0.754547119140625, 0.7822189331054688, 0.8098907470703125, 0.8375625610351562, 0.865234375]}, "gradients/decoder.model.decoder.layers.10.encoder_attn.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 3.0, 1.0, 3.0, 0.0, 2.0, 1.0, 4.0, 7.0, 7.0, 10.0, 6.0, 16.0, 14.0, 22.0, 18.0, 26.0, 40.0, 42.0, 32.0, 51.0, 48.0, 59.0, 51.0, 48.0, 58.0, 62.0, 45.0, 55.0, 31.0, 39.0, 42.0, 36.0, 21.0, 28.0, 12.0, 14.0, 13.0, 15.0, 6.0, 5.0, 8.0, 5.0, 1.0, 4.0, 2.0, 2.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-3.15234375, -3.064544677734375, -2.97674560546875, -2.888946533203125, -2.8011474609375, -2.713348388671875, -2.62554931640625, -2.537750244140625, -2.449951171875, -2.362152099609375, -2.27435302734375, -2.186553955078125, -2.0987548828125, -2.010955810546875, -1.92315673828125, -1.835357666015625, -1.74755859375, -1.659759521484375, -1.57196044921875, -1.484161376953125, -1.3963623046875, -1.308563232421875, -1.22076416015625, -1.132965087890625, -1.045166015625, -0.957366943359375, -0.86956787109375, -0.781768798828125, -0.6939697265625, -0.606170654296875, -0.51837158203125, -0.430572509765625, -0.3427734375, -0.254974365234375, -0.16717529296875, -0.079376220703125, 0.0084228515625, 0.096221923828125, 0.18402099609375, 0.271820068359375, 0.359619140625, 0.447418212890625, 0.53521728515625, 0.623016357421875, 0.7108154296875, 0.798614501953125, 0.88641357421875, 0.974212646484375, 1.06201171875, 1.149810791015625, 1.23760986328125, 1.325408935546875, 1.4132080078125, 1.501007080078125, 1.58880615234375, 1.676605224609375, 1.764404296875, 1.852203369140625, 1.94000244140625, 2.027801513671875, 2.1156005859375, 2.203399658203125, 2.29119873046875, 2.378997802734375, 2.466796875]}, "gradients/decoder.model.decoder.layers.10.encoder_attn.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 6.0, 6.0, 9.0, 16.0, 22.0, 21.0, 45.0, 75.0, 104.0, 160.0, 230.0, 398.0, 734.0, 1048.0, 1960.0, 3097.0, 5413.0, 9854.0, 18274.0, 35884.0, 77309.0, 188147.0, 350241.0, 195338.0, 80279.0, 37143.0, 18917.0, 10205.0, 5718.0, 3228.0, 1829.0, 1098.0, 642.0, 421.0, 265.0, 137.0, 100.0, 78.0, 37.0, 24.0, 16.0, 14.0, 6.0, 6.0, 6.0, 2.0, 4.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 2.0], "bins": [-0.25439453125, -0.24657249450683594, -0.23875045776367188, -0.2309284210205078, -0.22310638427734375, -0.2152843475341797, -0.20746231079101562, -0.19964027404785156, -0.1918182373046875, -0.18399620056152344, -0.17617416381835938, -0.1683521270751953, -0.16053009033203125, -0.1527080535888672, -0.14488601684570312, -0.13706398010253906, -0.129241943359375, -0.12141990661621094, -0.11359786987304688, -0.10577583312988281, -0.09795379638671875, -0.09013175964355469, -0.08230972290039062, -0.07448768615722656, -0.0666656494140625, -0.05884361267089844, -0.051021575927734375, -0.04319953918457031, -0.03537750244140625, -0.027555465698242188, -0.019733428955078125, -0.011911392211914062, -0.00408935546875, 0.0037326812744140625, 0.011554718017578125, 0.019376754760742188, 0.02719879150390625, 0.03502082824707031, 0.042842864990234375, 0.05066490173339844, 0.0584869384765625, 0.06630897521972656, 0.07413101196289062, 0.08195304870605469, 0.08977508544921875, 0.09759712219238281, 0.10541915893554688, 0.11324119567871094, 0.121063232421875, 0.12888526916503906, 0.13670730590820312, 0.1445293426513672, 0.15235137939453125, 0.1601734161376953, 0.16799545288085938, 0.17581748962402344, 0.1836395263671875, 0.19146156311035156, 0.19928359985351562, 0.2071056365966797, 0.21492767333984375, 0.2227497100830078, 0.23057174682617188, 0.23839378356933594, 0.2462158203125]}, "gradients/decoder.model.decoder.layers.10.encoder_attn.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 5.0, 5.0, 0.0, 8.0, 9.0, 9.0, 9.0, 10.0, 12.0, 9.0, 14.0, 14.0, 16.0, 25.0, 37.0, 26.0, 31.0, 33.0, 45.0, 47.0, 38.0, 40.0, 46.0, 53.0, 45.0, 56.0, 42.0, 41.0, 31.0, 39.0, 31.0, 27.0, 22.0, 26.0, 20.0, 22.0, 8.0, 14.0, 18.0, 10.0, 4.0, 2.0, 5.0, 5.0, 1.0, 1.0, 1.0, 2.0, 1.0, 0.0, 0.0, 2.0, 1.0], "bins": [-4.48828125, -4.3572998046875, -4.226318359375, -4.0953369140625, -3.96435546875, -3.8333740234375, -3.702392578125, -3.5714111328125, -3.4404296875, -3.3094482421875, -3.178466796875, -3.0474853515625, -2.91650390625, -2.7855224609375, -2.654541015625, -2.5235595703125, -2.392578125, -2.2615966796875, -2.130615234375, -1.9996337890625, -1.86865234375, -1.7376708984375, -1.606689453125, -1.4757080078125, -1.3447265625, -1.2137451171875, -1.082763671875, -0.9517822265625, -0.82080078125, -0.6898193359375, -0.558837890625, -0.4278564453125, -0.296875, -0.1658935546875, -0.034912109375, 0.0960693359375, 0.22705078125, 0.3580322265625, 0.489013671875, 0.6199951171875, 0.7509765625, 0.8819580078125, 1.012939453125, 1.1439208984375, 1.27490234375, 1.4058837890625, 1.536865234375, 1.6678466796875, 1.798828125, 1.9298095703125, 2.060791015625, 2.1917724609375, 2.32275390625, 2.4537353515625, 2.584716796875, 2.7156982421875, 2.8466796875, 2.9776611328125, 3.108642578125, 3.2396240234375, 3.37060546875, 3.5015869140625, 3.632568359375, 3.7635498046875, 3.89453125]}, "gradients/decoder.model.decoder.layers.10.encoder_attn.k_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 1.0, 1.0, 1.0, 3.0, 1.0, 2.0, 4.0, 1.0, 5.0, 10.0, 10.0, 17.0, 25.0, 38.0, 56.0, 97.0, 157.0, 231.0, 381.0, 701.0, 1168.0, 2275.0, 4949.0, 12325.0, 38067.0, 147523.0, 576154.0, 195972.0, 44775.0, 13912.0, 4941.0, 2128.0, 1058.0, 615.0, 347.0, 199.0, 128.0, 82.0, 68.0, 30.0, 27.0, 24.0, 16.0, 10.0, 12.0, 6.0, 2.0, 6.0, 0.0, 2.0, 2.0, 0.0, 2.0, 1.0, 2.0], "bins": [-0.04827880859375, -0.04692220687866211, -0.04556560516357422, -0.04420900344848633, -0.04285240173339844, -0.04149580001831055, -0.040139198303222656, -0.038782596588134766, -0.037425994873046875, -0.036069393157958984, -0.034712791442871094, -0.0333561897277832, -0.03199958801269531, -0.030642986297607422, -0.02928638458251953, -0.02792978286743164, -0.02657318115234375, -0.02521657943725586, -0.02385997772216797, -0.022503376007080078, -0.021146774291992188, -0.019790172576904297, -0.018433570861816406, -0.017076969146728516, -0.015720367431640625, -0.014363765716552734, -0.013007164001464844, -0.011650562286376953, -0.010293960571289062, -0.008937358856201172, -0.007580757141113281, -0.006224155426025391, -0.0048675537109375, -0.0035109519958496094, -0.0021543502807617188, -0.0007977485656738281, 0.0005588531494140625, 0.0019154548645019531, 0.0032720565795898438, 0.004628658294677734, 0.005985260009765625, 0.007341861724853516, 0.008698463439941406, 0.010055065155029297, 0.011411666870117188, 0.012768268585205078, 0.014124870300292969, 0.01548147201538086, 0.01683807373046875, 0.01819467544555664, 0.01955127716064453, 0.020907878875732422, 0.022264480590820312, 0.023621082305908203, 0.024977684020996094, 0.026334285736083984, 0.027690887451171875, 0.029047489166259766, 0.030404090881347656, 0.03176069259643555, 0.03311729431152344, 0.03447389602661133, 0.03583049774169922, 0.03718709945678711, 0.038543701171875]}, "gradients/decoder.model.decoder.layers.10.encoder_attn.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 1.0, 3.0, 4.0, 3.0, 5.0, 9.0, 18.0, 18.0, 23.0, 25.0, 61.0, 62.0, 79.0, 102.0, 134.0, 120.0, 84.0, 70.0, 52.0, 46.0, 30.0, 17.0, 14.0, 13.0, 5.0, 5.0, 6.0, 1.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.786252975463867e-05, -4.618149250745773e-05, -4.4500455260276794e-05, -4.2819418013095856e-05, -4.113838076591492e-05, -3.945734351873398e-05, -3.777630627155304e-05, -3.60952690243721e-05, -3.441423177719116e-05, -3.2733194530010223e-05, -3.1052157282829285e-05, -2.9371120035648346e-05, -2.7690082788467407e-05, -2.600904554128647e-05, -2.432800829410553e-05, -2.264697104692459e-05, -2.0965933799743652e-05, -1.9284896552562714e-05, -1.7603859305381775e-05, -1.5922822058200836e-05, -1.4241784811019897e-05, -1.2560747563838959e-05, -1.087971031665802e-05, -9.198673069477081e-06, -7.517635822296143e-06, -5.836598575115204e-06, -4.155561327934265e-06, -2.4745240807533264e-06, -7.934868335723877e-07, 8.87550413608551e-07, 2.5685876607894897e-06, 4.2496249079704285e-06, 5.930662155151367e-06, 7.611699402332306e-06, 9.292736649513245e-06, 1.0973773896694183e-05, 1.2654811143875122e-05, 1.433584839105606e-05, 1.6016885638237e-05, 1.7697922885417938e-05, 1.9378960132598877e-05, 2.1059997379779816e-05, 2.2741034626960754e-05, 2.4422071874141693e-05, 2.6103109121322632e-05, 2.778414636850357e-05, 2.946518361568451e-05, 3.114622086286545e-05, 3.282725811004639e-05, 3.4508295357227325e-05, 3.6189332604408264e-05, 3.78703698515892e-05, 3.955140709877014e-05, 4.123244434595108e-05, 4.291348159313202e-05, 4.459451884031296e-05, 4.6275556087493896e-05, 4.7956593334674835e-05, 4.9637630581855774e-05, 5.131866782903671e-05, 5.299970507621765e-05, 5.468074232339859e-05, 5.636177957057953e-05, 5.804281681776047e-05, 5.9723854064941406e-05]}, "gradients/decoder.model.decoder.layers.10.encoder_attn.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 3.0, 1.0, 0.0, 2.0, 3.0, 0.0, 2.0, 4.0, 1.0, 1.0, 7.0, 4.0, 10.0, 3.0, 19.0, 18.0, 23.0, 18.0, 24.0, 53.0, 63.0, 95.0, 184.0, 417.0, 1850.0, 15640.0, 552790.0, 459717.0, 14968.0, 1834.0, 410.0, 143.0, 74.0, 44.0, 45.0, 27.0, 21.0, 16.0, 9.0, 5.0, 4.0, 1.0, 1.0, 3.0, 1.0, 2.0, 1.0, 4.0, 1.0, 3.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-0.1478271484375, -0.14334487915039062, -0.13886260986328125, -0.13438034057617188, -0.1298980712890625, -0.12541580200195312, -0.12093353271484375, -0.11645126342773438, -0.111968994140625, -0.10748672485351562, -0.10300445556640625, -0.09852218627929688, -0.0940399169921875, -0.08955764770507812, -0.08507537841796875, -0.08059310913085938, -0.07611083984375, -0.07162857055664062, -0.06714630126953125, -0.06266403198242188, -0.0581817626953125, -0.053699493408203125, -0.04921722412109375, -0.044734954833984375, -0.040252685546875, -0.035770416259765625, -0.03128814697265625, -0.026805877685546875, -0.0223236083984375, -0.017841339111328125, -0.01335906982421875, -0.008876800537109375, -0.00439453125, 8.7738037109375e-05, 0.00457000732421875, 0.009052276611328125, 0.0135345458984375, 0.018016815185546875, 0.02249908447265625, 0.026981353759765625, 0.031463623046875, 0.035945892333984375, 0.04042816162109375, 0.044910430908203125, 0.0493927001953125, 0.053874969482421875, 0.05835723876953125, 0.06283950805664062, 0.06732177734375, 0.07180404663085938, 0.07628631591796875, 0.08076858520507812, 0.0852508544921875, 0.08973312377929688, 0.09421539306640625, 0.09869766235351562, 0.103179931640625, 0.10766220092773438, 0.11214447021484375, 0.11662673950195312, 0.1211090087890625, 0.12559127807617188, 0.13007354736328125, 0.13455581665039062, 0.1390380859375]}, "gradients/decoder.model.decoder.layers.10.encoder_attn.q_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 3.0, 0.0, 1.0, 0.0, 2.0, 0.0, 1.0, 2.0, 2.0, 3.0, 0.0, 2.0, 3.0, 3.0, 7.0, 8.0, 7.0, 10.0, 10.0, 14.0, 14.0, 35.0, 25.0, 33.0, 45.0, 52.0, 56.0, 77.0, 97.0, 104.0, 84.0, 74.0, 53.0, 46.0, 32.0, 29.0, 21.0, 18.0, 9.0, 3.0, 6.0, 8.0, 4.0, 6.0, 3.0, 0.0, 2.0, 1.0, 4.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.01367950439453125, -0.013254523277282715, -0.01282954216003418, -0.012404561042785645, -0.01197957992553711, -0.011554598808288574, -0.011129617691040039, -0.010704636573791504, -0.010279655456542969, -0.009854674339294434, -0.009429693222045898, -0.009004712104797363, -0.008579730987548828, -0.008154749870300293, -0.007729768753051758, -0.007304787635803223, -0.0068798065185546875, -0.006454825401306152, -0.006029844284057617, -0.005604863166809082, -0.005179882049560547, -0.004754900932312012, -0.0043299198150634766, -0.0039049386978149414, -0.0034799575805664062, -0.003054976463317871, -0.002629995346069336, -0.0022050142288208008, -0.0017800331115722656, -0.0013550519943237305, -0.0009300708770751953, -0.0005050897598266602, -8.0108642578125e-05, 0.00034487247467041016, 0.0007698535919189453, 0.0011948347091674805, 0.0016198158264160156, 0.0020447969436645508, 0.002469778060913086, 0.002894759178161621, 0.0033197402954101562, 0.0037447214126586914, 0.0041697025299072266, 0.004594683647155762, 0.005019664764404297, 0.005444645881652832, 0.005869626998901367, 0.006294608116149902, 0.0067195892333984375, 0.007144570350646973, 0.007569551467895508, 0.007994532585144043, 0.008419513702392578, 0.008844494819641113, 0.009269475936889648, 0.009694457054138184, 0.010119438171386719, 0.010544419288635254, 0.010969400405883789, 0.011394381523132324, 0.01181936264038086, 0.012244343757629395, 0.01266932487487793, 0.013094305992126465, 0.013519287109375]}, "gradients/decoder.model.decoder.layers.10.self_attn_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 2.0, 1.0, 4.0, 1.0, 4.0, 2.0, 3.0, 3.0, 12.0, 26.0, 31.0, 57.0, 80.0, 134.0, 158.0, 140.0, 132.0, 73.0, 48.0, 39.0, 19.0, 14.0, 9.0, 6.0, 4.0, 6.0, 2.0, 1.0, 0.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0], "bins": [-6.478943347930908, -6.303932189941406, -6.128920555114746, -5.953909397125244, -5.778898239135742, -5.603886604309082, -5.42887544631958, -5.253864288330078, -5.078852653503418, -4.903841495513916, -4.728829860687256, -4.553818702697754, -4.378807067871094, -4.203795909881592, -4.02878475189209, -3.853773355484009, -3.6787619590759277, -3.5037505626678467, -3.3287391662597656, -3.1537280082702637, -2.9787166118621826, -2.8037052154541016, -2.6286940574645996, -2.4536826610565186, -2.2786712646484375, -2.1036598682403564, -1.928648591041565, -1.7536373138427734, -1.5786259174346924, -1.4036145210266113, -1.2286032438278198, -1.0535919666290283, -0.8785805702209473, -0.703569233417511, -0.5285578966140747, -0.3535465598106384, -0.17853522300720215, -0.003523886203765869, 0.1714874505996704, 0.3464987277984619, 0.521510124206543, 0.6965214610099792, 0.8715327978134155, 1.046544075012207, 1.221555471420288, 1.3965668678283691, 1.5715781450271606, 1.7465894222259521, 1.9216008186340332, 2.0966122150421143, 2.2716236114501953, 2.4466347694396973, 2.6216461658477783, 2.7966575622558594, 2.9716687202453613, 3.1466801166534424, 3.3216915130615234, 3.4967029094696045, 3.6717143058776855, 3.8467254638671875, 4.021737098693848, 4.19674825668335, 4.371759414672852, 4.546771049499512, 4.721782207489014]}, "gradients/decoder.model.decoder.layers.10.self_attn_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 2.0, 2.0, 2.0, 2.0, 2.0, 4.0, 9.0, 7.0, 13.0, 8.0, 6.0, 11.0, 11.0, 26.0, 20.0, 27.0, 43.0, 40.0, 35.0, 46.0, 34.0, 43.0, 50.0, 47.0, 47.0, 46.0, 52.0, 51.0, 47.0, 37.0, 34.0, 27.0, 28.0, 24.0, 17.0, 23.0, 11.0, 16.0, 12.0, 10.0, 8.0, 6.0, 8.0, 5.0, 7.0, 2.0, 3.0, 1.0, 2.0, 1.0, 0.0, 0.0, 2.0], "bins": [-2.6686439514160156, -2.5930778980255127, -2.5175118446350098, -2.4419455528259277, -2.366379499435425, -2.290813446044922, -2.215247392654419, -2.139681339263916, -2.064115047454834, -1.988548994064331, -1.9129828214645386, -1.8374167680740356, -1.7618505954742432, -1.6862845420837402, -1.6107184886932373, -1.5351524353027344, -1.4595863819122314, -1.3840203285217285, -1.308454155921936, -1.232888102531433, -1.1573219299316406, -1.0817558765411377, -1.0061898231506348, -0.9306237101554871, -0.8550575971603394, -0.7794914841651917, -0.703925371170044, -0.628359317779541, -0.5527932047843933, -0.4772270917892456, -0.4016610085964203, -0.32609492540359497, -0.25052881240844727, -0.17496271431446075, -0.09939661622047424, -0.023830518126487732, 0.05173557996749878, 0.12730169296264648, 0.2028677761554718, 0.2784338593482971, 0.3539999723434448, 0.42956608533859253, 0.5051321983337402, 0.5806982517242432, 0.6562643647193909, 0.7318304777145386, 0.8073965311050415, 0.8829626441001892, 0.9585287570953369, 1.0340948104858398, 1.1096609830856323, 1.1852270364761353, 1.2607932090759277, 1.3363592624664307, 1.4119253158569336, 1.4874913692474365, 1.563057541847229, 1.638623595237732, 1.7141897678375244, 1.7897558212280273, 1.8653218746185303, 1.9408880472183228, 2.0164542198181152, 2.092020273208618, 2.167586326599121]}, "gradients/decoder.model.decoder.layers.10.self_attn.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 4.0, 4.0, 3.0, 2.0, 9.0, 5.0, 11.0, 11.0, 21.0, 21.0, 32.0, 41.0, 76.0, 117.0, 185.0, 296.0, 528.0, 934.0, 1729.0, 3102.0, 6249.0, 13166.0, 32359.0, 93957.0, 370963.0, 371612.0, 94059.0, 32302.0, 13300.0, 6336.0, 3076.0, 1730.0, 914.0, 553.0, 304.0, 180.0, 136.0, 84.0, 47.0, 25.0, 25.0, 12.0, 9.0, 9.0, 9.0, 5.0, 4.0, 3.0, 3.0, 3.0, 4.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.8681640625, -1.8058929443359375, -1.743621826171875, -1.6813507080078125, -1.61907958984375, -1.5568084716796875, -1.494537353515625, -1.4322662353515625, -1.3699951171875, -1.3077239990234375, -1.245452880859375, -1.1831817626953125, -1.12091064453125, -1.0586395263671875, -0.996368408203125, -0.9340972900390625, -0.871826171875, -0.8095550537109375, -0.747283935546875, -0.6850128173828125, -0.62274169921875, -0.5604705810546875, -0.498199462890625, -0.4359283447265625, -0.3736572265625, -0.3113861083984375, -0.249114990234375, -0.1868438720703125, -0.12457275390625, -0.0623016357421875, -3.0517578125e-05, 0.0622406005859375, 0.12451171875, 0.1867828369140625, 0.249053955078125, 0.3113250732421875, 0.37359619140625, 0.4358673095703125, 0.498138427734375, 0.5604095458984375, 0.6226806640625, 0.6849517822265625, 0.747222900390625, 0.8094940185546875, 0.87176513671875, 0.9340362548828125, 0.996307373046875, 1.0585784912109375, 1.120849609375, 1.1831207275390625, 1.245391845703125, 1.3076629638671875, 1.36993408203125, 1.4322052001953125, 1.494476318359375, 1.5567474365234375, 1.6190185546875, 1.6812896728515625, 1.743560791015625, 1.8058319091796875, 1.86810302734375, 1.9303741455078125, 1.992645263671875, 2.0549163818359375, 2.1171875]}, "gradients/decoder.model.decoder.layers.10.self_attn.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 2.0, 2.0, 1.0, 9.0, 3.0, 8.0, 19.0, 16.0, 25.0, 33.0, 45.0, 48.0, 57.0, 61.0, 78.0, 77.0, 78.0, 79.0, 60.0, 80.0, 53.0, 36.0, 39.0, 35.0, 32.0, 15.0, 5.0, 5.0, 8.0, 2.0, 3.0, 1.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-16.484375, -15.989501953125, -15.49462890625, -14.999755859375, -14.5048828125, -14.010009765625, -13.51513671875, -13.020263671875, -12.525390625, -12.030517578125, -11.53564453125, -11.040771484375, -10.5458984375, -10.051025390625, -9.55615234375, -9.061279296875, -8.56640625, -8.071533203125, -7.57666015625, -7.081787109375, -6.5869140625, -6.092041015625, -5.59716796875, -5.102294921875, -4.607421875, -4.112548828125, -3.61767578125, -3.122802734375, -2.6279296875, -2.133056640625, -1.63818359375, -1.143310546875, -0.6484375, -0.153564453125, 0.34130859375, 0.836181640625, 1.3310546875, 1.825927734375, 2.32080078125, 2.815673828125, 3.310546875, 3.805419921875, 4.30029296875, 4.795166015625, 5.2900390625, 5.784912109375, 6.27978515625, 6.774658203125, 7.26953125, 7.764404296875, 8.25927734375, 8.754150390625, 9.2490234375, 9.743896484375, 10.23876953125, 10.733642578125, 11.228515625, 11.723388671875, 12.21826171875, 12.713134765625, 13.2080078125, 13.702880859375, 14.19775390625, 14.692626953125, 15.1875]}, "gradients/decoder.model.decoder.layers.10.self_attn.v_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 3.0, 0.0, 0.0, 1.0, 5.0, 5.0, 7.0, 11.0, 18.0, 29.0, 49.0, 73.0, 155.0, 4055.0, 1043664.0, 208.0, 110.0, 59.0, 41.0, 21.0, 13.0, 6.0, 11.0, 5.0, 1.0, 6.0, 2.0, 0.0, 1.0, 0.0, 0.0, 2.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-64.9375, -62.455078125, -59.97265625, -57.490234375, -55.0078125, -52.525390625, -50.04296875, -47.560546875, -45.078125, -42.595703125, -40.11328125, -37.630859375, -35.1484375, -32.666015625, -30.18359375, -27.701171875, -25.21875, -22.736328125, -20.25390625, -17.771484375, -15.2890625, -12.806640625, -10.32421875, -7.841796875, -5.359375, -2.876953125, -0.39453125, 2.087890625, 4.5703125, 7.052734375, 9.53515625, 12.017578125, 14.5, 16.982421875, 19.46484375, 21.947265625, 24.4296875, 26.912109375, 29.39453125, 31.876953125, 34.359375, 36.841796875, 39.32421875, 41.806640625, 44.2890625, 46.771484375, 49.25390625, 51.736328125, 54.21875, 56.701171875, 59.18359375, 61.666015625, 64.1484375, 66.630859375, 69.11328125, 71.595703125, 74.078125, 76.560546875, 79.04296875, 81.525390625, 84.0078125, 86.490234375, 88.97265625, 91.455078125, 93.9375]}, "gradients/decoder.model.decoder.layers.10.self_attn.v_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 3.0, 0.0, 0.0, 1.0, 5.0, 5.0, 7.0, 11.0, 18.0, 28.0, 51.0, 68.0, 120.0, 146.0, 156.0, 125.0, 97.0, 61.0, 39.0, 22.0, 13.0, 5.0, 11.0, 5.0, 1.0, 6.0, 2.0, 0.0, 1.0, 0.0, 0.0, 2.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-26.171875, -25.171630859375, -24.17138671875, -23.171142578125, -22.1708984375, -21.170654296875, -20.17041015625, -19.170166015625, -18.169921875, -17.169677734375, -16.16943359375, -15.169189453125, -14.1689453125, -13.168701171875, -12.16845703125, -11.168212890625, -10.16796875, -9.167724609375, -8.16748046875, -7.167236328125, -6.1669921875, -5.166748046875, -4.16650390625, -3.166259765625, -2.166015625, -1.165771484375, -0.16552734375, 0.834716796875, 1.8349609375, 2.835205078125, 3.83544921875, 4.835693359375, 5.8359375, 6.836181640625, 7.83642578125, 8.836669921875, 9.8369140625, 10.837158203125, 11.83740234375, 12.837646484375, 13.837890625, 14.838134765625, 15.83837890625, 16.838623046875, 17.8388671875, 18.839111328125, 19.83935546875, 20.839599609375, 21.83984375, 22.840087890625, 23.84033203125, 24.840576171875, 25.8408203125, 26.841064453125, 27.84130859375, 28.841552734375, 29.841796875, 30.842041015625, 31.84228515625, 32.842529296875, 33.8427734375, 34.843017578125, 35.84326171875, 36.843505859375, 37.84375]}, "gradients/decoder.model.decoder.layers.10.self_attn.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 2.0, 6.0, 5.0, 7.0, 9.0, 20.0, 24.0, 24.0, 31.0, 71.0, 117.0, 205.0, 470.0, 1214.0, 3299.0, 11105.0, 120995.0, 885967.0, 17524.0, 4542.0, 1644.0, 636.0, 283.0, 144.0, 74.0, 43.0, 28.0, 22.0, 16.0, 9.0, 6.0, 3.0, 6.0, 3.0, 3.0, 1.0, 2.0, 2.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.90576171875, -0.8735885620117188, -0.8414154052734375, -0.8092422485351562, -0.777069091796875, -0.7448959350585938, -0.7127227783203125, -0.6805496215820312, -0.64837646484375, -0.6162033081054688, -0.5840301513671875, -0.5518569946289062, -0.519683837890625, -0.48751068115234375, -0.4553375244140625, -0.42316436767578125, -0.3909912109375, -0.35881805419921875, -0.3266448974609375, -0.29447174072265625, -0.262298583984375, -0.23012542724609375, -0.1979522705078125, -0.16577911376953125, -0.13360595703125, -0.10143280029296875, -0.0692596435546875, -0.03708648681640625, -0.004913330078125, 0.02725982666015625, 0.0594329833984375, 0.09160614013671875, 0.123779296875, 0.15595245361328125, 0.1881256103515625, 0.22029876708984375, 0.252471923828125, 0.28464508056640625, 0.3168182373046875, 0.34899139404296875, 0.38116455078125, 0.41333770751953125, 0.4455108642578125, 0.47768402099609375, 0.509857177734375, 0.5420303344726562, 0.5742034912109375, 0.6063766479492188, 0.6385498046875, 0.6707229614257812, 0.7028961181640625, 0.7350692749023438, 0.767242431640625, 0.7994155883789062, 0.8315887451171875, 0.8637619018554688, 0.89593505859375, 0.9281082153320312, 0.9602813720703125, 0.9924545288085938, 1.024627685546875, 1.0568008422851562, 1.0889739990234375, 1.1211471557617188, 1.1533203125]}, "gradients/decoder.model.decoder.layers.10.self_attn.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 2.0, 4.0, 5.0, 2.0, 6.0, 4.0, 5.0, 11.0, 18.0, 36.0, 49.0, 116.0, 265.0, 251.0, 105.0, 44.0, 27.0, 11.0, 5.0, 11.0, 5.0, 9.0, 3.0, 4.0, 4.0, 3.0, 1.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00014281272888183594, -0.0001384727656841278, -0.00013413280248641968, -0.00012979283928871155, -0.00012545287609100342, -0.00012111291289329529, -0.00011677294969558716, -0.00011243298649787903, -0.0001080930233001709, -0.00010375306010246277, -9.941309690475464e-05, -9.507313370704651e-05, -9.073317050933838e-05, -8.639320731163025e-05, -8.205324411392212e-05, -7.771328091621399e-05, -7.337331771850586e-05, -6.903335452079773e-05, -6.46933913230896e-05, -6.035342812538147e-05, -5.601346492767334e-05, -5.167350172996521e-05, -4.733353853225708e-05, -4.299357533454895e-05, -3.865361213684082e-05, -3.431364893913269e-05, -2.997368574142456e-05, -2.563372254371643e-05, -2.12937593460083e-05, -1.695379614830017e-05, -1.2613832950592041e-05, -8.273869752883911e-06, -3.933906555175781e-06, 4.0605664253234863e-07, 4.7460198402404785e-06, 9.085983037948608e-06, 1.3425946235656738e-05, 1.7765909433364868e-05, 2.2105872631072998e-05, 2.6445835828781128e-05, 3.078579902648926e-05, 3.512576222419739e-05, 3.946572542190552e-05, 4.380568861961365e-05, 4.814565181732178e-05, 5.248561501502991e-05, 5.682557821273804e-05, 6.116554141044617e-05, 6.55055046081543e-05, 6.984546780586243e-05, 7.418543100357056e-05, 7.852539420127869e-05, 8.286535739898682e-05, 8.720532059669495e-05, 9.154528379440308e-05, 9.58852469921112e-05, 0.00010022521018981934, 0.00010456517338752747, 0.0001089051365852356, 0.00011324509978294373, 0.00011758506298065186, 0.00012192502617835999, 0.00012626498937606812, 0.00013060495257377625, 0.00013494491577148438]}, "gradients/decoder.model.decoder.layers.10.self_attn.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 2.0, 2.0, 0.0, 1.0, 2.0, 3.0, 3.0, 4.0, 6.0, 8.0, 8.0, 22.0, 32.0, 35.0, 76.0, 113.0, 215.0, 479.0, 1393.0, 4266.0, 18004.0, 916114.0, 94034.0, 9335.0, 2650.0, 941.0, 372.0, 176.0, 94.0, 48.0, 36.0, 30.0, 17.0, 11.0, 7.0, 8.0, 5.0, 5.0, 2.0, 1.0, 1.0, 5.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.005859375, -0.9709320068359375, -0.936004638671875, -0.9010772705078125, -0.86614990234375, -0.8312225341796875, -0.796295166015625, -0.7613677978515625, -0.7264404296875, -0.6915130615234375, -0.656585693359375, -0.6216583251953125, -0.58673095703125, -0.5518035888671875, -0.516876220703125, -0.4819488525390625, -0.447021484375, -0.4120941162109375, -0.377166748046875, -0.3422393798828125, -0.30731201171875, -0.2723846435546875, -0.237457275390625, -0.2025299072265625, -0.1676025390625, -0.1326751708984375, -0.097747802734375, -0.0628204345703125, -0.02789306640625, 0.0070343017578125, 0.041961669921875, 0.0768890380859375, 0.11181640625, 0.1467437744140625, 0.181671142578125, 0.2165985107421875, 0.25152587890625, 0.2864532470703125, 0.321380615234375, 0.3563079833984375, 0.3912353515625, 0.4261627197265625, 0.461090087890625, 0.4960174560546875, 0.53094482421875, 0.5658721923828125, 0.600799560546875, 0.6357269287109375, 0.670654296875, 0.7055816650390625, 0.740509033203125, 0.7754364013671875, 0.81036376953125, 0.8452911376953125, 0.880218505859375, 0.9151458740234375, 0.9500732421875, 0.9850006103515625, 1.019927978515625, 1.0548553466796875, 1.08978271484375, 1.1247100830078125, 1.159637451171875, 1.1945648193359375, 1.2294921875]}, "gradients/decoder.model.decoder.layers.10.self_attn.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0, 1.0, 1.0, 2.0, 1.0, 0.0, 3.0, 8.0, 4.0, 10.0, 11.0, 21.0, 40.0, 69.0, 394.0, 296.0, 57.0, 25.0, 20.0, 13.0, 8.0, 7.0, 6.0, 3.0, 3.0, 0.0, 4.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.322265625, -0.31072998046875, -0.2991943359375, -0.28765869140625, -0.276123046875, -0.26458740234375, -0.2530517578125, -0.24151611328125, -0.22998046875, -0.21844482421875, -0.2069091796875, -0.19537353515625, -0.183837890625, -0.17230224609375, -0.1607666015625, -0.14923095703125, -0.1376953125, -0.12615966796875, -0.1146240234375, -0.10308837890625, -0.091552734375, -0.08001708984375, -0.0684814453125, -0.05694580078125, -0.04541015625, -0.03387451171875, -0.0223388671875, -0.01080322265625, 0.000732421875, 0.01226806640625, 0.0238037109375, 0.03533935546875, 0.046875, 0.05841064453125, 0.0699462890625, 0.08148193359375, 0.093017578125, 0.10455322265625, 0.1160888671875, 0.12762451171875, 0.13916015625, 0.15069580078125, 0.1622314453125, 0.17376708984375, 0.185302734375, 0.19683837890625, 0.2083740234375, 0.21990966796875, 0.2314453125, 0.24298095703125, 0.2545166015625, 0.26605224609375, 0.277587890625, 0.28912353515625, 0.3006591796875, 0.31219482421875, 0.32373046875, 0.33526611328125, 0.3468017578125, 0.35833740234375, 0.369873046875, 0.38140869140625, 0.3929443359375, 0.40447998046875, 0.416015625]}, "gradients/decoder.model.decoder.layers.9.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 4.0, 5.0, 3.0, 2.0, 5.0, 8.0, 20.0, 31.0, 64.0, 96.0, 110.0, 173.0, 157.0, 130.0, 75.0, 53.0, 30.0, 14.0, 15.0, 8.0, 6.0, 2.0, 0.0, 2.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-12.82423210144043, -12.486954689025879, -12.149678230285645, -11.812400817871094, -11.475123405456543, -11.137845993041992, -10.800569534301758, -10.463292121887207, -10.126014709472656, -9.788737297058105, -9.451460838317871, -9.11418342590332, -8.77690601348877, -8.439628601074219, -8.102352142333984, -7.765074729919434, -7.427798271179199, -7.090521335601807, -6.753243923187256, -6.415966987609863, -6.0786895751953125, -5.74141263961792, -5.404135704040527, -5.066858291625977, -4.729581356048584, -4.392304420471191, -4.055027008056641, -3.717750072479248, -3.3804728984832764, -3.0431957244873047, -2.705918788909912, -2.3686416149139404, -2.0313644409179688, -1.694087266921997, -1.356810212135315, -1.0195331573486328, -0.6822559833526611, -0.34497880935668945, -0.007701873779296875, 0.3295753002166748, 0.6668524742126465, 1.0041296482086182, 1.3414067029953003, 1.6786837577819824, 2.015960931777954, 2.353238105773926, 2.6905150413513184, 3.02779221534729, 3.3650693893432617, 3.7023465633392334, 4.039623737335205, 4.376900672912598, 4.714178085327148, 5.051455020904541, 5.388731956481934, 5.726009368896484, 6.063286304473877, 6.4005632400512695, 6.73784065246582, 7.075117588043213, 7.4123945236206055, 7.749671936035156, 8.08694839477539, 8.424225807189941, 8.761503219604492]}, "gradients/decoder.model.decoder.layers.9.final_layer_norm.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 3.0, 2.0, 3.0, 2.0, 3.0, 6.0, 3.0, 7.0, 3.0, 3.0, 8.0, 15.0, 15.0, 17.0, 15.0, 18.0, 19.0, 43.0, 19.0, 26.0, 36.0, 37.0, 45.0, 29.0, 36.0, 39.0, 33.0, 41.0, 34.0, 32.0, 28.0, 37.0, 39.0, 38.0, 27.0, 37.0, 22.0, 28.0, 21.0, 18.0, 16.0, 12.0, 18.0, 15.0, 12.0, 10.0, 14.0, 4.0, 7.0, 3.0, 3.0, 3.0, 6.0, 3.0, 2.0, 2.0, 3.0, 0.0, 2.0], "bins": [-12.192366600036621, -11.805703163146973, -11.41904067993164, -11.032377243041992, -10.645713806152344, -10.259051322937012, -9.872387886047363, -9.485725402832031, -9.099061965942383, -8.712398529052734, -8.325736045837402, -7.939072608947754, -7.552409648895264, -7.165746688842773, -6.779083251953125, -6.392420291900635, -6.0057573318481445, -5.619094371795654, -5.232431411743164, -4.845767974853516, -4.459105014801025, -4.072442054748535, -3.685778856277466, -3.2991156578063965, -2.9124526977539062, -2.525789737701416, -2.1391265392303467, -1.752463459968567, -1.365800380706787, -0.9791373014450073, -0.5924742221832275, -0.2058110237121582, 0.18085289001464844, 0.5675159692764282, 0.954179048538208, 1.3408421277999878, 1.7275052070617676, 2.114168167114258, 2.500831365585327, 2.8874945640563965, 3.2741575241088867, 3.660820484161377, 4.047483444213867, 4.434146881103516, 4.820809841156006, 5.207472801208496, 5.5941362380981445, 5.980799198150635, 6.367462158203125, 6.754125118255615, 7.1407880783081055, 7.527451515197754, 7.914114475250244, 8.300777435302734, 8.687440872192383, 9.074104309082031, 9.460766792297363, 9.847430229187012, 10.234092712402344, 10.620756149291992, 11.00741958618164, 11.394082069396973, 11.780745506286621, 12.167407989501953, 12.554071426391602]}, "gradients/decoder.model.decoder.layers.9.fc2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 1.0, 0.0, 5.0, 3.0, 3.0, 2.0, 15.0, 10.0, 21.0, 27.0, 32.0, 54.0, 67.0, 115.0, 310.0, 1112.0, 5704.0, 87145.0, 3159705.0, 912572.0, 23566.0, 2829.0, 541.0, 175.0, 90.0, 42.0, 33.0, 30.0, 26.0, 18.0, 13.0, 6.0, 7.0, 5.0, 5.0, 4.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-10.21875, -9.897216796875, -9.57568359375, -9.254150390625, -8.9326171875, -8.611083984375, -8.28955078125, -7.968017578125, -7.646484375, -7.324951171875, -7.00341796875, -6.681884765625, -6.3603515625, -6.038818359375, -5.71728515625, -5.395751953125, -5.07421875, -4.752685546875, -4.43115234375, -4.109619140625, -3.7880859375, -3.466552734375, -3.14501953125, -2.823486328125, -2.501953125, -2.180419921875, -1.85888671875, -1.537353515625, -1.2158203125, -0.894287109375, -0.57275390625, -0.251220703125, 0.0703125, 0.391845703125, 0.71337890625, 1.034912109375, 1.3564453125, 1.677978515625, 1.99951171875, 2.321044921875, 2.642578125, 2.964111328125, 3.28564453125, 3.607177734375, 3.9287109375, 4.250244140625, 4.57177734375, 4.893310546875, 5.21484375, 5.536376953125, 5.85791015625, 6.179443359375, 6.5009765625, 6.822509765625, 7.14404296875, 7.465576171875, 7.787109375, 8.108642578125, 8.43017578125, 8.751708984375, 9.0732421875, 9.394775390625, 9.71630859375, 10.037841796875, 10.359375]}, "gradients/decoder.model.decoder.layers.9.fc2.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 3.0, 6.0, 9.0, 12.0, 13.0, 19.0, 17.0, 28.0, 27.0, 40.0, 58.0, 46.0, 65.0, 65.0, 52.0, 68.0, 67.0, 68.0, 66.0, 53.0, 44.0, 39.0, 27.0, 27.0, 22.0, 24.0, 9.0, 12.0, 6.0, 6.0, 2.0, 5.0, 4.0, 2.0, 2.0, 1.0, 0.0, 1.0, 1.0], "bins": [-5.02734375, -4.9033203125, -4.779296875, -4.6552734375, -4.53125, -4.4072265625, -4.283203125, -4.1591796875, -4.03515625, -3.9111328125, -3.787109375, -3.6630859375, -3.5390625, -3.4150390625, -3.291015625, -3.1669921875, -3.04296875, -2.9189453125, -2.794921875, -2.6708984375, -2.546875, -2.4228515625, -2.298828125, -2.1748046875, -2.05078125, -1.9267578125, -1.802734375, -1.6787109375, -1.5546875, -1.4306640625, -1.306640625, -1.1826171875, -1.05859375, -0.9345703125, -0.810546875, -0.6865234375, -0.5625, -0.4384765625, -0.314453125, -0.1904296875, -0.06640625, 0.0576171875, 0.181640625, 0.3056640625, 0.4296875, 0.5537109375, 0.677734375, 0.8017578125, 0.92578125, 1.0498046875, 1.173828125, 1.2978515625, 1.421875, 1.5458984375, 1.669921875, 1.7939453125, 1.91796875, 2.0419921875, 2.166015625, 2.2900390625, 2.4140625, 2.5380859375, 2.662109375, 2.7861328125, 2.91015625]}, "gradients/decoder.model.decoder.layers.9.fc1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 1.0, 0.0, 4.0, 4.0, 3.0, 6.0, 8.0, 13.0, 25.0, 44.0, 82.0, 127.0, 296.0, 788.0, 3465329.0, 726226.0, 743.0, 270.0, 156.0, 63.0, 37.0, 18.0, 7.0, 14.0, 4.0, 5.0, 1.0, 1.0, 1.0, 2.0, 2.0, 3.0, 3.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 2.0, 0.0, 2.0, 1.0, 1.0, 2.0], "bins": [-46.4375, -45.0703125, -43.703125, -42.3359375, -40.96875, -39.6015625, -38.234375, -36.8671875, -35.5, -34.1328125, -32.765625, -31.3984375, -30.03125, -28.6640625, -27.296875, -25.9296875, -24.5625, -23.1953125, -21.828125, -20.4609375, -19.09375, -17.7265625, -16.359375, -14.9921875, -13.625, -12.2578125, -10.890625, -9.5234375, -8.15625, -6.7890625, -5.421875, -4.0546875, -2.6875, -1.3203125, 0.046875, 1.4140625, 2.78125, 4.1484375, 5.515625, 6.8828125, 8.25, 9.6171875, 10.984375, 12.3515625, 13.71875, 15.0859375, 16.453125, 17.8203125, 19.1875, 20.5546875, 21.921875, 23.2890625, 24.65625, 26.0234375, 27.390625, 28.7578125, 30.125, 31.4921875, 32.859375, 34.2265625, 35.59375, 36.9609375, 38.328125, 39.6953125, 41.0625]}, "gradients/decoder.model.decoder.layers.9.fc1.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 2.0, 0.0, 0.0, 2.0, 4.0, 2.0, 6.0, 3.0, 7.0, 6.0, 7.0, 11.0, 11.0, 20.0, 29.0, 41.0, 64.0, 69.0, 85.0, 158.0, 235.0, 355.0, 548.0, 619.0, 536.0, 401.0, 249.0, 182.0, 133.0, 90.0, 57.0, 44.0, 37.0, 22.0, 13.0, 10.0, 9.0, 7.0, 4.0, 4.0, 2.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.5625, -1.5185699462890625, -1.474639892578125, -1.4307098388671875, -1.38677978515625, -1.3428497314453125, -1.298919677734375, -1.2549896240234375, -1.2110595703125, -1.1671295166015625, -1.123199462890625, -1.0792694091796875, -1.03533935546875, -0.9914093017578125, -0.947479248046875, -0.9035491943359375, -0.859619140625, -0.8156890869140625, -0.771759033203125, -0.7278289794921875, -0.68389892578125, -0.6399688720703125, -0.596038818359375, -0.5521087646484375, -0.5081787109375, -0.4642486572265625, -0.420318603515625, -0.3763885498046875, -0.33245849609375, -0.2885284423828125, -0.244598388671875, -0.2006683349609375, -0.15673828125, -0.1128082275390625, -0.068878173828125, -0.0249481201171875, 0.01898193359375, 0.0629119873046875, 0.106842041015625, 0.1507720947265625, 0.1947021484375, 0.2386322021484375, 0.282562255859375, 0.3264923095703125, 0.37042236328125, 0.4143524169921875, 0.458282470703125, 0.5022125244140625, 0.546142578125, 0.5900726318359375, 0.634002685546875, 0.6779327392578125, 0.72186279296875, 0.7657928466796875, 0.809722900390625, 0.8536529541015625, 0.8975830078125, 0.9415130615234375, 0.985443115234375, 1.0293731689453125, 1.07330322265625, 1.1172332763671875, 1.161163330078125, 1.2050933837890625, 1.2490234375]}, "gradients/decoder.model.decoder.layers.9.encoder_attn_layer_norm.weight": {"_type": "histogram", "values": [2.0, 7.0, 1.0, 3.0, 11.0, 15.0, 27.0, 64.0, 102.0, 163.0, 189.0, 189.0, 114.0, 66.0, 35.0, 14.0, 3.0, 5.0, 4.0, 2.0, 2.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.7007694244384766, -3.357156276702881, -3.013542890548706, -2.6699295043945312, -2.3263163566589355, -1.9827030897140503, -1.639089822769165, -1.2954764366149902, -0.9518632888793945, -0.6082500219345093, -0.264636754989624, 0.07897651195526123, 0.4225897789001465, 0.7662030458450317, 1.109816312789917, 1.4534296989440918, 1.7970428466796875, 2.140655994415283, 2.484269380569458, 2.827882766723633, 3.1714959144592285, 3.515109062194824, 3.858722448348999, 4.202335834503174, 4.5459489822387695, 4.889562129974365, 5.233175277709961, 5.576788902282715, 5.9204020500183105, 6.264015197753906, 6.60762882232666, 6.951241970062256, 7.294855117797852, 7.638468265533447, 7.982081413269043, 8.325695037841797, 8.669307708740234, 9.012921333312988, 9.356534957885742, 9.70014762878418, 10.043761253356934, 10.387374877929688, 10.730987548828125, 11.074601173400879, 11.418214797973633, 11.76182746887207, 12.105441093444824, 12.449054718017578, 12.792667388916016, 13.13628101348877, 13.479893684387207, 13.823507308959961, 14.167119979858398, 14.510733604431152, 14.854347229003906, 15.197959899902344, 15.541573524475098, 15.885187149047852, 16.22879981994629, 16.572412490844727, 16.916027069091797, 17.259639739990234, 17.603252410888672, 17.946866989135742, 18.29047966003418]}, "gradients/decoder.model.decoder.layers.9.encoder_attn_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 1.0, 3.0, 4.0, 1.0, 2.0, 2.0, 6.0, 4.0, 9.0, 11.0, 14.0, 18.0, 22.0, 20.0, 26.0, 30.0, 32.0, 51.0, 55.0, 57.0, 47.0, 59.0, 53.0, 57.0, 50.0, 55.0, 50.0, 37.0, 48.0, 37.0, 36.0, 30.0, 15.0, 16.0, 16.0, 8.0, 8.0, 2.0, 10.0, 3.0, 3.0, 3.0, 1.0, 0.0, 0.0, 2.0, 1.0, 1.0, 0.0, 2.0], "bins": [-4.379016399383545, -4.25968074798584, -4.140345096588135, -4.02100944519043, -3.9016737937927246, -3.7823381423950195, -3.6630024909973145, -3.5436666011810303, -3.424330949783325, -3.30499529838562, -3.185659646987915, -3.06632399559021, -2.946988344192505, -2.8276524543762207, -2.7083168029785156, -2.5889811515808105, -2.4696455001831055, -2.3503098487854004, -2.2309741973876953, -2.1116385459899902, -1.9923027753829956, -1.8729671239852905, -1.7536314725875854, -1.6342957019805908, -1.5149602890014648, -1.3956246376037598, -1.2762889862060547, -1.1569533348083496, -1.037617564201355, -0.9182819128036499, -0.7989462614059448, -0.679610550403595, -0.5602748394012451, -0.44093915820121765, -0.3216034770011902, -0.2022678256034851, -0.08293214440345764, 0.036403536796569824, 0.1557391881942749, 0.27507489919662476, 0.39441055059432983, 0.5137462019920349, 0.6330819129943848, 0.7524175643920898, 0.8717532157897949, 0.9910889267921448, 1.110424518585205, 1.2297602891921997, 1.3490959405899048, 1.4684315919876099, 1.587767243385315, 1.7071030139923096, 1.8264386653900146, 1.9457743167877197, 2.065109968185425, 2.18444561958313, 2.303781270980835, 2.42311692237854, 2.542452573776245, 2.66178822517395, 2.7811238765716553, 2.9004597663879395, 3.0197954177856445, 3.1391310691833496, 3.2584667205810547]}, "gradients/decoder.model.decoder.layers.9.encoder_attn.out_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 2.0, 1.0, 2.0, 0.0, 2.0, 2.0, 3.0, 9.0, 11.0, 11.0, 15.0, 16.0, 28.0, 46.0, 48.0, 84.0, 111.0, 155.0, 328.0, 406.0, 748.0, 1292.0, 2368.0, 5003.0, 14236.0, 93268.0, 794623.0, 109129.0, 15405.0, 5282.0, 2506.0, 1335.0, 754.0, 462.0, 294.0, 179.0, 109.0, 92.0, 44.0, 48.0, 27.0, 33.0, 15.0, 7.0, 7.0, 8.0, 1.0, 6.0, 3.0, 3.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-0.9755859375, -0.9413909912109375, -0.907196044921875, -0.8730010986328125, -0.83880615234375, -0.8046112060546875, -0.770416259765625, -0.7362213134765625, -0.7020263671875, -0.6678314208984375, -0.633636474609375, -0.5994415283203125, -0.56524658203125, -0.5310516357421875, -0.496856689453125, -0.4626617431640625, -0.428466796875, -0.3942718505859375, -0.360076904296875, -0.3258819580078125, -0.29168701171875, -0.2574920654296875, -0.223297119140625, -0.1891021728515625, -0.1549072265625, -0.1207122802734375, -0.086517333984375, -0.0523223876953125, -0.01812744140625, 0.0160675048828125, 0.050262451171875, 0.0844573974609375, 0.11865234375, 0.1528472900390625, 0.187042236328125, 0.2212371826171875, 0.25543212890625, 0.2896270751953125, 0.323822021484375, 0.3580169677734375, 0.3922119140625, 0.4264068603515625, 0.460601806640625, 0.4947967529296875, 0.52899169921875, 0.5631866455078125, 0.597381591796875, 0.6315765380859375, 0.665771484375, 0.6999664306640625, 0.734161376953125, 0.7683563232421875, 0.80255126953125, 0.8367462158203125, 0.870941162109375, 0.9051361083984375, 0.9393310546875, 0.9735260009765625, 1.007720947265625, 1.0419158935546875, 1.07611083984375, 1.1103057861328125, 1.144500732421875, 1.1786956787109375, 1.212890625]}, "gradients/decoder.model.decoder.layers.9.encoder_attn.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 3.0, 0.0, 1.0, 5.0, 1.0, 4.0, 1.0, 7.0, 8.0, 6.0, 14.0, 17.0, 17.0, 25.0, 30.0, 28.0, 44.0, 46.0, 56.0, 56.0, 57.0, 61.0, 75.0, 58.0, 62.0, 48.0, 51.0, 46.0, 41.0, 32.0, 31.0, 18.0, 10.0, 12.0, 13.0, 5.0, 5.0, 3.0, 8.0, 3.0, 3.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0], "bins": [-2.869140625, -2.789794921875, -2.71044921875, -2.631103515625, -2.5517578125, -2.472412109375, -2.39306640625, -2.313720703125, -2.234375, -2.155029296875, -2.07568359375, -1.996337890625, -1.9169921875, -1.837646484375, -1.75830078125, -1.678955078125, -1.599609375, -1.520263671875, -1.44091796875, -1.361572265625, -1.2822265625, -1.202880859375, -1.12353515625, -1.044189453125, -0.96484375, -0.885498046875, -0.80615234375, -0.726806640625, -0.6474609375, -0.568115234375, -0.48876953125, -0.409423828125, -0.330078125, -0.250732421875, -0.17138671875, -0.092041015625, -0.0126953125, 0.066650390625, 0.14599609375, 0.225341796875, 0.3046875, 0.384033203125, 0.46337890625, 0.542724609375, 0.6220703125, 0.701416015625, 0.78076171875, 0.860107421875, 0.939453125, 1.018798828125, 1.09814453125, 1.177490234375, 1.2568359375, 1.336181640625, 1.41552734375, 1.494873046875, 1.57421875, 1.653564453125, 1.73291015625, 1.812255859375, 1.8916015625, 1.970947265625, 2.05029296875, 2.129638671875, 2.208984375]}, "gradients/decoder.model.decoder.layers.9.encoder_attn.v_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 2.0, 3.0, 1.0, 3.0, 4.0, 4.0, 4.0, 5.0, 5.0, 12.0, 15.0, 25.0, 48.0, 41.0, 77.0, 97.0, 161.0, 212.0, 354.0, 525.0, 781.0, 1390.0, 2466.0, 4278.0, 8627.0, 17287.0, 39715.0, 107243.0, 344757.0, 340881.0, 105188.0, 39227.0, 17096.0, 8088.0, 4104.0, 2195.0, 1274.0, 746.0, 518.0, 363.0, 219.0, 141.0, 110.0, 78.0, 57.0, 30.0, 30.0, 16.0, 15.0, 7.0, 10.0, 5.0, 8.0, 10.0, 5.0, 4.0, 0.0, 4.0], "bins": [-0.28369140625, -0.2755851745605469, -0.26747894287109375, -0.2593727111816406, -0.2512664794921875, -0.24316024780273438, -0.23505401611328125, -0.22694778442382812, -0.218841552734375, -0.21073532104492188, -0.20262908935546875, -0.19452285766601562, -0.1864166259765625, -0.17831039428710938, -0.17020416259765625, -0.16209793090820312, -0.15399169921875, -0.14588546752929688, -0.13777923583984375, -0.12967300415039062, -0.1215667724609375, -0.11346054077148438, -0.10535430908203125, -0.09724807739257812, -0.089141845703125, -0.08103561401367188, -0.07292938232421875, -0.06482315063476562, -0.0567169189453125, -0.048610687255859375, -0.04050445556640625, -0.032398223876953125, -0.0242919921875, -0.016185760498046875, -0.00807952880859375, 2.6702880859375e-05, 0.0081329345703125, 0.016239166259765625, 0.02434539794921875, 0.032451629638671875, 0.040557861328125, 0.048664093017578125, 0.05677032470703125, 0.06487655639648438, 0.0729827880859375, 0.08108901977539062, 0.08919525146484375, 0.09730148315429688, 0.10540771484375, 0.11351394653320312, 0.12162017822265625, 0.12972640991210938, 0.1378326416015625, 0.14593887329101562, 0.15404510498046875, 0.16215133666992188, 0.170257568359375, 0.17836380004882812, 0.18647003173828125, 0.19457626342773438, 0.2026824951171875, 0.21078872680664062, 0.21889495849609375, 0.22700119018554688, 0.235107421875]}, "gradients/decoder.model.decoder.layers.9.encoder_attn.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 3.0, 0.0, 3.0, 4.0, 6.0, 4.0, 9.0, 14.0, 14.0, 27.0, 34.0, 28.0, 42.0, 41.0, 41.0, 61.0, 55.0, 66.0, 63.0, 67.0, 72.0, 58.0, 56.0, 37.0, 43.0, 32.0, 30.0, 20.0, 17.0, 16.0, 13.0, 6.0, 8.0, 7.0, 5.0, 5.0, 3.0, 1.0, 1.0, 0.0, 1.0, 4.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.875, -4.733642578125, -4.59228515625, -4.450927734375, -4.3095703125, -4.168212890625, -4.02685546875, -3.885498046875, -3.744140625, -3.602783203125, -3.46142578125, -3.320068359375, -3.1787109375, -3.037353515625, -2.89599609375, -2.754638671875, -2.61328125, -2.471923828125, -2.33056640625, -2.189208984375, -2.0478515625, -1.906494140625, -1.76513671875, -1.623779296875, -1.482421875, -1.341064453125, -1.19970703125, -1.058349609375, -0.9169921875, -0.775634765625, -0.63427734375, -0.492919921875, -0.3515625, -0.210205078125, -0.06884765625, 0.072509765625, 0.2138671875, 0.355224609375, 0.49658203125, 0.637939453125, 0.779296875, 0.920654296875, 1.06201171875, 1.203369140625, 1.3447265625, 1.486083984375, 1.62744140625, 1.768798828125, 1.91015625, 2.051513671875, 2.19287109375, 2.334228515625, 2.4755859375, 2.616943359375, 2.75830078125, 2.899658203125, 3.041015625, 3.182373046875, 3.32373046875, 3.465087890625, 3.6064453125, 3.747802734375, 3.88916015625, 4.030517578125, 4.171875]}, "gradients/decoder.model.decoder.layers.9.encoder_attn.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 3.0, 2.0, 4.0, 3.0, 2.0, 10.0, 18.0, 22.0, 26.0, 43.0, 72.0, 77.0, 136.0, 228.0, 415.0, 1284.0, 8291.0, 911089.0, 121711.0, 3807.0, 713.0, 260.0, 122.0, 82.0, 48.0, 36.0, 23.0, 10.0, 6.0, 9.0, 3.0, 4.0, 1.0, 1.0, 0.0, 2.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.1649169921875, -0.15989112854003906, -0.15486526489257812, -0.1498394012451172, -0.14481353759765625, -0.1397876739501953, -0.13476181030273438, -0.12973594665527344, -0.1247100830078125, -0.11968421936035156, -0.11465835571289062, -0.10963249206542969, -0.10460662841796875, -0.09958076477050781, -0.09455490112304688, -0.08952903747558594, -0.084503173828125, -0.07947731018066406, -0.07445144653320312, -0.06942558288574219, -0.06439971923828125, -0.05937385559082031, -0.054347991943359375, -0.04932212829589844, -0.0442962646484375, -0.03927040100097656, -0.034244537353515625, -0.029218673706054688, -0.02419281005859375, -0.019166946411132812, -0.014141082763671875, -0.009115219116210938, -0.00408935546875, 0.0009365081787109375, 0.005962371826171875, 0.010988235473632812, 0.01601409912109375, 0.021039962768554688, 0.026065826416015625, 0.031091690063476562, 0.0361175537109375, 0.04114341735839844, 0.046169281005859375, 0.05119514465332031, 0.05622100830078125, 0.06124687194824219, 0.06627273559570312, 0.07129859924316406, 0.076324462890625, 0.08135032653808594, 0.08637619018554688, 0.09140205383300781, 0.09642791748046875, 0.10145378112792969, 0.10647964477539062, 0.11150550842285156, 0.1165313720703125, 0.12155723571777344, 0.12658309936523438, 0.1316089630126953, 0.13663482666015625, 0.1416606903076172, 0.14668655395507812, 0.15171241760253906, 0.15673828125]}, "gradients/decoder.model.decoder.layers.9.encoder_attn.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0, 2.0, 2.0, 2.0, 1.0, 5.0, 4.0, 2.0, 2.0, 6.0, 4.0, 12.0, 20.0, 33.0, 38.0, 56.0, 69.0, 119.0, 109.0, 118.0, 126.0, 75.0, 61.0, 43.0, 48.0, 21.0, 11.0, 11.0, 6.0, 5.0, 1.0, 5.0, 2.0], "bins": [-7.551908493041992e-05, -7.401406764984131e-05, -7.25090503692627e-05, -7.100403308868408e-05, -6.949901580810547e-05, -6.799399852752686e-05, -6.648898124694824e-05, -6.498396396636963e-05, -6.347894668579102e-05, -6.19739294052124e-05, -6.046891212463379e-05, -5.8963894844055176e-05, -5.745887756347656e-05, -5.595386028289795e-05, -5.4448843002319336e-05, -5.294382572174072e-05, -5.143880844116211e-05, -4.9933791160583496e-05, -4.842877388000488e-05, -4.692375659942627e-05, -4.5418739318847656e-05, -4.391372203826904e-05, -4.240870475769043e-05, -4.0903687477111816e-05, -3.93986701965332e-05, -3.789365291595459e-05, -3.6388635635375977e-05, -3.488361835479736e-05, -3.337860107421875e-05, -3.187358379364014e-05, -3.0368566513061523e-05, -2.886354923248291e-05, -2.7358531951904297e-05, -2.5853514671325684e-05, -2.434849739074707e-05, -2.2843480110168457e-05, -2.1338462829589844e-05, -1.983344554901123e-05, -1.8328428268432617e-05, -1.6823410987854004e-05, -1.531839370727539e-05, -1.3813376426696777e-05, -1.2308359146118164e-05, -1.080334186553955e-05, -9.298324584960938e-06, -7.793307304382324e-06, -6.288290023803711e-06, -4.783272743225098e-06, -3.2782554626464844e-06, -1.773238182067871e-06, -2.682209014892578e-07, 1.2367963790893555e-06, 2.7418136596679688e-06, 4.246830940246582e-06, 5.751848220825195e-06, 7.256865501403809e-06, 8.761882781982422e-06, 1.0266900062561035e-05, 1.1771917343139648e-05, 1.3276934623718262e-05, 1.4781951904296875e-05, 1.6286969184875488e-05, 1.77919864654541e-05, 1.9297003746032715e-05, 2.0802021026611328e-05]}, "gradients/decoder.model.decoder.layers.9.encoder_attn.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 2.0, 2.0, 8.0, 11.0, 19.0, 43.0, 122.0, 406.0, 1499.0, 19359.0, 1018712.0, 6876.0, 1030.0, 284.0, 106.0, 40.0, 16.0, 12.0, 5.0, 4.0, 1.0, 2.0, 3.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.372802734375, -0.3619537353515625, -0.351104736328125, -0.3402557373046875, -0.32940673828125, -0.3185577392578125, -0.307708740234375, -0.2968597412109375, -0.2860107421875, -0.2751617431640625, -0.264312744140625, -0.2534637451171875, -0.24261474609375, -0.2317657470703125, -0.220916748046875, -0.2100677490234375, -0.19921875, -0.1883697509765625, -0.177520751953125, -0.1666717529296875, -0.15582275390625, -0.1449737548828125, -0.134124755859375, -0.1232757568359375, -0.1124267578125, -0.1015777587890625, -0.090728759765625, -0.0798797607421875, -0.06903076171875, -0.0581817626953125, -0.047332763671875, -0.0364837646484375, -0.025634765625, -0.0147857666015625, -0.003936767578125, 0.0069122314453125, 0.01776123046875, 0.0286102294921875, 0.039459228515625, 0.0503082275390625, 0.0611572265625, 0.0720062255859375, 0.082855224609375, 0.0937042236328125, 0.10455322265625, 0.1154022216796875, 0.126251220703125, 0.1371002197265625, 0.14794921875, 0.1587982177734375, 0.169647216796875, 0.1804962158203125, 0.19134521484375, 0.2021942138671875, 0.213043212890625, 0.2238922119140625, 0.2347412109375, 0.2455902099609375, 0.256439208984375, 0.2672882080078125, 0.27813720703125, 0.2889862060546875, 0.299835205078125, 0.3106842041015625, 0.321533203125]}, "gradients/decoder.model.decoder.layers.9.encoder_attn.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 3.0, 1.0, 1.0, 8.0, 8.0, 24.0, 65.0, 380.0, 406.0, 58.0, 26.0, 10.0, 3.0, 3.0, 5.0, 3.0, 2.0, 0.0, 1.0, 2.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.047637939453125, -0.046236515045166016, -0.04483509063720703, -0.04343366622924805, -0.04203224182128906, -0.04063081741333008, -0.039229393005371094, -0.03782796859741211, -0.036426544189453125, -0.03502511978149414, -0.033623695373535156, -0.03222227096557617, -0.030820846557617188, -0.029419422149658203, -0.02801799774169922, -0.026616573333740234, -0.02521514892578125, -0.023813724517822266, -0.02241230010986328, -0.021010875701904297, -0.019609451293945312, -0.018208026885986328, -0.016806602478027344, -0.01540517807006836, -0.014003753662109375, -0.01260232925415039, -0.011200904846191406, -0.009799480438232422, -0.008398056030273438, -0.006996631622314453, -0.005595207214355469, -0.004193782806396484, -0.0027923583984375, -0.0013909339904785156, 1.049041748046875e-05, 0.0014119148254394531, 0.0028133392333984375, 0.004214763641357422, 0.005616188049316406, 0.007017612457275391, 0.008419036865234375, 0.00982046127319336, 0.011221885681152344, 0.012623310089111328, 0.014024734497070312, 0.015426158905029297, 0.01682758331298828, 0.018229007720947266, 0.01963043212890625, 0.021031856536865234, 0.02243328094482422, 0.023834705352783203, 0.025236129760742188, 0.026637554168701172, 0.028038978576660156, 0.02944040298461914, 0.030841827392578125, 0.03224325180053711, 0.033644676208496094, 0.03504610061645508, 0.03644752502441406, 0.03784894943237305, 0.03925037384033203, 0.040651798248291016, 0.04205322265625]}, "gradients/decoder.model.decoder.layers.9.self_attn_layer_norm.weight": {"_type": "histogram", "values": [3.0, 5.0, 1.0, 2.0, 1.0, 1.0, 2.0, 1.0, 6.0, 10.0, 10.0, 9.0, 13.0, 34.0, 55.0, 55.0, 81.0, 98.0, 103.0, 127.0, 109.0, 100.0, 52.0, 44.0, 25.0, 27.0, 16.0, 4.0, 4.0, 9.0, 2.0, 2.0, 0.0, 2.0, 2.0, 2.0, 2.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.230142593383789, -2.11362886428833, -1.9971152544021606, -1.8806015253067017, -1.7640879154205322, -1.6475741863250732, -1.5310604572296143, -1.4145468473434448, -1.2980332374572754, -1.1815195083618164, -1.065005898475647, -0.948492169380188, -0.8319785594940186, -0.7154648303985596, -0.5989511609077454, -0.48243749141693115, -0.36592376232147217, -0.24941009283065796, -0.13289640843868256, -0.016382724046707153, 0.10013094544410706, 0.21664464473724365, 0.33315831422805786, 0.44967198371887207, 0.5661856532096863, 0.6826993227005005, 0.7992129921913147, 0.9157266616821289, 1.032240390777588, 1.1487541198730469, 1.2652677297592163, 1.3817813396453857, 1.4982950687408447, 1.6148087978363037, 1.7313224077224731, 1.8478361368179321, 1.9643497467041016, 2.0808634757995605, 2.1973772048950195, 2.3138909339904785, 2.4304044246673584, 2.5469181537628174, 2.6634318828582764, 2.7799453735351562, 2.8964591026306152, 3.012972831726074, 3.129486560821533, 3.246000289916992, 3.362514019012451, 3.47902774810791, 3.595541477203369, 3.712054967880249, 3.828568696975708, 3.945082426071167, 4.061595916748047, 4.178109645843506, 4.294623374938965, 4.411137104034424, 4.527650833129883, 4.644164562225342, 4.760678291320801, 4.877191543579102, 4.9937052726745605, 5.1102190017700195, 5.2267327308654785]}, "gradients/decoder.model.decoder.layers.9.self_attn_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 3.0, 0.0, 1.0, 6.0, 1.0, 3.0, 5.0, 8.0, 6.0, 14.0, 16.0, 20.0, 21.0, 31.0, 32.0, 36.0, 58.0, 71.0, 60.0, 53.0, 62.0, 67.0, 59.0, 69.0, 47.0, 54.0, 42.0, 39.0, 36.0, 20.0, 13.0, 17.0, 12.0, 7.0, 7.0, 8.0, 4.0, 3.0, 1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.6596763134002686, -2.5811033248901367, -2.502530097961426, -2.423957109451294, -2.345384120941162, -2.266810894012451, -2.1882379055023193, -2.1096649169921875, -2.0310916900634766, -1.9525185823440552, -1.8739455938339233, -1.795372486114502, -1.7167994976043701, -1.6382263898849487, -1.5596532821655273, -1.4810802936553955, -1.4025073051452637, -1.3239341974258423, -1.2453612089157104, -1.166788101196289, -1.0882151126861572, -1.0096420049667358, -0.9310688972473145, -0.8524958491325378, -0.7739228010177612, -0.6953497529029846, -0.616776704788208, -0.5382035970687866, -0.45963054895401, -0.3810575008392334, -0.3024844229221344, -0.2239113450050354, -0.1453385353088379, -0.06676547229290009, 0.01180759072303772, 0.09038065373897552, 0.16895371675491333, 0.24752676486968994, 0.32609984278678894, 0.40467292070388794, 0.48324596881866455, 0.5618190169334412, 0.6403920650482178, 0.7189651727676392, 0.7975382208824158, 0.8761112689971924, 0.9546843767166138, 1.0332574844360352, 1.111830472946167, 1.1904035806655884, 1.2689765691757202, 1.3475496768951416, 1.4261226654052734, 1.5046957731246948, 1.5832688808441162, 1.661841869354248, 1.7404149770736694, 1.8189880847930908, 1.8975610733032227, 1.976134181022644, 2.0547072887420654, 2.1332802772521973, 2.211853265762329, 2.29042649269104, 2.368999481201172]}, "gradients/decoder.model.decoder.layers.9.self_attn.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 2.0, 0.0, 4.0, 0.0, 3.0, 2.0, 3.0, 5.0, 14.0, 15.0, 22.0, 28.0, 60.0, 86.0, 156.0, 285.0, 521.0, 1010.0, 1753.0, 3744.0, 8670.0, 27132.0, 151662.0, 699085.0, 116084.0, 23284.0, 7855.0, 3451.0, 1696.0, 837.0, 474.0, 240.0, 135.0, 88.0, 58.0, 30.0, 26.0, 11.0, 6.0, 11.0, 5.0, 5.0, 2.0, 3.0, 4.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 1.0], "bins": [-1.7109375, -1.66265869140625, -1.6143798828125, -1.56610107421875, -1.517822265625, -1.46954345703125, -1.4212646484375, -1.37298583984375, -1.32470703125, -1.27642822265625, -1.2281494140625, -1.17987060546875, -1.131591796875, -1.08331298828125, -1.0350341796875, -0.98675537109375, -0.9384765625, -0.89019775390625, -0.8419189453125, -0.79364013671875, -0.745361328125, -0.69708251953125, -0.6488037109375, -0.60052490234375, -0.55224609375, -0.50396728515625, -0.4556884765625, -0.40740966796875, -0.359130859375, -0.31085205078125, -0.2625732421875, -0.21429443359375, -0.166015625, -0.11773681640625, -0.0694580078125, -0.02117919921875, 0.027099609375, 0.07537841796875, 0.1236572265625, 0.17193603515625, 0.22021484375, 0.26849365234375, 0.3167724609375, 0.36505126953125, 0.413330078125, 0.46160888671875, 0.5098876953125, 0.55816650390625, 0.6064453125, 0.65472412109375, 0.7030029296875, 0.75128173828125, 0.799560546875, 0.84783935546875, 0.8961181640625, 0.94439697265625, 0.99267578125, 1.04095458984375, 1.0892333984375, 1.13751220703125, 1.185791015625, 1.23406982421875, 1.2823486328125, 1.33062744140625, 1.37890625]}, "gradients/decoder.model.decoder.layers.9.self_attn.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 3.0, 1.0, 4.0, 2.0, 6.0, 7.0, 12.0, 11.0, 19.0, 21.0, 36.0, 26.0, 35.0, 33.0, 40.0, 54.0, 65.0, 62.0, 68.0, 53.0, 59.0, 67.0, 48.0, 50.0, 40.0, 39.0, 41.0, 26.0, 15.0, 17.0, 15.0, 8.0, 5.0, 4.0, 7.0, 1.0, 7.0, 3.0, 1.0, 2.0, 1.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.95703125, -5.7877197265625, -5.618408203125, -5.4490966796875, -5.27978515625, -5.1104736328125, -4.941162109375, -4.7718505859375, -4.6025390625, -4.4332275390625, -4.263916015625, -4.0946044921875, -3.92529296875, -3.7559814453125, -3.586669921875, -3.4173583984375, -3.248046875, -3.0787353515625, -2.909423828125, -2.7401123046875, -2.57080078125, -2.4014892578125, -2.232177734375, -2.0628662109375, -1.8935546875, -1.7242431640625, -1.554931640625, -1.3856201171875, -1.21630859375, -1.0469970703125, -0.877685546875, -0.7083740234375, -0.5390625, -0.3697509765625, -0.200439453125, -0.0311279296875, 0.13818359375, 0.3074951171875, 0.476806640625, 0.6461181640625, 0.8154296875, 0.9847412109375, 1.154052734375, 1.3233642578125, 1.49267578125, 1.6619873046875, 1.831298828125, 2.0006103515625, 2.169921875, 2.3392333984375, 2.508544921875, 2.6778564453125, 2.84716796875, 3.0164794921875, 3.185791015625, 3.3551025390625, 3.5244140625, 3.6937255859375, 3.863037109375, 4.0323486328125, 4.20166015625, 4.3709716796875, 4.540283203125, 4.7095947265625, 4.87890625]}, "gradients/decoder.model.decoder.layers.9.self_attn.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 1.0, 1.0, 2.0, 3.0, 0.0, 3.0, 3.0, 5.0, 5.0, 6.0, 7.0, 6.0, 8.0, 15.0, 18.0, 15.0, 19.0, 16.0, 22.0, 19.0, 33.0, 30.0, 37.0, 42.0, 46.0, 34.0, 90.0, 967.0, 1039687.0, 6839.0, 149.0, 45.0, 49.0, 36.0, 33.0, 26.0, 36.0, 30.0, 31.0, 28.0, 19.0, 15.0, 11.0, 18.0, 9.0, 8.0, 16.0, 9.0, 5.0, 4.0, 3.0, 5.0, 1.0, 0.0, 3.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0], "bins": [-17.875, -17.310791015625, -16.74658203125, -16.182373046875, -15.6181640625, -15.053955078125, -14.48974609375, -13.925537109375, -13.361328125, -12.797119140625, -12.23291015625, -11.668701171875, -11.1044921875, -10.540283203125, -9.97607421875, -9.411865234375, -8.84765625, -8.283447265625, -7.71923828125, -7.155029296875, -6.5908203125, -6.026611328125, -5.46240234375, -4.898193359375, -4.333984375, -3.769775390625, -3.20556640625, -2.641357421875, -2.0771484375, -1.512939453125, -0.94873046875, -0.384521484375, 0.1796875, 0.743896484375, 1.30810546875, 1.872314453125, 2.4365234375, 3.000732421875, 3.56494140625, 4.129150390625, 4.693359375, 5.257568359375, 5.82177734375, 6.385986328125, 6.9501953125, 7.514404296875, 8.07861328125, 8.642822265625, 9.20703125, 9.771240234375, 10.33544921875, 10.899658203125, 11.4638671875, 12.028076171875, 12.59228515625, 13.156494140625, 13.720703125, 14.284912109375, 14.84912109375, 15.413330078125, 15.9775390625, 16.541748046875, 17.10595703125, 17.670166015625, 18.234375]}, "gradients/decoder.model.decoder.layers.9.self_attn.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 2.0, 1.0, 3.0, 0.0, 4.0, 3.0, 4.0, 9.0, 8.0, 6.0, 6.0, 14.0, 15.0, 20.0, 23.0, 15.0, 23.0, 24.0, 28.0, 39.0, 36.0, 46.0, 38.0, 42.0, 40.0, 53.0, 36.0, 40.0, 44.0, 43.0, 45.0, 33.0, 27.0, 34.0, 30.0, 38.0, 22.0, 20.0, 14.0, 13.0, 13.0, 13.0, 12.0, 11.0, 5.0, 6.0, 4.0, 5.0, 1.0, 2.0, 1.0, 2.0, 0.0, 1.0, 1.0, 1.0], "bins": [-4.515625, -4.38262939453125, -4.2496337890625, -4.11663818359375, -3.983642578125, -3.85064697265625, -3.7176513671875, -3.58465576171875, -3.45166015625, -3.31866455078125, -3.1856689453125, -3.05267333984375, -2.919677734375, -2.78668212890625, -2.6536865234375, -2.52069091796875, -2.3876953125, -2.25469970703125, -2.1217041015625, -1.98870849609375, -1.855712890625, -1.72271728515625, -1.5897216796875, -1.45672607421875, -1.32373046875, -1.19073486328125, -1.0577392578125, -0.92474365234375, -0.791748046875, -0.65875244140625, -0.5257568359375, -0.39276123046875, -0.259765625, -0.12677001953125, 0.0062255859375, 0.13922119140625, 0.272216796875, 0.40521240234375, 0.5382080078125, 0.67120361328125, 0.80419921875, 0.93719482421875, 1.0701904296875, 1.20318603515625, 1.336181640625, 1.46917724609375, 1.6021728515625, 1.73516845703125, 1.8681640625, 2.00115966796875, 2.1341552734375, 2.26715087890625, 2.400146484375, 2.53314208984375, 2.6661376953125, 2.79913330078125, 2.93212890625, 3.06512451171875, 3.1981201171875, 3.33111572265625, 3.464111328125, 3.59710693359375, 3.7301025390625, 3.86309814453125, 3.99609375]}, "gradients/decoder.model.decoder.layers.9.self_attn.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 2.0, 0.0, 1.0, 1.0, 1.0, 1.0, 2.0, 1.0, 2.0, 3.0, 4.0, 9.0, 12.0, 21.0, 28.0, 33.0, 53.0, 76.0, 138.0, 312.0, 749.0, 2866.0, 14531.0, 157909.0, 820575.0, 42611.0, 6207.0, 1441.0, 491.0, 192.0, 110.0, 53.0, 35.0, 19.0, 18.0, 16.0, 7.0, 13.0, 5.0, 7.0, 3.0, 5.0, 1.0, 4.0, 0.0, 0.0, 1.0, 1.0, 0.0, 3.0], "bins": [-0.9658203125, -0.9405746459960938, -0.9153289794921875, -0.8900833129882812, -0.864837646484375, -0.8395919799804688, -0.8143463134765625, -0.7891006469726562, -0.76385498046875, -0.7386093139648438, -0.7133636474609375, -0.6881179809570312, -0.662872314453125, -0.6376266479492188, -0.6123809814453125, -0.5871353149414062, -0.5618896484375, -0.5366439819335938, -0.5113983154296875, -0.48615264892578125, -0.460906982421875, -0.43566131591796875, -0.4104156494140625, -0.38516998291015625, -0.35992431640625, -0.33467864990234375, -0.3094329833984375, -0.28418731689453125, -0.258941650390625, -0.23369598388671875, -0.2084503173828125, -0.18320465087890625, -0.157958984375, -0.13271331787109375, -0.1074676513671875, -0.08222198486328125, -0.056976318359375, -0.03173065185546875, -0.0064849853515625, 0.01876068115234375, 0.04400634765625, 0.06925201416015625, 0.0944976806640625, 0.11974334716796875, 0.144989013671875, 0.17023468017578125, 0.1954803466796875, 0.22072601318359375, 0.2459716796875, 0.27121734619140625, 0.2964630126953125, 0.32170867919921875, 0.346954345703125, 0.37220001220703125, 0.3974456787109375, 0.42269134521484375, 0.44793701171875, 0.47318267822265625, 0.4984283447265625, 0.5236740112304688, 0.548919677734375, 0.5741653442382812, 0.5994110107421875, 0.6246566772460938, 0.64990234375]}, "gradients/decoder.model.decoder.layers.9.self_attn.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0, 4.0, 1.0, 5.0, 4.0, 2.0, 4.0, 5.0, 6.0, 8.0, 11.0, 12.0, 15.0, 11.0, 24.0, 36.0, 51.0, 86.0, 130.0, 164.0, 122.0, 107.0, 47.0, 32.0, 22.0, 22.0, 18.0, 16.0, 9.0, 3.0, 9.0, 6.0, 6.0, 3.0, 1.0, 6.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 3.0], "bins": [-6.985664367675781e-05, -6.778072565793991e-05, -6.570480763912201e-05, -6.362888962030411e-05, -6.15529716014862e-05, -5.9477053582668304e-05, -5.74011355638504e-05, -5.53252175450325e-05, -5.32492995262146e-05, -5.11733815073967e-05, -4.9097463488578796e-05, -4.7021545469760895e-05, -4.494562745094299e-05, -4.286970943212509e-05, -4.079379141330719e-05, -3.871787339448929e-05, -3.664195537567139e-05, -3.4566037356853485e-05, -3.2490119338035583e-05, -3.0414201319217682e-05, -2.833828330039978e-05, -2.626236528158188e-05, -2.4186447262763977e-05, -2.2110529243946075e-05, -2.0034611225128174e-05, -1.7958693206310272e-05, -1.588277518749237e-05, -1.3806857168674469e-05, -1.1730939149856567e-05, -9.655021131038666e-06, -7.579103112220764e-06, -5.5031850934028625e-06, -3.427267074584961e-06, -1.3513490557670593e-06, 7.245689630508423e-07, 2.800486981868744e-06, 4.8764050006866455e-06, 6.952323019504547e-06, 9.028241038322449e-06, 1.110415905714035e-05, 1.3180077075958252e-05, 1.5255995094776154e-05, 1.7331913113594055e-05, 1.9407831132411957e-05, 2.148374915122986e-05, 2.355966717004776e-05, 2.563558518886566e-05, 2.7711503207683563e-05, 2.9787421226501465e-05, 3.1863339245319366e-05, 3.393925726413727e-05, 3.601517528295517e-05, 3.809109330177307e-05, 4.016701132059097e-05, 4.2242929339408875e-05, 4.4318847358226776e-05, 4.639476537704468e-05, 4.847068339586258e-05, 5.054660141468048e-05, 5.262251943349838e-05, 5.4698437452316284e-05, 5.6774355471134186e-05, 5.885027348995209e-05, 6.092619150876999e-05, 6.300210952758789e-05]}, "gradients/decoder.model.decoder.layers.9.self_attn.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 2.0, 2.0, 5.0, 4.0, 5.0, 2.0, 8.0, 9.0, 13.0, 19.0, 28.0, 35.0, 69.0, 107.0, 204.0, 446.0, 1024.0, 2831.0, 8911.0, 43163.0, 665999.0, 287035.0, 28238.0, 6546.0, 2168.0, 867.0, 377.0, 183.0, 102.0, 53.0, 26.0, 22.0, 11.0, 10.0, 11.0, 6.0, 8.0, 0.0, 5.0, 3.0, 2.0, 3.0, 3.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.6728515625, -0.6523895263671875, -0.631927490234375, -0.6114654541015625, -0.59100341796875, -0.5705413818359375, -0.550079345703125, -0.5296173095703125, -0.5091552734375, -0.4886932373046875, -0.468231201171875, -0.4477691650390625, -0.42730712890625, -0.4068450927734375, -0.386383056640625, -0.3659210205078125, -0.345458984375, -0.3249969482421875, -0.304534912109375, -0.2840728759765625, -0.26361083984375, -0.2431488037109375, -0.222686767578125, -0.2022247314453125, -0.1817626953125, -0.1613006591796875, -0.140838623046875, -0.1203765869140625, -0.09991455078125, -0.0794525146484375, -0.058990478515625, -0.0385284423828125, -0.01806640625, 0.0023956298828125, 0.022857666015625, 0.0433197021484375, 0.06378173828125, 0.0842437744140625, 0.104705810546875, 0.1251678466796875, 0.1456298828125, 0.1660919189453125, 0.186553955078125, 0.2070159912109375, 0.22747802734375, 0.2479400634765625, 0.268402099609375, 0.2888641357421875, 0.309326171875, 0.3297882080078125, 0.350250244140625, 0.3707122802734375, 0.39117431640625, 0.4116363525390625, 0.432098388671875, 0.4525604248046875, 0.4730224609375, 0.4934844970703125, 0.513946533203125, 0.5344085693359375, 0.55487060546875, 0.5753326416015625, 0.595794677734375, 0.6162567138671875, 0.63671875]}, "gradients/decoder.model.decoder.layers.9.self_attn.q_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 2.0, 1.0, 0.0, 2.0, 1.0, 1.0, 3.0, 1.0, 2.0, 6.0, 0.0, 7.0, 8.0, 16.0, 14.0, 20.0, 22.0, 34.0, 38.0, 62.0, 84.0, 139.0, 180.0, 99.0, 80.0, 60.0, 33.0, 23.0, 14.0, 16.0, 11.0, 4.0, 7.0, 4.0, 5.0, 5.0, 3.0, 2.0, 4.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.1904296875, -0.18390464782714844, -0.17737960815429688, -0.1708545684814453, -0.16432952880859375, -0.1578044891357422, -0.15127944946289062, -0.14475440979003906, -0.1382293701171875, -0.13170433044433594, -0.12517929077148438, -0.11865425109863281, -0.11212921142578125, -0.10560417175292969, -0.09907913208007812, -0.09255409240722656, -0.086029052734375, -0.07950401306152344, -0.07297897338867188, -0.06645393371582031, -0.05992889404296875, -0.05340385437011719, -0.046878814697265625, -0.04035377502441406, -0.0338287353515625, -0.027303695678710938, -0.020778656005859375, -0.014253616333007812, -0.00772857666015625, -0.0012035369873046875, 0.005321502685546875, 0.011846542358398438, 0.01837158203125, 0.024896621704101562, 0.031421661376953125, 0.03794670104980469, 0.04447174072265625, 0.05099678039550781, 0.057521820068359375, 0.06404685974121094, 0.0705718994140625, 0.07709693908691406, 0.08362197875976562, 0.09014701843261719, 0.09667205810546875, 0.10319709777832031, 0.10972213745117188, 0.11624717712402344, 0.122772216796875, 0.12929725646972656, 0.13582229614257812, 0.1423473358154297, 0.14887237548828125, 0.1553974151611328, 0.16192245483398438, 0.16844749450683594, 0.1749725341796875, 0.18149757385253906, 0.18802261352539062, 0.1945476531982422, 0.20107269287109375, 0.2075977325439453, 0.21412277221679688, 0.22064781188964844, 0.2271728515625]}, "gradients/decoder.model.decoder.layers.8.final_layer_norm.weight": {"_type": "histogram", "values": [2.0, 0.0, 3.0, 4.0, 2.0, 0.0, 6.0, 7.0, 14.0, 18.0, 40.0, 66.0, 98.0, 129.0, 153.0, 143.0, 131.0, 77.0, 40.0, 45.0, 15.0, 7.0, 5.0, 3.0, 1.0, 3.0, 5.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.5602850914001465, -4.252932071685791, -3.9455790519714355, -3.63822603225708, -3.3308730125427246, -3.023519992828369, -2.7161669731140137, -2.408813953399658, -2.1014609336853027, -1.7941079139709473, -1.4867548942565918, -1.1794018745422363, -0.8720488548278809, -0.5646958351135254, -0.2573428153991699, 0.05001020431518555, 0.357363224029541, 0.6647162437438965, 0.972069263458252, 1.2794222831726074, 1.586775302886963, 1.8941283226013184, 2.201481342315674, 2.5088343620300293, 2.8161873817443848, 3.1235404014587402, 3.4308934211730957, 3.738246440887451, 4.045599460601807, 4.352952480316162, 4.660305500030518, 4.967658519744873, 5.27501106262207, 5.582364082336426, 5.889717102050781, 6.197070121765137, 6.504423141479492, 6.811776161193848, 7.119129180908203, 7.426482200622559, 7.733835220336914, 8.04118824005127, 8.348541259765625, 8.65589427947998, 8.963247299194336, 9.270600318908691, 9.577953338623047, 9.885306358337402, 10.192659378051758, 10.500012397766113, 10.807365417480469, 11.114718437194824, 11.42207145690918, 11.729424476623535, 12.03677749633789, 12.344130516052246, 12.651483535766602, 12.958836555480957, 13.266189575195312, 13.573542594909668, 13.880895614624023, 14.188248634338379, 14.495601654052734, 14.80295467376709, 15.110307693481445]}, "gradients/decoder.model.decoder.layers.8.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 4.0, 3.0, 5.0, 1.0, 8.0, 7.0, 13.0, 15.0, 8.0, 13.0, 20.0, 20.0, 26.0, 29.0, 21.0, 40.0, 41.0, 57.0, 38.0, 50.0, 50.0, 58.0, 42.0, 43.0, 42.0, 56.0, 44.0, 50.0, 31.0, 34.0, 25.0, 17.0, 22.0, 18.0, 13.0, 16.0, 9.0, 9.0, 0.0, 4.0, 4.0, 5.0, 2.0, 0.0, 1.0, 1.0, 0.0, 2.0, 1.0, 1.0], "bins": [-7.838198184967041, -7.618335723876953, -7.398473262786865, -7.178610801696777, -6.958748817443848, -6.73888635635376, -6.519023895263672, -6.299161434173584, -6.079298973083496, -5.859436511993408, -5.63957405090332, -5.419712066650391, -5.199849605560303, -4.979987144470215, -4.760124683380127, -4.540262222290039, -4.320400238037109, -4.1005377769470215, -3.8806755542755127, -3.660813093185425, -3.440950870513916, -3.221088409423828, -3.0012259483337402, -2.7813634872436523, -2.5615012645721436, -2.3416388034820557, -2.121776580810547, -1.901914119720459, -1.6820517778396606, -1.4621894359588623, -1.2423269748687744, -1.022464632987976, -0.8026022911071777, -0.5827399492263794, -0.3628775477409363, -0.14301514625549316, 0.07684719562530518, 0.2967095375061035, 0.5165719985961914, 0.7364343404769897, 0.9562966823577881, 1.1761590242385864, 1.3960213661193848, 1.6158838272094727, 1.835746169090271, 2.0556085109710693, 2.2754709720611572, 2.495333194732666, 2.715195655822754, 2.935058116912842, 3.1549203395843506, 3.3747828006744385, 3.5946450233459473, 3.814507484436035, 4.034369945526123, 4.254232406616211, 4.474094390869141, 4.6939568519592285, 4.913819313049316, 5.133681297302246, 5.353543758392334, 5.573406219482422, 5.79326868057251, 6.013131141662598, 6.2329936027526855]}, "gradients/decoder.model.decoder.layers.8.fc2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 5.0, 3.0, 5.0, 5.0, 8.0, 6.0, 11.0, 22.0, 33.0, 57.0, 47.0, 90.0, 166.0, 289.0, 517.0, 903.0, 1690.0, 3439.0, 8099.0, 21770.0, 71741.0, 324001.0, 1357085.0, 1761382.0, 487000.0, 104612.0, 30140.0, 11475.0, 4950.0, 2212.0, 1030.0, 598.0, 356.0, 218.0, 126.0, 70.0, 39.0, 34.0, 24.0, 9.0, 11.0, 5.0, 5.0, 1.0, 2.0, 1.0, 3.0, 0.0, 2.0, 2.0, 0.0, 1.0], "bins": [-3.576171875, -3.474395751953125, -3.37261962890625, -3.270843505859375, -3.1690673828125, -3.067291259765625, -2.96551513671875, -2.863739013671875, -2.761962890625, -2.660186767578125, -2.55841064453125, -2.456634521484375, -2.3548583984375, -2.253082275390625, -2.15130615234375, -2.049530029296875, -1.94775390625, -1.845977783203125, -1.74420166015625, -1.642425537109375, -1.5406494140625, -1.438873291015625, -1.33709716796875, -1.235321044921875, -1.133544921875, -1.031768798828125, -0.92999267578125, -0.828216552734375, -0.7264404296875, -0.624664306640625, -0.52288818359375, -0.421112060546875, -0.3193359375, -0.217559814453125, -0.11578369140625, -0.014007568359375, 0.0877685546875, 0.189544677734375, 0.29132080078125, 0.393096923828125, 0.494873046875, 0.596649169921875, 0.69842529296875, 0.800201416015625, 0.9019775390625, 1.003753662109375, 1.10552978515625, 1.207305908203125, 1.30908203125, 1.410858154296875, 1.51263427734375, 1.614410400390625, 1.7161865234375, 1.817962646484375, 1.91973876953125, 2.021514892578125, 2.123291015625, 2.225067138671875, 2.32684326171875, 2.428619384765625, 2.5303955078125, 2.632171630859375, 2.73394775390625, 2.835723876953125, 2.9375]}, "gradients/decoder.model.decoder.layers.8.fc2.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 7.0, 4.0, 7.0, 4.0, 13.0, 18.0, 19.0, 18.0, 33.0, 36.0, 48.0, 50.0, 73.0, 70.0, 76.0, 57.0, 74.0, 71.0, 50.0, 53.0, 57.0, 38.0, 39.0, 25.0, 18.0, 14.0, 9.0, 12.0, 4.0, 4.0, 5.0, 1.0, 2.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.23046875, -4.1077880859375, -3.985107421875, -3.8624267578125, -3.73974609375, -3.6170654296875, -3.494384765625, -3.3717041015625, -3.2490234375, -3.1263427734375, -3.003662109375, -2.8809814453125, -2.75830078125, -2.6356201171875, -2.512939453125, -2.3902587890625, -2.267578125, -2.1448974609375, -2.022216796875, -1.8995361328125, -1.77685546875, -1.6541748046875, -1.531494140625, -1.4088134765625, -1.2861328125, -1.1634521484375, -1.040771484375, -0.9180908203125, -0.79541015625, -0.6727294921875, -0.550048828125, -0.4273681640625, -0.3046875, -0.1820068359375, -0.059326171875, 0.0633544921875, 0.18603515625, 0.3087158203125, 0.431396484375, 0.5540771484375, 0.6767578125, 0.7994384765625, 0.922119140625, 1.0447998046875, 1.16748046875, 1.2901611328125, 1.412841796875, 1.5355224609375, 1.658203125, 1.7808837890625, 1.903564453125, 2.0262451171875, 2.14892578125, 2.2716064453125, 2.394287109375, 2.5169677734375, 2.6396484375, 2.7623291015625, 2.885009765625, 3.0076904296875, 3.13037109375, 3.2530517578125, 3.375732421875, 3.4984130859375, 3.62109375]}, "gradients/decoder.model.decoder.layers.8.fc1.weight": {"_type": "histogram", "values": [2.0, 1.0, 2.0, 1.0, 1.0, 2.0, 0.0, 0.0, 4.0, 5.0, 11.0, 8.0, 17.0, 44.0, 60.0, 98.0, 268.0, 813.0, 5135.0, 180223.0, 3969487.0, 34776.0, 2359.0, 517.0, 218.0, 102.0, 50.0, 42.0, 26.0, 12.0, 8.0, 3.0, 4.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-7.4140625, -7.0491943359375, -6.684326171875, -6.3194580078125, -5.95458984375, -5.5897216796875, -5.224853515625, -4.8599853515625, -4.4951171875, -4.1302490234375, -3.765380859375, -3.4005126953125, -3.03564453125, -2.6707763671875, -2.305908203125, -1.9410400390625, -1.576171875, -1.2113037109375, -0.846435546875, -0.4815673828125, -0.11669921875, 0.2481689453125, 0.613037109375, 0.9779052734375, 1.3427734375, 1.7076416015625, 2.072509765625, 2.4373779296875, 2.80224609375, 3.1671142578125, 3.531982421875, 3.8968505859375, 4.26171875, 4.6265869140625, 4.991455078125, 5.3563232421875, 5.72119140625, 6.0860595703125, 6.450927734375, 6.8157958984375, 7.1806640625, 7.5455322265625, 7.910400390625, 8.2752685546875, 8.64013671875, 9.0050048828125, 9.369873046875, 9.7347412109375, 10.099609375, 10.4644775390625, 10.829345703125, 11.1942138671875, 11.55908203125, 11.9239501953125, 12.288818359375, 12.6536865234375, 13.0185546875, 13.3834228515625, 13.748291015625, 14.1131591796875, 14.47802734375, 14.8428955078125, 15.207763671875, 15.5726318359375, 15.9375]}, "gradients/decoder.model.decoder.layers.8.fc1.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 2.0, 2.0, 6.0, 6.0, 6.0, 8.0, 13.0, 31.0, 59.0, 78.0, 169.0, 298.0, 693.0, 991.0, 821.0, 439.0, 206.0, 107.0, 60.0, 32.0, 25.0, 10.0, 10.0, 7.0, 4.0, 3.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-2.900390625, -2.8324432373046875, -2.764495849609375, -2.6965484619140625, -2.62860107421875, -2.5606536865234375, -2.492706298828125, -2.4247589111328125, -2.3568115234375, -2.2888641357421875, -2.220916748046875, -2.1529693603515625, -2.08502197265625, -2.0170745849609375, -1.949127197265625, -1.8811798095703125, -1.813232421875, -1.7452850341796875, -1.677337646484375, -1.6093902587890625, -1.54144287109375, -1.4734954833984375, -1.405548095703125, -1.3376007080078125, -1.2696533203125, -1.2017059326171875, -1.133758544921875, -1.0658111572265625, -0.99786376953125, -0.9299163818359375, -0.861968994140625, -0.7940216064453125, -0.72607421875, -0.6581268310546875, -0.590179443359375, -0.5222320556640625, -0.45428466796875, -0.3863372802734375, -0.318389892578125, -0.2504425048828125, -0.1824951171875, -0.1145477294921875, -0.046600341796875, 0.0213470458984375, 0.08929443359375, 0.1572418212890625, 0.225189208984375, 0.2931365966796875, 0.361083984375, 0.4290313720703125, 0.496978759765625, 0.5649261474609375, 0.63287353515625, 0.7008209228515625, 0.768768310546875, 0.8367156982421875, 0.9046630859375, 0.9726104736328125, 1.040557861328125, 1.1085052490234375, 1.17645263671875, 1.2444000244140625, 1.312347412109375, 1.3802947998046875, 1.4482421875]}, "gradients/decoder.model.decoder.layers.8.encoder_attn_layer_norm.weight": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 0.0, 1.0, 4.0, 1.0, 9.0, 7.0, 8.0, 31.0, 41.0, 66.0, 93.0, 142.0, 135.0, 163.0, 114.0, 81.0, 50.0, 29.0, 20.0, 7.0, 8.0, 2.0, 3.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.182204246520996, -3.919768810272217, -3.6573333740234375, -3.3948981761932373, -3.132462739944458, -2.8700273036956787, -2.6075921058654785, -2.345156669616699, -2.08272123336792, -1.8202857971191406, -1.5578504800796509, -1.2954151630401611, -1.0329797267913818, -0.7705442905426025, -0.5081089735031128, -0.24567365646362305, 0.01676177978515625, 0.27919715642929077, 0.5416325330734253, 0.8040679097175598, 1.0665032863616943, 1.3289387226104736, 1.5913740396499634, 1.8538093566894531, 2.1162447929382324, 2.3786802291870117, 2.641115665435791, 2.903550863265991, 3.1659862995147705, 3.42842173576355, 3.69085693359375, 3.9532923698425293, 4.215728759765625, 4.478164196014404, 4.740599632263184, 5.003035068511963, 5.265470504760742, 5.527905464172363, 5.790340900421143, 6.052776336669922, 6.315211772918701, 6.5776472091674805, 6.84008264541626, 7.102518081665039, 7.36495304107666, 7.627388954162598, 7.889823913574219, 8.152259826660156, 8.414694786071777, 8.677129745483398, 8.939565658569336, 9.202000617980957, 9.464436531066895, 9.726871490478516, 9.989307403564453, 10.251742362976074, 10.514177322387695, 10.776612281799316, 11.039048194885254, 11.301483154296875, 11.563919067382812, 11.826354026794434, 12.088789939880371, 12.351224899291992, 12.61366081237793]}, "gradients/decoder.model.decoder.layers.8.encoder_attn_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 2.0, 1.0, 3.0, 1.0, 0.0, 6.0, 8.0, 6.0, 11.0, 13.0, 22.0, 27.0, 28.0, 29.0, 32.0, 34.0, 59.0, 59.0, 45.0, 55.0, 49.0, 60.0, 71.0, 58.0, 68.0, 37.0, 40.0, 47.0, 32.0, 28.0, 20.0, 14.0, 9.0, 10.0, 8.0, 5.0, 4.0, 3.0, 0.0, 7.0, 1.0, 0.0, 1.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.250951766967773, -4.130521774291992, -4.010091304779053, -3.8896613121032715, -3.769231081008911, -3.648800849914551, -3.5283708572387695, -3.407940626144409, -3.287510395050049, -3.1670801639556885, -3.046649932861328, -2.926219940185547, -2.8057897090911865, -2.685359477996826, -2.564929485321045, -2.4444992542266846, -2.324069023132324, -2.203638792037964, -2.0832085609436035, -1.9627785682678223, -1.842348337173462, -1.7219181060791016, -1.6014879941940308, -1.48105788230896, -1.3606276512145996, -1.2401974201202393, -1.1197673082351685, -0.9993371367454529, -0.8789069652557373, -0.7584767937660217, -0.6380466222763062, -0.5176164507865906, -0.3971865177154541, -0.2767563462257385, -0.15632617473602295, -0.03589600324630737, 0.0845341682434082, 0.20496433973312378, 0.32539451122283936, 0.44582468271255493, 0.5662548542022705, 0.6866850256919861, 0.8071151971817017, 0.9275453686714172, 1.0479755401611328, 1.1684057712554932, 1.288835883140564, 1.4092659950256348, 1.5296962261199951, 1.6501264572143555, 1.7705565690994263, 1.890986680984497, 2.0114169120788574, 2.1318471431732178, 2.252277374267578, 2.3727073669433594, 2.4931375980377197, 2.61356782913208, 2.7339978218078613, 2.8544280529022217, 2.974858283996582, 3.0952885150909424, 3.2157187461853027, 3.336148738861084, 3.4565789699554443]}, "gradients/decoder.model.decoder.layers.8.encoder_attn.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 0.0, 1.0, 0.0, 2.0, 2.0, 4.0, 4.0, 7.0, 8.0, 9.0, 10.0, 19.0, 24.0, 45.0, 46.0, 69.0, 139.0, 200.0, 365.0, 724.0, 1610.0, 4095.0, 15817.0, 164852.0, 786903.0, 59422.0, 8836.0, 2757.0, 1214.0, 605.0, 310.0, 183.0, 92.0, 56.0, 42.0, 29.0, 17.0, 13.0, 15.0, 7.0, 5.0, 2.0, 4.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.1943359375, -1.1595306396484375, -1.124725341796875, -1.0899200439453125, -1.05511474609375, -1.0203094482421875, -0.985504150390625, -0.9506988525390625, -0.9158935546875, -0.8810882568359375, -0.846282958984375, -0.8114776611328125, -0.77667236328125, -0.7418670654296875, -0.707061767578125, -0.6722564697265625, -0.637451171875, -0.6026458740234375, -0.567840576171875, -0.5330352783203125, -0.49822998046875, -0.4634246826171875, -0.428619384765625, -0.3938140869140625, -0.3590087890625, -0.3242034912109375, -0.289398193359375, -0.2545928955078125, -0.21978759765625, -0.1849822998046875, -0.150177001953125, -0.1153717041015625, -0.08056640625, -0.0457611083984375, -0.010955810546875, 0.0238494873046875, 0.05865478515625, 0.0934600830078125, 0.128265380859375, 0.1630706787109375, 0.1978759765625, 0.2326812744140625, 0.267486572265625, 0.3022918701171875, 0.33709716796875, 0.3719024658203125, 0.406707763671875, 0.4415130615234375, 0.476318359375, 0.5111236572265625, 0.545928955078125, 0.5807342529296875, 0.61553955078125, 0.6503448486328125, 0.685150146484375, 0.7199554443359375, 0.7547607421875, 0.7895660400390625, 0.824371337890625, 0.8591766357421875, 0.89398193359375, 0.9287872314453125, 0.963592529296875, 0.9983978271484375, 1.033203125]}, "gradients/decoder.model.decoder.layers.8.encoder_attn.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 0.0, 3.0, 2.0, 1.0, 5.0, 7.0, 6.0, 11.0, 21.0, 17.0, 37.0, 40.0, 46.0, 50.0, 57.0, 72.0, 64.0, 83.0, 57.0, 72.0, 78.0, 56.0, 44.0, 49.0, 41.0, 20.0, 19.0, 14.0, 11.0, 4.0, 10.0, 6.0, 4.0, 2.0, 0.0, 0.0, 3.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.8671875, -2.782257080078125, -2.69732666015625, -2.612396240234375, -2.5274658203125, -2.442535400390625, -2.35760498046875, -2.272674560546875, -2.187744140625, -2.102813720703125, -2.01788330078125, -1.932952880859375, -1.8480224609375, -1.763092041015625, -1.67816162109375, -1.593231201171875, -1.50830078125, -1.423370361328125, -1.33843994140625, -1.253509521484375, -1.1685791015625, -1.083648681640625, -0.99871826171875, -0.913787841796875, -0.828857421875, -0.743927001953125, -0.65899658203125, -0.574066162109375, -0.4891357421875, -0.404205322265625, -0.31927490234375, -0.234344482421875, -0.1494140625, -0.064483642578125, 0.02044677734375, 0.105377197265625, 0.1903076171875, 0.275238037109375, 0.36016845703125, 0.445098876953125, 0.530029296875, 0.614959716796875, 0.69989013671875, 0.784820556640625, 0.8697509765625, 0.954681396484375, 1.03961181640625, 1.124542236328125, 1.20947265625, 1.294403076171875, 1.37933349609375, 1.464263916015625, 1.5491943359375, 1.634124755859375, 1.71905517578125, 1.803985595703125, 1.888916015625, 1.973846435546875, 2.05877685546875, 2.143707275390625, 2.2286376953125, 2.313568115234375, 2.39849853515625, 2.483428955078125, 2.568359375]}, "gradients/decoder.model.decoder.layers.8.encoder_attn.v_proj.weight": {"_type": "histogram", "values": [2.0, 2.0, 0.0, 2.0, 4.0, 2.0, 6.0, 3.0, 6.0, 11.0, 10.0, 20.0, 25.0, 25.0, 46.0, 62.0, 107.0, 140.0, 230.0, 352.0, 520.0, 782.0, 1256.0, 2029.0, 3254.0, 5531.0, 9449.0, 16968.0, 32062.0, 66343.0, 157898.0, 339935.0, 227086.0, 91595.0, 41917.0, 21654.0, 11865.0, 6678.0, 3923.0, 2478.0, 1513.0, 934.0, 599.0, 373.0, 290.0, 181.0, 127.0, 92.0, 46.0, 48.0, 29.0, 14.0, 16.0, 11.0, 6.0, 3.0, 3.0, 2.0, 3.0, 1.0, 3.0, 2.0, 0.0, 2.0], "bins": [-0.1895751953125, -0.18359375, -0.1776123046875, -0.171630859375, -0.1656494140625, -0.15966796875, -0.1536865234375, -0.147705078125, -0.1417236328125, -0.1357421875, -0.1297607421875, -0.123779296875, -0.1177978515625, -0.11181640625, -0.1058349609375, -0.099853515625, -0.0938720703125, -0.087890625, -0.0819091796875, -0.075927734375, -0.0699462890625, -0.06396484375, -0.0579833984375, -0.052001953125, -0.0460205078125, -0.0400390625, -0.0340576171875, -0.028076171875, -0.0220947265625, -0.01611328125, -0.0101318359375, -0.004150390625, 0.0018310546875, 0.0078125, 0.0137939453125, 0.019775390625, 0.0257568359375, 0.03173828125, 0.0377197265625, 0.043701171875, 0.0496826171875, 0.0556640625, 0.0616455078125, 0.067626953125, 0.0736083984375, 0.07958984375, 0.0855712890625, 0.091552734375, 0.0975341796875, 0.103515625, 0.1094970703125, 0.115478515625, 0.1214599609375, 0.12744140625, 0.1334228515625, 0.139404296875, 0.1453857421875, 0.1513671875, 0.1573486328125, 0.163330078125, 0.1693115234375, 0.17529296875, 0.1812744140625, 0.187255859375, 0.1932373046875]}, "gradients/decoder.model.decoder.layers.8.encoder_attn.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 4.0, 0.0, 1.0, 5.0, 2.0, 3.0, 10.0, 6.0, 8.0, 13.0, 13.0, 14.0, 24.0, 26.0, 30.0, 38.0, 43.0, 44.0, 49.0, 58.0, 53.0, 65.0, 63.0, 35.0, 67.0, 55.0, 44.0, 31.0, 33.0, 28.0, 27.0, 24.0, 17.0, 17.0, 14.0, 11.0, 7.0, 5.0, 5.0, 8.0, 2.0, 2.0, 10.0, 0.0, 2.0, 1.0, 3.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.328125, -4.20562744140625, -4.0831298828125, -3.96063232421875, -3.838134765625, -3.71563720703125, -3.5931396484375, -3.47064208984375, -3.34814453125, -3.22564697265625, -3.1031494140625, -2.98065185546875, -2.858154296875, -2.73565673828125, -2.6131591796875, -2.49066162109375, -2.3681640625, -2.24566650390625, -2.1231689453125, -2.00067138671875, -1.878173828125, -1.75567626953125, -1.6331787109375, -1.51068115234375, -1.38818359375, -1.26568603515625, -1.1431884765625, -1.02069091796875, -0.898193359375, -0.77569580078125, -0.6531982421875, -0.53070068359375, -0.408203125, -0.28570556640625, -0.1632080078125, -0.04071044921875, 0.081787109375, 0.20428466796875, 0.3267822265625, 0.44927978515625, 0.57177734375, 0.69427490234375, 0.8167724609375, 0.93927001953125, 1.061767578125, 1.18426513671875, 1.3067626953125, 1.42926025390625, 1.5517578125, 1.67425537109375, 1.7967529296875, 1.91925048828125, 2.041748046875, 2.16424560546875, 2.2867431640625, 2.40924072265625, 2.53173828125, 2.65423583984375, 2.7767333984375, 2.89923095703125, 3.021728515625, 3.14422607421875, 3.2667236328125, 3.38922119140625, 3.51171875]}, "gradients/decoder.model.decoder.layers.8.encoder_attn.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 3.0, 0.0, 2.0, 5.0, 9.0, 9.0, 7.0, 15.0, 17.0, 30.0, 43.0, 74.0, 111.0, 183.0, 289.0, 549.0, 1102.0, 2439.0, 6856.0, 27410.0, 234815.0, 699455.0, 56374.0, 11724.0, 3777.0, 1591.0, 743.0, 393.0, 228.0, 135.0, 55.0, 35.0, 18.0, 23.0, 12.0, 7.0, 3.0, 6.0, 7.0, 3.0, 3.0, 1.0, 2.0, 1.0, 2.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0], "bins": [-0.07318115234375, -0.07104206085205078, -0.06890296936035156, -0.06676387786865234, -0.06462478637695312, -0.062485694885253906, -0.06034660339355469, -0.05820751190185547, -0.05606842041015625, -0.05392932891845703, -0.05179023742675781, -0.049651145935058594, -0.047512054443359375, -0.045372962951660156, -0.04323387145996094, -0.04109477996826172, -0.0389556884765625, -0.03681659698486328, -0.03467750549316406, -0.032538414001464844, -0.030399322509765625, -0.028260231018066406, -0.026121139526367188, -0.02398204803466797, -0.02184295654296875, -0.01970386505126953, -0.017564773559570312, -0.015425682067871094, -0.013286590576171875, -0.011147499084472656, -0.009008407592773438, -0.006869316101074219, -0.004730224609375, -0.0025911331176757812, -0.0004520416259765625, 0.0016870498657226562, 0.003826141357421875, 0.005965232849121094, 0.008104324340820312, 0.010243415832519531, 0.01238250732421875, 0.014521598815917969, 0.016660690307617188, 0.018799781799316406, 0.020938873291015625, 0.023077964782714844, 0.025217056274414062, 0.02735614776611328, 0.0294952392578125, 0.03163433074951172, 0.03377342224121094, 0.035912513732910156, 0.038051605224609375, 0.040190696716308594, 0.04232978820800781, 0.04446887969970703, 0.04660797119140625, 0.04874706268310547, 0.05088615417480469, 0.053025245666503906, 0.055164337158203125, 0.057303428649902344, 0.05944252014160156, 0.06158161163330078, 0.063720703125]}, "gradients/decoder.model.decoder.layers.8.encoder_attn.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 6.0, 4.0, 7.0, 7.0, 14.0, 18.0, 24.0, 20.0, 31.0, 40.0, 51.0, 74.0, 84.0, 104.0, 110.0, 97.0, 72.0, 59.0, 48.0, 36.0, 29.0, 14.0, 10.0, 13.0, 14.0, 4.0, 7.0, 1.0, 2.0, 0.0, 1.0, 3.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 1.0], "bins": [-3.647804260253906e-05, -3.5305507481098175e-05, -3.413297235965729e-05, -3.29604372382164e-05, -3.178790211677551e-05, -3.0615366995334625e-05, -2.9442831873893738e-05, -2.827029675245285e-05, -2.7097761631011963e-05, -2.5925226509571075e-05, -2.4752691388130188e-05, -2.35801562666893e-05, -2.2407621145248413e-05, -2.1235086023807526e-05, -2.0062550902366638e-05, -1.889001578092575e-05, -1.7717480659484863e-05, -1.6544945538043976e-05, -1.537241041660309e-05, -1.4199875295162201e-05, -1.3027340173721313e-05, -1.1854805052280426e-05, -1.0682269930839539e-05, -9.509734809398651e-06, -8.337199687957764e-06, -7.164664566516876e-06, -5.992129445075989e-06, -4.819594323635101e-06, -3.647059202194214e-06, -2.4745240807533264e-06, -1.301988959312439e-06, -1.2945383787155151e-07, 1.043081283569336e-06, 2.2156164050102234e-06, 3.388151526451111e-06, 4.560686647891998e-06, 5.733221769332886e-06, 6.905756890773773e-06, 8.07829201221466e-06, 9.250827133655548e-06, 1.0423362255096436e-05, 1.1595897376537323e-05, 1.276843249797821e-05, 1.3940967619419098e-05, 1.5113502740859985e-05, 1.6286037862300873e-05, 1.745857298374176e-05, 1.8631108105182648e-05, 1.9803643226623535e-05, 2.0976178348064423e-05, 2.214871346950531e-05, 2.3321248590946198e-05, 2.4493783712387085e-05, 2.5666318833827972e-05, 2.683885395526886e-05, 2.8011389076709747e-05, 2.9183924198150635e-05, 3.0356459319591522e-05, 3.152899444103241e-05, 3.27015295624733e-05, 3.3874064683914185e-05, 3.504659980535507e-05, 3.621913492679596e-05, 3.739167004823685e-05, 3.8564205169677734e-05]}, "gradients/decoder.model.decoder.layers.8.encoder_attn.q_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 4.0, 0.0, 5.0, 3.0, 8.0, 5.0, 5.0, 16.0, 8.0, 16.0, 28.0, 54.0, 72.0, 144.0, 364.0, 1182.0, 4816.0, 29794.0, 605331.0, 377292.0, 23588.0, 4114.0, 1011.0, 357.0, 114.0, 75.0, 47.0, 42.0, 17.0, 13.0, 7.0, 4.0, 3.0, 7.0, 4.0, 4.0, 1.0, 2.0, 1.0, 2.0, 1.0, 4.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.10906982421875, -0.10565567016601562, -0.10224151611328125, -0.09882736206054688, -0.0954132080078125, -0.09199905395507812, -0.08858489990234375, -0.08517074584960938, -0.081756591796875, -0.07834243774414062, -0.07492828369140625, -0.07151412963867188, -0.0680999755859375, -0.06468582153320312, -0.06127166748046875, -0.057857513427734375, -0.054443359375, -0.051029205322265625, -0.04761505126953125, -0.044200897216796875, -0.0407867431640625, -0.037372589111328125, -0.03395843505859375, -0.030544281005859375, -0.027130126953125, -0.023715972900390625, -0.02030181884765625, -0.016887664794921875, -0.0134735107421875, -0.010059356689453125, -0.00664520263671875, -0.003231048583984375, 0.00018310546875, 0.003597259521484375, 0.00701141357421875, 0.010425567626953125, 0.0138397216796875, 0.017253875732421875, 0.02066802978515625, 0.024082183837890625, 0.027496337890625, 0.030910491943359375, 0.03432464599609375, 0.037738800048828125, 0.0411529541015625, 0.044567108154296875, 0.04798126220703125, 0.051395416259765625, 0.0548095703125, 0.058223724365234375, 0.06163787841796875, 0.06505203247070312, 0.0684661865234375, 0.07188034057617188, 0.07529449462890625, 0.07870864868164062, 0.082122802734375, 0.08553695678710938, 0.08895111083984375, 0.09236526489257812, 0.0957794189453125, 0.09919357299804688, 0.10260772705078125, 0.10602188110351562, 0.10943603515625]}, "gradients/decoder.model.decoder.layers.8.encoder_attn.q_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 5.0, 1.0, 3.0, 4.0, 7.0, 6.0, 12.0, 14.0, 10.0, 17.0, 23.0, 35.0, 66.0, 70.0, 119.0, 163.0, 142.0, 90.0, 59.0, 32.0, 38.0, 25.0, 13.0, 10.0, 14.0, 12.0, 2.0, 6.0, 3.0, 3.0, 0.0, 3.0, 2.0, 2.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.020263671875, -0.019725918769836426, -0.01918816566467285, -0.018650412559509277, -0.018112659454345703, -0.01757490634918213, -0.017037153244018555, -0.01649940013885498, -0.015961647033691406, -0.015423893928527832, -0.014886140823364258, -0.014348387718200684, -0.01381063461303711, -0.013272881507873535, -0.012735128402709961, -0.012197375297546387, -0.011659622192382812, -0.011121869087219238, -0.010584115982055664, -0.01004636287689209, -0.009508609771728516, -0.008970856666564941, -0.008433103561401367, -0.007895350456237793, -0.007357597351074219, -0.0068198442459106445, -0.00628209114074707, -0.005744338035583496, -0.005206584930419922, -0.004668831825256348, -0.0041310787200927734, -0.0035933256149291992, -0.003055572509765625, -0.0025178194046020508, -0.0019800662994384766, -0.0014423131942749023, -0.0009045600891113281, -0.0003668069839477539, 0.0001709461212158203, 0.0007086992263793945, 0.0012464523315429688, 0.001784205436706543, 0.002321958541870117, 0.0028597116470336914, 0.0033974647521972656, 0.00393521785736084, 0.004472970962524414, 0.005010724067687988, 0.0055484771728515625, 0.006086230278015137, 0.006623983383178711, 0.007161736488342285, 0.007699489593505859, 0.008237242698669434, 0.008774995803833008, 0.009312748908996582, 0.009850502014160156, 0.01038825511932373, 0.010926008224487305, 0.011463761329650879, 0.012001514434814453, 0.012539267539978027, 0.013077020645141602, 0.013614773750305176, 0.01415252685546875]}, "gradients/decoder.model.decoder.layers.8.self_attn_layer_norm.weight": {"_type": "histogram", "values": [1.0, 2.0, 2.0, 1.0, 1.0, 0.0, 2.0, 2.0, 4.0, 3.0, 13.0, 8.0, 11.0, 31.0, 34.0, 44.0, 94.0, 117.0, 117.0, 137.0, 123.0, 84.0, 62.0, 51.0, 22.0, 17.0, 16.0, 6.0, 3.0, 3.0, 4.0, 1.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.3899691104888916, -2.2656664848327637, -2.1413636207580566, -2.0170609951019287, -1.8927582502365112, -1.7684555053710938, -1.6441528797149658, -1.5198501348495483, -1.3955473899841309, -1.2712446451187134, -1.146941900253296, -1.022639274597168, -0.8983365297317505, -0.774033784866333, -0.6497310996055603, -0.5254284143447876, -0.4011256694793701, -0.276822954416275, -0.15252023935317993, -0.02821752429008484, 0.09608519077301025, 0.22038793563842773, 0.34469062089920044, 0.46899330615997314, 0.5932960510253906, 0.7175987958908081, 0.8419014811515808, 0.9662041664123535, 1.090506911277771, 1.2148096561431885, 1.3391122817993164, 1.4634150266647339, 1.5877175331115723, 1.7120202779769897, 1.8363230228424072, 1.9606256484985352, 2.084928512573242, 2.20923113822937, 2.333533763885498, 2.457836627960205, 2.582139253616333, 2.706441879272461, 2.830744743347168, 2.955047369003296, 3.079349994659424, 3.203652858734131, 3.327955484390259, 3.4522581100463867, 3.5765609741210938, 3.7008635997772217, 3.8251664638519287, 3.9494690895080566, 4.073771953582764, 4.1980743408203125, 4.3223772048950195, 4.446680068969727, 4.570982933044434, 4.695285797119141, 4.8195881843566895, 4.9438910484313965, 5.0681939125061035, 5.192496299743652, 5.316799163818359, 5.441102027893066, 5.565404415130615]}, "gradients/decoder.model.decoder.layers.8.self_attn_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 1.0, 1.0, 7.0, 8.0, 9.0, 14.0, 23.0, 26.0, 41.0, 37.0, 47.0, 68.0, 71.0, 46.0, 70.0, 79.0, 77.0, 90.0, 54.0, 46.0, 55.0, 39.0, 28.0, 17.0, 15.0, 11.0, 5.0, 8.0, 4.0, 2.0, 4.0, 2.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-2.6897177696228027, -2.6059787273406982, -2.5222394466400146, -2.43850040435791, -2.3547613620758057, -2.271022319793701, -2.1872830390930176, -2.103543996810913, -2.0198049545288086, -1.9360657930374146, -1.85232675075531, -1.768587589263916, -1.6848485469818115, -1.6011093854904175, -1.5173702239990234, -1.433631181716919, -1.349892020225525, -1.2661528587341309, -1.1824138164520264, -1.0986746549606323, -1.0149356126785278, -0.9311964511871338, -0.8474573493003845, -0.7637182474136353, -0.679979145526886, -0.5962400436401367, -0.5125009417533875, -0.4287618100643158, -0.34502270817756653, -0.26128360629081726, -0.1775444746017456, -0.09380537271499634, -0.01006627082824707, 0.0736728385090828, 0.15741194784641266, 0.24115106463432312, 0.3248901665210724, 0.40862926840782166, 0.4923684000968933, 0.5761075019836426, 0.6598466038703918, 0.7435857057571411, 0.8273248076438904, 0.9110639095306396, 0.9948030710220337, 1.0785421133041382, 1.1622812747955322, 1.2460203170776367, 1.3297594785690308, 1.4134986400604248, 1.4972376823425293, 1.5809768438339233, 1.6647158861160278, 1.7484550476074219, 1.8321940898895264, 1.9159332513809204, 1.9996724128723145, 2.083411455154419, 2.1671507358551025, 2.250889778137207, 2.3346288204193115, 2.418367862701416, 2.5021071434020996, 2.585846185684204, 2.6695852279663086]}, "gradients/decoder.model.decoder.layers.8.self_attn.out_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 1.0, 3.0, 5.0, 4.0, 7.0, 4.0, 13.0, 17.0, 35.0, 52.0, 72.0, 106.0, 207.0, 387.0, 710.0, 1399.0, 2766.0, 5878.0, 14716.0, 43678.0, 199342.0, 592624.0, 131911.0, 33043.0, 11814.0, 4917.0, 2328.0, 1133.0, 581.0, 293.0, 212.0, 119.0, 70.0, 42.0, 23.0, 19.0, 10.0, 7.0, 3.0, 5.0, 3.0, 4.0, 2.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.720703125, -1.662139892578125, -1.60357666015625, -1.545013427734375, -1.4864501953125, -1.427886962890625, -1.36932373046875, -1.310760498046875, -1.252197265625, -1.193634033203125, -1.13507080078125, -1.076507568359375, -1.0179443359375, -0.959381103515625, -0.90081787109375, -0.842254638671875, -0.78369140625, -0.725128173828125, -0.66656494140625, -0.608001708984375, -0.5494384765625, -0.490875244140625, -0.43231201171875, -0.373748779296875, -0.315185546875, -0.256622314453125, -0.19805908203125, -0.139495849609375, -0.0809326171875, -0.022369384765625, 0.03619384765625, 0.094757080078125, 0.1533203125, 0.211883544921875, 0.27044677734375, 0.329010009765625, 0.3875732421875, 0.446136474609375, 0.50469970703125, 0.563262939453125, 0.621826171875, 0.680389404296875, 0.73895263671875, 0.797515869140625, 0.8560791015625, 0.914642333984375, 0.97320556640625, 1.031768798828125, 1.09033203125, 1.148895263671875, 1.20745849609375, 1.266021728515625, 1.3245849609375, 1.383148193359375, 1.44171142578125, 1.500274658203125, 1.558837890625, 1.617401123046875, 1.67596435546875, 1.734527587890625, 1.7930908203125, 1.851654052734375, 1.91021728515625, 1.968780517578125, 2.02734375]}, "gradients/decoder.model.decoder.layers.8.self_attn.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 3.0, 1.0, 3.0, 0.0, 2.0, 6.0, 5.0, 8.0, 6.0, 10.0, 18.0, 25.0, 27.0, 22.0, 31.0, 41.0, 44.0, 65.0, 58.0, 60.0, 58.0, 64.0, 71.0, 67.0, 50.0, 55.0, 41.0, 39.0, 25.0, 24.0, 17.0, 13.0, 12.0, 13.0, 10.0, 3.0, 3.0, 3.0, 2.0, 5.0, 0.0, 3.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-5.22265625, -5.07391357421875, -4.9251708984375, -4.77642822265625, -4.627685546875, -4.47894287109375, -4.3302001953125, -4.18145751953125, -4.03271484375, -3.88397216796875, -3.7352294921875, -3.58648681640625, -3.437744140625, -3.28900146484375, -3.1402587890625, -2.99151611328125, -2.8427734375, -2.69403076171875, -2.5452880859375, -2.39654541015625, -2.247802734375, -2.09906005859375, -1.9503173828125, -1.80157470703125, -1.65283203125, -1.50408935546875, -1.3553466796875, -1.20660400390625, -1.057861328125, -0.90911865234375, -0.7603759765625, -0.61163330078125, -0.462890625, -0.31414794921875, -0.1654052734375, -0.01666259765625, 0.132080078125, 0.28082275390625, 0.4295654296875, 0.57830810546875, 0.72705078125, 0.87579345703125, 1.0245361328125, 1.17327880859375, 1.322021484375, 1.47076416015625, 1.6195068359375, 1.76824951171875, 1.9169921875, 2.06573486328125, 2.2144775390625, 2.36322021484375, 2.511962890625, 2.66070556640625, 2.8094482421875, 2.95819091796875, 3.10693359375, 3.25567626953125, 3.4044189453125, 3.55316162109375, 3.701904296875, 3.85064697265625, 3.9993896484375, 4.14813232421875, 4.296875]}, "gradients/decoder.model.decoder.layers.8.self_attn.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0, 1.0, 1.0, 0.0, 2.0, 0.0, 2.0, 3.0, 6.0, 6.0, 8.0, 8.0, 13.0, 9.0, 19.0, 21.0, 24.0, 24.0, 28.0, 28.0, 36.0, 31.0, 42.0, 67.0, 109.0, 529.0, 201380.0, 844833.0, 850.0, 112.0, 56.0, 46.0, 34.0, 41.0, 45.0, 29.0, 31.0, 15.0, 13.0, 16.0, 13.0, 10.0, 2.0, 6.0, 2.0, 2.0, 7.0, 1.0, 1.0, 4.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0], "bins": [-14.234375, -13.803955078125, -13.37353515625, -12.943115234375, -12.5126953125, -12.082275390625, -11.65185546875, -11.221435546875, -10.791015625, -10.360595703125, -9.93017578125, -9.499755859375, -9.0693359375, -8.638916015625, -8.20849609375, -7.778076171875, -7.34765625, -6.917236328125, -6.48681640625, -6.056396484375, -5.6259765625, -5.195556640625, -4.76513671875, -4.334716796875, -3.904296875, -3.473876953125, -3.04345703125, -2.613037109375, -2.1826171875, -1.752197265625, -1.32177734375, -0.891357421875, -0.4609375, -0.030517578125, 0.39990234375, 0.830322265625, 1.2607421875, 1.691162109375, 2.12158203125, 2.552001953125, 2.982421875, 3.412841796875, 3.84326171875, 4.273681640625, 4.7041015625, 5.134521484375, 5.56494140625, 5.995361328125, 6.42578125, 6.856201171875, 7.28662109375, 7.717041015625, 8.1474609375, 8.577880859375, 9.00830078125, 9.438720703125, 9.869140625, 10.299560546875, 10.72998046875, 11.160400390625, 11.5908203125, 12.021240234375, 12.45166015625, 12.882080078125, 13.3125]}, "gradients/decoder.model.decoder.layers.8.self_attn.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 1.0, 3.0, 1.0, 0.0, 0.0, 1.0, 2.0, 1.0, 4.0, 6.0, 6.0, 9.0, 11.0, 11.0, 11.0, 18.0, 23.0, 17.0, 27.0, 28.0, 33.0, 34.0, 40.0, 42.0, 46.0, 43.0, 44.0, 54.0, 66.0, 46.0, 37.0, 37.0, 33.0, 34.0, 41.0, 42.0, 34.0, 19.0, 26.0, 11.0, 16.0, 20.0, 10.0, 11.0, 1.0, 1.0, 3.0, 3.0, 2.0, 4.0, 1.0, 2.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-3.943359375, -3.820465087890625, -3.69757080078125, -3.574676513671875, -3.4517822265625, -3.328887939453125, -3.20599365234375, -3.083099365234375, -2.960205078125, -2.837310791015625, -2.71441650390625, -2.591522216796875, -2.4686279296875, -2.345733642578125, -2.22283935546875, -2.099945068359375, -1.97705078125, -1.854156494140625, -1.73126220703125, -1.608367919921875, -1.4854736328125, -1.362579345703125, -1.23968505859375, -1.116790771484375, -0.993896484375, -0.871002197265625, -0.74810791015625, -0.625213623046875, -0.5023193359375, -0.379425048828125, -0.25653076171875, -0.133636474609375, -0.0107421875, 0.112152099609375, 0.23504638671875, 0.357940673828125, 0.4808349609375, 0.603729248046875, 0.72662353515625, 0.849517822265625, 0.972412109375, 1.095306396484375, 1.21820068359375, 1.341094970703125, 1.4639892578125, 1.586883544921875, 1.70977783203125, 1.832672119140625, 1.95556640625, 2.078460693359375, 2.20135498046875, 2.324249267578125, 2.4471435546875, 2.570037841796875, 2.69293212890625, 2.815826416015625, 2.938720703125, 3.061614990234375, 3.18450927734375, 3.307403564453125, 3.4302978515625, 3.553192138671875, 3.67608642578125, 3.798980712890625, 3.921875]}, "gradients/decoder.model.decoder.layers.8.self_attn.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 4.0, 3.0, 5.0, 9.0, 14.0, 11.0, 19.0, 22.0, 37.0, 67.0, 106.0, 180.0, 351.0, 704.0, 1696.0, 4394.0, 13786.0, 61373.0, 599624.0, 311800.0, 38533.0, 9921.0, 3238.0, 1349.0, 580.0, 311.0, 142.0, 96.0, 55.0, 40.0, 24.0, 21.0, 12.0, 10.0, 9.0, 5.0, 2.0, 9.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.8466796875, -0.8216781616210938, -0.7966766357421875, -0.7716751098632812, -0.746673583984375, -0.7216720581054688, -0.6966705322265625, -0.6716690063476562, -0.64666748046875, -0.6216659545898438, -0.5966644287109375, -0.5716629028320312, -0.546661376953125, -0.5216598510742188, -0.4966583251953125, -0.47165679931640625, -0.4466552734375, -0.42165374755859375, -0.3966522216796875, -0.37165069580078125, -0.346649169921875, -0.32164764404296875, -0.2966461181640625, -0.27164459228515625, -0.24664306640625, -0.22164154052734375, -0.1966400146484375, -0.17163848876953125, -0.146636962890625, -0.12163543701171875, -0.0966339111328125, -0.07163238525390625, -0.046630859375, -0.02162933349609375, 0.0033721923828125, 0.02837371826171875, 0.053375244140625, 0.07837677001953125, 0.1033782958984375, 0.12837982177734375, 0.15338134765625, 0.17838287353515625, 0.2033843994140625, 0.22838592529296875, 0.253387451171875, 0.27838897705078125, 0.3033905029296875, 0.32839202880859375, 0.3533935546875, 0.37839508056640625, 0.4033966064453125, 0.42839813232421875, 0.453399658203125, 0.47840118408203125, 0.5034027099609375, 0.5284042358398438, 0.55340576171875, 0.5784072875976562, 0.6034088134765625, 0.6284103393554688, 0.653411865234375, 0.6784133911132812, 0.7034149169921875, 0.7284164428710938, 0.75341796875]}, "gradients/decoder.model.decoder.layers.8.self_attn.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 5.0, 3.0, 0.0, 5.0, 0.0, 4.0, 10.0, 10.0, 10.0, 16.0, 22.0, 31.0, 40.0, 61.0, 100.0, 158.0, 159.0, 97.0, 80.0, 55.0, 32.0, 30.0, 19.0, 22.0, 12.0, 4.0, 5.0, 5.0, 1.0, 10.0, 0.0, 0.0, 1.0, 2.0, 3.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.00011527538299560547, -0.00011168047785758972, -0.00010808557271957397, -0.00010449066758155823, -0.00010089576244354248, -9.730085730552673e-05, -9.370595216751099e-05, -9.011104702949524e-05, -8.651614189147949e-05, -8.292123675346375e-05, -7.9326331615448e-05, -7.573142647743225e-05, -7.21365213394165e-05, -6.854161620140076e-05, -6.494671106338501e-05, -6.135180592536926e-05, -5.7756900787353516e-05, -5.416199564933777e-05, -5.056709051132202e-05, -4.6972185373306274e-05, -4.337728023529053e-05, -3.978237509727478e-05, -3.618746995925903e-05, -3.2592564821243286e-05, -2.899765968322754e-05, -2.5402754545211792e-05, -2.1807849407196045e-05, -1.8212944269180298e-05, -1.461803913116455e-05, -1.1023133993148804e-05, -7.428228855133057e-06, -3.8333237171173096e-06, -2.384185791015625e-07, 3.3564865589141846e-06, 6.951391696929932e-06, 1.0546296834945679e-05, 1.4141201972961426e-05, 1.7736107110977173e-05, 2.133101224899292e-05, 2.4925917387008667e-05, 2.8520822525024414e-05, 3.211572766304016e-05, 3.571063280105591e-05, 3.9305537939071655e-05, 4.29004430770874e-05, 4.649534821510315e-05, 5.0090253353118896e-05, 5.3685158491134644e-05, 5.728006362915039e-05, 6.087496876716614e-05, 6.446987390518188e-05, 6.806477904319763e-05, 7.165968418121338e-05, 7.525458931922913e-05, 7.884949445724487e-05, 8.244439959526062e-05, 8.603930473327637e-05, 8.963420987129211e-05, 9.322911500930786e-05, 9.682402014732361e-05, 0.00010041892528533936, 0.0001040138304233551, 0.00010760873556137085, 0.0001112036406993866, 0.00011479854583740234]}, "gradients/decoder.model.decoder.layers.8.self_attn.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 2.0, 2.0, 3.0, 2.0, 1.0, 2.0, 5.0, 4.0, 8.0, 13.0, 11.0, 15.0, 21.0, 30.0, 42.0, 60.0, 123.0, 164.0, 274.0, 564.0, 1120.0, 2437.0, 6272.0, 19041.0, 71941.0, 551892.0, 319461.0, 51495.0, 14364.0, 5128.0, 1991.0, 937.0, 448.0, 280.0, 129.0, 91.0, 44.0, 39.0, 27.0, 22.0, 18.0, 12.0, 8.0, 4.0, 6.0, 2.0, 8.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.74560546875, -0.722930908203125, -0.70025634765625, -0.677581787109375, -0.6549072265625, -0.632232666015625, -0.60955810546875, -0.586883544921875, -0.564208984375, -0.541534423828125, -0.51885986328125, -0.496185302734375, -0.4735107421875, -0.450836181640625, -0.42816162109375, -0.405487060546875, -0.3828125, -0.360137939453125, -0.33746337890625, -0.314788818359375, -0.2921142578125, -0.269439697265625, -0.24676513671875, -0.224090576171875, -0.201416015625, -0.178741455078125, -0.15606689453125, -0.133392333984375, -0.1107177734375, -0.088043212890625, -0.06536865234375, -0.042694091796875, -0.02001953125, 0.002655029296875, 0.02532958984375, 0.048004150390625, 0.0706787109375, 0.093353271484375, 0.11602783203125, 0.138702392578125, 0.161376953125, 0.184051513671875, 0.20672607421875, 0.229400634765625, 0.2520751953125, 0.274749755859375, 0.29742431640625, 0.320098876953125, 0.3427734375, 0.365447998046875, 0.38812255859375, 0.410797119140625, 0.4334716796875, 0.456146240234375, 0.47882080078125, 0.501495361328125, 0.524169921875, 0.546844482421875, 0.56951904296875, 0.592193603515625, 0.6148681640625, 0.637542724609375, 0.66021728515625, 0.682891845703125, 0.70556640625]}, "gradients/decoder.model.decoder.layers.8.self_attn.q_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 3.0, 4.0, 5.0, 3.0, 3.0, 2.0, 4.0, 8.0, 10.0, 13.0, 10.0, 13.0, 18.0, 22.0, 21.0, 29.0, 39.0, 54.0, 77.0, 104.0, 115.0, 104.0, 70.0, 49.0, 37.0, 25.0, 31.0, 19.0, 24.0, 12.0, 14.0, 19.0, 8.0, 11.0, 2.0, 10.0, 6.0, 3.0, 2.0, 2.0, 4.0, 1.0, 0.0, 1.0, 0.0, 2.0, 2.0, 2.0], "bins": [-0.28466796875, -0.2768898010253906, -0.26911163330078125, -0.2613334655761719, -0.2535552978515625, -0.24577713012695312, -0.23799896240234375, -0.23022079467773438, -0.222442626953125, -0.21466445922851562, -0.20688629150390625, -0.19910812377929688, -0.1913299560546875, -0.18355178833007812, -0.17577362060546875, -0.16799545288085938, -0.16021728515625, -0.15243911743164062, -0.14466094970703125, -0.13688278198242188, -0.1291046142578125, -0.12132644653320312, -0.11354827880859375, -0.10577011108398438, -0.097991943359375, -0.09021377563476562, -0.08243560791015625, -0.07465744018554688, -0.0668792724609375, -0.059101104736328125, -0.05132293701171875, -0.043544769287109375, -0.0357666015625, -0.027988433837890625, -0.02021026611328125, -0.012432098388671875, -0.0046539306640625, 0.003124237060546875, 0.01090240478515625, 0.018680572509765625, 0.026458740234375, 0.034236907958984375, 0.04201507568359375, 0.049793243408203125, 0.0575714111328125, 0.06534957885742188, 0.07312774658203125, 0.08090591430664062, 0.08868408203125, 0.09646224975585938, 0.10424041748046875, 0.11201858520507812, 0.1197967529296875, 0.12757492065429688, 0.13535308837890625, 0.14313125610351562, 0.150909423828125, 0.15868759155273438, 0.16646575927734375, 0.17424392700195312, 0.1820220947265625, 0.18980026245117188, 0.19757843017578125, 0.20535659790039062, 0.213134765625]}, "gradients/decoder.model.decoder.layers.7.final_layer_norm.weight": {"_type": "histogram", "values": [5.0, 4.0, 16.0, 79.0, 237.0, 439.0, 177.0, 47.0, 12.0, 3.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.61046838760376, -3.756706714630127, -2.902945041656494, -2.0491833686828613, -1.1954216957092285, -0.3416600227355957, 0.5121016502380371, 1.36586332321167, 2.2196249961853027, 3.0733866691589355, 3.9271483421325684, 4.780910015106201, 5.634671688079834, 6.488433361053467, 7.3421950340271, 8.19595718383789, 9.049718856811523, 9.903480529785156, 10.757242202758789, 11.611003875732422, 12.464765548706055, 13.318527221679688, 14.17228889465332, 15.026050567626953, 15.879812240600586, 16.73357391357422, 17.58733558654785, 18.441097259521484, 19.294858932495117, 20.14862060546875, 21.002382278442383, 21.856143951416016, 22.709903717041016, 23.56366539001465, 24.41742706298828, 25.271188735961914, 26.124950408935547, 26.97871208190918, 27.832473754882812, 28.686235427856445, 29.539997100830078, 30.39375877380371, 31.247520446777344, 32.101280212402344, 32.95504379272461, 33.808807373046875, 34.662567138671875, 35.516326904296875, 36.37009048461914, 37.223854064941406, 38.077613830566406, 38.931373596191406, 39.78513717651367, 40.63890075683594, 41.49266052246094, 42.34642028808594, 43.2001838684082, 44.05394744873047, 44.90770721435547, 45.76146697998047, 46.615230560302734, 47.468994140625, 48.32275390625, 49.176513671875, 50.030277252197266]}, "gradients/decoder.model.decoder.layers.7.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 0.0, 1.0, 1.0, 2.0, 4.0, 4.0, 5.0, 8.0, 6.0, 8.0, 11.0, 12.0, 21.0, 20.0, 14.0, 30.0, 32.0, 32.0, 31.0, 55.0, 41.0, 52.0, 50.0, 53.0, 66.0, 54.0, 54.0, 48.0, 50.0, 40.0, 35.0, 31.0, 23.0, 30.0, 23.0, 16.0, 10.0, 9.0, 6.0, 10.0, 5.0, 4.0, 0.0, 3.0, 2.0, 0.0, 4.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-8.082514762878418, -7.8463239669799805, -7.610133171081543, -7.3739423751831055, -7.13775110244751, -6.901560306549072, -6.665369510650635, -6.429178714752197, -6.192987442016602, -5.956796646118164, -5.720605850219727, -5.484415054321289, -5.248223781585693, -5.012032985687256, -4.775842189788818, -4.539651393890381, -4.303460597991943, -4.067269802093506, -3.8310787677764893, -3.5948879718780518, -3.358696937561035, -3.1225061416625977, -2.88631534576416, -2.6501245498657227, -2.413933515548706, -2.1777427196502686, -1.941551685333252, -1.7053608894348145, -1.4691699743270874, -1.2329790592193604, -0.9967882633209229, -0.7605973482131958, -0.5244064331054688, -0.2882155478000641, -0.052024662494659424, 0.18416619300842285, 0.4203571081161499, 0.656548023223877, 0.8927388191223145, 1.1289297342300415, 1.3651206493377686, 1.6013115644454956, 1.8375024795532227, 2.07369327545166, 2.3098840713500977, 2.5460751056671143, 2.7822659015655518, 3.0184569358825684, 3.254647731781006, 3.4908385276794434, 3.72702956199646, 3.9632203578948975, 4.199411392211914, 4.435602188110352, 4.671792984008789, 4.907983779907227, 5.144174575805664, 5.380365371704102, 5.616556167602539, 5.852746963500977, 6.088938236236572, 6.32512903213501, 6.561319828033447, 6.797510623931885, 7.0337018966674805]}, "gradients/decoder.model.decoder.layers.7.fc2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 3.0, 6.0, 3.0, 17.0, 14.0, 24.0, 29.0, 59.0, 65.0, 99.0, 190.0, 344.0, 673.0, 1271.0, 2783.0, 6755.0, 17586.0, 61850.0, 298004.0, 1566987.0, 1759518.0, 366687.0, 73480.0, 22596.0, 8537.0, 3531.0, 1486.0, 711.0, 403.0, 235.0, 128.0, 82.0, 34.0, 35.0, 24.0, 9.0, 10.0, 8.0, 5.0, 4.0, 4.0, 1.0, 1.0, 3.0, 3.0, 1.0], "bins": [-4.37890625, -4.26385498046875, -4.1488037109375, -4.03375244140625, -3.918701171875, -3.80364990234375, -3.6885986328125, -3.57354736328125, -3.45849609375, -3.34344482421875, -3.2283935546875, -3.11334228515625, -2.998291015625, -2.88323974609375, -2.7681884765625, -2.65313720703125, -2.5380859375, -2.42303466796875, -2.3079833984375, -2.19293212890625, -2.077880859375, -1.96282958984375, -1.8477783203125, -1.73272705078125, -1.61767578125, -1.50262451171875, -1.3875732421875, -1.27252197265625, -1.157470703125, -1.04241943359375, -0.9273681640625, -0.81231689453125, -0.697265625, -0.58221435546875, -0.4671630859375, -0.35211181640625, -0.237060546875, -0.12200927734375, -0.0069580078125, 0.10809326171875, 0.22314453125, 0.33819580078125, 0.4532470703125, 0.56829833984375, 0.683349609375, 0.79840087890625, 0.9134521484375, 1.02850341796875, 1.1435546875, 1.25860595703125, 1.3736572265625, 1.48870849609375, 1.603759765625, 1.71881103515625, 1.8338623046875, 1.94891357421875, 2.06396484375, 2.17901611328125, 2.2940673828125, 2.40911865234375, 2.524169921875, 2.63922119140625, 2.7542724609375, 2.86932373046875, 2.984375]}, "gradients/decoder.model.decoder.layers.7.fc2.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0, 0.0, 1.0, 1.0, 3.0, 3.0, 6.0, 5.0, 8.0, 12.0, 20.0, 22.0, 29.0, 40.0, 40.0, 51.0, 72.0, 60.0, 76.0, 75.0, 87.0, 85.0, 68.0, 50.0, 53.0, 42.0, 20.0, 27.0, 13.0, 12.0, 12.0, 7.0, 3.0, 2.0, 3.0, 5.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.36328125, -4.2369384765625, -4.110595703125, -3.9842529296875, -3.85791015625, -3.7315673828125, -3.605224609375, -3.4788818359375, -3.3525390625, -3.2261962890625, -3.099853515625, -2.9735107421875, -2.84716796875, -2.7208251953125, -2.594482421875, -2.4681396484375, -2.341796875, -2.2154541015625, -2.089111328125, -1.9627685546875, -1.83642578125, -1.7100830078125, -1.583740234375, -1.4573974609375, -1.3310546875, -1.2047119140625, -1.078369140625, -0.9520263671875, -0.82568359375, -0.6993408203125, -0.572998046875, -0.4466552734375, -0.3203125, -0.1939697265625, -0.067626953125, 0.0587158203125, 0.18505859375, 0.3114013671875, 0.437744140625, 0.5640869140625, 0.6904296875, 0.8167724609375, 0.943115234375, 1.0694580078125, 1.19580078125, 1.3221435546875, 1.448486328125, 1.5748291015625, 1.701171875, 1.8275146484375, 1.953857421875, 2.0802001953125, 2.20654296875, 2.3328857421875, 2.459228515625, 2.5855712890625, 2.7119140625, 2.8382568359375, 2.964599609375, 3.0909423828125, 3.21728515625, 3.3436279296875, 3.469970703125, 3.5963134765625, 3.72265625]}, "gradients/decoder.model.decoder.layers.7.fc1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 1.0, 1.0, 1.0, 4.0, 6.0, 9.0, 10.0, 15.0, 37.0, 52.0, 113.0, 305.0, 1071.0, 5556.0, 257568.0, 3898602.0, 27735.0, 2237.0, 586.0, 185.0, 85.0, 59.0, 19.0, 8.0, 10.0, 5.0, 5.0, 3.0, 2.0, 1.0, 1.0, 3.0, 0.0, 0.0, 2.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-9.953125, -9.588134765625, -9.22314453125, -8.858154296875, -8.4931640625, -8.128173828125, -7.76318359375, -7.398193359375, -7.033203125, -6.668212890625, -6.30322265625, -5.938232421875, -5.5732421875, -5.208251953125, -4.84326171875, -4.478271484375, -4.11328125, -3.748291015625, -3.38330078125, -3.018310546875, -2.6533203125, -2.288330078125, -1.92333984375, -1.558349609375, -1.193359375, -0.828369140625, -0.46337890625, -0.098388671875, 0.2666015625, 0.631591796875, 0.99658203125, 1.361572265625, 1.7265625, 2.091552734375, 2.45654296875, 2.821533203125, 3.1865234375, 3.551513671875, 3.91650390625, 4.281494140625, 4.646484375, 5.011474609375, 5.37646484375, 5.741455078125, 6.1064453125, 6.471435546875, 6.83642578125, 7.201416015625, 7.56640625, 7.931396484375, 8.29638671875, 8.661376953125, 9.0263671875, 9.391357421875, 9.75634765625, 10.121337890625, 10.486328125, 10.851318359375, 11.21630859375, 11.581298828125, 11.9462890625, 12.311279296875, 12.67626953125, 13.041259765625, 13.40625]}, "gradients/decoder.model.decoder.layers.7.fc1.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 4.0, 7.0, 2.0, 8.0, 13.0, 21.0, 36.0, 51.0, 82.0, 117.0, 235.0, 388.0, 653.0, 847.0, 664.0, 377.0, 225.0, 117.0, 78.0, 52.0, 24.0, 22.0, 17.0, 14.0, 5.0, 10.0, 4.0, 4.0, 6.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.654296875, -1.60003662109375, -1.5457763671875, -1.49151611328125, -1.437255859375, -1.38299560546875, -1.3287353515625, -1.27447509765625, -1.22021484375, -1.16595458984375, -1.1116943359375, -1.05743408203125, -1.003173828125, -0.94891357421875, -0.8946533203125, -0.84039306640625, -0.7861328125, -0.73187255859375, -0.6776123046875, -0.62335205078125, -0.569091796875, -0.51483154296875, -0.4605712890625, -0.40631103515625, -0.35205078125, -0.29779052734375, -0.2435302734375, -0.18927001953125, -0.135009765625, -0.08074951171875, -0.0264892578125, 0.02777099609375, 0.08203125, 0.13629150390625, 0.1905517578125, 0.24481201171875, 0.299072265625, 0.35333251953125, 0.4075927734375, 0.46185302734375, 0.51611328125, 0.57037353515625, 0.6246337890625, 0.67889404296875, 0.733154296875, 0.78741455078125, 0.8416748046875, 0.89593505859375, 0.9501953125, 1.00445556640625, 1.0587158203125, 1.11297607421875, 1.167236328125, 1.22149658203125, 1.2757568359375, 1.33001708984375, 1.38427734375, 1.43853759765625, 1.4927978515625, 1.54705810546875, 1.601318359375, 1.65557861328125, 1.7098388671875, 1.76409912109375, 1.818359375]}, "gradients/decoder.model.decoder.layers.7.encoder_attn_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0, 1.0, 0.0, 1.0, 3.0, 2.0, 5.0, 4.0, 3.0, 10.0, 17.0, 19.0, 18.0, 30.0, 27.0, 35.0, 58.0, 50.0, 67.0, 78.0, 86.0, 67.0, 84.0, 78.0, 58.0, 48.0, 35.0, 33.0, 24.0, 17.0, 13.0, 11.0, 6.0, 6.0, 7.0, 6.0, 1.0, 4.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-3.5593817234039307, -3.433736562728882, -3.308091402053833, -3.182446241378784, -3.0568008422851562, -2.9311556816101074, -2.8055105209350586, -2.6798653602600098, -2.554220199584961, -2.428575038909912, -2.3029298782348633, -2.1772847175598145, -2.0516395568847656, -1.9259942770004272, -1.8003489971160889, -1.67470383644104, -1.5490586757659912, -1.4234135150909424, -1.2977683544158936, -1.1721230745315552, -1.0464779138565063, -0.9208327531814575, -0.7951875329017639, -0.6695423126220703, -0.5438971519470215, -0.41825196146965027, -0.29260677099227905, -0.16696158051490784, -0.04131639003753662, 0.08432877063751221, 0.2099739909172058, 0.3356192111968994, 0.46126461029052734, 0.5869097709655762, 0.7125549912452698, 0.8382002115249634, 0.9638453722000122, 1.089490532875061, 1.2151358127593994, 1.3407809734344482, 1.466426134109497, 1.592071294784546, 1.7177164554595947, 1.843361735343933, 1.969006896018982, 2.0946521759033203, 2.220297336578369, 2.345942497253418, 2.471587657928467, 2.5972328186035156, 2.7228779792785645, 2.8485231399536133, 2.974168300628662, 3.099813461303711, 3.225458860397339, 3.3511040210723877, 3.4767491817474365, 3.6023943424224854, 3.728039503097534, 3.853684663772583, 3.979330062866211, 4.10497522354126, 4.230620384216309, 4.356265544891357, 4.481910705566406]}, "gradients/decoder.model.decoder.layers.7.encoder_attn_layer_norm.bias": {"_type": "histogram", "values": [2.0, 2.0, 1.0, 0.0, 2.0, 2.0, 1.0, 1.0, 1.0, 4.0, 2.0, 4.0, 4.0, 10.0, 1.0, 10.0, 7.0, 18.0, 15.0, 14.0, 26.0, 27.0, 31.0, 24.0, 28.0, 39.0, 44.0, 32.0, 46.0, 43.0, 47.0, 43.0, 46.0, 49.0, 49.0, 47.0, 45.0, 38.0, 25.0, 30.0, 32.0, 18.0, 22.0, 13.0, 13.0, 13.0, 9.0, 7.0, 8.0, 8.0, 3.0, 5.0, 4.0, 0.0, 1.0, 2.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0], "bins": [-3.0234243869781494, -2.9325292110443115, -2.8416337966918945, -2.7507386207580566, -2.6598434448242188, -2.568948268890381, -2.478053092956543, -2.387157678604126, -2.296262502670288, -2.20536732673645, -2.114471912384033, -2.0235767364501953, -1.9326815605163574, -1.8417863845825195, -1.750891089439392, -1.6599957942962646, -1.5691006183624268, -1.4782054424285889, -1.3873101472854614, -1.296414852142334, -1.205519676208496, -1.1146245002746582, -1.0237292051315308, -0.9328339695930481, -0.8419387340545654, -0.7510434985160828, -0.6601482629776001, -0.5692530274391174, -0.47835779190063477, -0.3874625563621521, -0.29656732082366943, -0.20567208528518677, -0.114776611328125, -0.023881375789642334, 0.06701385974884033, 0.157909095287323, 0.24880433082580566, 0.33969956636428833, 0.430594801902771, 0.5214900374412537, 0.6123852729797363, 0.703280508518219, 0.7941757440567017, 0.8850709795951843, 0.975966215133667, 1.0668613910675049, 1.1577566862106323, 1.2486519813537598, 1.3395471572875977, 1.4304423332214355, 1.521337628364563, 1.6122329235076904, 1.7031280994415283, 1.7940232753753662, 1.8849185705184937, 1.975813865661621, 2.066709041595459, 2.157604217529297, 2.2484993934631348, 2.3393948078155518, 2.4302899837493896, 2.5211851596832275, 2.6120805740356445, 2.7029757499694824, 2.7938709259033203]}, "gradients/decoder.model.decoder.layers.7.encoder_attn.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 4.0, 8.0, 3.0, 8.0, 11.0, 18.0, 19.0, 29.0, 62.0, 72.0, 102.0, 151.0, 266.0, 530.0, 1054.0, 2541.0, 6841.0, 23233.0, 118416.0, 657532.0, 190203.0, 32752.0, 8758.0, 3035.0, 1370.0, 657.0, 335.0, 225.0, 113.0, 78.0, 53.0, 30.0, 13.0, 16.0, 6.0, 7.0, 4.0, 4.0, 1.0, 4.0, 2.0, 0.0, 1.0, 1.0, 2.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.73291015625, -0.7117385864257812, -0.6905670166015625, -0.6693954467773438, -0.648223876953125, -0.6270523071289062, -0.6058807373046875, -0.5847091674804688, -0.56353759765625, -0.5423660278320312, -0.5211944580078125, -0.5000228881835938, -0.478851318359375, -0.45767974853515625, -0.4365081787109375, -0.41533660888671875, -0.3941650390625, -0.37299346923828125, -0.3518218994140625, -0.33065032958984375, -0.309478759765625, -0.28830718994140625, -0.2671356201171875, -0.24596405029296875, -0.22479248046875, -0.20362091064453125, -0.1824493408203125, -0.16127777099609375, -0.140106201171875, -0.11893463134765625, -0.0977630615234375, -0.07659149169921875, -0.055419921875, -0.03424835205078125, -0.0130767822265625, 0.00809478759765625, 0.029266357421875, 0.05043792724609375, 0.0716094970703125, 0.09278106689453125, 0.11395263671875, 0.13512420654296875, 0.1562957763671875, 0.17746734619140625, 0.198638916015625, 0.21981048583984375, 0.2409820556640625, 0.26215362548828125, 0.2833251953125, 0.30449676513671875, 0.3256683349609375, 0.34683990478515625, 0.368011474609375, 0.38918304443359375, 0.4103546142578125, 0.43152618408203125, 0.45269775390625, 0.47386932373046875, 0.4950408935546875, 0.5162124633789062, 0.537384033203125, 0.5585556030273438, 0.5797271728515625, 0.6008987426757812, 0.6220703125]}, "gradients/decoder.model.decoder.layers.7.encoder_attn.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 5.0, 3.0, 3.0, 3.0, 2.0, 2.0, 6.0, 12.0, 6.0, 16.0, 15.0, 31.0, 35.0, 39.0, 50.0, 54.0, 56.0, 73.0, 64.0, 69.0, 70.0, 71.0, 67.0, 55.0, 50.0, 25.0, 33.0, 21.0, 16.0, 15.0, 16.0, 8.0, 8.0, 4.0, 4.0, 2.0, 4.0, 3.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.4921875, -2.410003662109375, -2.32781982421875, -2.245635986328125, -2.1634521484375, -2.081268310546875, -1.99908447265625, -1.916900634765625, -1.834716796875, -1.752532958984375, -1.67034912109375, -1.588165283203125, -1.5059814453125, -1.423797607421875, -1.34161376953125, -1.259429931640625, -1.17724609375, -1.095062255859375, -1.01287841796875, -0.930694580078125, -0.8485107421875, -0.766326904296875, -0.68414306640625, -0.601959228515625, -0.519775390625, -0.437591552734375, -0.35540771484375, -0.273223876953125, -0.1910400390625, -0.108856201171875, -0.02667236328125, 0.055511474609375, 0.1376953125, 0.219879150390625, 0.30206298828125, 0.384246826171875, 0.4664306640625, 0.548614501953125, 0.63079833984375, 0.712982177734375, 0.795166015625, 0.877349853515625, 0.95953369140625, 1.041717529296875, 1.1239013671875, 1.206085205078125, 1.28826904296875, 1.370452880859375, 1.45263671875, 1.534820556640625, 1.61700439453125, 1.699188232421875, 1.7813720703125, 1.863555908203125, 1.94573974609375, 2.027923583984375, 2.110107421875, 2.192291259765625, 2.27447509765625, 2.356658935546875, 2.4388427734375, 2.521026611328125, 2.60321044921875, 2.685394287109375, 2.767578125]}, "gradients/decoder.model.decoder.layers.7.encoder_attn.v_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 1.0, 0.0, 2.0, 0.0, 4.0, 8.0, 6.0, 10.0, 16.0, 17.0, 29.0, 46.0, 61.0, 107.0, 168.0, 276.0, 405.0, 715.0, 1192.0, 1787.0, 3167.0, 5514.0, 10051.0, 19186.0, 38034.0, 82440.0, 195843.0, 352129.0, 183198.0, 77113.0, 36634.0, 18004.0, 9568.0, 5269.0, 3030.0, 1762.0, 1022.0, 647.0, 391.0, 259.0, 158.0, 108.0, 62.0, 37.0, 23.0, 18.0, 17.0, 10.0, 9.0, 4.0, 3.0, 2.0, 4.0, 0.0, 1.0, 3.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-0.210205078125, -0.20330429077148438, -0.19640350341796875, -0.18950271606445312, -0.1826019287109375, -0.17570114135742188, -0.16880035400390625, -0.16189956665039062, -0.154998779296875, -0.14809799194335938, -0.14119720458984375, -0.13429641723632812, -0.1273956298828125, -0.12049484252929688, -0.11359405517578125, -0.10669326782226562, -0.09979248046875, -0.09289169311523438, -0.08599090576171875, -0.07909011840820312, -0.0721893310546875, -0.06528854370117188, -0.05838775634765625, -0.051486968994140625, -0.044586181640625, -0.037685394287109375, -0.03078460693359375, -0.023883819580078125, -0.0169830322265625, -0.010082244873046875, -0.00318145751953125, 0.003719329833984375, 0.0106201171875, 0.017520904541015625, 0.02442169189453125, 0.031322479248046875, 0.0382232666015625, 0.045124053955078125, 0.05202484130859375, 0.058925628662109375, 0.065826416015625, 0.07272720336914062, 0.07962799072265625, 0.08652877807617188, 0.0934295654296875, 0.10033035278320312, 0.10723114013671875, 0.11413192749023438, 0.12103271484375, 0.12793350219726562, 0.13483428955078125, 0.14173507690429688, 0.1486358642578125, 0.15553665161132812, 0.16243743896484375, 0.16933822631835938, 0.176239013671875, 0.18313980102539062, 0.19004058837890625, 0.19694137573242188, 0.2038421630859375, 0.21074295043945312, 0.21764373779296875, 0.22454452514648438, 0.2314453125]}, "gradients/decoder.model.decoder.layers.7.encoder_attn.v_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 1.0, 1.0, 1.0, 4.0, 5.0, 5.0, 4.0, 6.0, 6.0, 6.0, 13.0, 17.0, 9.0, 15.0, 18.0, 33.0, 24.0, 32.0, 37.0, 30.0, 32.0, 34.0, 40.0, 36.0, 43.0, 43.0, 63.0, 54.0, 46.0, 30.0, 36.0, 25.0, 30.0, 27.0, 35.0, 21.0, 25.0, 16.0, 18.0, 19.0, 12.0, 12.0, 12.0, 9.0, 3.0, 4.0, 3.0, 5.0, 5.0, 4.0, 0.0, 4.0, 4.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.078125, -2.972900390625, -2.86767578125, -2.762451171875, -2.6572265625, -2.552001953125, -2.44677734375, -2.341552734375, -2.236328125, -2.131103515625, -2.02587890625, -1.920654296875, -1.8154296875, -1.710205078125, -1.60498046875, -1.499755859375, -1.39453125, -1.289306640625, -1.18408203125, -1.078857421875, -0.9736328125, -0.868408203125, -0.76318359375, -0.657958984375, -0.552734375, -0.447509765625, -0.34228515625, -0.237060546875, -0.1318359375, -0.026611328125, 0.07861328125, 0.183837890625, 0.2890625, 0.394287109375, 0.49951171875, 0.604736328125, 0.7099609375, 0.815185546875, 0.92041015625, 1.025634765625, 1.130859375, 1.236083984375, 1.34130859375, 1.446533203125, 1.5517578125, 1.656982421875, 1.76220703125, 1.867431640625, 1.97265625, 2.077880859375, 2.18310546875, 2.288330078125, 2.3935546875, 2.498779296875, 2.60400390625, 2.709228515625, 2.814453125, 2.919677734375, 3.02490234375, 3.130126953125, 3.2353515625, 3.340576171875, 3.44580078125, 3.551025390625, 3.65625]}, "gradients/decoder.model.decoder.layers.7.encoder_attn.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 1.0, 2.0, 1.0, 4.0, 1.0, 1.0, 3.0, 10.0, 9.0, 13.0, 13.0, 21.0, 38.0, 40.0, 83.0, 129.0, 195.0, 292.0, 560.0, 956.0, 1843.0, 4468.0, 12969.0, 48032.0, 326534.0, 551631.0, 72354.0, 17588.0, 5866.0, 2362.0, 1076.0, 584.0, 335.0, 193.0, 122.0, 77.0, 49.0, 33.0, 24.0, 14.0, 8.0, 14.0, 2.0, 2.0, 6.0, 1.0, 2.0, 0.0, 1.0, 2.0, 1.0, 1.0, 3.0, 2.0], "bins": [-0.06353759765625, -0.06172895431518555, -0.059920310974121094, -0.05811166763305664, -0.05630302429199219, -0.054494380950927734, -0.05268573760986328, -0.05087709426879883, -0.049068450927734375, -0.04725980758666992, -0.04545116424560547, -0.043642520904541016, -0.04183387756347656, -0.04002523422241211, -0.038216590881347656, -0.0364079475402832, -0.03459930419921875, -0.0327906608581543, -0.030982017517089844, -0.02917337417602539, -0.027364730834960938, -0.025556087493896484, -0.02374744415283203, -0.021938800811767578, -0.020130157470703125, -0.018321514129638672, -0.01651287078857422, -0.014704227447509766, -0.012895584106445312, -0.01108694076538086, -0.009278297424316406, -0.007469654083251953, -0.0056610107421875, -0.003852367401123047, -0.0020437240600585938, -0.00023508071899414062, 0.0015735626220703125, 0.0033822059631347656, 0.005190849304199219, 0.006999492645263672, 0.008808135986328125, 0.010616779327392578, 0.012425422668457031, 0.014234066009521484, 0.016042709350585938, 0.01785135269165039, 0.019659996032714844, 0.021468639373779297, 0.02327728271484375, 0.025085926055908203, 0.026894569396972656, 0.02870321273803711, 0.030511856079101562, 0.032320499420166016, 0.03412914276123047, 0.03593778610229492, 0.037746429443359375, 0.03955507278442383, 0.04136371612548828, 0.043172359466552734, 0.04498100280761719, 0.04678964614868164, 0.048598289489746094, 0.05040693283081055, 0.052215576171875]}, "gradients/decoder.model.decoder.layers.7.encoder_attn.k_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 0.0, 1.0, 3.0, 3.0, 4.0, 6.0, 2.0, 1.0, 8.0, 8.0, 9.0, 11.0, 13.0, 18.0, 26.0, 27.0, 35.0, 37.0, 46.0, 42.0, 62.0, 79.0, 76.0, 48.0, 63.0, 62.0, 60.0, 53.0, 38.0, 32.0, 28.0, 25.0, 17.0, 10.0, 7.0, 8.0, 8.0, 9.0, 7.0, 1.0, 2.0, 2.0, 3.0, 3.0, 0.0, 4.0, 2.0, 0.0, 1.0, 0.0, 2.0], "bins": [-3.24249267578125e-05, -3.152620047330856e-05, -3.0627474188804626e-05, -2.972874790430069e-05, -2.8830021619796753e-05, -2.7931295335292816e-05, -2.703256905078888e-05, -2.6133842766284943e-05, -2.5235116481781006e-05, -2.433639019727707e-05, -2.3437663912773132e-05, -2.2538937628269196e-05, -2.164021134376526e-05, -2.0741485059261322e-05, -1.9842758774757385e-05, -1.894403249025345e-05, -1.8045306205749512e-05, -1.7146579921245575e-05, -1.6247853636741638e-05, -1.53491273522377e-05, -1.4450401067733765e-05, -1.3551674783229828e-05, -1.2652948498725891e-05, -1.1754222214221954e-05, -1.0855495929718018e-05, -9.95676964521408e-06, -9.058043360710144e-06, -8.159317076206207e-06, -7.2605907917022705e-06, -6.361864507198334e-06, -5.463138222694397e-06, -4.56441193819046e-06, -3.6656856536865234e-06, -2.7669593691825867e-06, -1.86823308467865e-06, -9.695068001747131e-07, -7.078051567077637e-08, 8.279457688331604e-07, 1.7266720533370972e-06, 2.625398337841034e-06, 3.5241246223449707e-06, 4.4228509068489075e-06, 5.321577191352844e-06, 6.220303475856781e-06, 7.119029760360718e-06, 8.017756044864655e-06, 8.916482329368591e-06, 9.815208613872528e-06, 1.0713934898376465e-05, 1.1612661182880402e-05, 1.2511387467384338e-05, 1.3410113751888275e-05, 1.4308840036392212e-05, 1.5207566320896149e-05, 1.6106292605400085e-05, 1.7005018889904022e-05, 1.790374517440796e-05, 1.8802471458911896e-05, 1.9701197743415833e-05, 2.059992402791977e-05, 2.1498650312423706e-05, 2.2397376596927643e-05, 2.329610288143158e-05, 2.4194829165935516e-05, 2.5093555450439453e-05]}, "gradients/decoder.model.decoder.layers.7.encoder_attn.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 2.0, 3.0, 0.0, 4.0, 1.0, 4.0, 3.0, 4.0, 4.0, 9.0, 19.0, 23.0, 38.0, 52.0, 107.0, 298.0, 927.0, 4120.0, 33014.0, 749889.0, 240943.0, 15647.0, 2463.0, 546.0, 207.0, 91.0, 49.0, 24.0, 17.0, 11.0, 14.0, 10.0, 9.0, 3.0, 0.0, 2.0, 3.0, 1.0, 0.0, 2.0, 2.0, 0.0, 1.0, 2.0, 0.0, 0.0, 1.0], "bins": [-0.150146484375, -0.1460704803466797, -0.14199447631835938, -0.13791847229003906, -0.13384246826171875, -0.12976646423339844, -0.12569046020507812, -0.12161445617675781, -0.1175384521484375, -0.11346244812011719, -0.10938644409179688, -0.10531044006347656, -0.10123443603515625, -0.09715843200683594, -0.09308242797851562, -0.08900642395019531, -0.084930419921875, -0.08085441589355469, -0.07677841186523438, -0.07270240783691406, -0.06862640380859375, -0.06455039978027344, -0.060474395751953125, -0.05639839172363281, -0.0523223876953125, -0.04824638366699219, -0.044170379638671875, -0.04009437561035156, -0.03601837158203125, -0.03194236755371094, -0.027866363525390625, -0.023790359497070312, -0.01971435546875, -0.015638351440429688, -0.011562347412109375, -0.0074863433837890625, -0.00341033935546875, 0.0006656646728515625, 0.004741668701171875, 0.008817672729492188, 0.0128936767578125, 0.016969680786132812, 0.021045684814453125, 0.025121688842773438, 0.02919769287109375, 0.03327369689941406, 0.037349700927734375, 0.04142570495605469, 0.045501708984375, 0.04957771301269531, 0.053653717041015625, 0.05772972106933594, 0.06180572509765625, 0.06588172912597656, 0.06995773315429688, 0.07403373718261719, 0.0781097412109375, 0.08218574523925781, 0.08626174926757812, 0.09033775329589844, 0.09441375732421875, 0.09848976135253906, 0.10256576538085938, 0.10664176940917969, 0.1107177734375]}, "gradients/decoder.model.decoder.layers.7.encoder_attn.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 2.0, 2.0, 0.0, 0.0, 0.0, 2.0, 2.0, 1.0, 6.0, 3.0, 8.0, 6.0, 6.0, 9.0, 14.0, 13.0, 10.0, 21.0, 21.0, 22.0, 23.0, 52.0, 75.0, 110.0, 105.0, 134.0, 85.0, 63.0, 38.0, 27.0, 25.0, 24.0, 13.0, 21.0, 19.0, 9.0, 13.0, 7.0, 4.0, 3.0, 4.0, 4.0, 2.0, 5.0, 3.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.01727294921875, -0.01674628257751465, -0.016219615936279297, -0.015692949295043945, -0.015166282653808594, -0.014639616012573242, -0.01411294937133789, -0.013586282730102539, -0.013059616088867188, -0.012532949447631836, -0.012006282806396484, -0.011479616165161133, -0.010952949523925781, -0.01042628288269043, -0.009899616241455078, -0.009372949600219727, -0.008846282958984375, -0.008319616317749023, -0.007792949676513672, -0.00726628303527832, -0.006739616394042969, -0.006212949752807617, -0.005686283111572266, -0.005159616470336914, -0.0046329498291015625, -0.004106283187866211, -0.0035796165466308594, -0.003052949905395508, -0.0025262832641601562, -0.0019996166229248047, -0.0014729499816894531, -0.0009462833404541016, -0.00041961669921875, 0.00010704994201660156, 0.0006337165832519531, 0.0011603832244873047, 0.0016870498657226562, 0.002213716506958008, 0.0027403831481933594, 0.003267049789428711, 0.0037937164306640625, 0.004320383071899414, 0.004847049713134766, 0.005373716354370117, 0.005900382995605469, 0.00642704963684082, 0.006953716278076172, 0.0074803829193115234, 0.008007049560546875, 0.008533716201782227, 0.009060382843017578, 0.00958704948425293, 0.010113716125488281, 0.010640382766723633, 0.011167049407958984, 0.011693716049194336, 0.012220382690429688, 0.012747049331665039, 0.01327371597290039, 0.013800382614135742, 0.014327049255371094, 0.014853715896606445, 0.015380382537841797, 0.01590704917907715, 0.0164337158203125]}, "gradients/decoder.model.decoder.layers.7.self_attn_layer_norm.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0, 2.0, 1.0, 5.0, 2.0, 4.0, 7.0, 17.0, 19.0, 29.0, 42.0, 63.0, 83.0, 120.0, 119.0, 114.0, 111.0, 91.0, 60.0, 38.0, 29.0, 16.0, 10.0, 9.0, 7.0, 4.0, 5.0, 2.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-3.538412570953369, -3.428050994873047, -3.3176894187927246, -3.2073278427124023, -3.09696626663208, -2.986604690551758, -2.8762431144714355, -2.7658815383911133, -2.655519962310791, -2.5451583862304688, -2.4347968101501465, -2.324435234069824, -2.214073657989502, -2.1037120819091797, -1.9933505058288574, -1.8829889297485352, -1.772627353668213, -1.6622657775878906, -1.5519042015075684, -1.441542625427246, -1.3311810493469238, -1.2208194732666016, -1.1104578971862793, -1.000096321105957, -0.8897347450256348, -0.7793731689453125, -0.6690115928649902, -0.558650016784668, -0.4482884407043457, -0.33792686462402344, -0.22756528854370117, -0.1172037124633789, -0.006842374801635742, 0.10351920127868652, 0.2138807773590088, 0.32424235343933105, 0.4346039295196533, 0.5449655055999756, 0.6553270816802979, 0.7656886577606201, 0.8760502338409424, 0.9864118099212646, 1.096773386001587, 1.2071349620819092, 1.3174965381622314, 1.4278581142425537, 1.538219690322876, 1.6485812664031982, 1.7589428424835205, 1.8693044185638428, 1.979665994644165, 2.0900275707244873, 2.2003891468048096, 2.310750722885132, 2.421112298965454, 2.5314738750457764, 2.6418354511260986, 2.752197027206421, 2.862558603286743, 2.9729201793670654, 3.0832817554473877, 3.19364333152771, 3.3040049076080322, 3.4143664836883545, 3.5247280597686768]}, "gradients/decoder.model.decoder.layers.7.self_attn_layer_norm.bias": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 4.0, 0.0, 3.0, 2.0, 1.0, 3.0, 7.0, 4.0, 2.0, 5.0, 9.0, 9.0, 23.0, 16.0, 21.0, 32.0, 22.0, 35.0, 35.0, 41.0, 60.0, 42.0, 49.0, 52.0, 56.0, 52.0, 60.0, 44.0, 63.0, 40.0, 36.0, 35.0, 25.0, 28.0, 18.0, 15.0, 14.0, 7.0, 12.0, 10.0, 3.0, 5.0, 7.0, 1.0, 1.0, 1.0, 4.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-1.9951624870300293, -1.933447003364563, -1.8717315196990967, -1.8100160360336304, -1.748300552368164, -1.6865849494934082, -1.624869465827942, -1.5631539821624756, -1.5014384984970093, -1.439723014831543, -1.3780075311660767, -1.3162920475006104, -1.2545764446258545, -1.1928610801696777, -1.1311454772949219, -1.0694299936294556, -1.0077145099639893, -0.945999026298523, -0.8842835426330566, -0.8225679993629456, -0.7608525156974792, -0.6991370320320129, -0.6374214887619019, -0.5757060050964355, -0.5139905214309692, -0.45227503776550293, -0.39055952429771423, -0.32884401082992554, -0.26712852716445923, -0.20541304349899292, -0.14369753003120422, -0.08198201656341553, -0.020266413688659668, 0.041449084877967834, 0.10316458344459534, 0.16488008201122284, 0.22659558057785034, 0.28831106424331665, 0.35002657771110535, 0.41174209117889404, 0.47345757484436035, 0.5351730585098267, 0.596888542175293, 0.658604085445404, 0.7203195691108704, 0.7820350527763367, 0.8437505960464478, 0.9054660797119141, 0.9671815633773804, 1.0288970470428467, 1.090612530708313, 1.1523280143737793, 1.2140436172485352, 1.275758981704712, 1.3374745845794678, 1.399190068244934, 1.4609055519104004, 1.5226210355758667, 1.584336519241333, 1.6460520029067993, 1.7077674865722656, 1.7694830894470215, 1.8311985731124878, 1.892914056777954, 1.9546295404434204]}, "gradients/decoder.model.decoder.layers.7.self_attn.out_proj.weight": {"_type": "histogram", "values": [2.0, 2.0, 1.0, 0.0, 2.0, 1.0, 0.0, 2.0, 0.0, 1.0, 1.0, 3.0, 3.0, 4.0, 1.0, 2.0, 5.0, 7.0, 7.0, 7.0, 16.0, 19.0, 25.0, 20.0, 42.0, 57.0, 122.0, 300.0, 1115.0, 4333.0, 25008.0, 217968.0, 668072.0, 112472.0, 14752.0, 2921.0, 773.0, 231.0, 76.0, 70.0, 33.0, 23.0, 11.0, 11.0, 9.0, 6.0, 9.0, 3.0, 6.0, 3.0, 0.0, 2.0, 3.0, 2.0, 2.0, 3.0, 3.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0], "bins": [-5.3203125, -5.1558837890625, -4.991455078125, -4.8270263671875, -4.66259765625, -4.4981689453125, -4.333740234375, -4.1693115234375, -4.0048828125, -3.8404541015625, -3.676025390625, -3.5115966796875, -3.34716796875, -3.1827392578125, -3.018310546875, -2.8538818359375, -2.689453125, -2.5250244140625, -2.360595703125, -2.1961669921875, -2.03173828125, -1.8673095703125, -1.702880859375, -1.5384521484375, -1.3740234375, -1.2095947265625, -1.045166015625, -0.8807373046875, -0.71630859375, -0.5518798828125, -0.387451171875, -0.2230224609375, -0.05859375, 0.1058349609375, 0.270263671875, 0.4346923828125, 0.59912109375, 0.7635498046875, 0.927978515625, 1.0924072265625, 1.2568359375, 1.4212646484375, 1.585693359375, 1.7501220703125, 1.91455078125, 2.0789794921875, 2.243408203125, 2.4078369140625, 2.572265625, 2.7366943359375, 2.901123046875, 3.0655517578125, 3.22998046875, 3.3944091796875, 3.558837890625, 3.7232666015625, 3.8876953125, 4.0521240234375, 4.216552734375, 4.3809814453125, 4.54541015625, 4.7098388671875, 4.874267578125, 5.0386962890625, 5.203125]}, "gradients/decoder.model.decoder.layers.7.self_attn.out_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 2.0, 2.0, 1.0, 4.0, 3.0, 9.0, 13.0, 12.0, 21.0, 27.0, 48.0, 51.0, 68.0, 97.0, 100.0, 94.0, 103.0, 93.0, 88.0, 66.0, 38.0, 28.0, 14.0, 15.0, 11.0, 2.0, 5.0, 2.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.07421875, -3.85333251953125, -3.6324462890625, -3.41156005859375, -3.190673828125, -2.96978759765625, -2.7489013671875, -2.52801513671875, -2.30712890625, -2.08624267578125, -1.8653564453125, -1.64447021484375, -1.423583984375, -1.20269775390625, -0.9818115234375, -0.76092529296875, -0.5400390625, -0.31915283203125, -0.0982666015625, 0.12261962890625, 0.343505859375, 0.56439208984375, 0.7852783203125, 1.00616455078125, 1.22705078125, 1.44793701171875, 1.6688232421875, 1.88970947265625, 2.110595703125, 2.33148193359375, 2.5523681640625, 2.77325439453125, 2.994140625, 3.21502685546875, 3.4359130859375, 3.65679931640625, 3.877685546875, 4.09857177734375, 4.3194580078125, 4.54034423828125, 4.76123046875, 4.98211669921875, 5.2030029296875, 5.42388916015625, 5.644775390625, 5.86566162109375, 6.0865478515625, 6.30743408203125, 6.5283203125, 6.74920654296875, 6.9700927734375, 7.19097900390625, 7.411865234375, 7.63275146484375, 7.8536376953125, 8.07452392578125, 8.29541015625, 8.51629638671875, 8.7371826171875, 8.95806884765625, 9.178955078125, 9.39984130859375, 9.6207275390625, 9.84161376953125, 10.0625]}, "gradients/decoder.model.decoder.layers.7.self_attn.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 5.0, 1.0, 3.0, 2.0, 3.0, 7.0, 10.0, 12.0, 19.0, 30.0, 28.0, 46.0, 44.0, 55.0, 95.0, 136.0, 180.0, 347.0, 687.0, 1559.0, 5324.0, 34061.0, 448777.0, 509013.0, 39033.0, 5639.0, 1652.0, 701.0, 399.0, 185.0, 142.0, 93.0, 77.0, 50.0, 39.0, 24.0, 20.0, 17.0, 16.0, 9.0, 9.0, 8.0, 4.0, 2.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 2.0], "bins": [-5.76171875, -5.5924072265625, -5.423095703125, -5.2537841796875, -5.08447265625, -4.9151611328125, -4.745849609375, -4.5765380859375, -4.4072265625, -4.2379150390625, -4.068603515625, -3.8992919921875, -3.72998046875, -3.5606689453125, -3.391357421875, -3.2220458984375, -3.052734375, -2.8834228515625, -2.714111328125, -2.5447998046875, -2.37548828125, -2.2061767578125, -2.036865234375, -1.8675537109375, -1.6982421875, -1.5289306640625, -1.359619140625, -1.1903076171875, -1.02099609375, -0.8516845703125, -0.682373046875, -0.5130615234375, -0.34375, -0.1744384765625, -0.005126953125, 0.1641845703125, 0.33349609375, 0.5028076171875, 0.672119140625, 0.8414306640625, 1.0107421875, 1.1800537109375, 1.349365234375, 1.5186767578125, 1.68798828125, 1.8572998046875, 2.026611328125, 2.1959228515625, 2.365234375, 2.5345458984375, 2.703857421875, 2.8731689453125, 3.04248046875, 3.2117919921875, 3.381103515625, 3.5504150390625, 3.7197265625, 3.8890380859375, 4.058349609375, 4.2276611328125, 4.39697265625, 4.5662841796875, 4.735595703125, 4.9049072265625, 5.07421875]}, "gradients/decoder.model.decoder.layers.7.self_attn.v_proj.bias": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 1.0, 0.0, 5.0, 1.0, 0.0, 2.0, 3.0, 4.0, 2.0, 3.0, 11.0, 7.0, 9.0, 10.0, 14.0, 18.0, 12.0, 19.0, 28.0, 31.0, 34.0, 29.0, 36.0, 43.0, 32.0, 36.0, 38.0, 41.0, 49.0, 47.0, 37.0, 46.0, 45.0, 43.0, 35.0, 39.0, 36.0, 29.0, 24.0, 21.0, 15.0, 17.0, 10.0, 9.0, 9.0, 7.0, 5.0, 3.0, 5.0, 8.0, 3.0, 5.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.296875, -3.19189453125, -3.0869140625, -2.98193359375, -2.876953125, -2.77197265625, -2.6669921875, -2.56201171875, -2.45703125, -2.35205078125, -2.2470703125, -2.14208984375, -2.037109375, -1.93212890625, -1.8271484375, -1.72216796875, -1.6171875, -1.51220703125, -1.4072265625, -1.30224609375, -1.197265625, -1.09228515625, -0.9873046875, -0.88232421875, -0.77734375, -0.67236328125, -0.5673828125, -0.46240234375, -0.357421875, -0.25244140625, -0.1474609375, -0.04248046875, 0.0625, 0.16748046875, 0.2724609375, 0.37744140625, 0.482421875, 0.58740234375, 0.6923828125, 0.79736328125, 0.90234375, 1.00732421875, 1.1123046875, 1.21728515625, 1.322265625, 1.42724609375, 1.5322265625, 1.63720703125, 1.7421875, 1.84716796875, 1.9521484375, 2.05712890625, 2.162109375, 2.26708984375, 2.3720703125, 2.47705078125, 2.58203125, 2.68701171875, 2.7919921875, 2.89697265625, 3.001953125, 3.10693359375, 3.2119140625, 3.31689453125, 3.421875]}, "gradients/decoder.model.decoder.layers.7.self_attn.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 3.0, 2.0, 5.0, 3.0, 5.0, 8.0, 3.0, 9.0, 18.0, 33.0, 50.0, 75.0, 162.0, 422.0, 1246.0, 4173.0, 19111.0, 121082.0, 660341.0, 204422.0, 28910.0, 5928.0, 1610.0, 515.0, 182.0, 92.0, 64.0, 28.0, 18.0, 19.0, 14.0, 3.0, 5.0, 4.0, 4.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.2412109375, -1.197845458984375, -1.15447998046875, -1.111114501953125, -1.0677490234375, -1.024383544921875, -0.98101806640625, -0.937652587890625, -0.894287109375, -0.850921630859375, -0.80755615234375, -0.764190673828125, -0.7208251953125, -0.677459716796875, -0.63409423828125, -0.590728759765625, -0.54736328125, -0.503997802734375, -0.46063232421875, -0.417266845703125, -0.3739013671875, -0.330535888671875, -0.28717041015625, -0.243804931640625, -0.200439453125, -0.157073974609375, -0.11370849609375, -0.070343017578125, -0.0269775390625, 0.016387939453125, 0.05975341796875, 0.103118896484375, 0.146484375, 0.189849853515625, 0.23321533203125, 0.276580810546875, 0.3199462890625, 0.363311767578125, 0.40667724609375, 0.450042724609375, 0.493408203125, 0.536773681640625, 0.58013916015625, 0.623504638671875, 0.6668701171875, 0.710235595703125, 0.75360107421875, 0.796966552734375, 0.84033203125, 0.883697509765625, 0.92706298828125, 0.970428466796875, 1.0137939453125, 1.057159423828125, 1.10052490234375, 1.143890380859375, 1.187255859375, 1.230621337890625, 1.27398681640625, 1.317352294921875, 1.3607177734375, 1.404083251953125, 1.44744873046875, 1.490814208984375, 1.5341796875]}, "gradients/decoder.model.decoder.layers.7.self_attn.k_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 0.0, 3.0, 3.0, 1.0, 4.0, 5.0, 7.0, 7.0, 14.0, 12.0, 16.0, 19.0, 31.0, 31.0, 47.0, 52.0, 75.0, 96.0, 90.0, 86.0, 66.0, 64.0, 48.0, 48.0, 49.0, 36.0, 28.0, 14.0, 12.0, 12.0, 10.0, 4.0, 4.0, 5.0, 4.0, 2.0, 1.0, 0.0, 0.0, 1.0, 2.0, 1.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0001437664031982422, -0.0001395680010318756, -0.00013536959886550903, -0.00013117119669914246, -0.00012697279453277588, -0.0001227743923664093, -0.00011857599020004272, -0.00011437758803367615, -0.00011017918586730957, -0.00010598078370094299, -0.00010178238153457642, -9.758397936820984e-05, -9.338557720184326e-05, -8.918717503547668e-05, -8.498877286911011e-05, -8.079037070274353e-05, -7.659196853637695e-05, -7.239356637001038e-05, -6.81951642036438e-05, -6.399676203727722e-05, -5.9798359870910645e-05, -5.559995770454407e-05, -5.140155553817749e-05, -4.720315337181091e-05, -4.3004751205444336e-05, -3.880634903907776e-05, -3.460794687271118e-05, -3.0409544706344604e-05, -2.6211142539978027e-05, -2.201274037361145e-05, -1.7814338207244873e-05, -1.3615936040878296e-05, -9.417533874511719e-06, -5.219131708145142e-06, -1.0207295417785645e-06, 3.1776726245880127e-06, 7.37607479095459e-06, 1.1574476957321167e-05, 1.5772879123687744e-05, 1.997128129005432e-05, 2.41696834564209e-05, 2.8368085622787476e-05, 3.256648778915405e-05, 3.676488995552063e-05, 4.096329212188721e-05, 4.5161694288253784e-05, 4.936009645462036e-05, 5.355849862098694e-05, 5.7756900787353516e-05, 6.195530295372009e-05, 6.615370512008667e-05, 7.035210728645325e-05, 7.455050945281982e-05, 7.87489116191864e-05, 8.294731378555298e-05, 8.714571595191956e-05, 9.134411811828613e-05, 9.554252028465271e-05, 9.974092245101929e-05, 0.00010393932461738586, 0.00010813772678375244, 0.00011233612895011902, 0.0001165345311164856, 0.00012073293328285217, 0.00012493133544921875]}, "gradients/decoder.model.decoder.layers.7.self_attn.q_proj.weight": {"_type": "histogram", "values": [2.0, 2.0, 0.0, 0.0, 2.0, 1.0, 1.0, 0.0, 6.0, 5.0, 6.0, 5.0, 4.0, 19.0, 12.0, 21.0, 21.0, 39.0, 57.0, 127.0, 202.0, 471.0, 1357.0, 5176.0, 28535.0, 233104.0, 662135.0, 98646.0, 13996.0, 2951.0, 903.0, 324.0, 179.0, 82.0, 51.0, 40.0, 26.0, 18.0, 14.0, 8.0, 7.0, 4.0, 7.0, 4.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.2275390625, -1.180877685546875, -1.13421630859375, -1.087554931640625, -1.0408935546875, -0.994232177734375, -0.94757080078125, -0.900909423828125, -0.854248046875, -0.807586669921875, -0.76092529296875, -0.714263916015625, -0.6676025390625, -0.620941162109375, -0.57427978515625, -0.527618408203125, -0.48095703125, -0.434295654296875, -0.38763427734375, -0.340972900390625, -0.2943115234375, -0.247650146484375, -0.20098876953125, -0.154327392578125, -0.107666015625, -0.061004638671875, -0.01434326171875, 0.032318115234375, 0.0789794921875, 0.125640869140625, 0.17230224609375, 0.218963623046875, 0.265625, 0.312286376953125, 0.35894775390625, 0.405609130859375, 0.4522705078125, 0.498931884765625, 0.54559326171875, 0.592254638671875, 0.638916015625, 0.685577392578125, 0.73223876953125, 0.778900146484375, 0.8255615234375, 0.872222900390625, 0.91888427734375, 0.965545654296875, 1.01220703125, 1.058868408203125, 1.10552978515625, 1.152191162109375, 1.1988525390625, 1.245513916015625, 1.29217529296875, 1.338836669921875, 1.385498046875, 1.432159423828125, 1.47882080078125, 1.525482177734375, 1.5721435546875, 1.618804931640625, 1.66546630859375, 1.712127685546875, 1.7587890625]}, "gradients/decoder.model.decoder.layers.7.self_attn.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 0.0, 3.0, 0.0, 1.0, 1.0, 1.0, 1.0, 3.0, 1.0, 7.0, 4.0, 6.0, 8.0, 9.0, 14.0, 22.0, 22.0, 15.0, 20.0, 32.0, 31.0, 69.0, 65.0, 80.0, 71.0, 78.0, 72.0, 86.0, 56.0, 53.0, 30.0, 27.0, 20.0, 22.0, 16.0, 21.0, 13.0, 7.0, 5.0, 9.0, 5.0, 3.0, 4.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.37890625, -0.3656883239746094, -0.35247039794921875, -0.3392524719238281, -0.3260345458984375, -0.3128166198730469, -0.29959869384765625, -0.2863807678222656, -0.273162841796875, -0.2599449157714844, -0.24672698974609375, -0.23350906372070312, -0.2202911376953125, -0.20707321166992188, -0.19385528564453125, -0.18063735961914062, -0.16741943359375, -0.15420150756835938, -0.14098358154296875, -0.12776565551757812, -0.1145477294921875, -0.10132980346679688, -0.08811187744140625, -0.07489395141601562, -0.061676025390625, -0.048458099365234375, -0.03524017333984375, -0.022022247314453125, -0.0088043212890625, 0.004413604736328125, 0.01763153076171875, 0.030849456787109375, 0.0440673828125, 0.057285308837890625, 0.07050323486328125, 0.08372116088867188, 0.0969390869140625, 0.11015701293945312, 0.12337493896484375, 0.13659286499023438, 0.149810791015625, 0.16302871704101562, 0.17624664306640625, 0.18946456909179688, 0.2026824951171875, 0.21590042114257812, 0.22911834716796875, 0.24233627319335938, 0.25555419921875, 0.2687721252441406, 0.28199005126953125, 0.2952079772949219, 0.3084259033203125, 0.3216438293457031, 0.33486175537109375, 0.3480796813964844, 0.361297607421875, 0.3745155334472656, 0.38773345947265625, 0.4009513854980469, 0.4141693115234375, 0.4273872375488281, 0.44060516357421875, 0.4538230895996094, 0.467041015625]}, "gradients/decoder.model.decoder.layers.6.final_layer_norm.weight": {"_type": "histogram", "values": [3.0, 1.0, 8.0, 8.0, 19.0, 51.0, 89.0, 150.0, 230.0, 204.0, 121.0, 71.0, 34.0, 15.0, 4.0, 6.0, 4.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-3.65155291557312, -3.2425835132598877, -2.8336143493652344, -2.424644947052002, -2.0156755447387695, -1.606706142425537, -1.1977369785308838, -0.7887675762176514, -0.37979817390441895, 0.0291711688041687, 0.43814051151275635, 0.8471097946166992, 1.2560791969299316, 1.665048599243164, 2.0740177631378174, 2.48298716545105, 2.8919565677642822, 3.3009259700775146, 3.709895133972168, 4.1188645362854, 4.527833938598633, 4.936803340911865, 5.345772743225098, 5.754741668701172, 6.1637115478515625, 6.572680950164795, 6.981650352478027, 7.390619277954102, 7.799589157104492, 8.208558082580566, 8.61752700805664, 9.026496887207031, 9.435466766357422, 9.844435691833496, 10.253405570983887, 10.662374496459961, 11.071344375610352, 11.480313301086426, 11.8892822265625, 12.29825210571289, 12.707221984863281, 13.116190910339355, 13.525160789489746, 13.93412971496582, 14.343099594116211, 14.752068519592285, 15.16103744506836, 15.57000732421875, 15.978976249694824, 16.3879451751709, 16.79691505432129, 17.20588493347168, 17.614852905273438, 18.023822784423828, 18.43279266357422, 18.84176254272461, 19.250730514526367, 19.659700393676758, 20.068668365478516, 20.477638244628906, 20.886608123779297, 21.295578002929688, 21.704545974731445, 22.113515853881836, 22.522485733032227]}, "gradients/decoder.model.decoder.layers.6.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 2.0, 2.0, 4.0, 2.0, 4.0, 6.0, 7.0, 7.0, 11.0, 13.0, 21.0, 21.0, 18.0, 35.0, 26.0, 36.0, 33.0, 47.0, 37.0, 52.0, 51.0, 43.0, 54.0, 57.0, 53.0, 47.0, 50.0, 39.0, 56.0, 33.0, 28.0, 29.0, 27.0, 17.0, 12.0, 7.0, 5.0, 5.0, 4.0, 5.0, 3.0, 3.0, 2.0, 2.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6.408663272857666, -6.17978572845459, -5.9509077072143555, -5.722030162811279, -5.493152141571045, -5.264274597167969, -5.035396575927734, -4.806519031524658, -4.577641487121582, -4.348763942718506, -4.1198859214782715, -3.8910083770751953, -3.662130355834961, -3.4332528114318848, -3.2043750286102295, -2.975497245788574, -2.74661922454834, -2.5177414417266846, -2.2888636589050293, -2.059986114501953, -1.8311082124710083, -1.602230429649353, -1.3733527660369873, -1.144474983215332, -0.9155972003936768, -0.6867194175720215, -0.457841694355011, -0.2289639711380005, -8.618831634521484e-05, 0.22879159450531006, 0.4576692581176758, 0.686547040939331, 0.9154243469238281, 1.1443021297454834, 1.3731799125671387, 1.6020575761795044, 1.8309353590011597, 2.0598130226135254, 2.2886908054351807, 2.517568588256836, 2.746446371078491, 2.9753241539001465, 3.2042019367218018, 3.433079719543457, 3.661957263946533, 3.8908352851867676, 4.119712829589844, 4.348590850830078, 4.577468395233154, 4.8063459396362305, 5.035223960876465, 5.264101505279541, 5.492979526519775, 5.721857070922852, 5.950735092163086, 6.179612636566162, 6.408490180969238, 6.6373677253723145, 6.866245746612549, 7.095123291015625, 7.324001312255859, 7.5528788566589355, 7.781756401062012, 8.010634422302246, 8.23951244354248]}, "gradients/decoder.model.decoder.layers.6.fc2.weight": {"_type": "histogram", "values": [3.0, 1.0, 1.0, 1.0, 0.0, 4.0, 7.0, 6.0, 6.0, 12.0, 18.0, 30.0, 35.0, 76.0, 137.0, 234.0, 414.0, 904.0, 1858.0, 4596.0, 12862.0, 49524.0, 326270.0, 2033071.0, 1489402.0, 220295.0, 36977.0, 10580.0, 3792.0, 1660.0, 761.0, 356.0, 172.0, 93.0, 60.0, 31.0, 18.0, 13.0, 11.0, 4.0, 2.0, 2.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.900390625, -2.779022216796875, -2.65765380859375, -2.536285400390625, -2.4149169921875, -2.293548583984375, -2.17218017578125, -2.050811767578125, -1.929443359375, -1.808074951171875, -1.68670654296875, -1.565338134765625, -1.4439697265625, -1.322601318359375, -1.20123291015625, -1.079864501953125, -0.95849609375, -0.837127685546875, -0.71575927734375, -0.594390869140625, -0.4730224609375, -0.351654052734375, -0.23028564453125, -0.108917236328125, 0.012451171875, 0.133819580078125, 0.25518798828125, 0.376556396484375, 0.4979248046875, 0.619293212890625, 0.74066162109375, 0.862030029296875, 0.9833984375, 1.104766845703125, 1.22613525390625, 1.347503662109375, 1.4688720703125, 1.590240478515625, 1.71160888671875, 1.832977294921875, 1.954345703125, 2.075714111328125, 2.19708251953125, 2.318450927734375, 2.4398193359375, 2.561187744140625, 2.68255615234375, 2.803924560546875, 2.92529296875, 3.046661376953125, 3.16802978515625, 3.289398193359375, 3.4107666015625, 3.532135009765625, 3.65350341796875, 3.774871826171875, 3.896240234375, 4.017608642578125, 4.13897705078125, 4.260345458984375, 4.3817138671875, 4.503082275390625, 4.62445068359375, 4.745819091796875, 4.8671875]}, "gradients/decoder.model.decoder.layers.6.fc2.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 3.0, 3.0, 5.0, 5.0, 8.0, 6.0, 13.0, 13.0, 17.0, 19.0, 21.0, 34.0, 44.0, 39.0, 34.0, 60.0, 60.0, 56.0, 58.0, 54.0, 68.0, 65.0, 53.0, 51.0, 51.0, 46.0, 33.0, 20.0, 16.0, 11.0, 12.0, 8.0, 11.0, 7.0, 4.0, 4.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.400390625, -3.286590576171875, -3.17279052734375, -3.058990478515625, -2.9451904296875, -2.831390380859375, -2.71759033203125, -2.603790283203125, -2.489990234375, -2.376190185546875, -2.26239013671875, -2.148590087890625, -2.0347900390625, -1.920989990234375, -1.80718994140625, -1.693389892578125, -1.57958984375, -1.465789794921875, -1.35198974609375, -1.238189697265625, -1.1243896484375, -1.010589599609375, -0.89678955078125, -0.782989501953125, -0.669189453125, -0.555389404296875, -0.44158935546875, -0.327789306640625, -0.2139892578125, -0.100189208984375, 0.01361083984375, 0.127410888671875, 0.2412109375, 0.355010986328125, 0.46881103515625, 0.582611083984375, 0.6964111328125, 0.810211181640625, 0.92401123046875, 1.037811279296875, 1.151611328125, 1.265411376953125, 1.37921142578125, 1.493011474609375, 1.6068115234375, 1.720611572265625, 1.83441162109375, 1.948211669921875, 2.06201171875, 2.175811767578125, 2.28961181640625, 2.403411865234375, 2.5172119140625, 2.631011962890625, 2.74481201171875, 2.858612060546875, 2.972412109375, 3.086212158203125, 3.20001220703125, 3.313812255859375, 3.4276123046875, 3.541412353515625, 3.65521240234375, 3.769012451171875, 3.8828125]}, "gradients/decoder.model.decoder.layers.6.fc1.weight": {"_type": "histogram", "values": [4.0, 3.0, 7.0, 17.0, 51.0, 184.0, 797.0, 10090.0, 4155691.0, 26203.0, 947.0, 183.0, 80.0, 29.0, 7.0, 3.0, 1.0, 5.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.6796875, -5.0181884765625, -4.356689453125, -3.6951904296875, -3.03369140625, -2.3721923828125, -1.710693359375, -1.0491943359375, -0.3876953125, 0.2738037109375, 0.935302734375, 1.5968017578125, 2.25830078125, 2.9197998046875, 3.581298828125, 4.2427978515625, 4.904296875, 5.5657958984375, 6.227294921875, 6.8887939453125, 7.55029296875, 8.2117919921875, 8.873291015625, 9.5347900390625, 10.1962890625, 10.8577880859375, 11.519287109375, 12.1807861328125, 12.84228515625, 13.5037841796875, 14.165283203125, 14.8267822265625, 15.48828125, 16.1497802734375, 16.811279296875, 17.4727783203125, 18.13427734375, 18.7957763671875, 19.457275390625, 20.1187744140625, 20.7802734375, 21.4417724609375, 22.103271484375, 22.7647705078125, 23.42626953125, 24.0877685546875, 24.749267578125, 25.4107666015625, 26.072265625, 26.7337646484375, 27.395263671875, 28.0567626953125, 28.71826171875, 29.3797607421875, 30.041259765625, 30.7027587890625, 31.3642578125, 32.0257568359375, 32.687255859375, 33.3487548828125, 34.01025390625, 34.6717529296875, 35.333251953125, 35.9947509765625, 36.65625]}, "gradients/decoder.model.decoder.layers.6.fc1.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 2.0, 4.0, 7.0, 18.0, 24.0, 45.0, 47.0, 67.0, 142.0, 236.0, 429.0, 806.0, 942.0, 625.0, 295.0, 155.0, 101.0, 53.0, 34.0, 17.0, 11.0, 8.0, 6.0, 7.0, 1.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0], "bins": [-2.2734375, -2.2154388427734375, -2.157440185546875, -2.0994415283203125, -2.04144287109375, -1.9834442138671875, -1.925445556640625, -1.8674468994140625, -1.8094482421875, -1.7514495849609375, -1.693450927734375, -1.6354522705078125, -1.57745361328125, -1.5194549560546875, -1.461456298828125, -1.4034576416015625, -1.345458984375, -1.2874603271484375, -1.229461669921875, -1.1714630126953125, -1.11346435546875, -1.0554656982421875, -0.997467041015625, -0.9394683837890625, -0.8814697265625, -0.8234710693359375, -0.765472412109375, -0.7074737548828125, -0.64947509765625, -0.5914764404296875, -0.533477783203125, -0.4754791259765625, -0.41748046875, -0.3594818115234375, -0.301483154296875, -0.2434844970703125, -0.18548583984375, -0.1274871826171875, -0.069488525390625, -0.0114898681640625, 0.0465087890625, 0.1045074462890625, 0.162506103515625, 0.2205047607421875, 0.27850341796875, 0.3365020751953125, 0.394500732421875, 0.4524993896484375, 0.510498046875, 0.5684967041015625, 0.626495361328125, 0.6844940185546875, 0.74249267578125, 0.8004913330078125, 0.858489990234375, 0.9164886474609375, 0.9744873046875, 1.0324859619140625, 1.090484619140625, 1.1484832763671875, 1.20648193359375, 1.2644805908203125, 1.322479248046875, 1.3804779052734375, 1.4384765625]}, "gradients/decoder.model.decoder.layers.6.encoder_attn_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0, 0.0, 2.0, 4.0, 7.0, 7.0, 13.0, 16.0, 22.0, 22.0, 29.0, 46.0, 58.0, 68.0, 76.0, 91.0, 95.0, 87.0, 98.0, 65.0, 58.0, 43.0, 21.0, 19.0, 22.0, 12.0, 10.0, 5.0, 7.0, 3.0, 3.0, 1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 2.0, 0.0, 0.0, 1.0], "bins": [-5.605706691741943, -5.464009761810303, -5.322312355041504, -5.180615425109863, -5.038918495178223, -4.897221088409424, -4.755524158477783, -4.613826751708984, -4.472129821777344, -4.330432891845703, -4.188735485076904, -4.047038555145264, -3.905341386795044, -3.763644218444824, -3.6219472885131836, -3.480250120162964, -3.338552951812744, -3.1968557834625244, -3.0551586151123047, -2.913461685180664, -2.7717645168304443, -2.6300673484802246, -2.488370418548584, -2.3466732501983643, -2.2049760818481445, -2.063278913497925, -1.9215818643569946, -1.7798848152160645, -1.6381876468658447, -1.496490478515625, -1.3547934293746948, -1.2130963802337646, -1.071399211883545, -0.92970210313797, -0.788004994392395, -0.6463078856468201, -0.5046107769012451, -0.36291366815567017, -0.22121655941009521, -0.07951945066452026, 0.06217765808105469, 0.20387476682662964, 0.3455718755722046, 0.48726898431777954, 0.6289660930633545, 0.7706632018089294, 0.9123603105545044, 1.0540573596954346, 1.1957545280456543, 1.337451696395874, 1.4791487455368042, 1.6208457946777344, 1.762542963027954, 1.9042401313781738, 2.0459370613098145, 2.187634229660034, 2.329331398010254, 2.4710285663604736, 2.6127257347106934, 2.754422664642334, 2.8961198329925537, 3.0378170013427734, 3.179513931274414, 3.321211099624634, 3.4629082679748535]}, "gradients/decoder.model.decoder.layers.6.encoder_attn_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 0.0, 2.0, 2.0, 3.0, 4.0, 4.0, 6.0, 1.0, 10.0, 3.0, 8.0, 17.0, 22.0, 14.0, 16.0, 33.0, 22.0, 34.0, 46.0, 43.0, 29.0, 38.0, 59.0, 42.0, 49.0, 47.0, 59.0, 47.0, 55.0, 45.0, 48.0, 36.0, 29.0, 22.0, 17.0, 27.0, 21.0, 12.0, 9.0, 9.0, 5.0, 5.0, 7.0, 4.0, 1.0, 0.0, 3.0, 1.0, 1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.914206027984619, -2.811676502227783, -2.7091469764709473, -2.6066174507141113, -2.5040876865386963, -2.4015581607818604, -2.2990286350250244, -2.1964991092681885, -2.0939693450927734, -1.9914398193359375, -1.888910174369812, -1.786380648612976, -1.6838510036468506, -1.5813214778900146, -1.4787919521331787, -1.3762624263763428, -1.2737329006195068, -1.171203374862671, -1.0686737298965454, -0.9661442041397095, -0.8636146187782288, -0.761085033416748, -0.6585555076599121, -0.5560259222984314, -0.4534963369369507, -0.35096675157546997, -0.24843719601631165, -0.14590764045715332, -0.04337805509567261, 0.059151530265808105, 0.16168105602264404, 0.26421064138412476, 0.36674046516418457, 0.4692700505256653, 0.571799635887146, 0.6743291616439819, 0.7768587470054626, 0.8793883323669434, 0.9819178581237793, 1.0844473838806152, 1.1869770288467407, 1.2895065546035767, 1.3920361995697021, 1.494565725326538, 1.597095251083374, 1.6996248960494995, 1.8021544218063354, 1.904684066772461, 2.007213592529297, 2.109743118286133, 2.2122726440429688, 2.3148021697998047, 2.4173319339752197, 2.5198614597320557, 2.6223909854888916, 2.7249205112457275, 2.8274502754211426, 2.9299798011779785, 3.0325093269348145, 3.1350388526916504, 3.2375686168670654, 3.3400981426239014, 3.4426276683807373, 3.5451571941375732, 3.647686719894409]}, "gradients/decoder.model.decoder.layers.6.encoder_attn.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 4.0, 1.0, 0.0, 2.0, 2.0, 5.0, 5.0, 11.0, 14.0, 15.0, 21.0, 31.0, 41.0, 78.0, 106.0, 140.0, 203.0, 319.0, 469.0, 720.0, 1267.0, 2079.0, 3513.0, 6143.0, 11999.0, 24165.0, 55152.0, 148756.0, 417087.0, 231591.0, 77988.0, 32414.0, 15391.0, 7811.0, 4261.0, 2514.0, 1525.0, 943.0, 571.0, 402.0, 264.0, 155.0, 115.0, 69.0, 61.0, 42.0, 32.0, 27.0, 14.0, 10.0, 8.0, 8.0, 1.0, 3.0, 1.0, 3.0, 1.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.36083984375, -0.3494529724121094, -0.33806610107421875, -0.3266792297363281, -0.3152923583984375, -0.3039054870605469, -0.29251861572265625, -0.2811317443847656, -0.269744873046875, -0.2583580017089844, -0.24697113037109375, -0.23558425903320312, -0.2241973876953125, -0.21281051635742188, -0.20142364501953125, -0.19003677368164062, -0.17864990234375, -0.16726303100585938, -0.15587615966796875, -0.14448928833007812, -0.1331024169921875, -0.12171554565429688, -0.11032867431640625, -0.09894180297851562, -0.087554931640625, -0.07616806030273438, -0.06478118896484375, -0.053394317626953125, -0.0420074462890625, -0.030620574951171875, -0.01923370361328125, -0.007846832275390625, 0.0035400390625, 0.014926910400390625, 0.02631378173828125, 0.037700653076171875, 0.0490875244140625, 0.060474395751953125, 0.07186126708984375, 0.08324813842773438, 0.094635009765625, 0.10602188110351562, 0.11740875244140625, 0.12879562377929688, 0.1401824951171875, 0.15156936645507812, 0.16295623779296875, 0.17434310913085938, 0.18572998046875, 0.19711685180664062, 0.20850372314453125, 0.21989059448242188, 0.2312774658203125, 0.24266433715820312, 0.25405120849609375, 0.2654380798339844, 0.276824951171875, 0.2882118225097656, 0.29959869384765625, 0.3109855651855469, 0.3223724365234375, 0.3337593078613281, 0.34514617919921875, 0.3565330505371094, 0.367919921875]}, "gradients/decoder.model.decoder.layers.6.encoder_attn.out_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 2.0, 1.0, 2.0, 2.0, 8.0, 1.0, 4.0, 4.0, 11.0, 10.0, 17.0, 19.0, 26.0, 27.0, 25.0, 33.0, 51.0, 48.0, 41.0, 37.0, 60.0, 52.0, 57.0, 63.0, 54.0, 56.0, 51.0, 48.0, 32.0, 36.0, 35.0, 26.0, 21.0, 15.0, 8.0, 9.0, 4.0, 1.0, 4.0, 2.0, 6.0, 1.0, 2.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-2.353515625, -2.28021240234375, -2.2069091796875, -2.13360595703125, -2.060302734375, -1.98699951171875, -1.9136962890625, -1.84039306640625, -1.76708984375, -1.69378662109375, -1.6204833984375, -1.54718017578125, -1.473876953125, -1.40057373046875, -1.3272705078125, -1.25396728515625, -1.1806640625, -1.10736083984375, -1.0340576171875, -0.96075439453125, -0.887451171875, -0.81414794921875, -0.7408447265625, -0.66754150390625, -0.59423828125, -0.52093505859375, -0.4476318359375, -0.37432861328125, -0.301025390625, -0.22772216796875, -0.1544189453125, -0.08111572265625, -0.0078125, 0.06549072265625, 0.1387939453125, 0.21209716796875, 0.285400390625, 0.35870361328125, 0.4320068359375, 0.50531005859375, 0.57861328125, 0.65191650390625, 0.7252197265625, 0.79852294921875, 0.871826171875, 0.94512939453125, 1.0184326171875, 1.09173583984375, 1.1650390625, 1.23834228515625, 1.3116455078125, 1.38494873046875, 1.458251953125, 1.53155517578125, 1.6048583984375, 1.67816162109375, 1.75146484375, 1.82476806640625, 1.8980712890625, 1.97137451171875, 2.044677734375, 2.11798095703125, 2.1912841796875, 2.26458740234375, 2.337890625]}, "gradients/decoder.model.decoder.layers.6.encoder_attn.v_proj.weight": {"_type": "histogram", "values": [2.0, 2.0, 1.0, 3.0, 7.0, 8.0, 12.0, 19.0, 15.0, 39.0, 36.0, 67.0, 72.0, 119.0, 202.0, 301.0, 486.0, 745.0, 1205.0, 1848.0, 3113.0, 4994.0, 8485.0, 14834.0, 27035.0, 51995.0, 107687.0, 240231.0, 296926.0, 141787.0, 66726.0, 34147.0, 18599.0, 10586.0, 6416.0, 3790.0, 2281.0, 1360.0, 842.0, 575.0, 336.0, 211.0, 151.0, 96.0, 48.0, 44.0, 31.0, 16.0, 18.0, 9.0, 5.0, 3.0, 1.0, 0.0, 1.0, 0.0, 0.0, 3.0, 1.0, 0.0, 1.0, 2.0, 0.0, 1.0], "bins": [-0.1900634765625, -0.1833171844482422, -0.17657089233398438, -0.16982460021972656, -0.16307830810546875, -0.15633201599121094, -0.14958572387695312, -0.1428394317626953, -0.1360931396484375, -0.1293468475341797, -0.12260055541992188, -0.11585426330566406, -0.10910797119140625, -0.10236167907714844, -0.09561538696289062, -0.08886909484863281, -0.082122802734375, -0.07537651062011719, -0.06863021850585938, -0.06188392639160156, -0.05513763427734375, -0.04839134216308594, -0.041645050048828125, -0.03489875793457031, -0.0281524658203125, -0.021406173706054688, -0.014659881591796875, -0.007913589477539062, -0.00116729736328125, 0.0055789947509765625, 0.012325286865234375, 0.019071578979492188, 0.02581787109375, 0.03256416320800781, 0.039310455322265625, 0.04605674743652344, 0.05280303955078125, 0.05954933166503906, 0.06629562377929688, 0.07304191589355469, 0.0797882080078125, 0.08653450012207031, 0.09328079223632812, 0.10002708435058594, 0.10677337646484375, 0.11351966857910156, 0.12026596069335938, 0.1270122528076172, 0.133758544921875, 0.1405048370361328, 0.14725112915039062, 0.15399742126464844, 0.16074371337890625, 0.16749000549316406, 0.17423629760742188, 0.1809825897216797, 0.1877288818359375, 0.1944751739501953, 0.20122146606445312, 0.20796775817871094, 0.21471405029296875, 0.22146034240722656, 0.22820663452148438, 0.2349529266357422, 0.24169921875]}, "gradients/decoder.model.decoder.layers.6.encoder_attn.v_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 2.0, 5.0, 0.0, 5.0, 3.0, 12.0, 7.0, 9.0, 11.0, 8.0, 11.0, 16.0, 15.0, 19.0, 28.0, 29.0, 34.0, 29.0, 36.0, 33.0, 29.0, 29.0, 39.0, 33.0, 45.0, 51.0, 35.0, 56.0, 44.0, 37.0, 42.0, 35.0, 46.0, 32.0, 21.0, 19.0, 23.0, 15.0, 13.0, 12.0, 13.0, 13.0, 5.0, 1.0, 5.0, 2.0, 4.0, 2.0, 0.0, 3.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-3.5625, -3.4384765625, -3.314453125, -3.1904296875, -3.06640625, -2.9423828125, -2.818359375, -2.6943359375, -2.5703125, -2.4462890625, -2.322265625, -2.1982421875, -2.07421875, -1.9501953125, -1.826171875, -1.7021484375, -1.578125, -1.4541015625, -1.330078125, -1.2060546875, -1.08203125, -0.9580078125, -0.833984375, -0.7099609375, -0.5859375, -0.4619140625, -0.337890625, -0.2138671875, -0.08984375, 0.0341796875, 0.158203125, 0.2822265625, 0.40625, 0.5302734375, 0.654296875, 0.7783203125, 0.90234375, 1.0263671875, 1.150390625, 1.2744140625, 1.3984375, 1.5224609375, 1.646484375, 1.7705078125, 1.89453125, 2.0185546875, 2.142578125, 2.2666015625, 2.390625, 2.5146484375, 2.638671875, 2.7626953125, 2.88671875, 3.0107421875, 3.134765625, 3.2587890625, 3.3828125, 3.5068359375, 3.630859375, 3.7548828125, 3.87890625, 4.0029296875, 4.126953125, 4.2509765625, 4.375]}, "gradients/decoder.model.decoder.layers.6.encoder_attn.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 2.0, 0.0, 0.0, 3.0, 1.0, 2.0, 2.0, 4.0, 5.0, 6.0, 8.0, 11.0, 18.0, 29.0, 38.0, 65.0, 111.0, 172.0, 261.0, 487.0, 910.0, 1851.0, 4970.0, 19945.0, 278311.0, 700155.0, 30931.0, 6095.0, 2105.0, 890.0, 462.0, 247.0, 158.0, 100.0, 73.0, 43.0, 30.0, 19.0, 12.0, 6.0, 6.0, 5.0, 6.0, 5.0, 2.0, 2.0, 1.0, 1.0, 0.0, 3.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.110107421875, -0.10667991638183594, -0.10325241088867188, -0.09982490539550781, -0.09639739990234375, -0.09296989440917969, -0.08954238891601562, -0.08611488342285156, -0.0826873779296875, -0.07925987243652344, -0.07583236694335938, -0.07240486145019531, -0.06897735595703125, -0.06554985046386719, -0.062122344970703125, -0.05869483947753906, -0.055267333984375, -0.05183982849121094, -0.048412322998046875, -0.04498481750488281, -0.04155731201171875, -0.03812980651855469, -0.034702301025390625, -0.03127479553222656, -0.0278472900390625, -0.024419784545898438, -0.020992279052734375, -0.017564773559570312, -0.01413726806640625, -0.010709762573242188, -0.007282257080078125, -0.0038547515869140625, -0.00042724609375, 0.0030002593994140625, 0.006427764892578125, 0.009855270385742188, 0.01328277587890625, 0.016710281372070312, 0.020137786865234375, 0.023565292358398438, 0.0269927978515625, 0.030420303344726562, 0.033847808837890625, 0.03727531433105469, 0.04070281982421875, 0.04413032531738281, 0.047557830810546875, 0.05098533630371094, 0.054412841796875, 0.05784034729003906, 0.061267852783203125, 0.06469535827636719, 0.06812286376953125, 0.07155036926269531, 0.07497787475585938, 0.07840538024902344, 0.0818328857421875, 0.08526039123535156, 0.08868789672851562, 0.09211540222167969, 0.09554290771484375, 0.09897041320800781, 0.10239791870117188, 0.10582542419433594, 0.1092529296875]}, "gradients/decoder.model.decoder.layers.6.encoder_attn.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 2.0, 2.0, 5.0, 1.0, 11.0, 11.0, 15.0, 36.0, 43.0, 105.0, 104.0, 179.0, 161.0, 134.0, 70.0, 50.0, 28.0, 22.0, 16.0, 6.0, 4.0, 3.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-7.253885269165039e-05, -7.012765854597092e-05, -6.771646440029144e-05, -6.530527025461197e-05, -6.28940761089325e-05, -6.048288196325302e-05, -5.807168781757355e-05, -5.5660493671894073e-05, -5.32492995262146e-05, -5.0838105380535126e-05, -4.842691123485565e-05, -4.601571708917618e-05, -4.3604522943496704e-05, -4.119332879781723e-05, -3.8782134652137756e-05, -3.637094050645828e-05, -3.395974636077881e-05, -3.1548552215099335e-05, -2.913735806941986e-05, -2.6726163923740387e-05, -2.4314969778060913e-05, -2.190377563238144e-05, -1.9492581486701965e-05, -1.708138734102249e-05, -1.4670193195343018e-05, -1.2258999049663544e-05, -9.84780490398407e-06, -7.436610758304596e-06, -5.025416612625122e-06, -2.614222466945648e-06, -2.0302832126617432e-07, 2.2081658244132996e-06, 4.6193599700927734e-06, 7.030554115772247e-06, 9.441748261451721e-06, 1.1852942407131195e-05, 1.4264136552810669e-05, 1.6675330698490143e-05, 1.9086524844169617e-05, 2.149771898984909e-05, 2.3908913135528564e-05, 2.632010728120804e-05, 2.8731301426887512e-05, 3.1142495572566986e-05, 3.355368971824646e-05, 3.5964883863925934e-05, 3.837607800960541e-05, 4.078727215528488e-05, 4.3198466300964355e-05, 4.560966044664383e-05, 4.80208545923233e-05, 5.043204873800278e-05, 5.284324288368225e-05, 5.5254437029361725e-05, 5.76656311750412e-05, 6.007682532072067e-05, 6.248801946640015e-05, 6.489921361207962e-05, 6.73104077577591e-05, 6.972160190343857e-05, 7.213279604911804e-05, 7.454399019479752e-05, 7.695518434047699e-05, 7.936637848615646e-05, 8.177757263183594e-05]}, "gradients/decoder.model.decoder.layers.6.encoder_attn.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 4.0, 0.0, 0.0, 2.0, 1.0, 0.0, 1.0, 2.0, 3.0, 7.0, 2.0, 5.0, 4.0, 12.0, 28.0, 49.0, 82.0, 207.0, 496.0, 1303.0, 4371.0, 19893.0, 353567.0, 636513.0, 24717.0, 4941.0, 1483.0, 491.0, 196.0, 83.0, 46.0, 17.0, 11.0, 6.0, 9.0, 5.0, 4.0, 2.0, 3.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.1502685546875, -0.1457233428955078, -0.14117813110351562, -0.13663291931152344, -0.13208770751953125, -0.12754249572753906, -0.12299728393554688, -0.11845207214355469, -0.1139068603515625, -0.10936164855957031, -0.10481643676757812, -0.10027122497558594, -0.09572601318359375, -0.09118080139160156, -0.08663558959960938, -0.08209037780761719, -0.077545166015625, -0.07299995422363281, -0.06845474243164062, -0.06390953063964844, -0.05936431884765625, -0.05481910705566406, -0.050273895263671875, -0.04572868347167969, -0.0411834716796875, -0.03663825988769531, -0.032093048095703125, -0.027547836303710938, -0.02300262451171875, -0.018457412719726562, -0.013912200927734375, -0.009366989135742188, -0.00482177734375, -0.0002765655517578125, 0.004268646240234375, 0.008813858032226562, 0.01335906982421875, 0.017904281616210938, 0.022449493408203125, 0.026994705200195312, 0.0315399169921875, 0.03608512878417969, 0.040630340576171875, 0.04517555236816406, 0.04972076416015625, 0.05426597595214844, 0.058811187744140625, 0.06335639953613281, 0.067901611328125, 0.07244682312011719, 0.07699203491210938, 0.08153724670410156, 0.08608245849609375, 0.09062767028808594, 0.09517288208007812, 0.09971809387207031, 0.1042633056640625, 0.10880851745605469, 0.11335372924804688, 0.11789894104003906, 0.12244415283203125, 0.12698936462402344, 0.13153457641601562, 0.1360797882080078, 0.140625]}, "gradients/decoder.model.decoder.layers.6.encoder_attn.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 2.0, 1.0, 1.0, 0.0, 2.0, 1.0, 2.0, 2.0, 10.0, 2.0, 4.0, 6.0, 8.0, 9.0, 9.0, 11.0, 13.0, 26.0, 28.0, 59.0, 95.0, 220.0, 234.0, 100.0, 45.0, 30.0, 20.0, 18.0, 9.0, 10.0, 5.0, 6.0, 7.0, 4.0, 3.0, 4.0, 1.0, 2.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.02569580078125, -0.02491927146911621, -0.024142742156982422, -0.023366212844848633, -0.022589683532714844, -0.021813154220581055, -0.021036624908447266, -0.020260095596313477, -0.019483566284179688, -0.0187070369720459, -0.01793050765991211, -0.01715397834777832, -0.01637744903564453, -0.015600919723510742, -0.014824390411376953, -0.014047861099243164, -0.013271331787109375, -0.012494802474975586, -0.011718273162841797, -0.010941743850708008, -0.010165214538574219, -0.00938868522644043, -0.00861215591430664, -0.007835626602172852, -0.0070590972900390625, -0.0062825679779052734, -0.005506038665771484, -0.004729509353637695, -0.003952980041503906, -0.003176450729370117, -0.002399921417236328, -0.001623392105102539, -0.00084686279296875, -7.033348083496094e-05, 0.0007061958312988281, 0.0014827251434326172, 0.0022592544555664062, 0.0030357837677001953, 0.0038123130798339844, 0.0045888423919677734, 0.0053653717041015625, 0.0061419010162353516, 0.006918430328369141, 0.00769495964050293, 0.008471488952636719, 0.009248018264770508, 0.010024547576904297, 0.010801076889038086, 0.011577606201171875, 0.012354135513305664, 0.013130664825439453, 0.013907194137573242, 0.014683723449707031, 0.01546025276184082, 0.01623678207397461, 0.0170133113861084, 0.017789840698242188, 0.018566370010375977, 0.019342899322509766, 0.020119428634643555, 0.020895957946777344, 0.021672487258911133, 0.022449016571044922, 0.02322554588317871, 0.0240020751953125]}, "gradients/decoder.model.decoder.layers.6.self_attn_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 2.0, 6.0, 13.0, 19.0, 44.0, 73.0, 126.0, 189.0, 209.0, 164.0, 83.0, 37.0, 25.0, 9.0, 7.0, 5.0, 1.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0], "bins": [-8.971868515014648, -8.785367965698242, -8.59886646270752, -8.412365913391113, -8.22586441040039, -8.039363861083984, -7.852862358093262, -7.666361331939697, -7.479860305786133, -7.293359279632568, -7.106858253479004, -6.9203572273254395, -6.733856201171875, -6.5473551750183105, -6.360854148864746, -6.174353122711182, -5.987852096557617, -5.801351070404053, -5.614850044250488, -5.428349018096924, -5.241847991943359, -5.055346965789795, -4.8688459396362305, -4.682344913482666, -4.49584436416626, -4.309343338012695, -4.122842311859131, -3.9363412857055664, -3.749840259552002, -3.5633392333984375, -3.376838207244873, -3.1903371810913086, -3.0038363933563232, -2.817335367202759, -2.6308343410491943, -2.44433331489563, -2.2578322887420654, -2.071331262588501, -1.884830355644226, -1.6983293294906616, -1.5118283033370972, -1.3253272771835327, -1.1388262510299683, -0.9523252844810486, -0.7658242583274841, -0.5793232321739197, -0.392822265625, -0.20632123947143555, -0.019820213317871094, 0.16668079793453217, 0.3531818091869354, 0.5396828055381775, 0.7261838316917419, 0.9126848578453064, 1.099185824394226, 1.2856868505477905, 1.472187876701355, 1.6586889028549194, 1.8451899290084839, 2.031690835952759, 2.2181918621063232, 2.4046928882598877, 2.591193914413452, 2.7776949405670166, 2.964195966720581]}, "gradients/decoder.model.decoder.layers.6.self_attn_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 4.0, 5.0, 4.0, 2.0, 4.0, 6.0, 6.0, 8.0, 15.0, 25.0, 14.0, 30.0, 30.0, 30.0, 48.0, 48.0, 43.0, 54.0, 58.0, 53.0, 70.0, 64.0, 59.0, 65.0, 57.0, 39.0, 33.0, 25.0, 26.0, 30.0, 14.0, 12.0, 10.0, 3.0, 7.0, 3.0, 7.0, 1.0, 2.0, 2.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.5932490825653076, -2.5170950889587402, -2.440941095352173, -2.3647871017456055, -2.288633108139038, -2.2124791145324707, -2.1363251209259033, -2.060171127319336, -1.9840171337127686, -1.9078631401062012, -1.8317091464996338, -1.7555551528930664, -1.679401159286499, -1.6032471656799316, -1.5270931720733643, -1.4509391784667969, -1.374785304069519, -1.2986313104629517, -1.2224773168563843, -1.146323323249817, -1.0701693296432495, -0.9940153360366821, -0.9178614020347595, -0.8417074084281921, -0.7655534148216248, -0.6893994212150574, -0.61324542760849, -0.5370914936065674, -0.4609374701976776, -0.38478347659111023, -0.30862951278686523, -0.23247551918029785, -0.15632152557373047, -0.08016753941774368, -0.004013553261756897, 0.07214042544364929, 0.14829441905021667, 0.22444841265678406, 0.30060237646102905, 0.37675637006759644, 0.4529103636741638, 0.5290643572807312, 0.6052183508872986, 0.6813722848892212, 0.7575262784957886, 0.833680272102356, 0.9098342657089233, 0.9859882593154907, 1.062142252922058, 1.1382962465286255, 1.2144502401351929, 1.2906042337417603, 1.3667582273483276, 1.442912220954895, 1.5190660953521729, 1.5952200889587402, 1.6713740825653076, 1.747528076171875, 1.8236820697784424, 1.8998360633850098, 1.9759900569915771, 2.0521440505981445, 2.128298044204712, 2.2044520378112793, 2.2806060314178467]}, "gradients/decoder.model.decoder.layers.6.self_attn.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 1.0, 2.0, 0.0, 2.0, 4.0, 2.0, 4.0, 6.0, 12.0, 9.0, 15.0, 25.0, 27.0, 67.0, 245.0, 886.0, 4543.0, 41208.0, 823710.0, 164360.0, 10968.0, 1777.0, 416.0, 150.0, 42.0, 28.0, 16.0, 7.0, 6.0, 8.0, 3.0, 5.0, 4.0, 0.0, 2.0, 3.0, 0.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-7.203125, -7.01708984375, -6.8310546875, -6.64501953125, -6.458984375, -6.27294921875, -6.0869140625, -5.90087890625, -5.71484375, -5.52880859375, -5.3427734375, -5.15673828125, -4.970703125, -4.78466796875, -4.5986328125, -4.41259765625, -4.2265625, -4.04052734375, -3.8544921875, -3.66845703125, -3.482421875, -3.29638671875, -3.1103515625, -2.92431640625, -2.73828125, -2.55224609375, -2.3662109375, -2.18017578125, -1.994140625, -1.80810546875, -1.6220703125, -1.43603515625, -1.25, -1.06396484375, -0.8779296875, -0.69189453125, -0.505859375, -0.31982421875, -0.1337890625, 0.05224609375, 0.23828125, 0.42431640625, 0.6103515625, 0.79638671875, 0.982421875, 1.16845703125, 1.3544921875, 1.54052734375, 1.7265625, 1.91259765625, 2.0986328125, 2.28466796875, 2.470703125, 2.65673828125, 2.8427734375, 3.02880859375, 3.21484375, 3.40087890625, 3.5869140625, 3.77294921875, 3.958984375, 4.14501953125, 4.3310546875, 4.51708984375, 4.703125]}, "gradients/decoder.model.decoder.layers.6.self_attn.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 4.0, 1.0, 1.0, 2.0, 4.0, 4.0, 5.0, 9.0, 9.0, 8.0, 13.0, 12.0, 14.0, 29.0, 21.0, 42.0, 27.0, 56.0, 31.0, 53.0, 62.0, 52.0, 76.0, 59.0, 57.0, 62.0, 64.0, 43.0, 37.0, 36.0, 29.0, 19.0, 20.0, 13.0, 12.0, 6.0, 5.0, 5.0, 6.0, 6.0, 2.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0], "bins": [-5.1171875, -4.97259521484375, -4.8280029296875, -4.68341064453125, -4.538818359375, -4.39422607421875, -4.2496337890625, -4.10504150390625, -3.96044921875, -3.81585693359375, -3.6712646484375, -3.52667236328125, -3.382080078125, -3.23748779296875, -3.0928955078125, -2.94830322265625, -2.8037109375, -2.65911865234375, -2.5145263671875, -2.36993408203125, -2.225341796875, -2.08074951171875, -1.9361572265625, -1.79156494140625, -1.64697265625, -1.50238037109375, -1.3577880859375, -1.21319580078125, -1.068603515625, -0.92401123046875, -0.7794189453125, -0.63482666015625, -0.490234375, -0.34564208984375, -0.2010498046875, -0.05645751953125, 0.088134765625, 0.23272705078125, 0.3773193359375, 0.52191162109375, 0.66650390625, 0.81109619140625, 0.9556884765625, 1.10028076171875, 1.244873046875, 1.38946533203125, 1.5340576171875, 1.67864990234375, 1.8232421875, 1.96783447265625, 2.1124267578125, 2.25701904296875, 2.401611328125, 2.54620361328125, 2.6907958984375, 2.83538818359375, 2.97998046875, 3.12457275390625, 3.2691650390625, 3.41375732421875, 3.558349609375, 3.70294189453125, 3.8475341796875, 3.99212646484375, 4.13671875]}, "gradients/decoder.model.decoder.layers.6.self_attn.v_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 1.0, 3.0, 0.0, 1.0, 4.0, 4.0, 4.0, 2.0, 5.0, 13.0, 8.0, 9.0, 19.0, 13.0, 11.0, 20.0, 18.0, 29.0, 34.0, 40.0, 38.0, 74.0, 96.0, 149.0, 253.0, 591.0, 1599.0, 5227.0, 29673.0, 513612.0, 461795.0, 27605.0, 4931.0, 1359.0, 508.0, 241.0, 156.0, 111.0, 59.0, 40.0, 33.0, 29.0, 27.0, 22.0, 13.0, 12.0, 13.0, 10.0, 14.0, 9.0, 6.0, 3.0, 2.0, 5.0, 5.0, 3.0, 5.0, 4.0, 1.0, 2.0], "bins": [-4.484375, -4.3524169921875, -4.220458984375, -4.0885009765625, -3.95654296875, -3.8245849609375, -3.692626953125, -3.5606689453125, -3.4287109375, -3.2967529296875, -3.164794921875, -3.0328369140625, -2.90087890625, -2.7689208984375, -2.636962890625, -2.5050048828125, -2.373046875, -2.2410888671875, -2.109130859375, -1.9771728515625, -1.84521484375, -1.7132568359375, -1.581298828125, -1.4493408203125, -1.3173828125, -1.1854248046875, -1.053466796875, -0.9215087890625, -0.78955078125, -0.6575927734375, -0.525634765625, -0.3936767578125, -0.26171875, -0.1297607421875, 0.002197265625, 0.1341552734375, 0.26611328125, 0.3980712890625, 0.530029296875, 0.6619873046875, 0.7939453125, 0.9259033203125, 1.057861328125, 1.1898193359375, 1.32177734375, 1.4537353515625, 1.585693359375, 1.7176513671875, 1.849609375, 1.9815673828125, 2.113525390625, 2.2454833984375, 2.37744140625, 2.5093994140625, 2.641357421875, 2.7733154296875, 2.9052734375, 3.0372314453125, 3.169189453125, 3.3011474609375, 3.43310546875, 3.5650634765625, 3.697021484375, 3.8289794921875, 3.9609375]}, "gradients/decoder.model.decoder.layers.6.self_attn.v_proj.bias": {"_type": "histogram", "values": [1.0, 2.0, 0.0, 1.0, 2.0, 0.0, 1.0, 1.0, 4.0, 1.0, 2.0, 5.0, 3.0, 7.0, 10.0, 15.0, 20.0, 12.0, 14.0, 13.0, 10.0, 23.0, 26.0, 25.0, 34.0, 32.0, 26.0, 54.0, 32.0, 36.0, 39.0, 47.0, 41.0, 42.0, 38.0, 45.0, 36.0, 31.0, 28.0, 39.0, 31.0, 18.0, 25.0, 18.0, 15.0, 16.0, 17.0, 12.0, 14.0, 12.0, 11.0, 9.0, 7.0, 2.0, 3.0, 2.0, 2.0, 3.0, 0.0, 6.0, 0.0, 1.0, 0.0, 2.0], "bins": [-4.00390625, -3.88287353515625, -3.7618408203125, -3.64080810546875, -3.519775390625, -3.39874267578125, -3.2777099609375, -3.15667724609375, -3.03564453125, -2.91461181640625, -2.7935791015625, -2.67254638671875, -2.551513671875, -2.43048095703125, -2.3094482421875, -2.18841552734375, -2.0673828125, -1.94635009765625, -1.8253173828125, -1.70428466796875, -1.583251953125, -1.46221923828125, -1.3411865234375, -1.22015380859375, -1.09912109375, -0.97808837890625, -0.8570556640625, -0.73602294921875, -0.614990234375, -0.49395751953125, -0.3729248046875, -0.25189208984375, -0.130859375, -0.00982666015625, 0.1112060546875, 0.23223876953125, 0.353271484375, 0.47430419921875, 0.5953369140625, 0.71636962890625, 0.83740234375, 0.95843505859375, 1.0794677734375, 1.20050048828125, 1.321533203125, 1.44256591796875, 1.5635986328125, 1.68463134765625, 1.8056640625, 1.92669677734375, 2.0477294921875, 2.16876220703125, 2.289794921875, 2.41082763671875, 2.5318603515625, 2.65289306640625, 2.77392578125, 2.89495849609375, 3.0159912109375, 3.13702392578125, 3.258056640625, 3.37908935546875, 3.5001220703125, 3.62115478515625, 3.7421875]}, "gradients/decoder.model.decoder.layers.6.self_attn.k_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 5.0, 4.0, 7.0, 5.0, 7.0, 7.0, 18.0, 17.0, 19.0, 35.0, 73.0, 101.0, 116.0, 212.0, 413.0, 705.0, 1459.0, 3389.0, 9018.0, 30114.0, 134329.0, 589162.0, 214584.0, 44496.0, 12171.0, 4257.0, 1801.0, 901.0, 442.0, 252.0, 145.0, 111.0, 61.0, 39.0, 28.0, 18.0, 13.0, 7.0, 5.0, 2.0, 6.0, 3.0, 5.0, 1.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.861328125, -0.8349380493164062, -0.8085479736328125, -0.7821578979492188, -0.755767822265625, -0.7293777465820312, -0.7029876708984375, -0.6765975952148438, -0.65020751953125, -0.6238174438476562, -0.5974273681640625, -0.5710372924804688, -0.544647216796875, -0.5182571411132812, -0.4918670654296875, -0.46547698974609375, -0.4390869140625, -0.41269683837890625, -0.3863067626953125, -0.35991668701171875, -0.333526611328125, -0.30713653564453125, -0.2807464599609375, -0.25435638427734375, -0.22796630859375, -0.20157623291015625, -0.1751861572265625, -0.14879608154296875, -0.122406005859375, -0.09601593017578125, -0.0696258544921875, -0.04323577880859375, -0.016845703125, 0.00954437255859375, 0.0359344482421875, 0.06232452392578125, 0.088714599609375, 0.11510467529296875, 0.1414947509765625, 0.16788482666015625, 0.19427490234375, 0.22066497802734375, 0.2470550537109375, 0.27344512939453125, 0.299835205078125, 0.32622528076171875, 0.3526153564453125, 0.37900543212890625, 0.4053955078125, 0.43178558349609375, 0.4581756591796875, 0.48456573486328125, 0.510955810546875, 0.5373458862304688, 0.5637359619140625, 0.5901260375976562, 0.61651611328125, 0.6429061889648438, 0.6692962646484375, 0.6956863403320312, 0.722076416015625, 0.7484664916992188, 0.7748565673828125, 0.8012466430664062, 0.82763671875]}, "gradients/decoder.model.decoder.layers.6.self_attn.k_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 2.0, 3.0, 0.0, 3.0, 5.0, 10.0, 8.0, 9.0, 8.0, 12.0, 23.0, 32.0, 25.0, 45.0, 51.0, 76.0, 84.0, 123.0, 109.0, 96.0, 65.0, 42.0, 45.0, 32.0, 20.0, 17.0, 11.0, 14.0, 9.0, 5.0, 3.0, 8.0, 1.0, 3.0, 4.0, 3.0, 3.0, 0.0, 2.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.00011426210403442383, -0.00011021923273801804, -0.00010617636144161224, -0.00010213349014520645, -9.809061884880066e-05, -9.404774755239487e-05, -9.000487625598907e-05, -8.596200495958328e-05, -8.191913366317749e-05, -7.78762623667717e-05, -7.38333910703659e-05, -6.979051977396011e-05, -6.574764847755432e-05, -6.170477718114853e-05, -5.766190588474274e-05, -5.3619034588336945e-05, -4.957616329193115e-05, -4.553329199552536e-05, -4.149042069911957e-05, -3.7447549402713776e-05, -3.3404678106307983e-05, -2.936180680990219e-05, -2.53189355134964e-05, -2.1276064217090607e-05, -1.7233192920684814e-05, -1.3190321624279022e-05, -9.14745032787323e-06, -5.104579031467438e-06, -1.0617077350616455e-06, 2.9811635613441467e-06, 7.024034857749939e-06, 1.1066906154155731e-05, 1.5109777450561523e-05, 1.9152648746967316e-05, 2.3195520043373108e-05, 2.72383913397789e-05, 3.128126263618469e-05, 3.5324133932590485e-05, 3.936700522899628e-05, 4.340987652540207e-05, 4.745274782180786e-05, 5.1495619118213654e-05, 5.5538490414619446e-05, 5.958136171102524e-05, 6.362423300743103e-05, 6.766710430383682e-05, 7.170997560024261e-05, 7.575284689664841e-05, 7.97957181930542e-05, 8.383858948945999e-05, 8.788146078586578e-05, 9.192433208227158e-05, 9.596720337867737e-05, 0.00010001007467508316, 0.00010405294597148895, 0.00010809581726789474, 0.00011213868856430054, 0.00011618155986070633, 0.00012022443115711212, 0.00012426730245351791, 0.0001283101737499237, 0.0001323530450463295, 0.0001363959163427353, 0.00014043878763914108, 0.00014448165893554688]}, "gradients/decoder.model.decoder.layers.6.self_attn.q_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 4.0, 2.0, 1.0, 2.0, 1.0, 5.0, 1.0, 4.0, 3.0, 3.0, 3.0, 7.0, 9.0, 17.0, 23.0, 36.0, 41.0, 67.0, 100.0, 165.0, 266.0, 460.0, 865.0, 1846.0, 4270.0, 11876.0, 46039.0, 269260.0, 584640.0, 95962.0, 20778.0, 6414.0, 2636.0, 1216.0, 558.0, 351.0, 229.0, 134.0, 89.0, 46.0, 45.0, 28.0, 14.0, 9.0, 10.0, 6.0, 3.0, 8.0, 5.0, 2.0, 2.0, 2.0, 4.0, 2.0, 0.0, 1.0, 3.0, 0.0, 1.0], "bins": [-1.0546875, -1.0229415893554688, -0.9911956787109375, -0.9594497680664062, -0.927703857421875, -0.8959579467773438, -0.8642120361328125, -0.8324661254882812, -0.80072021484375, -0.7689743041992188, -0.7372283935546875, -0.7054824829101562, -0.673736572265625, -0.6419906616210938, -0.6102447509765625, -0.5784988403320312, -0.5467529296875, -0.5150070190429688, -0.4832611083984375, -0.45151519775390625, -0.419769287109375, -0.38802337646484375, -0.3562774658203125, -0.32453155517578125, -0.29278564453125, -0.26103973388671875, -0.2292938232421875, -0.19754791259765625, -0.165802001953125, -0.13405609130859375, -0.1023101806640625, -0.07056427001953125, -0.038818359375, -0.00707244873046875, 0.0246734619140625, 0.05641937255859375, 0.088165283203125, 0.11991119384765625, 0.1516571044921875, 0.18340301513671875, 0.21514892578125, 0.24689483642578125, 0.2786407470703125, 0.31038665771484375, 0.342132568359375, 0.37387847900390625, 0.4056243896484375, 0.43737030029296875, 0.4691162109375, 0.5008621215820312, 0.5326080322265625, 0.5643539428710938, 0.596099853515625, 0.6278457641601562, 0.6595916748046875, 0.6913375854492188, 0.72308349609375, 0.7548294067382812, 0.7865753173828125, 0.8183212280273438, 0.850067138671875, 0.8818130493164062, 0.9135589599609375, 0.9453048706054688, 0.97705078125]}, "gradients/decoder.model.decoder.layers.6.self_attn.q_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 0.0, 6.0, 4.0, 1.0, 2.0, 3.0, 7.0, 9.0, 2.0, 16.0, 13.0, 17.0, 23.0, 21.0, 27.0, 43.0, 38.0, 65.0, 81.0, 101.0, 97.0, 91.0, 61.0, 43.0, 51.0, 32.0, 28.0, 13.0, 24.0, 31.0, 13.0, 8.0, 7.0, 7.0, 4.0, 3.0, 3.0, 4.0, 2.0, 3.0, 4.0, 2.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.386474609375, -0.37456512451171875, -0.3626556396484375, -0.35074615478515625, -0.338836669921875, -0.32692718505859375, -0.3150177001953125, -0.30310821533203125, -0.29119873046875, -0.27928924560546875, -0.2673797607421875, -0.25547027587890625, -0.243560791015625, -0.23165130615234375, -0.2197418212890625, -0.20783233642578125, -0.1959228515625, -0.18401336669921875, -0.1721038818359375, -0.16019439697265625, -0.148284912109375, -0.13637542724609375, -0.1244659423828125, -0.11255645751953125, -0.10064697265625, -0.08873748779296875, -0.0768280029296875, -0.06491851806640625, -0.053009033203125, -0.04109954833984375, -0.0291900634765625, -0.01728057861328125, -0.00537109375, 0.00653839111328125, 0.0184478759765625, 0.03035736083984375, 0.042266845703125, 0.05417633056640625, 0.0660858154296875, 0.07799530029296875, 0.08990478515625, 0.10181427001953125, 0.1137237548828125, 0.12563323974609375, 0.137542724609375, 0.14945220947265625, 0.1613616943359375, 0.17327117919921875, 0.1851806640625, 0.19709014892578125, 0.2089996337890625, 0.22090911865234375, 0.232818603515625, 0.24472808837890625, 0.2566375732421875, 0.26854705810546875, 0.28045654296875, 0.29236602783203125, 0.3042755126953125, 0.31618499755859375, 0.328094482421875, 0.34000396728515625, 0.3519134521484375, 0.36382293701171875, 0.375732421875]}, "gradients/decoder.model.decoder.layers.5.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 3.0, 2.0, 2.0, 11.0, 56.0, 179.0, 391.0, 271.0, 77.0, 18.0, 7.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0], "bins": [-38.14911651611328, -37.380836486816406, -36.6125602722168, -35.84428024291992, -35.07600402832031, -34.30772399902344, -33.53944396972656, -32.77116775512695, -32.00288772583008, -31.234609603881836, -30.466331481933594, -29.69805145263672, -28.929773330688477, -28.161495208740234, -27.39321517944336, -26.624937057495117, -25.856658935546875, -25.088380813598633, -24.32010269165039, -23.551822662353516, -22.783544540405273, -22.01526641845703, -21.246986389160156, -20.478708267211914, -19.710430145263672, -18.94215202331543, -18.173873901367188, -17.405593872070312, -16.63731575012207, -15.869037628173828, -15.10075855255127, -14.332479476928711, -13.564202308654785, -12.795923233032227, -12.027645111083984, -11.259366989135742, -10.491087913513184, -9.722808837890625, -8.954530715942383, -8.18625259399414, -7.417973518371582, -6.649694919586182, -5.881416320800781, -5.113137722015381, -4.3448591232299805, -3.57658052444458, -2.8083019256591797, -2.0400233268737793, -1.271744728088379, -0.5034661293029785, 0.2648124694824219, 1.0330910682678223, 1.8013696670532227, 2.569648265838623, 3.3379268646240234, 4.106205463409424, 4.874484062194824, 5.642762660980225, 6.411041259765625, 7.179319858551025, 7.947598457336426, 8.715877532958984, 9.484155654907227, 10.252433776855469, 11.020712852478027]}, "gradients/decoder.model.decoder.layers.5.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 3.0, 2.0, 6.0, 9.0, 3.0, 7.0, 7.0, 13.0, 12.0, 10.0, 10.0, 18.0, 21.0, 13.0, 22.0, 23.0, 32.0, 26.0, 19.0, 31.0, 43.0, 38.0, 47.0, 40.0, 37.0, 34.0, 36.0, 41.0, 32.0, 41.0, 37.0, 27.0, 32.0, 34.0, 29.0, 28.0, 16.0, 25.0, 20.0, 15.0, 12.0, 14.0, 6.0, 13.0, 7.0, 6.0, 5.0, 7.0, 0.0, 1.0, 5.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.737830638885498, -5.536037445068359, -5.334244251251221, -5.132451057434082, -4.930657863616943, -4.728864669799805, -4.527071475982666, -4.325278282165527, -4.123485088348389, -3.92169189453125, -3.7198987007141113, -3.5181055068969727, -3.316312313079834, -3.1145191192626953, -2.9127259254455566, -2.710932731628418, -2.5091395378112793, -2.3073463439941406, -2.105553150177002, -1.9037599563598633, -1.7019667625427246, -1.500173568725586, -1.2983803749084473, -1.0965871810913086, -0.8947939872741699, -0.6930007934570312, -0.4912075996398926, -0.2894144058227539, -0.08762121200561523, 0.11417198181152344, 0.3159651756286621, 0.5177583694458008, 0.7195510864257812, 0.9213442802429199, 1.1231374740600586, 1.3249306678771973, 1.526723861694336, 1.7285170555114746, 1.9303102493286133, 2.132103443145752, 2.3338966369628906, 2.5356898307800293, 2.737483024597168, 2.9392762184143066, 3.1410694122314453, 3.342862606048584, 3.5446557998657227, 3.7464489936828613, 3.9482421875, 4.150035381317139, 4.351828575134277, 4.553621768951416, 4.755414962768555, 4.957208156585693, 5.159001350402832, 5.360794544219971, 5.562587738037109, 5.764380931854248, 5.966174125671387, 6.167967319488525, 6.369760513305664, 6.571553707122803, 6.773346900939941, 6.97514009475708, 7.176933288574219]}, "gradients/decoder.model.decoder.layers.5.fc2.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 1.0, 5.0, 5.0, 5.0, 6.0, 12.0, 14.0, 36.0, 72.0, 159.0, 382.0, 1008.0, 2868.0, 11131.0, 70393.0, 1254221.0, 2656404.0, 172452.0, 18496.0, 4277.0, 1389.0, 532.0, 212.0, 93.0, 50.0, 23.0, 13.0, 9.0, 6.0, 5.0, 4.0, 3.0, 2.0, 3.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.41796875, -5.2384033203125, -5.058837890625, -4.8792724609375, -4.69970703125, -4.5201416015625, -4.340576171875, -4.1610107421875, -3.9814453125, -3.8018798828125, -3.622314453125, -3.4427490234375, -3.26318359375, -3.0836181640625, -2.904052734375, -2.7244873046875, -2.544921875, -2.3653564453125, -2.185791015625, -2.0062255859375, -1.82666015625, -1.6470947265625, -1.467529296875, -1.2879638671875, -1.1083984375, -0.9288330078125, -0.749267578125, -0.5697021484375, -0.39013671875, -0.2105712890625, -0.031005859375, 0.1485595703125, 0.328125, 0.5076904296875, 0.687255859375, 0.8668212890625, 1.04638671875, 1.2259521484375, 1.405517578125, 1.5850830078125, 1.7646484375, 1.9442138671875, 2.123779296875, 2.3033447265625, 2.48291015625, 2.6624755859375, 2.842041015625, 3.0216064453125, 3.201171875, 3.3807373046875, 3.560302734375, 3.7398681640625, 3.91943359375, 4.0989990234375, 4.278564453125, 4.4581298828125, 4.6376953125, 4.8172607421875, 4.996826171875, 5.1763916015625, 5.35595703125, 5.5355224609375, 5.715087890625, 5.8946533203125, 6.07421875]}, "gradients/decoder.model.decoder.layers.5.fc2.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 2.0, 1.0, 3.0, 3.0, 6.0, 5.0, 9.0, 4.0, 10.0, 21.0, 18.0, 16.0, 27.0, 28.0, 30.0, 37.0, 38.0, 50.0, 41.0, 52.0, 53.0, 62.0, 50.0, 73.0, 56.0, 48.0, 49.0, 33.0, 39.0, 31.0, 21.0, 16.0, 19.0, 14.0, 11.0, 10.0, 8.0, 9.0, 3.0, 1.0, 1.0, 1.0, 2.0, 0.0, 2.0, 2.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.025390625, -2.929840087890625, -2.83428955078125, -2.738739013671875, -2.6431884765625, -2.547637939453125, -2.45208740234375, -2.356536865234375, -2.260986328125, -2.165435791015625, -2.06988525390625, -1.974334716796875, -1.8787841796875, -1.783233642578125, -1.68768310546875, -1.592132568359375, -1.49658203125, -1.401031494140625, -1.30548095703125, -1.209930419921875, -1.1143798828125, -1.018829345703125, -0.92327880859375, -0.827728271484375, -0.732177734375, -0.636627197265625, -0.54107666015625, -0.445526123046875, -0.3499755859375, -0.254425048828125, -0.15887451171875, -0.063323974609375, 0.0322265625, 0.127777099609375, 0.22332763671875, 0.318878173828125, 0.4144287109375, 0.509979248046875, 0.60552978515625, 0.701080322265625, 0.796630859375, 0.892181396484375, 0.98773193359375, 1.083282470703125, 1.1788330078125, 1.274383544921875, 1.36993408203125, 1.465484619140625, 1.56103515625, 1.656585693359375, 1.75213623046875, 1.847686767578125, 1.9432373046875, 2.038787841796875, 2.13433837890625, 2.229888916015625, 2.325439453125, 2.420989990234375, 2.51654052734375, 2.612091064453125, 2.7076416015625, 2.803192138671875, 2.89874267578125, 2.994293212890625, 3.08984375]}, "gradients/decoder.model.decoder.layers.5.fc1.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 3.0, 4.0, 7.0, 1.0, 2.0, 7.0, 12.0, 10.0, 12.0, 12.0, 32.0, 39.0, 55.0, 82.0, 124.0, 225.0, 377.0, 704.0, 1601.0, 4532.0, 16883.0, 106184.0, 2784890.0, 1196700.0, 63944.0, 11753.0, 3387.0, 1260.0, 587.0, 324.0, 175.0, 93.0, 72.0, 59.0, 30.0, 31.0, 22.0, 11.0, 10.0, 7.0, 9.0, 3.0, 5.0, 1.0, 1.0, 6.0, 2.0, 1.0, 1.0, 3.0, 0.0, 0.0, 2.0, 2.0], "bins": [-4.9609375, -4.81005859375, -4.6591796875, -4.50830078125, -4.357421875, -4.20654296875, -4.0556640625, -3.90478515625, -3.75390625, -3.60302734375, -3.4521484375, -3.30126953125, -3.150390625, -2.99951171875, -2.8486328125, -2.69775390625, -2.546875, -2.39599609375, -2.2451171875, -2.09423828125, -1.943359375, -1.79248046875, -1.6416015625, -1.49072265625, -1.33984375, -1.18896484375, -1.0380859375, -0.88720703125, -0.736328125, -0.58544921875, -0.4345703125, -0.28369140625, -0.1328125, 0.01806640625, 0.1689453125, 0.31982421875, 0.470703125, 0.62158203125, 0.7724609375, 0.92333984375, 1.07421875, 1.22509765625, 1.3759765625, 1.52685546875, 1.677734375, 1.82861328125, 1.9794921875, 2.13037109375, 2.28125, 2.43212890625, 2.5830078125, 2.73388671875, 2.884765625, 3.03564453125, 3.1865234375, 3.33740234375, 3.48828125, 3.63916015625, 3.7900390625, 3.94091796875, 4.091796875, 4.24267578125, 4.3935546875, 4.54443359375, 4.6953125]}, "gradients/decoder.model.decoder.layers.5.fc1.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 4.0, 6.0, 3.0, 8.0, 12.0, 14.0, 18.0, 32.0, 35.0, 62.0, 91.0, 145.0, 255.0, 417.0, 700.0, 755.0, 606.0, 342.0, 223.0, 112.0, 74.0, 55.0, 39.0, 17.0, 22.0, 11.0, 7.0, 5.0, 2.0, 5.0, 3.0, 1.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.2314453125, -1.184539794921875, -1.13763427734375, -1.090728759765625, -1.0438232421875, -0.996917724609375, -0.95001220703125, -0.903106689453125, -0.856201171875, -0.809295654296875, -0.76239013671875, -0.715484619140625, -0.6685791015625, -0.621673583984375, -0.57476806640625, -0.527862548828125, -0.48095703125, -0.434051513671875, -0.38714599609375, -0.340240478515625, -0.2933349609375, -0.246429443359375, -0.19952392578125, -0.152618408203125, -0.105712890625, -0.058807373046875, -0.01190185546875, 0.035003662109375, 0.0819091796875, 0.128814697265625, 0.17572021484375, 0.222625732421875, 0.26953125, 0.316436767578125, 0.36334228515625, 0.410247802734375, 0.4571533203125, 0.504058837890625, 0.55096435546875, 0.597869873046875, 0.644775390625, 0.691680908203125, 0.73858642578125, 0.785491943359375, 0.8323974609375, 0.879302978515625, 0.92620849609375, 0.973114013671875, 1.02001953125, 1.066925048828125, 1.11383056640625, 1.160736083984375, 1.2076416015625, 1.254547119140625, 1.30145263671875, 1.348358154296875, 1.395263671875, 1.442169189453125, 1.48907470703125, 1.535980224609375, 1.5828857421875, 1.629791259765625, 1.67669677734375, 1.723602294921875, 1.7705078125]}, "gradients/decoder.model.decoder.layers.5.encoder_attn_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 3.0, 3.0, 9.0, 8.0, 17.0, 23.0, 33.0, 50.0, 81.0, 70.0, 106.0, 114.0, 96.0, 110.0, 68.0, 65.0, 46.0, 39.0, 17.0, 18.0, 13.0, 8.0, 7.0, 5.0, 3.0, 0.0, 2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-7.17844820022583, -7.004284858703613, -6.8301215171813965, -6.655958652496338, -6.481795310974121, -6.307631969451904, -6.1334686279296875, -5.959305763244629, -5.785142421722412, -5.610979080200195, -5.4368157386779785, -5.26265287399292, -5.088489532470703, -4.914326190948486, -4.7401628494262695, -4.565999984741211, -4.391836166381836, -4.217672824859619, -4.043509483337402, -3.8693463802337646, -3.695183277130127, -3.52101993560791, -3.3468565940856934, -3.1726934909820557, -2.998530387878418, -2.824367046356201, -2.6502039432525635, -2.4760406017303467, -2.301877498626709, -2.127714157104492, -1.953550934791565, -1.7793877124786377, -1.605224609375, -1.4310613870620728, -1.2568981647491455, -1.0827348232269287, -0.9085716605186462, -0.734408438205719, -0.560245156288147, -0.3860819339752197, -0.21191871166229248, -0.03775547444820404, 0.1364077627658844, 0.31057101488113403, 0.4847342371940613, 0.6588974595069885, 0.8330607414245605, 1.0072239637374878, 1.181387186050415, 1.3555504083633423, 1.5297136306762695, 1.7038769721984863, 1.878040075302124, 2.052203416824341, 2.2263665199279785, 2.4005298614501953, 2.574693202972412, 2.748856544494629, 2.9230196475982666, 3.0971829891204834, 3.271346092224121, 3.445509433746338, 3.6196727752685547, 3.7938358783721924, 3.96799898147583]}, "gradients/decoder.model.decoder.layers.5.encoder_attn_layer_norm.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 2.0, 3.0, 2.0, 1.0, 1.0, 3.0, 3.0, 2.0, 7.0, 3.0, 9.0, 9.0, 9.0, 9.0, 18.0, 13.0, 24.0, 20.0, 16.0, 27.0, 38.0, 25.0, 37.0, 46.0, 35.0, 43.0, 43.0, 48.0, 52.0, 56.0, 35.0, 42.0, 34.0, 37.0, 41.0, 30.0, 34.0, 24.0, 17.0, 26.0, 20.0, 12.0, 9.0, 14.0, 7.0, 10.0, 5.0, 3.0, 5.0, 3.0, 3.0, 2.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-2.608947277069092, -2.521838426589966, -2.43472957611084, -2.347620964050293, -2.260512113571167, -2.173403263092041, -2.086294651031494, -1.9991858005523682, -1.9120769500732422, -1.8249680995941162, -1.7378593683242798, -1.6507506370544434, -1.5636417865753174, -1.4765329360961914, -1.389424204826355, -1.3023154735565186, -1.2152066230773926, -1.1280977725982666, -1.0409890413284302, -0.953880250453949, -0.8667714595794678, -0.7796626687049866, -0.6925538778305054, -0.6054450869560242, -0.518336296081543, -0.43122750520706177, -0.34411871433258057, -0.25700992345809937, -0.16990113258361816, -0.08279234170913696, 0.004316449165344238, 0.09142524003982544, 0.17853403091430664, 0.26564282178878784, 0.35275161266326904, 0.43986040353775024, 0.5269691944122314, 0.6140779852867126, 0.7011867761611938, 0.788295567035675, 0.8754043579101562, 0.9625131487846375, 1.0496219396591187, 1.136730670928955, 1.223839521408081, 1.310948371887207, 1.3980571031570435, 1.4851658344268799, 1.5722746849060059, 1.6593835353851318, 1.7464922666549683, 1.8336009979248047, 1.9207098484039307, 2.0078186988830566, 2.0949273109436035, 2.1820361614227295, 2.2691450119018555, 2.3562538623809814, 2.4433627128601074, 2.5304713249206543, 2.6175801753997803, 2.7046890258789062, 2.791797637939453, 2.878906488418579, 2.966015338897705]}, "gradients/decoder.model.decoder.layers.5.encoder_attn.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 2.0, 1.0, 4.0, 3.0, 5.0, 5.0, 8.0, 15.0, 20.0, 26.0, 36.0, 65.0, 73.0, 106.0, 179.0, 266.0, 481.0, 787.0, 1476.0, 2667.0, 5639.0, 12049.0, 28421.0, 77796.0, 260697.0, 432261.0, 142762.0, 47314.0, 18484.0, 8166.0, 3934.0, 2012.0, 1128.0, 613.0, 383.0, 215.0, 163.0, 92.0, 67.0, 46.0, 27.0, 16.0, 18.0, 11.0, 7.0, 6.0, 5.0, 2.0, 4.0, 2.0, 1.0, 1.0, 1.0, 2.0, 0.0, 1.0], "bins": [-0.3896484375, -0.3779335021972656, -0.36621856689453125, -0.3545036315917969, -0.3427886962890625, -0.3310737609863281, -0.31935882568359375, -0.3076438903808594, -0.295928955078125, -0.2842140197753906, -0.27249908447265625, -0.2607841491699219, -0.2490692138671875, -0.23735427856445312, -0.22563934326171875, -0.21392440795898438, -0.20220947265625, -0.19049453735351562, -0.17877960205078125, -0.16706466674804688, -0.1553497314453125, -0.14363479614257812, -0.13191986083984375, -0.12020492553710938, -0.108489990234375, -0.09677505493164062, -0.08506011962890625, -0.07334518432617188, -0.0616302490234375, -0.049915313720703125, -0.03820037841796875, -0.026485443115234375, -0.0147705078125, -0.003055572509765625, 0.00865936279296875, 0.020374298095703125, 0.0320892333984375, 0.043804168701171875, 0.05551910400390625, 0.06723403930664062, 0.078948974609375, 0.09066390991210938, 0.10237884521484375, 0.11409378051757812, 0.1258087158203125, 0.13752365112304688, 0.14923858642578125, 0.16095352172851562, 0.17266845703125, 0.18438339233398438, 0.19609832763671875, 0.20781326293945312, 0.2195281982421875, 0.23124313354492188, 0.24295806884765625, 0.2546730041503906, 0.266387939453125, 0.2781028747558594, 0.28981781005859375, 0.3015327453613281, 0.3132476806640625, 0.3249626159667969, 0.33667755126953125, 0.3483924865722656, 0.360107421875]}, "gradients/decoder.model.decoder.layers.5.encoder_attn.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 2.0, 2.0, 1.0, 2.0, 3.0, 0.0, 8.0, 6.0, 8.0, 10.0, 6.0, 7.0, 12.0, 25.0, 22.0, 25.0, 29.0, 46.0, 44.0, 37.0, 59.0, 54.0, 52.0, 59.0, 53.0, 59.0, 49.0, 38.0, 52.0, 44.0, 32.0, 32.0, 27.0, 24.0, 21.0, 16.0, 5.0, 14.0, 5.0, 15.0, 4.0, 1.0, 3.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-1.892578125, -1.8271484375, -1.76171875, -1.6962890625, -1.630859375, -1.5654296875, -1.5, -1.4345703125, -1.369140625, -1.3037109375, -1.23828125, -1.1728515625, -1.107421875, -1.0419921875, -0.9765625, -0.9111328125, -0.845703125, -0.7802734375, -0.71484375, -0.6494140625, -0.583984375, -0.5185546875, -0.453125, -0.3876953125, -0.322265625, -0.2568359375, -0.19140625, -0.1259765625, -0.060546875, 0.0048828125, 0.0703125, 0.1357421875, 0.201171875, 0.2666015625, 0.33203125, 0.3974609375, 0.462890625, 0.5283203125, 0.59375, 0.6591796875, 0.724609375, 0.7900390625, 0.85546875, 0.9208984375, 0.986328125, 1.0517578125, 1.1171875, 1.1826171875, 1.248046875, 1.3134765625, 1.37890625, 1.4443359375, 1.509765625, 1.5751953125, 1.640625, 1.7060546875, 1.771484375, 1.8369140625, 1.90234375, 1.9677734375, 2.033203125, 2.0986328125, 2.1640625, 2.2294921875, 2.294921875]}, "gradients/decoder.model.decoder.layers.5.encoder_attn.v_proj.weight": {"_type": "histogram", "values": [1.0, 2.0, 1.0, 4.0, 2.0, 4.0, 5.0, 10.0, 13.0, 16.0, 10.0, 33.0, 50.0, 58.0, 80.0, 125.0, 205.0, 288.0, 425.0, 727.0, 1161.0, 1914.0, 3177.0, 5585.0, 10424.0, 19951.0, 42359.0, 98319.0, 260895.0, 347696.0, 141050.0, 57486.0, 26293.0, 13086.0, 6991.0, 3994.0, 2300.0, 1393.0, 856.0, 512.0, 361.0, 225.0, 182.0, 99.0, 74.0, 33.0, 28.0, 22.0, 17.0, 11.0, 3.0, 4.0, 5.0, 1.0, 2.0, 1.0, 3.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.20947265625, -0.20229530334472656, -0.19511795043945312, -0.1879405975341797, -0.18076324462890625, -0.1735858917236328, -0.16640853881835938, -0.15923118591308594, -0.1520538330078125, -0.14487648010253906, -0.13769912719726562, -0.1305217742919922, -0.12334442138671875, -0.11616706848144531, -0.10898971557617188, -0.10181236267089844, -0.094635009765625, -0.08745765686035156, -0.08028030395507812, -0.07310295104980469, -0.06592559814453125, -0.05874824523925781, -0.051570892333984375, -0.04439353942871094, -0.0372161865234375, -0.030038833618164062, -0.022861480712890625, -0.015684127807617188, -0.00850677490234375, -0.0013294219970703125, 0.005847930908203125, 0.013025283813476562, 0.02020263671875, 0.027379989624023438, 0.034557342529296875, 0.04173469543457031, 0.04891204833984375, 0.05608940124511719, 0.06326675415039062, 0.07044410705566406, 0.0776214599609375, 0.08479881286621094, 0.09197616577148438, 0.09915351867675781, 0.10633087158203125, 0.11350822448730469, 0.12068557739257812, 0.12786293029785156, 0.135040283203125, 0.14221763610839844, 0.14939498901367188, 0.1565723419189453, 0.16374969482421875, 0.1709270477294922, 0.17810440063476562, 0.18528175354003906, 0.1924591064453125, 0.19963645935058594, 0.20681381225585938, 0.2139911651611328, 0.22116851806640625, 0.2283458709716797, 0.23552322387695312, 0.24270057678222656, 0.2498779296875]}, "gradients/decoder.model.decoder.layers.5.encoder_attn.v_proj.bias": {"_type": "histogram", "values": [1.0, 2.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 2.0, 5.0, 3.0, 4.0, 6.0, 4.0, 5.0, 7.0, 12.0, 15.0, 14.0, 20.0, 23.0, 27.0, 32.0, 32.0, 34.0, 45.0, 37.0, 49.0, 62.0, 55.0, 57.0, 46.0, 54.0, 42.0, 46.0, 43.0, 29.0, 31.0, 29.0, 23.0, 18.0, 16.0, 18.0, 15.0, 5.0, 13.0, 7.0, 4.0, 7.0, 4.0, 4.0, 3.0, 3.0, 3.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0], "bins": [-3.650390625, -3.530548095703125, -3.41070556640625, -3.290863037109375, -3.1710205078125, -3.051177978515625, -2.93133544921875, -2.811492919921875, -2.691650390625, -2.571807861328125, -2.45196533203125, -2.332122802734375, -2.2122802734375, -2.092437744140625, -1.97259521484375, -1.852752685546875, -1.73291015625, -1.613067626953125, -1.49322509765625, -1.373382568359375, -1.2535400390625, -1.133697509765625, -1.01385498046875, -0.894012451171875, -0.774169921875, -0.654327392578125, -0.53448486328125, -0.414642333984375, -0.2947998046875, -0.174957275390625, -0.05511474609375, 0.064727783203125, 0.1845703125, 0.304412841796875, 0.42425537109375, 0.544097900390625, 0.6639404296875, 0.783782958984375, 0.90362548828125, 1.023468017578125, 1.143310546875, 1.263153076171875, 1.38299560546875, 1.502838134765625, 1.6226806640625, 1.742523193359375, 1.86236572265625, 1.982208251953125, 2.10205078125, 2.221893310546875, 2.34173583984375, 2.461578369140625, 2.5814208984375, 2.701263427734375, 2.82110595703125, 2.940948486328125, 3.060791015625, 3.180633544921875, 3.30047607421875, 3.420318603515625, 3.5401611328125, 3.660003662109375, 3.77984619140625, 3.899688720703125, 4.01953125]}, "gradients/decoder.model.decoder.layers.5.encoder_attn.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 2.0, 0.0, 2.0, 0.0, 4.0, 8.0, 14.0, 6.0, 21.0, 30.0, 44.0, 59.0, 88.0, 147.0, 241.0, 555.0, 1476.0, 5406.0, 40945.0, 863995.0, 121194.0, 10620.0, 2267.0, 770.0, 305.0, 157.0, 67.0, 60.0, 33.0, 16.0, 6.0, 9.0, 7.0, 3.0, 3.0, 2.0, 0.0, 0.0, 2.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0], "bins": [-0.0914306640625, -0.0887155532836914, -0.08600044250488281, -0.08328533172607422, -0.08057022094726562, -0.07785511016845703, -0.07513999938964844, -0.07242488861083984, -0.06970977783203125, -0.06699466705322266, -0.06427955627441406, -0.06156444549560547, -0.058849334716796875, -0.05613422393798828, -0.05341911315917969, -0.050704002380371094, -0.0479888916015625, -0.045273780822753906, -0.04255867004394531, -0.03984355926513672, -0.037128448486328125, -0.03441333770751953, -0.03169822692871094, -0.028983116149902344, -0.02626800537109375, -0.023552894592285156, -0.020837783813476562, -0.01812267303466797, -0.015407562255859375, -0.012692451477050781, -0.009977340698242188, -0.007262229919433594, -0.004547119140625, -0.0018320083618164062, 0.0008831024169921875, 0.0035982131958007812, 0.006313323974609375, 0.009028434753417969, 0.011743545532226562, 0.014458656311035156, 0.01717376708984375, 0.019888877868652344, 0.022603988647460938, 0.02531909942626953, 0.028034210205078125, 0.03074932098388672, 0.03346443176269531, 0.036179542541503906, 0.0388946533203125, 0.041609764099121094, 0.04432487487792969, 0.04703998565673828, 0.049755096435546875, 0.05247020721435547, 0.05518531799316406, 0.057900428771972656, 0.06061553955078125, 0.06333065032958984, 0.06604576110839844, 0.06876087188720703, 0.07147598266601562, 0.07419109344482422, 0.07690620422363281, 0.0796213150024414, 0.08233642578125]}, "gradients/decoder.model.decoder.layers.5.encoder_attn.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 2.0, 3.0, 5.0, 1.0, 9.0, 7.0, 13.0, 16.0, 25.0, 37.0, 57.0, 78.0, 129.0, 141.0, 143.0, 117.0, 65.0, 51.0, 33.0, 18.0, 24.0, 12.0, 8.0, 6.0, 4.0, 3.0, 1.0, 4.0, 0.0, 1.0, 2.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.8279762268066406e-05, -4.6897679567337036e-05, -4.5515596866607666e-05, -4.4133514165878296e-05, -4.2751431465148926e-05, -4.1369348764419556e-05, -3.9987266063690186e-05, -3.8605183362960815e-05, -3.7223100662231445e-05, -3.5841017961502075e-05, -3.4458935260772705e-05, -3.3076852560043335e-05, -3.1694769859313965e-05, -3.0312687158584595e-05, -2.8930604457855225e-05, -2.7548521757125854e-05, -2.6166439056396484e-05, -2.4784356355667114e-05, -2.3402273654937744e-05, -2.2020190954208374e-05, -2.0638108253479004e-05, -1.9256025552749634e-05, -1.7873942852020264e-05, -1.6491860151290894e-05, -1.5109777450561523e-05, -1.3727694749832153e-05, -1.2345612049102783e-05, -1.0963529348373413e-05, -9.581446647644043e-06, -8.199363946914673e-06, -6.817281246185303e-06, -5.435198545455933e-06, -4.0531158447265625e-06, -2.6710331439971924e-06, -1.2889504432678223e-06, 9.313225746154785e-08, 1.475214958190918e-06, 2.857297658920288e-06, 4.239380359649658e-06, 5.621463060379028e-06, 7.0035457611083984e-06, 8.385628461837769e-06, 9.767711162567139e-06, 1.1149793863296509e-05, 1.2531876564025879e-05, 1.3913959264755249e-05, 1.529604196548462e-05, 1.667812466621399e-05, 1.806020736694336e-05, 1.944229006767273e-05, 2.08243727684021e-05, 2.220645546913147e-05, 2.358853816986084e-05, 2.497062087059021e-05, 2.635270357131958e-05, 2.773478627204895e-05, 2.911686897277832e-05, 3.049895167350769e-05, 3.188103437423706e-05, 3.326311707496643e-05, 3.46451997756958e-05, 3.602728247642517e-05, 3.740936517715454e-05, 3.879144787788391e-05, 4.017353057861328e-05]}, "gradients/decoder.model.decoder.layers.5.encoder_attn.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 1.0, 3.0, 4.0, 1.0, 7.0, 12.0, 17.0, 23.0, 40.0, 80.0, 133.0, 284.0, 581.0, 1561.0, 5184.0, 26217.0, 429064.0, 546826.0, 30133.0, 5572.0, 1620.0, 629.0, 257.0, 128.0, 67.0, 43.0, 29.0, 14.0, 12.0, 5.0, 9.0, 2.0, 0.0, 1.0, 2.0, 2.0, 3.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0], "bins": [-0.10931396484375, -0.10643959045410156, -0.10356521606445312, -0.10069084167480469, -0.09781646728515625, -0.09494209289550781, -0.09206771850585938, -0.08919334411621094, -0.0863189697265625, -0.08344459533691406, -0.08057022094726562, -0.07769584655761719, -0.07482147216796875, -0.07194709777832031, -0.06907272338867188, -0.06619834899902344, -0.063323974609375, -0.06044960021972656, -0.057575225830078125, -0.05470085144042969, -0.05182647705078125, -0.04895210266113281, -0.046077728271484375, -0.04320335388183594, -0.0403289794921875, -0.03745460510253906, -0.034580230712890625, -0.03170585632324219, -0.02883148193359375, -0.025957107543945312, -0.023082733154296875, -0.020208358764648438, -0.017333984375, -0.014459609985351562, -0.011585235595703125, -0.008710861206054688, -0.00583648681640625, -0.0029621124267578125, -8.7738037109375e-05, 0.0027866363525390625, 0.0056610107421875, 0.008535385131835938, 0.011409759521484375, 0.014284133911132812, 0.01715850830078125, 0.020032882690429688, 0.022907257080078125, 0.025781631469726562, 0.028656005859375, 0.03153038024902344, 0.034404754638671875, 0.03727912902832031, 0.04015350341796875, 0.04302787780761719, 0.045902252197265625, 0.04877662658691406, 0.0516510009765625, 0.05452537536621094, 0.057399749755859375, 0.06027412414550781, 0.06314849853515625, 0.06602287292480469, 0.06889724731445312, 0.07177162170410156, 0.07464599609375]}, "gradients/decoder.model.decoder.layers.5.encoder_attn.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 4.0, 1.0, 5.0, 5.0, 8.0, 5.0, 14.0, 31.0, 62.0, 132.0, 294.0, 258.0, 92.0, 44.0, 23.0, 15.0, 8.0, 5.0, 4.0, 3.0, 0.0, 0.0, 3.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.03302001953125, -0.032169342041015625, -0.03131866455078125, -0.030467987060546875, -0.0296173095703125, -0.028766632080078125, -0.02791595458984375, -0.027065277099609375, -0.026214599609375, -0.025363922119140625, -0.02451324462890625, -0.023662567138671875, -0.0228118896484375, -0.021961212158203125, -0.02111053466796875, -0.020259857177734375, -0.0194091796875, -0.018558502197265625, -0.01770782470703125, -0.016857147216796875, -0.0160064697265625, -0.015155792236328125, -0.01430511474609375, -0.013454437255859375, -0.012603759765625, -0.011753082275390625, -0.01090240478515625, -0.010051727294921875, -0.0092010498046875, -0.008350372314453125, -0.00749969482421875, -0.006649017333984375, -0.00579833984375, -0.004947662353515625, -0.00409698486328125, -0.003246307373046875, -0.0023956298828125, -0.001544952392578125, -0.00069427490234375, 0.000156402587890625, 0.001007080078125, 0.001857757568359375, 0.00270843505859375, 0.003559112548828125, 0.0044097900390625, 0.005260467529296875, 0.00611114501953125, 0.006961822509765625, 0.0078125, 0.008663177490234375, 0.00951385498046875, 0.010364532470703125, 0.0112152099609375, 0.012065887451171875, 0.01291656494140625, 0.013767242431640625, 0.014617919921875, 0.015468597412109375, 0.01631927490234375, 0.017169952392578125, 0.0180206298828125, 0.018871307373046875, 0.01972198486328125, 0.020572662353515625, 0.02142333984375]}, "gradients/decoder.model.decoder.layers.5.self_attn_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 8.0, 17.0, 46.0, 122.0, 205.0, 247.0, 187.0, 101.0, 41.0, 20.0, 9.0, 3.0, 4.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0], "bins": [-10.502792358398438, -10.27670669555664, -10.050621032714844, -9.824535369873047, -9.59844970703125, -9.372364044189453, -9.146278381347656, -8.92019271850586, -8.694107055664062, -8.468021392822266, -8.241935729980469, -8.015850067138672, -7.789764404296875, -7.563678741455078, -7.337592601776123, -7.111506938934326, -6.885420799255371, -6.659335136413574, -6.433249473571777, -6.2071638107299805, -5.981078147888184, -5.754992485046387, -5.528906345367432, -5.302820682525635, -5.076735019683838, -4.850649356842041, -4.624563694000244, -4.398478031158447, -4.172391891479492, -3.9463064670562744, -3.7202205657958984, -3.4941349029541016, -3.2680487632751465, -3.0419631004333496, -2.8158774375915527, -2.5897915363311768, -2.36370587348938, -2.137620210647583, -1.9115344285964966, -1.6854486465454102, -1.4593629837036133, -1.2332773208618164, -1.00719153881073, -0.7811058163642883, -0.5550200939178467, -0.3289344310760498, -0.10284864902496338, 0.12323713302612305, 0.3493227958679199, 0.5754085183143616, 0.8014942407608032, 1.0275800228118896, 1.2536656856536865, 1.4797513484954834, 1.7058371305465698, 1.9319229125976562, 2.158008575439453, 2.38409423828125, 2.610179901123047, 2.836265802383423, 3.0623514652252197, 3.2884371280670166, 3.5145230293273926, 3.7406086921691895, 3.9666943550109863]}, "gradients/decoder.model.decoder.layers.5.self_attn_layer_norm.bias": {"_type": "histogram", "values": [1.0, 1.0, 3.0, 2.0, 2.0, 0.0, 0.0, 1.0, 2.0, 2.0, 6.0, 3.0, 4.0, 4.0, 4.0, 10.0, 6.0, 18.0, 13.0, 15.0, 28.0, 16.0, 24.0, 25.0, 40.0, 36.0, 47.0, 43.0, 47.0, 45.0, 55.0, 56.0, 52.0, 45.0, 43.0, 40.0, 49.0, 29.0, 31.0, 32.0, 25.0, 24.0, 18.0, 10.0, 14.0, 11.0, 10.0, 5.0, 5.0, 4.0, 9.0, 1.0, 2.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-1.7506234645843506, -1.6942590475082397, -1.6378945112228394, -1.5815300941467285, -1.5251655578613281, -1.4688011407852173, -1.4124367237091064, -1.356072187423706, -1.2997076511383057, -1.2433432340621948, -1.1869786977767944, -1.1306142807006836, -1.0742497444152832, -1.0178853273391724, -0.9615208506584167, -0.9051563739776611, -0.8487919569015503, -0.7924274802207947, -0.7360630035400391, -0.6796985864639282, -0.6233340501785278, -0.566969633102417, -0.5106051564216614, -0.45424067974090576, -0.39787620306015015, -0.34151172637939453, -0.2851472496986389, -0.2287828028202057, -0.17241832613945007, -0.11605384945869446, -0.05968940258026123, -0.0033249258995056152, 0.05303955078125, 0.10940402001142502, 0.16576848924160004, 0.22213295102119446, 0.2784974277019501, 0.3348619043827057, 0.3912263512611389, 0.44759082794189453, 0.5039553046226501, 0.5603197813034058, 0.6166842579841614, 0.673048734664917, 0.7294131517410278, 0.7857776880264282, 0.8421421051025391, 0.8985065817832947, 0.9548710584640503, 1.0112354755401611, 1.0676000118255615, 1.1239644289016724, 1.1803289651870728, 1.2366933822631836, 1.293057918548584, 1.3494223356246948, 1.4057867527008057, 1.4621511697769165, 1.518515706062317, 1.5748801231384277, 1.6312446594238281, 1.687609076499939, 1.7439734935760498, 1.8003380298614502, 1.8567025661468506]}, "gradients/decoder.model.decoder.layers.5.self_attn.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 8.0, 5.0, 6.0, 26.0, 33.0, 44.0, 73.0, 115.0, 178.0, 303.0, 485.0, 870.0, 1567.0, 3192.0, 6556.0, 14953.0, 38076.0, 148837.0, 648629.0, 123952.0, 34320.0, 13725.0, 6221.0, 2932.0, 1432.0, 793.0, 477.0, 268.0, 165.0, 94.0, 71.0, 46.0, 41.0, 24.0, 10.0, 14.0, 5.0, 1.0, 6.0, 0.0, 3.0, 2.0, 0.0, 1.0, 0.0, 0.0, 2.0], "bins": [-2.08203125, -2.0233001708984375, -1.964569091796875, -1.9058380126953125, -1.84710693359375, -1.7883758544921875, -1.729644775390625, -1.6709136962890625, -1.6121826171875, -1.5534515380859375, -1.494720458984375, -1.4359893798828125, -1.37725830078125, -1.3185272216796875, -1.259796142578125, -1.2010650634765625, -1.142333984375, -1.0836029052734375, -1.024871826171875, -0.9661407470703125, -0.90740966796875, -0.8486785888671875, -0.789947509765625, -0.7312164306640625, -0.6724853515625, -0.6137542724609375, -0.555023193359375, -0.4962921142578125, -0.43756103515625, -0.3788299560546875, -0.320098876953125, -0.2613677978515625, -0.20263671875, -0.1439056396484375, -0.085174560546875, -0.0264434814453125, 0.03228759765625, 0.0910186767578125, 0.149749755859375, 0.2084808349609375, 0.2672119140625, 0.3259429931640625, 0.384674072265625, 0.4434051513671875, 0.50213623046875, 0.5608673095703125, 0.619598388671875, 0.6783294677734375, 0.737060546875, 0.7957916259765625, 0.854522705078125, 0.9132537841796875, 0.97198486328125, 1.0307159423828125, 1.089447021484375, 1.1481781005859375, 1.2069091796875, 1.2656402587890625, 1.324371337890625, 1.3831024169921875, 1.44183349609375, 1.5005645751953125, 1.559295654296875, 1.6180267333984375, 1.6767578125]}, "gradients/decoder.model.decoder.layers.5.self_attn.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 3.0, 2.0, 1.0, 4.0, 4.0, 3.0, 7.0, 9.0, 4.0, 11.0, 12.0, 14.0, 13.0, 23.0, 30.0, 30.0, 31.0, 32.0, 41.0, 50.0, 40.0, 55.0, 52.0, 59.0, 59.0, 54.0, 51.0, 44.0, 46.0, 42.0, 41.0, 27.0, 17.0, 20.0, 19.0, 12.0, 6.0, 6.0, 13.0, 9.0, 9.0, 4.0, 3.0, 0.0, 3.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.05859375, -3.933258056640625, -3.80792236328125, -3.682586669921875, -3.5572509765625, -3.431915283203125, -3.30657958984375, -3.181243896484375, -3.055908203125, -2.930572509765625, -2.80523681640625, -2.679901123046875, -2.5545654296875, -2.429229736328125, -2.30389404296875, -2.178558349609375, -2.05322265625, -1.927886962890625, -1.80255126953125, -1.677215576171875, -1.5518798828125, -1.426544189453125, -1.30120849609375, -1.175872802734375, -1.050537109375, -0.925201416015625, -0.79986572265625, -0.674530029296875, -0.5491943359375, -0.423858642578125, -0.29852294921875, -0.173187255859375, -0.0478515625, 0.077484130859375, 0.20281982421875, 0.328155517578125, 0.4534912109375, 0.578826904296875, 0.70416259765625, 0.829498291015625, 0.954833984375, 1.080169677734375, 1.20550537109375, 1.330841064453125, 1.4561767578125, 1.581512451171875, 1.70684814453125, 1.832183837890625, 1.95751953125, 2.082855224609375, 2.20819091796875, 2.333526611328125, 2.4588623046875, 2.584197998046875, 2.70953369140625, 2.834869384765625, 2.960205078125, 3.085540771484375, 3.21087646484375, 3.336212158203125, 3.4615478515625, 3.586883544921875, 3.71221923828125, 3.837554931640625, 3.962890625]}, "gradients/decoder.model.decoder.layers.5.self_attn.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 2.0, 0.0, 1.0, 4.0, 4.0, 3.0, 3.0, 4.0, 6.0, 14.0, 20.0, 11.0, 24.0, 36.0, 52.0, 56.0, 89.0, 122.0, 191.0, 282.0, 547.0, 1316.0, 3793.0, 14688.0, 95901.0, 828529.0, 83213.0, 13514.0, 3559.0, 1167.0, 547.0, 267.0, 149.0, 122.0, 80.0, 69.0, 46.0, 32.0, 27.0, 14.0, 20.0, 13.0, 11.0, 6.0, 2.0, 3.0, 5.0, 2.0, 2.0, 1.0, 1.0, 0.0, 2.0], "bins": [-3.705078125, -3.603485107421875, -3.50189208984375, -3.400299072265625, -3.2987060546875, -3.197113037109375, -3.09552001953125, -2.993927001953125, -2.892333984375, -2.790740966796875, -2.68914794921875, -2.587554931640625, -2.4859619140625, -2.384368896484375, -2.28277587890625, -2.181182861328125, -2.07958984375, -1.977996826171875, -1.87640380859375, -1.774810791015625, -1.6732177734375, -1.571624755859375, -1.47003173828125, -1.368438720703125, -1.266845703125, -1.165252685546875, -1.06365966796875, -0.962066650390625, -0.8604736328125, -0.758880615234375, -0.65728759765625, -0.555694580078125, -0.4541015625, -0.352508544921875, -0.25091552734375, -0.149322509765625, -0.0477294921875, 0.053863525390625, 0.15545654296875, 0.257049560546875, 0.358642578125, 0.460235595703125, 0.56182861328125, 0.663421630859375, 0.7650146484375, 0.866607666015625, 0.96820068359375, 1.069793701171875, 1.17138671875, 1.272979736328125, 1.37457275390625, 1.476165771484375, 1.5777587890625, 1.679351806640625, 1.78094482421875, 1.882537841796875, 1.984130859375, 2.085723876953125, 2.18731689453125, 2.288909912109375, 2.3905029296875, 2.492095947265625, 2.59368896484375, 2.695281982421875, 2.796875]}, "gradients/decoder.model.decoder.layers.5.self_attn.v_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 3.0, 2.0, 5.0, 6.0, 8.0, 6.0, 12.0, 10.0, 9.0, 11.0, 15.0, 18.0, 17.0, 21.0, 41.0, 31.0, 34.0, 28.0, 29.0, 46.0, 47.0, 37.0, 40.0, 44.0, 46.0, 53.0, 50.0, 44.0, 34.0, 38.0, 29.0, 43.0, 17.0, 28.0, 19.0, 23.0, 14.0, 13.0, 7.0, 6.0, 4.0, 10.0, 5.0, 2.0, 3.0, 2.0, 4.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0], "bins": [-4.20703125, -4.0684814453125, -3.929931640625, -3.7913818359375, -3.65283203125, -3.5142822265625, -3.375732421875, -3.2371826171875, -3.0986328125, -2.9600830078125, -2.821533203125, -2.6829833984375, -2.54443359375, -2.4058837890625, -2.267333984375, -2.1287841796875, -1.990234375, -1.8516845703125, -1.713134765625, -1.5745849609375, -1.43603515625, -1.2974853515625, -1.158935546875, -1.0203857421875, -0.8818359375, -0.7432861328125, -0.604736328125, -0.4661865234375, -0.32763671875, -0.1890869140625, -0.050537109375, 0.0880126953125, 0.2265625, 0.3651123046875, 0.503662109375, 0.6422119140625, 0.78076171875, 0.9193115234375, 1.057861328125, 1.1964111328125, 1.3349609375, 1.4735107421875, 1.612060546875, 1.7506103515625, 1.88916015625, 2.0277099609375, 2.166259765625, 2.3048095703125, 2.443359375, 2.5819091796875, 2.720458984375, 2.8590087890625, 2.99755859375, 3.1361083984375, 3.274658203125, 3.4132080078125, 3.5517578125, 3.6903076171875, 3.828857421875, 3.9674072265625, 4.10595703125, 4.2445068359375, 4.383056640625, 4.5216064453125, 4.66015625]}, "gradients/decoder.model.decoder.layers.5.self_attn.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 3.0, 3.0, 2.0, 1.0, 3.0, 3.0, 5.0, 6.0, 7.0, 24.0, 32.0, 39.0, 60.0, 86.0, 131.0, 274.0, 585.0, 1496.0, 4991.0, 26089.0, 299316.0, 665609.0, 39796.0, 6574.0, 1879.0, 723.0, 343.0, 179.0, 117.0, 62.0, 33.0, 27.0, 20.0, 17.0, 9.0, 8.0, 5.0, 3.0, 3.0, 0.0, 3.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-1.0380859375, -1.00360107421875, -0.9691162109375, -0.93463134765625, -0.900146484375, -0.86566162109375, -0.8311767578125, -0.79669189453125, -0.76220703125, -0.72772216796875, -0.6932373046875, -0.65875244140625, -0.624267578125, -0.58978271484375, -0.5552978515625, -0.52081298828125, -0.486328125, -0.45184326171875, -0.4173583984375, -0.38287353515625, -0.348388671875, -0.31390380859375, -0.2794189453125, -0.24493408203125, -0.21044921875, -0.17596435546875, -0.1414794921875, -0.10699462890625, -0.072509765625, -0.03802490234375, -0.0035400390625, 0.03094482421875, 0.0654296875, 0.09991455078125, 0.1343994140625, 0.16888427734375, 0.203369140625, 0.23785400390625, 0.2723388671875, 0.30682373046875, 0.34130859375, 0.37579345703125, 0.4102783203125, 0.44476318359375, 0.479248046875, 0.51373291015625, 0.5482177734375, 0.58270263671875, 0.6171875, 0.65167236328125, 0.6861572265625, 0.72064208984375, 0.755126953125, 0.78961181640625, 0.8240966796875, 0.85858154296875, 0.89306640625, 0.92755126953125, 0.9620361328125, 0.99652099609375, 1.031005859375, 1.06549072265625, 1.0999755859375, 1.13446044921875, 1.1689453125]}, "gradients/decoder.model.decoder.layers.5.self_attn.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 1.0, 0.0, 1.0, 0.0, 5.0, 3.0, 4.0, 8.0, 4.0, 8.0, 16.0, 28.0, 24.0, 39.0, 44.0, 91.0, 136.0, 231.0, 121.0, 85.0, 37.0, 31.0, 32.0, 25.0, 8.0, 7.0, 7.0, 5.0, 2.0, 2.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00012171268463134766, -0.00011770427227020264, -0.00011369585990905762, -0.0001096874475479126, -0.00010567903518676758, -0.00010167062282562256, -9.766221046447754e-05, -9.365379810333252e-05, -8.96453857421875e-05, -8.563697338104248e-05, -8.162856101989746e-05, -7.762014865875244e-05, -7.361173629760742e-05, -6.96033239364624e-05, -6.559491157531738e-05, -6.158649921417236e-05, -5.7578086853027344e-05, -5.3569674491882324e-05, -4.9561262130737305e-05, -4.5552849769592285e-05, -4.1544437408447266e-05, -3.7536025047302246e-05, -3.3527612686157227e-05, -2.9519200325012207e-05, -2.5510787963867188e-05, -2.1502375602722168e-05, -1.749396324157715e-05, -1.3485550880432129e-05, -9.47713851928711e-06, -5.46872615814209e-06, -1.4603137969970703e-06, 2.5480985641479492e-06, 6.556510925292969e-06, 1.0564923286437988e-05, 1.4573335647583008e-05, 1.8581748008728027e-05, 2.2590160369873047e-05, 2.6598572731018066e-05, 3.0606985092163086e-05, 3.4615397453308105e-05, 3.8623809814453125e-05, 4.2632222175598145e-05, 4.6640634536743164e-05, 5.0649046897888184e-05, 5.46574592590332e-05, 5.866587162017822e-05, 6.267428398132324e-05, 6.668269634246826e-05, 7.069110870361328e-05, 7.46995210647583e-05, 7.870793342590332e-05, 8.271634578704834e-05, 8.672475814819336e-05, 9.073317050933838e-05, 9.47415828704834e-05, 9.874999523162842e-05, 0.00010275840759277344, 0.00010676681995391846, 0.00011077523231506348, 0.0001147836446762085, 0.00011879205703735352, 0.00012280046939849854, 0.00012680888175964355, 0.00013081729412078857, 0.0001348257064819336]}, "gradients/decoder.model.decoder.layers.5.self_attn.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 2.0, 1.0, 1.0, 1.0, 2.0, 1.0, 5.0, 3.0, 2.0, 9.0, 5.0, 9.0, 10.0, 18.0, 30.0, 43.0, 106.0, 226.0, 418.0, 1025.0, 3140.0, 16298.0, 237960.0, 748287.0, 33634.0, 4796.0, 1476.0, 539.0, 244.0, 117.0, 56.0, 33.0, 22.0, 14.0, 7.0, 3.0, 11.0, 2.0, 4.0, 0.0, 1.0, 0.0, 1.0, 3.0, 0.0, 3.0, 1.0, 2.0], "bins": [-1.7685546875, -1.723388671875, -1.67822265625, -1.633056640625, -1.587890625, -1.542724609375, -1.49755859375, -1.452392578125, -1.4072265625, -1.362060546875, -1.31689453125, -1.271728515625, -1.2265625, -1.181396484375, -1.13623046875, -1.091064453125, -1.0458984375, -1.000732421875, -0.95556640625, -0.910400390625, -0.865234375, -0.820068359375, -0.77490234375, -0.729736328125, -0.6845703125, -0.639404296875, -0.59423828125, -0.549072265625, -0.50390625, -0.458740234375, -0.41357421875, -0.368408203125, -0.3232421875, -0.278076171875, -0.23291015625, -0.187744140625, -0.142578125, -0.097412109375, -0.05224609375, -0.007080078125, 0.0380859375, 0.083251953125, 0.12841796875, 0.173583984375, 0.21875, 0.263916015625, 0.30908203125, 0.354248046875, 0.3994140625, 0.444580078125, 0.48974609375, 0.534912109375, 0.580078125, 0.625244140625, 0.67041015625, 0.715576171875, 0.7607421875, 0.805908203125, 0.85107421875, 0.896240234375, 0.94140625, 0.986572265625, 1.03173828125, 1.076904296875, 1.1220703125]}, "gradients/decoder.model.decoder.layers.5.self_attn.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 2.0, 0.0, 1.0, 0.0, 4.0, 3.0, 5.0, 12.0, 20.0, 17.0, 19.0, 38.0, 49.0, 82.0, 132.0, 275.0, 117.0, 72.0, 46.0, 35.0, 15.0, 20.0, 13.0, 13.0, 7.0, 5.0, 2.0, 4.0, 2.0, 3.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-0.457763671875, -0.4422264099121094, -0.42668914794921875, -0.4111518859863281, -0.3956146240234375, -0.3800773620605469, -0.36454010009765625, -0.3490028381347656, -0.333465576171875, -0.3179283142089844, -0.30239105224609375, -0.2868537902832031, -0.2713165283203125, -0.2557792663574219, -0.24024200439453125, -0.22470474243164062, -0.20916748046875, -0.19363021850585938, -0.17809295654296875, -0.16255569458007812, -0.1470184326171875, -0.13148117065429688, -0.11594390869140625, -0.10040664672851562, -0.084869384765625, -0.06933212280273438, -0.05379486083984375, -0.038257598876953125, -0.0227203369140625, -0.007183074951171875, 0.00835418701171875, 0.023891448974609375, 0.0394287109375, 0.054965972900390625, 0.07050323486328125, 0.08604049682617188, 0.1015777587890625, 0.11711502075195312, 0.13265228271484375, 0.14818954467773438, 0.163726806640625, 0.17926406860351562, 0.19480133056640625, 0.21033859252929688, 0.2258758544921875, 0.24141311645507812, 0.25695037841796875, 0.2724876403808594, 0.28802490234375, 0.3035621643066406, 0.31909942626953125, 0.3346366882324219, 0.3501739501953125, 0.3657112121582031, 0.38124847412109375, 0.3967857360839844, 0.412322998046875, 0.4278602600097656, 0.44339752197265625, 0.4589347839355469, 0.4744720458984375, 0.4900093078613281, 0.5055465698242188, 0.5210838317871094, 0.53662109375]}, "gradients/decoder.model.decoder.layers.4.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 17.0, 14.0, 20.0, 52.0, 97.0, 178.0, 183.0, 170.0, 122.0, 65.0, 45.0, 24.0, 8.0, 11.0, 3.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-15.51832389831543, -15.171737670898438, -14.825150489807129, -14.47856330871582, -14.131977081298828, -13.785390853881836, -13.438803672790527, -13.092216491699219, -12.745630264282227, -12.399044036865234, -12.052456855773926, -11.705869674682617, -11.359283447265625, -11.012697219848633, -10.666110038757324, -10.319522857666016, -9.972936630249023, -9.626350402832031, -9.279763221740723, -8.933176040649414, -8.586589813232422, -8.24000358581543, -7.893416404724121, -7.546829700469971, -7.20024299621582, -6.85365629196167, -6.5070695877075195, -6.160482883453369, -5.813896179199219, -5.467309474945068, -5.120722770690918, -4.774136066436768, -4.427548408508301, -4.08096170425415, -3.734375, -3.3877882957458496, -3.041201591491699, -2.694614887237549, -2.3480281829833984, -2.001441478729248, -1.6548547744750977, -1.3082680702209473, -0.9616813659667969, -0.6150946617126465, -0.2685079574584961, 0.0780787467956543, 0.4246654510498047, 0.7712521553039551, 1.1178388595581055, 1.4644255638122559, 1.8110122680664062, 2.1575989723205566, 2.504185676574707, 2.8507723808288574, 3.197359085083008, 3.543945789337158, 3.8905324935913086, 4.237119197845459, 4.583705902099609, 4.93029260635376, 5.27687931060791, 5.6234660148620605, 5.970052719116211, 6.316639423370361, 6.663226127624512]}, "gradients/decoder.model.decoder.layers.4.final_layer_norm.bias": {"_type": "histogram", "values": [2.0, 1.0, 2.0, 1.0, 2.0, 2.0, 4.0, 4.0, 8.0, 5.0, 8.0, 8.0, 11.0, 7.0, 14.0, 12.0, 19.0, 21.0, 20.0, 15.0, 23.0, 24.0, 27.0, 17.0, 26.0, 29.0, 27.0, 33.0, 36.0, 39.0, 53.0, 34.0, 38.0, 36.0, 35.0, 33.0, 35.0, 31.0, 34.0, 41.0, 18.0, 24.0, 21.0, 23.0, 23.0, 20.0, 11.0, 12.0, 7.0, 5.0, 9.0, 7.0, 3.0, 7.0, 2.0, 3.0, 0.0, 2.0, 2.0, 2.0, 2.0, 0.0, 2.0, 2.0], "bins": [-6.2397027015686035, -6.039177894592285, -5.838653564453125, -5.638128757476807, -5.4376044273376465, -5.237079620361328, -5.036555290222168, -4.83603048324585, -4.635505676269531, -4.434980869293213, -4.234456539154053, -4.033931732177734, -3.833407402038574, -3.632882595062256, -3.4323580265045166, -3.2318334579467773, -3.031309127807617, -2.830784559249878, -2.6302599906921387, -2.4297351837158203, -2.22921085357666, -2.028686046600342, -1.8281614780426025, -1.6276369094848633, -1.427112340927124, -1.2265877723693848, -1.0260632038116455, -0.8255385160446167, -0.6250139474868774, -0.4244893789291382, -0.22396469116210938, -0.023440122604370117, 0.17708396911621094, 0.3776085674762726, 0.5781331658363342, 0.7786577939987183, 0.9791823625564575, 1.1797069311141968, 1.3802316188812256, 1.5807561874389648, 1.781280755996704, 1.9818053245544434, 2.1823298931121826, 2.382854461669922, 2.5833792686462402, 2.7839035987854004, 2.9844284057617188, 3.184952974319458, 3.3854775428771973, 3.5860021114349365, 3.786526679992676, 3.987051486968994, 4.187575817108154, 4.388100624084473, 4.588624954223633, 4.789149761199951, 4.9896745681762695, 5.190199375152588, 5.390723705291748, 5.591248512268066, 5.791772842407227, 5.992297649383545, 6.192822456359863, 6.393346786499023, 6.593871116638184]}, "gradients/decoder.model.decoder.layers.4.fc2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 3.0, 3.0, 2.0, 4.0, 3.0, 12.0, 23.0, 42.0, 88.0, 203.0, 488.0, 1343.0, 4035.0, 17951.0, 233808.0, 3246645.0, 646977.0, 34332.0, 5648.0, 1662.0, 589.0, 227.0, 100.0, 47.0, 26.0, 11.0, 6.0, 6.0, 2.0, 1.0, 2.0, 3.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.625, -5.447265625, -5.26953125, -5.091796875, -4.9140625, -4.736328125, -4.55859375, -4.380859375, -4.203125, -4.025390625, -3.84765625, -3.669921875, -3.4921875, -3.314453125, -3.13671875, -2.958984375, -2.78125, -2.603515625, -2.42578125, -2.248046875, -2.0703125, -1.892578125, -1.71484375, -1.537109375, -1.359375, -1.181640625, -1.00390625, -0.826171875, -0.6484375, -0.470703125, -0.29296875, -0.115234375, 0.0625, 0.240234375, 0.41796875, 0.595703125, 0.7734375, 0.951171875, 1.12890625, 1.306640625, 1.484375, 1.662109375, 1.83984375, 2.017578125, 2.1953125, 2.373046875, 2.55078125, 2.728515625, 2.90625, 3.083984375, 3.26171875, 3.439453125, 3.6171875, 3.794921875, 3.97265625, 4.150390625, 4.328125, 4.505859375, 4.68359375, 4.861328125, 5.0390625, 5.216796875, 5.39453125, 5.572265625, 5.75]}, "gradients/decoder.model.decoder.layers.4.fc2.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 2.0, 1.0, 2.0, 2.0, 4.0, 5.0, 2.0, 4.0, 4.0, 8.0, 6.0, 8.0, 15.0, 19.0, 26.0, 30.0, 28.0, 27.0, 44.0, 53.0, 54.0, 53.0, 54.0, 60.0, 57.0, 48.0, 68.0, 52.0, 50.0, 52.0, 37.0, 30.0, 19.0, 21.0, 14.0, 8.0, 13.0, 10.0, 6.0, 6.0, 6.0, 1.0, 1.0, 6.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-3.087890625, -2.9908447265625, -2.893798828125, -2.7967529296875, -2.69970703125, -2.6026611328125, -2.505615234375, -2.4085693359375, -2.3115234375, -2.2144775390625, -2.117431640625, -2.0203857421875, -1.92333984375, -1.8262939453125, -1.729248046875, -1.6322021484375, -1.53515625, -1.4381103515625, -1.341064453125, -1.2440185546875, -1.14697265625, -1.0499267578125, -0.952880859375, -0.8558349609375, -0.7587890625, -0.6617431640625, -0.564697265625, -0.4676513671875, -0.37060546875, -0.2735595703125, -0.176513671875, -0.0794677734375, 0.017578125, 0.1146240234375, 0.211669921875, 0.3087158203125, 0.40576171875, 0.5028076171875, 0.599853515625, 0.6968994140625, 0.7939453125, 0.8909912109375, 0.988037109375, 1.0850830078125, 1.18212890625, 1.2791748046875, 1.376220703125, 1.4732666015625, 1.5703125, 1.6673583984375, 1.764404296875, 1.8614501953125, 1.95849609375, 2.0555419921875, 2.152587890625, 2.2496337890625, 2.3466796875, 2.4437255859375, 2.540771484375, 2.6378173828125, 2.73486328125, 2.8319091796875, 2.928955078125, 3.0260009765625, 3.123046875]}, "gradients/decoder.model.decoder.layers.4.fc1.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 4.0, 1.0, 0.0, 1.0, 5.0, 3.0, 5.0, 5.0, 10.0, 7.0, 22.0, 28.0, 48.0, 81.0, 99.0, 150.0, 247.0, 473.0, 1069.0, 2834.0, 10449.0, 75229.0, 3405823.0, 657616.0, 30383.0, 6107.0, 1888.0, 778.0, 378.0, 185.0, 120.0, 66.0, 48.0, 35.0, 25.0, 19.0, 13.0, 8.0, 7.0, 7.0, 7.0, 2.0, 4.0, 4.0, 3.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.4765625, -4.30303955078125, -4.1295166015625, -3.95599365234375, -3.782470703125, -3.60894775390625, -3.4354248046875, -3.26190185546875, -3.08837890625, -2.91485595703125, -2.7413330078125, -2.56781005859375, -2.394287109375, -2.22076416015625, -2.0472412109375, -1.87371826171875, -1.7001953125, -1.52667236328125, -1.3531494140625, -1.17962646484375, -1.006103515625, -0.83258056640625, -0.6590576171875, -0.48553466796875, -0.31201171875, -0.13848876953125, 0.0350341796875, 0.20855712890625, 0.382080078125, 0.55560302734375, 0.7291259765625, 0.90264892578125, 1.076171875, 1.24969482421875, 1.4232177734375, 1.59674072265625, 1.770263671875, 1.94378662109375, 2.1173095703125, 2.29083251953125, 2.46435546875, 2.63787841796875, 2.8114013671875, 2.98492431640625, 3.158447265625, 3.33197021484375, 3.5054931640625, 3.67901611328125, 3.8525390625, 4.02606201171875, 4.1995849609375, 4.37310791015625, 4.546630859375, 4.72015380859375, 4.8936767578125, 5.06719970703125, 5.24072265625, 5.41424560546875, 5.5877685546875, 5.76129150390625, 5.934814453125, 6.10833740234375, 6.2818603515625, 6.45538330078125, 6.62890625]}, "gradients/decoder.model.decoder.layers.4.fc1.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 3.0, 2.0, 1.0, 3.0, 1.0, 3.0, 5.0, 8.0, 9.0, 5.0, 13.0, 14.0, 19.0, 29.0, 58.0, 88.0, 196.0, 359.0, 680.0, 865.0, 792.0, 432.0, 209.0, 111.0, 64.0, 36.0, 25.0, 17.0, 11.0, 10.0, 6.0, 5.0, 2.0, 3.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.4033203125, -1.355865478515625, -1.30841064453125, -1.260955810546875, -1.2135009765625, -1.166046142578125, -1.11859130859375, -1.071136474609375, -1.023681640625, -0.976226806640625, -0.92877197265625, -0.881317138671875, -0.8338623046875, -0.786407470703125, -0.73895263671875, -0.691497802734375, -0.64404296875, -0.596588134765625, -0.54913330078125, -0.501678466796875, -0.4542236328125, -0.406768798828125, -0.35931396484375, -0.311859130859375, -0.264404296875, -0.216949462890625, -0.16949462890625, -0.122039794921875, -0.0745849609375, -0.027130126953125, 0.02032470703125, 0.067779541015625, 0.115234375, 0.162689208984375, 0.21014404296875, 0.257598876953125, 0.3050537109375, 0.352508544921875, 0.39996337890625, 0.447418212890625, 0.494873046875, 0.542327880859375, 0.58978271484375, 0.637237548828125, 0.6846923828125, 0.732147216796875, 0.77960205078125, 0.827056884765625, 0.87451171875, 0.921966552734375, 0.96942138671875, 1.016876220703125, 1.0643310546875, 1.111785888671875, 1.15924072265625, 1.206695556640625, 1.254150390625, 1.301605224609375, 1.34906005859375, 1.396514892578125, 1.4439697265625, 1.491424560546875, 1.53887939453125, 1.586334228515625, 1.6337890625]}, "gradients/decoder.model.decoder.layers.4.encoder_attn_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 3.0, 1.0, 5.0, 8.0, 11.0, 13.0, 20.0, 29.0, 42.0, 63.0, 85.0, 104.0, 120.0, 122.0, 100.0, 85.0, 65.0, 46.0, 40.0, 19.0, 7.0, 13.0, 4.0, 3.0, 2.0, 0.0, 3.0, 2.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.730464458465576, -5.562582969665527, -5.39470100402832, -5.2268195152282715, -5.0589375495910645, -4.891056060791016, -4.723174095153809, -4.55529260635376, -4.387411117553711, -4.219529628753662, -4.051647663116455, -3.8837661743164062, -3.7158844470977783, -3.5480027198791504, -3.3801209926605225, -3.2122392654418945, -3.0443575382232666, -2.8764758110046387, -2.7085940837860107, -2.540712356567383, -2.372830867767334, -2.204949140548706, -2.037067413330078, -1.8691858053207397, -1.7013040781021118, -1.5334223508834839, -1.3655407428741455, -1.1976590156555176, -1.0297772884368896, -0.8618956804275513, -0.6940139532089233, -0.526132345199585, -0.35825061798095703, -0.19036893546581268, -0.022487252950668335, 0.1453944444656372, 0.31327611207962036, 0.4811577796936035, 0.6490395069122314, 0.8169211149215698, 0.9848028421401978, 1.1526845693588257, 1.320566177368164, 1.488447904586792, 1.65632963180542, 1.8242112398147583, 1.9920929670333862, 2.1599745750427246, 2.3278563022613525, 2.4957380294799805, 2.6636197566986084, 2.8315014839172363, 2.999382972717285, 3.167264699935913, 3.335146427154541, 3.50302791595459, 3.670909881591797, 3.838791608810425, 4.006673336029053, 4.174554824829102, 4.342436790466309, 4.510318279266357, 4.678199768066406, 4.846081733703613, 5.013963222503662]}, "gradients/decoder.model.decoder.layers.4.encoder_attn_layer_norm.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 4.0, 8.0, 4.0, 1.0, 5.0, 10.0, 8.0, 10.0, 11.0, 30.0, 21.0, 24.0, 24.0, 21.0, 36.0, 31.0, 31.0, 43.0, 33.0, 52.0, 52.0, 41.0, 46.0, 45.0, 48.0, 44.0, 36.0, 39.0, 30.0, 39.0, 36.0, 31.0, 18.0, 17.0, 10.0, 14.0, 13.0, 15.0, 5.0, 5.0, 4.0, 6.0, 3.0, 5.0, 2.0, 1.0, 1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0], "bins": [-2.6991169452667236, -2.611685276031494, -2.5242536067962646, -2.436821937561035, -2.3493902683258057, -2.261958599090576, -2.1745266914367676, -2.087095260620117, -1.9996634721755981, -1.9122318029403687, -1.8248001337051392, -1.7373683452606201, -1.6499366760253906, -1.5625050067901611, -1.4750733375549316, -1.3876416683197021, -1.3002099990844727, -1.2127783298492432, -1.1253466606140137, -1.0379149913787842, -0.9504832625389099, -0.8630515933036804, -0.7756198644638062, -0.6881881952285767, -0.6007565259933472, -0.5133248567581177, -0.4258931577205658, -0.3384614586830139, -0.2510297894477844, -0.16359812021255493, -0.07616639137268066, 0.011265277862548828, 0.09869670867919922, 0.1861283928155899, 0.2735600769519806, 0.36099177598953247, 0.44842344522476196, 0.5358551144599915, 0.6232868432998657, 0.7107185125350952, 0.7981501817703247, 0.8855818510055542, 0.9730135202407837, 1.0604453086853027, 1.1478769779205322, 1.2353086471557617, 1.3227403163909912, 1.4101719856262207, 1.4976036548614502, 1.5850353240966797, 1.6724669933319092, 1.7598986625671387, 1.8473303318023682, 1.9347620010375977, 2.0221939086914062, 2.1096253395080566, 2.1970572471618652, 2.2844889163970947, 2.371920585632324, 2.4593522548675537, 2.546783924102783, 2.6342155933380127, 2.721647262573242, 2.809079170227051, 2.896510601043701]}, "gradients/decoder.model.decoder.layers.4.encoder_attn.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 2.0, 0.0, 0.0, 2.0, 3.0, 5.0, 6.0, 7.0, 14.0, 31.0, 37.0, 51.0, 85.0, 148.0, 296.0, 647.0, 1527.0, 5323.0, 23969.0, 163837.0, 719710.0, 108554.0, 17722.0, 4146.0, 1329.0, 520.0, 238.0, 142.0, 78.0, 50.0, 28.0, 14.0, 16.0, 11.0, 13.0, 3.0, 3.0, 0.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.91064453125, -0.8833847045898438, -0.8561248779296875, -0.8288650512695312, -0.801605224609375, -0.7743453979492188, -0.7470855712890625, -0.7198257446289062, -0.69256591796875, -0.6653060913085938, -0.6380462646484375, -0.6107864379882812, -0.583526611328125, -0.5562667846679688, -0.5290069580078125, -0.5017471313476562, -0.4744873046875, -0.44722747802734375, -0.4199676513671875, -0.39270782470703125, -0.365447998046875, -0.33818817138671875, -0.3109283447265625, -0.28366851806640625, -0.25640869140625, -0.22914886474609375, -0.2018890380859375, -0.17462921142578125, -0.147369384765625, -0.12010955810546875, -0.0928497314453125, -0.06558990478515625, -0.038330078125, -0.01107025146484375, 0.0161895751953125, 0.04344940185546875, 0.070709228515625, 0.09796905517578125, 0.1252288818359375, 0.15248870849609375, 0.17974853515625, 0.20700836181640625, 0.2342681884765625, 0.26152801513671875, 0.288787841796875, 0.31604766845703125, 0.3433074951171875, 0.37056732177734375, 0.3978271484375, 0.42508697509765625, 0.4523468017578125, 0.47960662841796875, 0.506866455078125, 0.5341262817382812, 0.5613861083984375, 0.5886459350585938, 0.61590576171875, 0.6431655883789062, 0.6704254150390625, 0.6976852416992188, 0.724945068359375, 0.7522048950195312, 0.7794647216796875, 0.8067245483398438, 0.833984375]}, "gradients/decoder.model.decoder.layers.4.encoder_attn.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 1.0, 4.0, 2.0, 4.0, 8.0, 12.0, 13.0, 17.0, 25.0, 35.0, 39.0, 38.0, 36.0, 67.0, 63.0, 66.0, 78.0, 59.0, 66.0, 74.0, 71.0, 54.0, 41.0, 35.0, 24.0, 26.0, 15.0, 6.0, 13.0, 4.0, 5.0, 9.0, 3.0, 2.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.9921875, -2.904083251953125, -2.81597900390625, -2.727874755859375, -2.6397705078125, -2.551666259765625, -2.46356201171875, -2.375457763671875, -2.287353515625, -2.199249267578125, -2.11114501953125, -2.023040771484375, -1.9349365234375, -1.846832275390625, -1.75872802734375, -1.670623779296875, -1.58251953125, -1.494415283203125, -1.40631103515625, -1.318206787109375, -1.2301025390625, -1.141998291015625, -1.05389404296875, -0.965789794921875, -0.877685546875, -0.789581298828125, -0.70147705078125, -0.613372802734375, -0.5252685546875, -0.437164306640625, -0.34906005859375, -0.260955810546875, -0.1728515625, -0.084747314453125, 0.00335693359375, 0.091461181640625, 0.1795654296875, 0.267669677734375, 0.35577392578125, 0.443878173828125, 0.531982421875, 0.620086669921875, 0.70819091796875, 0.796295166015625, 0.8843994140625, 0.972503662109375, 1.06060791015625, 1.148712158203125, 1.23681640625, 1.324920654296875, 1.41302490234375, 1.501129150390625, 1.5892333984375, 1.677337646484375, 1.76544189453125, 1.853546142578125, 1.941650390625, 2.029754638671875, 2.11785888671875, 2.205963134765625, 2.2940673828125, 2.382171630859375, 2.47027587890625, 2.558380126953125, 2.646484375]}, "gradients/decoder.model.decoder.layers.4.encoder_attn.v_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 4.0, 7.0, 7.0, 9.0, 21.0, 23.0, 42.0, 41.0, 90.0, 109.0, 164.0, 266.0, 323.0, 481.0, 753.0, 1138.0, 1787.0, 2831.0, 4514.0, 7258.0, 12680.0, 22300.0, 41986.0, 83573.0, 184503.0, 303084.0, 190981.0, 88256.0, 44090.0, 23383.0, 13195.0, 7753.0, 4669.0, 2799.0, 1802.0, 1218.0, 783.0, 532.0, 360.0, 227.0, 176.0, 107.0, 82.0, 50.0, 31.0, 30.0, 12.0, 12.0, 7.0, 7.0, 3.0, 5.0, 3.0, 0.0, 1.0, 1.0], "bins": [-0.1973876953125, -0.1914997100830078, -0.18561172485351562, -0.17972373962402344, -0.17383575439453125, -0.16794776916503906, -0.16205978393554688, -0.1561717987060547, -0.1502838134765625, -0.1443958282470703, -0.13850784301757812, -0.13261985778808594, -0.12673187255859375, -0.12084388732910156, -0.11495590209960938, -0.10906791687011719, -0.103179931640625, -0.09729194641113281, -0.09140396118164062, -0.08551597595214844, -0.07962799072265625, -0.07374000549316406, -0.06785202026367188, -0.06196403503417969, -0.0560760498046875, -0.05018806457519531, -0.044300079345703125, -0.03841209411621094, -0.03252410888671875, -0.026636123657226562, -0.020748138427734375, -0.014860153198242188, -0.00897216796875, -0.0030841827392578125, 0.002803802490234375, 0.008691787719726562, 0.01457977294921875, 0.020467758178710938, 0.026355743408203125, 0.03224372863769531, 0.0381317138671875, 0.04401969909667969, 0.049907684326171875, 0.05579566955566406, 0.06168365478515625, 0.06757164001464844, 0.07345962524414062, 0.07934761047363281, 0.085235595703125, 0.09112358093261719, 0.09701156616210938, 0.10289955139160156, 0.10878753662109375, 0.11467552185058594, 0.12056350708007812, 0.1264514923095703, 0.1323394775390625, 0.1382274627685547, 0.14411544799804688, 0.15000343322753906, 0.15589141845703125, 0.16177940368652344, 0.16766738891601562, 0.1735553741455078, 0.179443359375]}, "gradients/decoder.model.decoder.layers.4.encoder_attn.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 1.0, 2.0, 2.0, 2.0, 1.0, 3.0, 0.0, 6.0, 2.0, 6.0, 9.0, 7.0, 14.0, 19.0, 17.0, 21.0, 25.0, 24.0, 27.0, 29.0, 28.0, 46.0, 52.0, 30.0, 45.0, 49.0, 44.0, 38.0, 41.0, 46.0, 55.0, 39.0, 42.0, 39.0, 30.0, 20.0, 26.0, 18.0, 23.0, 19.0, 12.0, 10.0, 7.0, 2.0, 6.0, 8.0, 8.0, 4.0, 4.0, 0.0, 2.0, 2.0, 1.0, 2.0, 2.0, 0.0, 1.0], "bins": [-3.9453125, -3.829132080078125, -3.71295166015625, -3.596771240234375, -3.4805908203125, -3.364410400390625, -3.24822998046875, -3.132049560546875, -3.015869140625, -2.899688720703125, -2.78350830078125, -2.667327880859375, -2.5511474609375, -2.434967041015625, -2.31878662109375, -2.202606201171875, -2.08642578125, -1.970245361328125, -1.85406494140625, -1.737884521484375, -1.6217041015625, -1.505523681640625, -1.38934326171875, -1.273162841796875, -1.156982421875, -1.040802001953125, -0.92462158203125, -0.808441162109375, -0.6922607421875, -0.576080322265625, -0.45989990234375, -0.343719482421875, -0.2275390625, -0.111358642578125, 0.00482177734375, 0.121002197265625, 0.2371826171875, 0.353363037109375, 0.46954345703125, 0.585723876953125, 0.701904296875, 0.818084716796875, 0.93426513671875, 1.050445556640625, 1.1666259765625, 1.282806396484375, 1.39898681640625, 1.515167236328125, 1.63134765625, 1.747528076171875, 1.86370849609375, 1.979888916015625, 2.0960693359375, 2.212249755859375, 2.32843017578125, 2.444610595703125, 2.560791015625, 2.676971435546875, 2.79315185546875, 2.909332275390625, 3.0255126953125, 3.141693115234375, 3.25787353515625, 3.374053955078125, 3.490234375]}, "gradients/decoder.model.decoder.layers.4.encoder_attn.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 2.0, 0.0, 2.0, 2.0, 1.0, 3.0, 4.0, 3.0, 6.0, 14.0, 11.0, 16.0, 37.0, 40.0, 67.0, 92.0, 154.0, 235.0, 345.0, 853.0, 2579.0, 12839.0, 221399.0, 775630.0, 27684.0, 4365.0, 1122.0, 410.0, 241.0, 145.0, 100.0, 57.0, 35.0, 25.0, 18.0, 5.0, 8.0, 6.0, 1.0, 0.0, 3.0, 2.0, 0.0, 0.0, 0.0, 4.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0], "bins": [-0.117431640625, -0.1138906478881836, -0.11034965515136719, -0.10680866241455078, -0.10326766967773438, -0.09972667694091797, -0.09618568420410156, -0.09264469146728516, -0.08910369873046875, -0.08556270599365234, -0.08202171325683594, -0.07848072052001953, -0.07493972778320312, -0.07139873504638672, -0.06785774230957031, -0.0643167495727539, -0.0607757568359375, -0.057234764099121094, -0.05369377136230469, -0.05015277862548828, -0.046611785888671875, -0.04307079315185547, -0.03952980041503906, -0.035988807678222656, -0.03244781494140625, -0.028906822204589844, -0.025365829467773438, -0.02182483673095703, -0.018283843994140625, -0.014742851257324219, -0.011201858520507812, -0.007660865783691406, -0.004119873046875, -0.0005788803100585938, 0.0029621124267578125, 0.006503105163574219, 0.010044097900390625, 0.013585090637207031, 0.017126083374023438, 0.020667076110839844, 0.02420806884765625, 0.027749061584472656, 0.03129005432128906, 0.03483104705810547, 0.038372039794921875, 0.04191303253173828, 0.04545402526855469, 0.048995018005371094, 0.0525360107421875, 0.056077003479003906, 0.05961799621582031, 0.06315898895263672, 0.06669998168945312, 0.07024097442626953, 0.07378196716308594, 0.07732295989990234, 0.08086395263671875, 0.08440494537353516, 0.08794593811035156, 0.09148693084716797, 0.09502792358398438, 0.09856891632080078, 0.10210990905761719, 0.1056509017944336, 0.10919189453125]}, "gradients/decoder.model.decoder.layers.4.encoder_attn.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 4.0, 3.0, 4.0, 7.0, 3.0, 13.0, 4.0, 10.0, 20.0, 30.0, 42.0, 74.0, 91.0, 91.0, 136.0, 120.0, 94.0, 79.0, 48.0, 40.0, 15.0, 25.0, 17.0, 11.0, 13.0, 4.0, 1.0, 4.0, 3.0, 2.0, 4.0, 2.0, 0.0, 1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 2.0], "bins": [-5.6803226470947266e-05, -5.5264681577682495e-05, -5.3726136684417725e-05, -5.2187591791152954e-05, -5.0649046897888184e-05, -4.911050200462341e-05, -4.757195711135864e-05, -4.603341221809387e-05, -4.44948673248291e-05, -4.295632243156433e-05, -4.141777753829956e-05, -3.987923264503479e-05, -3.834068775177002e-05, -3.680214285850525e-05, -3.526359796524048e-05, -3.372505307197571e-05, -3.218650817871094e-05, -3.064796328544617e-05, -2.9109418392181396e-05, -2.7570873498916626e-05, -2.6032328605651855e-05, -2.4493783712387085e-05, -2.2955238819122314e-05, -2.1416693925857544e-05, -1.9878149032592773e-05, -1.8339604139328003e-05, -1.6801059246063232e-05, -1.5262514352798462e-05, -1.3723969459533691e-05, -1.2185424566268921e-05, -1.064687967300415e-05, -9.10833477973938e-06, -7.569789886474609e-06, -6.031244993209839e-06, -4.492700099945068e-06, -2.954155206680298e-06, -1.4156103134155273e-06, 1.2293457984924316e-07, 1.6614794731140137e-06, 3.200024366378784e-06, 4.738569259643555e-06, 6.277114152908325e-06, 7.815659046173096e-06, 9.354203939437866e-06, 1.0892748832702637e-05, 1.2431293725967407e-05, 1.3969838619232178e-05, 1.5508383512496948e-05, 1.704692840576172e-05, 1.858547329902649e-05, 2.012401819229126e-05, 2.166256308555603e-05, 2.32011079788208e-05, 2.473965287208557e-05, 2.6278197765350342e-05, 2.7816742658615112e-05, 2.9355287551879883e-05, 3.089383244514465e-05, 3.2432377338409424e-05, 3.3970922231674194e-05, 3.5509467124938965e-05, 3.7048012018203735e-05, 3.8586556911468506e-05, 4.0125101804733276e-05, 4.166364669799805e-05]}, "gradients/decoder.model.decoder.layers.4.encoder_attn.q_proj.weight": {"_type": "histogram", "values": [1.0, 2.0, 0.0, 1.0, 0.0, 2.0, 6.0, 3.0, 1.0, 6.0, 7.0, 7.0, 14.0, 12.0, 27.0, 26.0, 30.0, 41.0, 67.0, 97.0, 161.0, 379.0, 1079.0, 4206.0, 30719.0, 886848.0, 112824.0, 8948.0, 1847.0, 605.0, 226.0, 128.0, 76.0, 45.0, 34.0, 22.0, 19.0, 11.0, 9.0, 8.0, 7.0, 3.0, 5.0, 2.0, 4.0, 1.0, 2.0, 2.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.1207275390625, -0.11603164672851562, -0.11133575439453125, -0.10663986206054688, -0.1019439697265625, -0.09724807739257812, -0.09255218505859375, -0.08785629272460938, -0.083160400390625, -0.07846450805664062, -0.07376861572265625, -0.06907272338867188, -0.0643768310546875, -0.059680938720703125, -0.05498504638671875, -0.050289154052734375, -0.04559326171875, -0.040897369384765625, -0.03620147705078125, -0.031505584716796875, -0.0268096923828125, -0.022113800048828125, -0.01741790771484375, -0.012722015380859375, -0.008026123046875, -0.003330230712890625, 0.00136566162109375, 0.006061553955078125, 0.0107574462890625, 0.015453338623046875, 0.02014923095703125, 0.024845123291015625, 0.029541015625, 0.034236907958984375, 0.03893280029296875, 0.043628692626953125, 0.0483245849609375, 0.053020477294921875, 0.05771636962890625, 0.062412261962890625, 0.067108154296875, 0.07180404663085938, 0.07649993896484375, 0.08119583129882812, 0.0858917236328125, 0.09058761596679688, 0.09528350830078125, 0.09997940063476562, 0.10467529296875, 0.10937118530273438, 0.11406707763671875, 0.11876296997070312, 0.1234588623046875, 0.12815475463867188, 0.13285064697265625, 0.13754653930664062, 0.142242431640625, 0.14693832397460938, 0.15163421630859375, 0.15633010864257812, 0.1610260009765625, 0.16572189331054688, 0.17041778564453125, 0.17511367797851562, 0.1798095703125]}, "gradients/decoder.model.decoder.layers.4.encoder_attn.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 5.0, 2.0, 6.0, 13.0, 18.0, 45.0, 103.0, 330.0, 295.0, 103.0, 54.0, 16.0, 10.0, 9.0, 4.0, 1.0, 1.0, 1.0, 0.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0645751953125, -0.062957763671875, -0.06134033203125, -0.059722900390625, -0.05810546875, -0.056488037109375, -0.05487060546875, -0.053253173828125, -0.0516357421875, -0.050018310546875, -0.04840087890625, -0.046783447265625, -0.045166015625, -0.043548583984375, -0.04193115234375, -0.040313720703125, -0.0386962890625, -0.037078857421875, -0.03546142578125, -0.033843994140625, -0.0322265625, -0.030609130859375, -0.02899169921875, -0.027374267578125, -0.0257568359375, -0.024139404296875, -0.02252197265625, -0.020904541015625, -0.019287109375, -0.017669677734375, -0.01605224609375, -0.014434814453125, -0.0128173828125, -0.011199951171875, -0.00958251953125, -0.007965087890625, -0.00634765625, -0.004730224609375, -0.00311279296875, -0.001495361328125, 0.0001220703125, 0.001739501953125, 0.00335693359375, 0.004974365234375, 0.006591796875, 0.008209228515625, 0.00982666015625, 0.011444091796875, 0.0130615234375, 0.014678955078125, 0.01629638671875, 0.017913818359375, 0.01953125, 0.021148681640625, 0.02276611328125, 0.024383544921875, 0.0260009765625, 0.027618408203125, 0.02923583984375, 0.030853271484375, 0.032470703125, 0.034088134765625, 0.03570556640625, 0.037322998046875, 0.0389404296875]}, "gradients/decoder.model.decoder.layers.4.self_attn_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 3.0, 1.0, 4.0, 2.0, 18.0, 15.0, 37.0, 70.0, 144.0, 178.0, 204.0, 137.0, 90.0, 55.0, 28.0, 13.0, 9.0, 1.0, 4.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-7.498447418212891, -7.328708171844482, -7.158968925476074, -6.989229202270508, -6.8194899559021, -6.649750709533691, -6.480011463165283, -6.310272216796875, -6.140532493591309, -5.9707932472229, -5.801054000854492, -5.631314277648926, -5.461575031280518, -5.291835784912109, -5.122096538543701, -4.952357292175293, -4.782618045806885, -4.612878799438477, -4.443139553070068, -4.27340030670166, -4.103660583496094, -3.9339213371276855, -3.7641820907592773, -3.594442844390869, -3.424703359603882, -3.2549641132354736, -3.0852246284484863, -2.915485382080078, -2.74574613571167, -2.5760066509246826, -2.4062674045562744, -2.236527919769287, -2.0667881965637207, -1.897048830986023, -1.7273094654083252, -1.557570219039917, -1.3878308534622192, -1.2180914878845215, -1.0483522415161133, -0.8786128759384155, -0.7088735103607178, -0.53913414478302, -0.36939483880996704, -0.19965553283691406, -0.02991616725921631, 0.13982319831848145, 0.30956244468688965, 0.4793018102645874, 0.6490411758422852, 0.8187805414199829, 0.9885198473930359, 1.1582591533660889, 1.3279985189437866, 1.4977378845214844, 1.6674771308898926, 1.8372164964675903, 2.006955862045288, 2.1766951084136963, 2.3464345932006836, 2.516173839569092, 2.6859130859375, 2.8556525707244873, 3.0253918170928955, 3.195131301879883, 3.364870548248291]}, "gradients/decoder.model.decoder.layers.4.self_attn_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 5.0, 8.0, 4.0, 6.0, 7.0, 15.0, 19.0, 30.0, 37.0, 27.0, 37.0, 43.0, 58.0, 51.0, 65.0, 71.0, 65.0, 61.0, 66.0, 52.0, 40.0, 51.0, 52.0, 34.0, 22.0, 14.0, 20.0, 12.0, 11.0, 5.0, 10.0, 4.0, 5.0, 2.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.6339807510375977, -2.557360887527466, -2.480741262435913, -2.4041213989257812, -2.3275017738342285, -2.2508819103240967, -2.174262285232544, -2.097642421722412, -2.0210227966308594, -1.944403052330017, -1.8677833080291748, -1.7911635637283325, -1.7145438194274902, -1.6379239559173584, -1.5613042116165161, -1.4846844673156738, -1.408064603805542, -1.3314448595046997, -1.2548251152038574, -1.1782053709030151, -1.1015856266021729, -1.024965763092041, -0.9483460187911987, -0.8717262744903564, -0.7951065301895142, -0.7184867858886719, -0.6418670415878296, -0.5652472376823425, -0.48862749338150024, -0.41200774908065796, -0.3353879749774933, -0.2587682008743286, -0.18214821815490723, -0.10552845895290375, -0.02890869975090027, 0.04771105945110321, 0.12433081865310669, 0.20095056295394897, 0.27757033705711365, 0.3541901111602783, 0.4308098554611206, 0.5074295997619629, 0.5840493440628052, 0.6606691479682922, 0.7372888922691345, 0.8139086365699768, 0.8905284404754639, 0.9671481847763062, 1.0437679290771484, 1.1203876733779907, 1.197007417678833, 1.2736271619796753, 1.3502469062805176, 1.4268667697906494, 1.5034865140914917, 1.580106258392334, 1.6567260026931763, 1.7333457469940186, 1.8099654912948608, 1.8865852355957031, 1.963205099105835, 2.0398247241973877, 2.1164445877075195, 2.1930642127990723, 2.269684076309204]}, "gradients/decoder.model.decoder.layers.4.self_attn.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 4.0, 0.0, 2.0, 5.0, 1.0, 1.0, 6.0, 5.0, 4.0, 4.0, 16.0, 16.0, 15.0, 27.0, 40.0, 51.0, 80.0, 128.0, 184.0, 292.0, 514.0, 847.0, 1508.0, 2831.0, 6137.0, 16108.0, 61619.0, 426962.0, 438931.0, 62934.0, 16276.0, 6273.0, 2969.0, 1492.0, 857.0, 526.0, 291.0, 190.0, 133.0, 84.0, 68.0, 40.0, 32.0, 16.0, 6.0, 8.0, 5.0, 3.0, 3.0, 5.0, 3.0, 4.0, 5.0, 3.0, 4.0, 2.0, 0.0, 0.0, 3.0], "bins": [-1.47265625, -1.42803955078125, -1.3834228515625, -1.33880615234375, -1.294189453125, -1.24957275390625, -1.2049560546875, -1.16033935546875, -1.11572265625, -1.07110595703125, -1.0264892578125, -0.98187255859375, -0.937255859375, -0.89263916015625, -0.8480224609375, -0.80340576171875, -0.7587890625, -0.71417236328125, -0.6695556640625, -0.62493896484375, -0.580322265625, -0.53570556640625, -0.4910888671875, -0.44647216796875, -0.40185546875, -0.35723876953125, -0.3126220703125, -0.26800537109375, -0.223388671875, -0.17877197265625, -0.1341552734375, -0.08953857421875, -0.044921875, -0.00030517578125, 0.0443115234375, 0.08892822265625, 0.133544921875, 0.17816162109375, 0.2227783203125, 0.26739501953125, 0.31201171875, 0.35662841796875, 0.4012451171875, 0.44586181640625, 0.490478515625, 0.53509521484375, 0.5797119140625, 0.62432861328125, 0.6689453125, 0.71356201171875, 0.7581787109375, 0.80279541015625, 0.847412109375, 0.89202880859375, 0.9366455078125, 0.98126220703125, 1.02587890625, 1.07049560546875, 1.1151123046875, 1.15972900390625, 1.204345703125, 1.24896240234375, 1.2935791015625, 1.33819580078125, 1.3828125]}, "gradients/decoder.model.decoder.layers.4.self_attn.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 1.0, 2.0, 3.0, 0.0, 7.0, 3.0, 0.0, 7.0, 10.0, 12.0, 16.0, 18.0, 31.0, 35.0, 38.0, 59.0, 43.0, 50.0, 57.0, 80.0, 58.0, 71.0, 45.0, 70.0, 59.0, 59.0, 43.0, 35.0, 19.0, 22.0, 16.0, 8.0, 14.0, 8.0, 8.0, 2.0, 5.0, 3.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.99609375, -5.832275390625, -5.66845703125, -5.504638671875, -5.3408203125, -5.177001953125, -5.01318359375, -4.849365234375, -4.685546875, -4.521728515625, -4.35791015625, -4.194091796875, -4.0302734375, -3.866455078125, -3.70263671875, -3.538818359375, -3.375, -3.211181640625, -3.04736328125, -2.883544921875, -2.7197265625, -2.555908203125, -2.39208984375, -2.228271484375, -2.064453125, -1.900634765625, -1.73681640625, -1.572998046875, -1.4091796875, -1.245361328125, -1.08154296875, -0.917724609375, -0.75390625, -0.590087890625, -0.42626953125, -0.262451171875, -0.0986328125, 0.065185546875, 0.22900390625, 0.392822265625, 0.556640625, 0.720458984375, 0.88427734375, 1.048095703125, 1.2119140625, 1.375732421875, 1.53955078125, 1.703369140625, 1.8671875, 2.031005859375, 2.19482421875, 2.358642578125, 2.5224609375, 2.686279296875, 2.85009765625, 3.013916015625, 3.177734375, 3.341552734375, 3.50537109375, 3.669189453125, 3.8330078125, 3.996826171875, 4.16064453125, 4.324462890625, 4.48828125]}, "gradients/decoder.model.decoder.layers.4.self_attn.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 2.0, 2.0, 2.0, 0.0, 0.0, 0.0, 2.0, 4.0, 5.0, 5.0, 7.0, 15.0, 18.0, 8.0, 18.0, 23.0, 32.0, 38.0, 59.0, 83.0, 109.0, 182.0, 295.0, 599.0, 1358.0, 4351.0, 29573.0, 890590.0, 109427.0, 7763.0, 2097.0, 814.0, 375.0, 222.0, 131.0, 94.0, 60.0, 55.0, 39.0, 21.0, 20.0, 15.0, 14.0, 13.0, 7.0, 7.0, 2.0, 6.0, 1.0, 2.0, 4.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0], "bins": [-2.833984375, -2.74163818359375, -2.6492919921875, -2.55694580078125, -2.464599609375, -2.37225341796875, -2.2799072265625, -2.18756103515625, -2.09521484375, -2.00286865234375, -1.9105224609375, -1.81817626953125, -1.725830078125, -1.63348388671875, -1.5411376953125, -1.44879150390625, -1.3564453125, -1.26409912109375, -1.1717529296875, -1.07940673828125, -0.987060546875, -0.89471435546875, -0.8023681640625, -0.71002197265625, -0.61767578125, -0.52532958984375, -0.4329833984375, -0.34063720703125, -0.248291015625, -0.15594482421875, -0.0635986328125, 0.02874755859375, 0.12109375, 0.21343994140625, 0.3057861328125, 0.39813232421875, 0.490478515625, 0.58282470703125, 0.6751708984375, 0.76751708984375, 0.85986328125, 0.95220947265625, 1.0445556640625, 1.13690185546875, 1.229248046875, 1.32159423828125, 1.4139404296875, 1.50628662109375, 1.5986328125, 1.69097900390625, 1.7833251953125, 1.87567138671875, 1.968017578125, 2.06036376953125, 2.1527099609375, 2.24505615234375, 2.33740234375, 2.42974853515625, 2.5220947265625, 2.61444091796875, 2.706787109375, 2.79913330078125, 2.8914794921875, 2.98382568359375, 3.076171875]}, "gradients/decoder.model.decoder.layers.4.self_attn.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 2.0, 0.0, 1.0, 3.0, 1.0, 2.0, 2.0, 3.0, 1.0, 3.0, 6.0, 4.0, 7.0, 8.0, 11.0, 12.0, 17.0, 17.0, 26.0, 18.0, 31.0, 33.0, 39.0, 23.0, 37.0, 49.0, 53.0, 45.0, 50.0, 48.0, 40.0, 62.0, 38.0, 43.0, 42.0, 31.0, 33.0, 24.0, 28.0, 27.0, 17.0, 17.0, 10.0, 11.0, 16.0, 5.0, 1.0, 5.0, 4.0, 0.0, 3.0, 4.0, 0.0, 2.0, 1.0, 1.0, 1.0, 1.0, 0.0, 2.0], "bins": [-4.97265625, -4.8232421875, -4.673828125, -4.5244140625, -4.375, -4.2255859375, -4.076171875, -3.9267578125, -3.77734375, -3.6279296875, -3.478515625, -3.3291015625, -3.1796875, -3.0302734375, -2.880859375, -2.7314453125, -2.58203125, -2.4326171875, -2.283203125, -2.1337890625, -1.984375, -1.8349609375, -1.685546875, -1.5361328125, -1.38671875, -1.2373046875, -1.087890625, -0.9384765625, -0.7890625, -0.6396484375, -0.490234375, -0.3408203125, -0.19140625, -0.0419921875, 0.107421875, 0.2568359375, 0.40625, 0.5556640625, 0.705078125, 0.8544921875, 1.00390625, 1.1533203125, 1.302734375, 1.4521484375, 1.6015625, 1.7509765625, 1.900390625, 2.0498046875, 2.19921875, 2.3486328125, 2.498046875, 2.6474609375, 2.796875, 2.9462890625, 3.095703125, 3.2451171875, 3.39453125, 3.5439453125, 3.693359375, 3.8427734375, 3.9921875, 4.1416015625, 4.291015625, 4.4404296875, 4.58984375]}, "gradients/decoder.model.decoder.layers.4.self_attn.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 3.0, 1.0, 0.0, 2.0, 1.0, 0.0, 1.0, 1.0, 4.0, 4.0, 6.0, 4.0, 8.0, 14.0, 35.0, 46.0, 99.0, 204.0, 524.0, 1725.0, 9896.0, 622394.0, 401406.0, 9595.0, 1676.0, 511.0, 215.0, 84.0, 50.0, 22.0, 10.0, 10.0, 6.0, 6.0, 3.0, 0.0, 0.0, 2.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.6533203125, -1.60980224609375, -1.5662841796875, -1.52276611328125, -1.479248046875, -1.43572998046875, -1.3922119140625, -1.34869384765625, -1.30517578125, -1.26165771484375, -1.2181396484375, -1.17462158203125, -1.131103515625, -1.08758544921875, -1.0440673828125, -1.00054931640625, -0.95703125, -0.91351318359375, -0.8699951171875, -0.82647705078125, -0.782958984375, -0.73944091796875, -0.6959228515625, -0.65240478515625, -0.60888671875, -0.56536865234375, -0.5218505859375, -0.47833251953125, -0.434814453125, -0.39129638671875, -0.3477783203125, -0.30426025390625, -0.2607421875, -0.21722412109375, -0.1737060546875, -0.13018798828125, -0.086669921875, -0.04315185546875, 0.0003662109375, 0.04388427734375, 0.08740234375, 0.13092041015625, 0.1744384765625, 0.21795654296875, 0.261474609375, 0.30499267578125, 0.3485107421875, 0.39202880859375, 0.435546875, 0.47906494140625, 0.5225830078125, 0.56610107421875, 0.609619140625, 0.65313720703125, 0.6966552734375, 0.74017333984375, 0.78369140625, 0.82720947265625, 0.8707275390625, 0.91424560546875, 0.957763671875, 1.00128173828125, 1.0447998046875, 1.08831787109375, 1.1318359375]}, "gradients/decoder.model.decoder.layers.4.self_attn.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 4.0, 0.0, 2.0, 4.0, 3.0, 0.0, 8.0, 4.0, 6.0, 12.0, 13.0, 19.0, 31.0, 61.0, 222.0, 366.0, 133.0, 54.0, 23.0, 15.0, 9.0, 9.0, 9.0, 3.0, 4.0, 1.0, 1.0, 1.0, 2.0, 1.0, 0.0, 1.0], "bins": [-0.0002033710479736328, -0.00019907578825950623, -0.00019478052854537964, -0.00019048526883125305, -0.00018619000911712646, -0.00018189474940299988, -0.0001775994896888733, -0.0001733042299747467, -0.00016900897026062012, -0.00016471371054649353, -0.00016041845083236694, -0.00015612319111824036, -0.00015182793140411377, -0.00014753267168998718, -0.0001432374119758606, -0.000138942152261734, -0.00013464689254760742, -0.00013035163283348083, -0.00012605637311935425, -0.00012176111340522766, -0.00011746585369110107, -0.00011317059397697449, -0.0001088753342628479, -0.00010458007454872131, -0.00010028481483459473, -9.598955512046814e-05, -9.169429540634155e-05, -8.739903569221497e-05, -8.310377597808838e-05, -7.880851626396179e-05, -7.45132565498352e-05, -7.021799683570862e-05, -6.592273712158203e-05, -6.162747740745544e-05, -5.733221769332886e-05, -5.303695797920227e-05, -4.8741698265075684e-05, -4.44464385509491e-05, -4.015117883682251e-05, -3.585591912269592e-05, -3.1560659408569336e-05, -2.726539969444275e-05, -2.2970139980316162e-05, -1.8674880266189575e-05, -1.4379620552062988e-05, -1.0084360837936401e-05, -5.7891011238098145e-06, -1.4938414096832275e-06, 2.8014183044433594e-06, 7.096678018569946e-06, 1.1391937732696533e-05, 1.568719744682312e-05, 1.9982457160949707e-05, 2.4277716875076294e-05, 2.857297658920288e-05, 3.286823630332947e-05, 3.7163496017456055e-05, 4.145875573158264e-05, 4.575401544570923e-05, 5.0049275159835815e-05, 5.43445348739624e-05, 5.863979458808899e-05, 6.293505430221558e-05, 6.723031401634216e-05, 7.152557373046875e-05]}, "gradients/decoder.model.decoder.layers.4.self_attn.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 2.0, 2.0, 2.0, 8.0, 3.0, 11.0, 18.0, 27.0, 38.0, 69.0, 103.0, 203.0, 465.0, 1125.0, 3539.0, 16445.0, 835455.0, 174055.0, 12382.0, 2814.0, 952.0, 408.0, 168.0, 100.0, 62.0, 30.0, 20.0, 23.0, 16.0, 4.0, 6.0, 5.0, 1.0, 5.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.10546875, -1.0611114501953125, -1.016754150390625, -0.9723968505859375, -0.92803955078125, -0.8836822509765625, -0.839324951171875, -0.7949676513671875, -0.7506103515625, -0.7062530517578125, -0.661895751953125, -0.6175384521484375, -0.57318115234375, -0.5288238525390625, -0.484466552734375, -0.4401092529296875, -0.395751953125, -0.3513946533203125, -0.307037353515625, -0.2626800537109375, -0.21832275390625, -0.1739654541015625, -0.129608154296875, -0.0852508544921875, -0.0408935546875, 0.0034637451171875, 0.047821044921875, 0.0921783447265625, 0.13653564453125, 0.1808929443359375, 0.225250244140625, 0.2696075439453125, 0.31396484375, 0.3583221435546875, 0.402679443359375, 0.4470367431640625, 0.49139404296875, 0.5357513427734375, 0.580108642578125, 0.6244659423828125, 0.6688232421875, 0.7131805419921875, 0.757537841796875, 0.8018951416015625, 0.84625244140625, 0.8906097412109375, 0.934967041015625, 0.9793243408203125, 1.023681640625, 1.0680389404296875, 1.112396240234375, 1.1567535400390625, 1.20111083984375, 1.2454681396484375, 1.289825439453125, 1.3341827392578125, 1.3785400390625, 1.4228973388671875, 1.467254638671875, 1.5116119384765625, 1.55596923828125, 1.6003265380859375, 1.644683837890625, 1.6890411376953125, 1.7333984375]}, "gradients/decoder.model.decoder.layers.4.self_attn.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 1.0, 2.0, 1.0, 4.0, 6.0, 4.0, 11.0, 9.0, 8.0, 16.0, 28.0, 38.0, 201.0, 500.0, 66.0, 38.0, 14.0, 19.0, 7.0, 11.0, 5.0, 5.0, 2.0, 7.0, 4.0, 3.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.437255859375, -0.4232063293457031, -0.40915679931640625, -0.3951072692871094, -0.3810577392578125, -0.3670082092285156, -0.35295867919921875, -0.3389091491699219, -0.324859619140625, -0.3108100891113281, -0.29676055908203125, -0.2827110290527344, -0.2686614990234375, -0.2546119689941406, -0.24056243896484375, -0.22651290893554688, -0.21246337890625, -0.19841384887695312, -0.18436431884765625, -0.17031478881835938, -0.1562652587890625, -0.14221572875976562, -0.12816619873046875, -0.11411666870117188, -0.100067138671875, -0.08601760864257812, -0.07196807861328125, -0.057918548583984375, -0.0438690185546875, -0.029819488525390625, -0.01576995849609375, -0.001720428466796875, 0.0123291015625, 0.026378631591796875, 0.04042816162109375, 0.054477691650390625, 0.0685272216796875, 0.08257675170898438, 0.09662628173828125, 0.11067581176757812, 0.124725341796875, 0.13877487182617188, 0.15282440185546875, 0.16687393188476562, 0.1809234619140625, 0.19497299194335938, 0.20902252197265625, 0.22307205200195312, 0.23712158203125, 0.2511711120605469, 0.26522064208984375, 0.2792701721191406, 0.2933197021484375, 0.3073692321777344, 0.32141876220703125, 0.3354682922363281, 0.349517822265625, 0.3635673522949219, 0.37761688232421875, 0.3916664123535156, 0.4057159423828125, 0.4197654724121094, 0.43381500244140625, 0.4478645324707031, 0.4619140625]}, "gradients/decoder.model.decoder.layers.3.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 5.0, 5.0, 14.0, 14.0, 23.0, 44.0, 83.0, 139.0, 162.0, 162.0, 135.0, 96.0, 59.0, 30.0, 15.0, 12.0, 8.0, 3.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6.257029056549072, -5.961655139923096, -5.666281700134277, -5.370907783508301, -5.075533866882324, -4.780159950256348, -4.484786510467529, -4.189412593841553, -3.8940389156341553, -3.598665237426758, -3.3032913208007812, -3.007917642593384, -2.7125439643859863, -2.4171700477600098, -2.1217963695526123, -1.8264224529266357, -1.5310487747192383, -1.2356749773025513, -0.940301239490509, -0.6449275016784668, -0.3495537042617798, -0.05417990684509277, 0.2411937713623047, 0.5365676879882812, 0.8319413661956787, 1.1273151636123657, 1.4226889610290527, 1.7180626392364502, 2.0134363174438477, 2.308810234069824, 2.6041839122772217, 2.8995578289031982, 3.1949310302734375, 3.490304708480835, 3.7856786251068115, 4.081052303314209, 4.3764262199401855, 4.671799659729004, 4.9671735763549805, 5.262547492980957, 5.557921409606934, 5.85329532623291, 6.1486687660217285, 6.444042682647705, 6.739416599273682, 7.0347900390625, 7.330163955688477, 7.625537872314453, 7.9209113121032715, 8.21628475189209, 8.511658668518066, 8.807032585144043, 9.10240650177002, 9.397780418395996, 9.693153381347656, 9.988527297973633, 10.28390121459961, 10.579275131225586, 10.874649047851562, 11.170022964477539, 11.4653959274292, 11.760769844055176, 12.056143760681152, 12.351517677307129, 12.646891593933105]}, "gradients/decoder.model.decoder.layers.3.final_layer_norm.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 8.0, 8.0, 7.0, 5.0, 15.0, 10.0, 15.0, 18.0, 27.0, 22.0, 22.0, 31.0, 29.0, 38.0, 38.0, 39.0, 34.0, 42.0, 52.0, 51.0, 56.0, 25.0, 39.0, 35.0, 41.0, 36.0, 25.0, 36.0, 31.0, 29.0, 21.0, 23.0, 25.0, 16.0, 15.0, 8.0, 10.0, 9.0, 4.0, 4.0, 4.0, 5.0, 3.0, 0.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-7.087131023406982, -6.861043930053711, -6.6349568367004395, -6.408869743347168, -6.1827826499938965, -5.956695556640625, -5.7306084632873535, -5.504521369934082, -5.2784342765808105, -5.052347183227539, -4.826260089874268, -4.600172996520996, -4.374085903167725, -4.147998809814453, -3.9219117164611816, -3.69582462310791, -3.4697377681732178, -3.2436506748199463, -3.017563581466675, -2.7914764881134033, -2.565389394760132, -2.3393025398254395, -2.113215446472168, -1.887128233909607, -1.6610411405563354, -1.434954047203064, -1.2088669538497925, -0.9827799201011658, -0.7566928267478943, -0.5306057929992676, -0.3045186996459961, -0.07843160629272461, 0.14765548706054688, 0.37374258041381836, 0.5998296737670898, 0.8259167075157166, 1.0520038604736328, 1.2780908346176147, 1.5041779279708862, 1.7302650213241577, 1.9563521146774292, 2.182439088821411, 2.4085261821746826, 2.634613275527954, 2.8607003688812256, 3.086787462234497, 3.3128745555877686, 3.53896164894104, 3.7650487422943115, 3.991135835647583, 4.217222690582275, 4.443309783935547, 4.669396877288818, 4.89548397064209, 5.121571063995361, 5.347658157348633, 5.573745250701904, 5.799832344055176, 6.025919437408447, 6.252006530761719, 6.47809362411499, 6.704180717468262, 6.930267810821533, 7.156354904174805, 7.382441997528076]}, "gradients/decoder.model.decoder.layers.3.fc2.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 3.0, 0.0, 4.0, 3.0, 2.0, 5.0, 5.0, 10.0, 12.0, 16.0, 17.0, 17.0, 36.0, 57.0, 77.0, 133.0, 220.0, 382.0, 783.0, 1745.0, 4996.0, 18268.0, 132900.0, 2518501.0, 1419959.0, 76658.0, 13009.0, 3794.0, 1329.0, 597.0, 283.0, 160.0, 110.0, 61.0, 40.0, 30.0, 19.0, 15.0, 9.0, 7.0, 9.0, 4.0, 3.0, 2.0, 3.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-5.73828125, -5.56890869140625, -5.3995361328125, -5.23016357421875, -5.060791015625, -4.89141845703125, -4.7220458984375, -4.55267333984375, -4.38330078125, -4.21392822265625, -4.0445556640625, -3.87518310546875, -3.705810546875, -3.53643798828125, -3.3670654296875, -3.19769287109375, -3.0283203125, -2.85894775390625, -2.6895751953125, -2.52020263671875, -2.350830078125, -2.18145751953125, -2.0120849609375, -1.84271240234375, -1.67333984375, -1.50396728515625, -1.3345947265625, -1.16522216796875, -0.995849609375, -0.82647705078125, -0.6571044921875, -0.48773193359375, -0.318359375, -0.14898681640625, 0.0203857421875, 0.18975830078125, 0.359130859375, 0.52850341796875, 0.6978759765625, 0.86724853515625, 1.03662109375, 1.20599365234375, 1.3753662109375, 1.54473876953125, 1.714111328125, 1.88348388671875, 2.0528564453125, 2.22222900390625, 2.3916015625, 2.56097412109375, 2.7303466796875, 2.89971923828125, 3.069091796875, 3.23846435546875, 3.4078369140625, 3.57720947265625, 3.74658203125, 3.91595458984375, 4.0853271484375, 4.25469970703125, 4.424072265625, 4.59344482421875, 4.7628173828125, 4.93218994140625, 5.1015625]}, "gradients/decoder.model.decoder.layers.3.fc2.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 2.0, 1.0, 4.0, 2.0, 1.0, 4.0, 4.0, 7.0, 12.0, 5.0, 8.0, 18.0, 20.0, 28.0, 20.0, 30.0, 35.0, 42.0, 31.0, 54.0, 43.0, 32.0, 48.0, 50.0, 65.0, 53.0, 64.0, 52.0, 28.0, 41.0, 22.0, 35.0, 18.0, 38.0, 18.0, 18.0, 12.0, 11.0, 5.0, 4.0, 7.0, 5.0, 4.0, 3.0, 2.0, 5.0, 2.0, 1.0, 0.0, 1.0, 2.0, 1.0], "bins": [-2.31640625, -2.251708984375, -2.18701171875, -2.122314453125, -2.0576171875, -1.992919921875, -1.92822265625, -1.863525390625, -1.798828125, -1.734130859375, -1.66943359375, -1.604736328125, -1.5400390625, -1.475341796875, -1.41064453125, -1.345947265625, -1.28125, -1.216552734375, -1.15185546875, -1.087158203125, -1.0224609375, -0.957763671875, -0.89306640625, -0.828369140625, -0.763671875, -0.698974609375, -0.63427734375, -0.569580078125, -0.5048828125, -0.440185546875, -0.37548828125, -0.310791015625, -0.24609375, -0.181396484375, -0.11669921875, -0.052001953125, 0.0126953125, 0.077392578125, 0.14208984375, 0.206787109375, 0.271484375, 0.336181640625, 0.40087890625, 0.465576171875, 0.5302734375, 0.594970703125, 0.65966796875, 0.724365234375, 0.7890625, 0.853759765625, 0.91845703125, 0.983154296875, 1.0478515625, 1.112548828125, 1.17724609375, 1.241943359375, 1.306640625, 1.371337890625, 1.43603515625, 1.500732421875, 1.5654296875, 1.630126953125, 1.69482421875, 1.759521484375, 1.82421875]}, "gradients/decoder.model.decoder.layers.3.fc1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 3.0, 3.0, 3.0, 4.0, 2.0, 4.0, 4.0, 5.0, 6.0, 9.0, 13.0, 9.0, 32.0, 31.0, 60.0, 95.0, 150.0, 244.0, 520.0, 1265.0, 3054.0, 10343.0, 62622.0, 1880409.0, 2152425.0, 67496.0, 10202.0, 2958.0, 1173.0, 539.0, 244.0, 141.0, 74.0, 53.0, 31.0, 21.0, 18.0, 7.0, 8.0, 5.0, 3.0, 8.0, 0.0, 1.0, 1.0, 1.0, 1.0], "bins": [-6.6640625, -6.5015869140625, -6.339111328125, -6.1766357421875, -6.01416015625, -5.8516845703125, -5.689208984375, -5.5267333984375, -5.3642578125, -5.2017822265625, -5.039306640625, -4.8768310546875, -4.71435546875, -4.5518798828125, -4.389404296875, -4.2269287109375, -4.064453125, -3.9019775390625, -3.739501953125, -3.5770263671875, -3.41455078125, -3.2520751953125, -3.089599609375, -2.9271240234375, -2.7646484375, -2.6021728515625, -2.439697265625, -2.2772216796875, -2.11474609375, -1.9522705078125, -1.789794921875, -1.6273193359375, -1.46484375, -1.3023681640625, -1.139892578125, -0.9774169921875, -0.81494140625, -0.6524658203125, -0.489990234375, -0.3275146484375, -0.1650390625, -0.0025634765625, 0.159912109375, 0.3223876953125, 0.48486328125, 0.6473388671875, 0.809814453125, 0.9722900390625, 1.134765625, 1.2972412109375, 1.459716796875, 1.6221923828125, 1.78466796875, 1.9471435546875, 2.109619140625, 2.2720947265625, 2.4345703125, 2.5970458984375, 2.759521484375, 2.9219970703125, 3.08447265625, 3.2469482421875, 3.409423828125, 3.5718994140625, 3.734375]}, "gradients/decoder.model.decoder.layers.3.fc1.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 5.0, 2.0, 2.0, 3.0, 0.0, 8.0, 8.0, 12.0, 22.0, 22.0, 40.0, 37.0, 73.0, 95.0, 160.0, 276.0, 458.0, 656.0, 746.0, 560.0, 298.0, 195.0, 115.0, 91.0, 57.0, 41.0, 31.0, 18.0, 8.0, 9.0, 4.0, 7.0, 6.0, 3.0, 3.0, 2.0, 3.0, 3.0, 3.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0], "bins": [-1.025390625, -0.984710693359375, -0.94403076171875, -0.903350830078125, -0.8626708984375, -0.821990966796875, -0.78131103515625, -0.740631103515625, -0.699951171875, -0.659271240234375, -0.61859130859375, -0.577911376953125, -0.5372314453125, -0.496551513671875, -0.45587158203125, -0.415191650390625, -0.37451171875, -0.333831787109375, -0.29315185546875, -0.252471923828125, -0.2117919921875, -0.171112060546875, -0.13043212890625, -0.089752197265625, -0.049072265625, -0.008392333984375, 0.03228759765625, 0.072967529296875, 0.1136474609375, 0.154327392578125, 0.19500732421875, 0.235687255859375, 0.2763671875, 0.317047119140625, 0.35772705078125, 0.398406982421875, 0.4390869140625, 0.479766845703125, 0.52044677734375, 0.561126708984375, 0.601806640625, 0.642486572265625, 0.68316650390625, 0.723846435546875, 0.7645263671875, 0.805206298828125, 0.84588623046875, 0.886566162109375, 0.92724609375, 0.967926025390625, 1.00860595703125, 1.049285888671875, 1.0899658203125, 1.130645751953125, 1.17132568359375, 1.212005615234375, 1.252685546875, 1.293365478515625, 1.33404541015625, 1.374725341796875, 1.4154052734375, 1.456085205078125, 1.49676513671875, 1.537445068359375, 1.578125]}, "gradients/decoder.model.decoder.layers.3.encoder_attn_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 3.0, 2.0, 4.0, 2.0, 6.0, 6.0, 9.0, 23.0, 25.0, 34.0, 52.0, 64.0, 101.0, 88.0, 114.0, 110.0, 82.0, 79.0, 56.0, 48.0, 31.0, 23.0, 20.0, 14.0, 5.0, 6.0, 3.0, 2.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 3.0], "bins": [-6.406394004821777, -6.252882957458496, -6.099371433258057, -5.945860385894775, -5.792349338531494, -5.638837814331055, -5.485326766967773, -5.331815719604492, -5.178304195404053, -5.0247931480407715, -4.871281623840332, -4.717770576477051, -4.5642595291137695, -4.41074800491333, -4.257236957550049, -4.103725433349609, -3.9502146244049072, -3.796703338623047, -3.6431922912597656, -3.4896810054779053, -3.336169719696045, -3.1826586723327637, -3.0291473865509033, -2.875636100769043, -2.7221250534057617, -2.5686137676239014, -2.41510272026062, -2.2615914344787598, -2.1080801486968994, -1.9545689821243286, -1.8010578155517578, -1.6475465297698975, -1.4940354824066162, -1.3405243158340454, -1.187013030052185, -1.0335018634796143, -0.8799906373023987, -0.7264794111251831, -0.5729682445526123, -0.41945695877075195, -0.26594579219818115, -0.11243458092212677, 0.04107663035392761, 0.1945878267288208, 0.3480990529060364, 0.501610279083252, 0.6551214456558228, 0.8086327314376831, 0.9621438980102539, 1.1156550645828247, 1.269166350364685, 1.4226775169372559, 1.5761888027191162, 1.729699969291687, 1.8832111358642578, 2.036722421646118, 2.1902337074279785, 2.343744993209839, 2.49725604057312, 2.6507673263549805, 2.804278612136841, 2.957789897918701, 3.1113009452819824, 3.2648122310638428, 3.418323278427124]}, "gradients/decoder.model.decoder.layers.3.encoder_attn_layer_norm.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 3.0, 0.0, 2.0, 3.0, 1.0, 6.0, 9.0, 10.0, 4.0, 8.0, 12.0, 8.0, 14.0, 11.0, 29.0, 25.0, 32.0, 34.0, 39.0, 47.0, 46.0, 55.0, 49.0, 48.0, 50.0, 50.0, 46.0, 45.0, 50.0, 42.0, 45.0, 36.0, 26.0, 27.0, 24.0, 21.0, 10.0, 10.0, 10.0, 5.0, 8.0, 1.0, 3.0, 1.0, 6.0, 1.0, 5.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.8059191703796387, -2.7161920070648193, -2.626465082168579, -2.5367379188537598, -2.4470109939575195, -2.3572838306427, -2.267556667327881, -2.1778297424316406, -2.0881025791168213, -1.9983755350112915, -1.9086484909057617, -1.8189213275909424, -1.7291942834854126, -1.6394672393798828, -1.549740195274353, -1.4600131511688232, -1.3702861070632935, -1.2805590629577637, -1.1908320188522339, -1.101104974746704, -1.0113778114318848, -0.921650767326355, -0.8319237232208252, -0.7421966195106506, -0.6524695754051208, -0.5627425312995911, -0.4730154275894165, -0.3832883834838867, -0.29356130957603455, -0.20383423566818237, -0.11410719156265259, -0.024380087852478027, 0.06534695625305176, 0.15507403016090393, 0.2448010891675949, 0.3345281481742859, 0.42425522208213806, 0.5139822959899902, 0.60370934009552, 0.6934364438056946, 0.7831634879112244, 0.8728905320167542, 0.9626176357269287, 1.0523446798324585, 1.1420717239379883, 1.2317988872528076, 1.3215258121490479, 1.4112529754638672, 1.500980019569397, 1.5907070636749268, 1.6804341077804565, 1.7701611518859863, 1.8598883152008057, 1.9496153593063354, 2.0393424034118652, 2.1290695667266846, 2.218796491622925, 2.308523654937744, 2.3982505798339844, 2.4879777431488037, 2.577704668045044, 2.6674318313598633, 2.7571587562561035, 2.846885919570923, 2.936613082885742]}, "gradients/decoder.model.decoder.layers.3.encoder_attn.out_proj.weight": {"_type": "histogram", "values": [3.0, 1.0, 0.0, 2.0, 1.0, 0.0, 1.0, 3.0, 3.0, 2.0, 4.0, 3.0, 7.0, 11.0, 13.0, 23.0, 34.0, 52.0, 79.0, 110.0, 179.0, 288.0, 534.0, 953.0, 1874.0, 3938.0, 9831.0, 27823.0, 108358.0, 558331.0, 256895.0, 51674.0, 15742.0, 6046.0, 2621.0, 1342.0, 708.0, 431.0, 231.0, 143.0, 82.0, 69.0, 37.0, 32.0, 23.0, 9.0, 4.0, 11.0, 2.0, 4.0, 1.0, 1.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0], "bins": [-0.56689453125, -0.5478363037109375, -0.528778076171875, -0.5097198486328125, -0.49066162109375, -0.4716033935546875, -0.452545166015625, -0.4334869384765625, -0.4144287109375, -0.3953704833984375, -0.376312255859375, -0.3572540283203125, -0.33819580078125, -0.3191375732421875, -0.300079345703125, -0.2810211181640625, -0.261962890625, -0.2429046630859375, -0.223846435546875, -0.2047882080078125, -0.18572998046875, -0.1666717529296875, -0.147613525390625, -0.1285552978515625, -0.1094970703125, -0.0904388427734375, -0.071380615234375, -0.0523223876953125, -0.03326416015625, -0.0142059326171875, 0.004852294921875, 0.0239105224609375, 0.04296875, 0.0620269775390625, 0.081085205078125, 0.1001434326171875, 0.11920166015625, 0.1382598876953125, 0.157318115234375, 0.1763763427734375, 0.1954345703125, 0.2144927978515625, 0.233551025390625, 0.2526092529296875, 0.27166748046875, 0.2907257080078125, 0.309783935546875, 0.3288421630859375, 0.347900390625, 0.3669586181640625, 0.386016845703125, 0.4050750732421875, 0.42413330078125, 0.4431915283203125, 0.462249755859375, 0.4813079833984375, 0.5003662109375, 0.5194244384765625, 0.538482666015625, 0.5575408935546875, 0.57659912109375, 0.5956573486328125, 0.614715576171875, 0.6337738037109375, 0.65283203125]}, "gradients/decoder.model.decoder.layers.3.encoder_attn.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 3.0, 2.0, 8.0, 7.0, 13.0, 9.0, 15.0, 22.0, 23.0, 39.0, 41.0, 39.0, 71.0, 70.0, 71.0, 72.0, 78.0, 56.0, 69.0, 61.0, 64.0, 49.0, 32.0, 27.0, 31.0, 12.0, 7.0, 2.0, 5.0, 5.0, 7.0, 2.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.193359375, -2.11181640625, -2.0302734375, -1.94873046875, -1.8671875, -1.78564453125, -1.7041015625, -1.62255859375, -1.541015625, -1.45947265625, -1.3779296875, -1.29638671875, -1.21484375, -1.13330078125, -1.0517578125, -0.97021484375, -0.888671875, -0.80712890625, -0.7255859375, -0.64404296875, -0.5625, -0.48095703125, -0.3994140625, -0.31787109375, -0.236328125, -0.15478515625, -0.0732421875, 0.00830078125, 0.08984375, 0.17138671875, 0.2529296875, 0.33447265625, 0.416015625, 0.49755859375, 0.5791015625, 0.66064453125, 0.7421875, 0.82373046875, 0.9052734375, 0.98681640625, 1.068359375, 1.14990234375, 1.2314453125, 1.31298828125, 1.39453125, 1.47607421875, 1.5576171875, 1.63916015625, 1.720703125, 1.80224609375, 1.8837890625, 1.96533203125, 2.046875, 2.12841796875, 2.2099609375, 2.29150390625, 2.373046875, 2.45458984375, 2.5361328125, 2.61767578125, 2.69921875, 2.78076171875, 2.8623046875, 2.94384765625, 3.025390625]}, "gradients/decoder.model.decoder.layers.3.encoder_attn.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 2.0, 1.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 2.0, 5.0, 2.0, 7.0, 8.0, 18.0, 24.0, 32.0, 62.0, 94.0, 114.0, 209.0, 303.0, 502.0, 803.0, 1412.0, 2597.0, 4914.0, 9577.0, 19225.0, 42599.0, 104091.0, 295398.0, 347028.0, 125189.0, 49455.0, 21983.0, 10636.0, 5319.0, 2884.0, 1597.0, 930.0, 572.0, 329.0, 204.0, 142.0, 102.0, 61.0, 33.0, 34.0, 28.0, 10.0, 8.0, 7.0, 2.0, 1.0, 2.0, 1.0, 2.0, 2.0, 0.0, 2.0, 3.0], "bins": [-0.256591796875, -0.24906539916992188, -0.24153900146484375, -0.23401260375976562, -0.2264862060546875, -0.21895980834960938, -0.21143341064453125, -0.20390701293945312, -0.196380615234375, -0.18885421752929688, -0.18132781982421875, -0.17380142211914062, -0.1662750244140625, -0.15874862670898438, -0.15122222900390625, -0.14369583129882812, -0.13616943359375, -0.12864303588867188, -0.12111663818359375, -0.11359024047851562, -0.1060638427734375, -0.09853744506835938, -0.09101104736328125, -0.08348464965820312, -0.075958251953125, -0.06843185424804688, -0.06090545654296875, -0.053379058837890625, -0.0458526611328125, -0.038326263427734375, -0.03079986572265625, -0.023273468017578125, -0.0157470703125, -0.008220672607421875, -0.00069427490234375, 0.006832122802734375, 0.0143585205078125, 0.021884918212890625, 0.02941131591796875, 0.036937713623046875, 0.044464111328125, 0.051990509033203125, 0.05951690673828125, 0.06704330444335938, 0.0745697021484375, 0.08209609985351562, 0.08962249755859375, 0.09714889526367188, 0.10467529296875, 0.11220169067382812, 0.11972808837890625, 0.12725448608398438, 0.1347808837890625, 0.14230728149414062, 0.14983367919921875, 0.15736007690429688, 0.164886474609375, 0.17241287231445312, 0.17993927001953125, 0.18746566772460938, 0.1949920654296875, 0.20251846313476562, 0.21004486083984375, 0.21757125854492188, 0.22509765625]}, "gradients/decoder.model.decoder.layers.3.encoder_attn.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 3.0, 3.0, 1.0, 2.0, 2.0, 6.0, 6.0, 7.0, 7.0, 11.0, 9.0, 19.0, 23.0, 17.0, 28.0, 34.0, 37.0, 33.0, 37.0, 45.0, 37.0, 46.0, 62.0, 43.0, 50.0, 42.0, 46.0, 42.0, 41.0, 38.0, 29.0, 31.0, 30.0, 26.0, 19.0, 25.0, 13.0, 10.0, 13.0, 11.0, 9.0, 1.0, 5.0, 3.0, 3.0, 3.0, 3.0, 3.0, 2.0, 0.0, 0.0, 2.0], "bins": [-3.84765625, -3.739593505859375, -3.63153076171875, -3.523468017578125, -3.4154052734375, -3.307342529296875, -3.19927978515625, -3.091217041015625, -2.983154296875, -2.875091552734375, -2.76702880859375, -2.658966064453125, -2.5509033203125, -2.442840576171875, -2.33477783203125, -2.226715087890625, -2.11865234375, -2.010589599609375, -1.90252685546875, -1.794464111328125, -1.6864013671875, -1.578338623046875, -1.47027587890625, -1.362213134765625, -1.254150390625, -1.146087646484375, -1.03802490234375, -0.929962158203125, -0.8218994140625, -0.713836669921875, -0.60577392578125, -0.497711181640625, -0.3896484375, -0.281585693359375, -0.17352294921875, -0.065460205078125, 0.0426025390625, 0.150665283203125, 0.25872802734375, 0.366790771484375, 0.474853515625, 0.582916259765625, 0.69097900390625, 0.799041748046875, 0.9071044921875, 1.015167236328125, 1.12322998046875, 1.231292724609375, 1.33935546875, 1.447418212890625, 1.55548095703125, 1.663543701171875, 1.7716064453125, 1.879669189453125, 1.98773193359375, 2.095794677734375, 2.203857421875, 2.311920166015625, 2.41998291015625, 2.528045654296875, 2.6361083984375, 2.744171142578125, 2.85223388671875, 2.960296630859375, 3.068359375]}, "gradients/decoder.model.decoder.layers.3.encoder_attn.k_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 3.0, 2.0, 2.0, 2.0, 8.0, 10.0, 16.0, 28.0, 38.0, 73.0, 123.0, 249.0, 543.0, 1731.0, 7492.0, 59350.0, 822992.0, 138829.0, 12880.0, 2544.0, 759.0, 353.0, 200.0, 101.0, 74.0, 54.0, 33.0, 23.0, 15.0, 13.0, 6.0, 5.0, 3.0, 4.0, 2.0, 0.0, 2.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.09820556640625, -0.0951986312866211, -0.09219169616699219, -0.08918476104736328, -0.08617782592773438, -0.08317089080810547, -0.08016395568847656, -0.07715702056884766, -0.07415008544921875, -0.07114315032958984, -0.06813621520996094, -0.06512928009033203, -0.062122344970703125, -0.05911540985107422, -0.05610847473144531, -0.053101539611816406, -0.0500946044921875, -0.047087669372558594, -0.04408073425292969, -0.04107379913330078, -0.038066864013671875, -0.03505992889404297, -0.03205299377441406, -0.029046058654785156, -0.02603912353515625, -0.023032188415527344, -0.020025253295898438, -0.01701831817626953, -0.014011383056640625, -0.011004447937011719, -0.007997512817382812, -0.004990577697753906, -0.001983642578125, 0.0010232925415039062, 0.0040302276611328125, 0.007037162780761719, 0.010044097900390625, 0.013051033020019531, 0.016057968139648438, 0.019064903259277344, 0.02207183837890625, 0.025078773498535156, 0.028085708618164062, 0.03109264373779297, 0.034099578857421875, 0.03710651397705078, 0.04011344909667969, 0.043120384216308594, 0.0461273193359375, 0.049134254455566406, 0.05214118957519531, 0.05514812469482422, 0.058155059814453125, 0.06116199493408203, 0.06416893005371094, 0.06717586517333984, 0.07018280029296875, 0.07318973541259766, 0.07619667053222656, 0.07920360565185547, 0.08221054077148438, 0.08521747589111328, 0.08822441101074219, 0.0912313461303711, 0.09423828125]}, "gradients/decoder.model.decoder.layers.3.encoder_attn.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 0.0, 4.0, 0.0, 1.0, 3.0, 6.0, 10.0, 4.0, 8.0, 10.0, 14.0, 15.0, 16.0, 31.0, 23.0, 53.0, 59.0, 73.0, 72.0, 71.0, 77.0, 83.0, 77.0, 69.0, 51.0, 40.0, 31.0, 24.0, 16.0, 17.0, 11.0, 9.0, 7.0, 7.0, 8.0, 5.0, 2.0, 3.0, 1.0, 1.0, 3.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0], "bins": [-3.427267074584961e-05, -3.3280812203884125e-05, -3.228895366191864e-05, -3.1297095119953156e-05, -3.030523657798767e-05, -2.9313378036022186e-05, -2.83215194940567e-05, -2.7329660952091217e-05, -2.6337802410125732e-05, -2.5345943868160248e-05, -2.4354085326194763e-05, -2.336222678422928e-05, -2.2370368242263794e-05, -2.137850970029831e-05, -2.0386651158332825e-05, -1.939479261636734e-05, -1.8402934074401855e-05, -1.741107553243637e-05, -1.6419216990470886e-05, -1.54273584485054e-05, -1.4435499906539917e-05, -1.3443641364574432e-05, -1.2451782822608948e-05, -1.1459924280643463e-05, -1.0468065738677979e-05, -9.476207196712494e-06, -8.48434865474701e-06, -7.492490112781525e-06, -6.50063157081604e-06, -5.508773028850555e-06, -4.516914486885071e-06, -3.525055944919586e-06, -2.5331974029541016e-06, -1.541338860988617e-06, -5.494803190231323e-07, 4.423782229423523e-07, 1.434236764907837e-06, 2.4260953068733215e-06, 3.417953848838806e-06, 4.409812390804291e-06, 5.401670932769775e-06, 6.39352947473526e-06, 7.385388016700745e-06, 8.37724655866623e-06, 9.369105100631714e-06, 1.0360963642597198e-05, 1.1352822184562683e-05, 1.2344680726528168e-05, 1.3336539268493652e-05, 1.4328397810459137e-05, 1.532025635242462e-05, 1.6312114894390106e-05, 1.730397343635559e-05, 1.8295831978321075e-05, 1.928769052028656e-05, 2.0279549062252045e-05, 2.127140760421753e-05, 2.2263266146183014e-05, 2.32551246881485e-05, 2.4246983230113983e-05, 2.5238841772079468e-05, 2.6230700314044952e-05, 2.7222558856010437e-05, 2.821441739797592e-05, 2.9206275939941406e-05]}, "gradients/decoder.model.decoder.layers.3.encoder_attn.q_proj.weight": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 1.0, 1.0, 0.0, 3.0, 2.0, 3.0, 2.0, 2.0, 2.0, 5.0, 8.0, 8.0, 13.0, 18.0, 28.0, 33.0, 65.0, 103.0, 179.0, 299.0, 537.0, 1003.0, 2011.0, 4484.0, 10948.0, 31940.0, 132031.0, 666222.0, 144158.0, 34166.0, 11208.0, 4545.0, 2049.0, 1091.0, 587.0, 314.0, 163.0, 109.0, 70.0, 48.0, 31.0, 24.0, 11.0, 15.0, 5.0, 4.0, 5.0, 4.0, 1.0, 0.0, 2.0, 4.0, 1.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0], "bins": [-0.057373046875, -0.05549335479736328, -0.05361366271972656, -0.051733970642089844, -0.049854278564453125, -0.047974586486816406, -0.04609489440917969, -0.04421520233154297, -0.04233551025390625, -0.04045581817626953, -0.03857612609863281, -0.036696434020996094, -0.034816741943359375, -0.032937049865722656, -0.031057357788085938, -0.02917766571044922, -0.0272979736328125, -0.02541828155517578, -0.023538589477539062, -0.021658897399902344, -0.019779205322265625, -0.017899513244628906, -0.016019821166992188, -0.014140129089355469, -0.01226043701171875, -0.010380744934082031, -0.008501052856445312, -0.006621360778808594, -0.004741668701171875, -0.0028619766235351562, -0.0009822845458984375, 0.0008974075317382812, 0.002777099609375, 0.004656791687011719, 0.0065364837646484375, 0.008416175842285156, 0.010295867919921875, 0.012175559997558594, 0.014055252075195312, 0.01593494415283203, 0.01781463623046875, 0.01969432830810547, 0.021574020385742188, 0.023453712463378906, 0.025333404541015625, 0.027213096618652344, 0.029092788696289062, 0.03097248077392578, 0.0328521728515625, 0.03473186492919922, 0.03661155700683594, 0.038491249084472656, 0.040370941162109375, 0.042250633239746094, 0.04413032531738281, 0.04601001739501953, 0.04788970947265625, 0.04976940155029297, 0.05164909362792969, 0.053528785705566406, 0.055408477783203125, 0.057288169860839844, 0.05916786193847656, 0.06104755401611328, 0.06292724609375]}, "gradients/decoder.model.decoder.layers.3.encoder_attn.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0, 1.0, 2.0, 2.0, 6.0, 2.0, 5.0, 15.0, 11.0, 9.0, 12.0, 10.0, 12.0, 16.0, 29.0, 39.0, 49.0, 55.0, 87.0, 171.0, 140.0, 94.0, 70.0, 48.0, 22.0, 23.0, 18.0, 17.0, 14.0, 8.0, 8.0, 2.0, 7.0, 2.0, 1.0, 4.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.01386260986328125, -0.013383030891418457, -0.012903451919555664, -0.012423872947692871, -0.011944293975830078, -0.011464715003967285, -0.010985136032104492, -0.0105055570602417, -0.010025978088378906, -0.009546399116516113, -0.00906682014465332, -0.008587241172790527, -0.008107662200927734, -0.007628083229064941, -0.0071485042572021484, -0.0066689252853393555, -0.0061893463134765625, -0.0057097673416137695, -0.0052301883697509766, -0.004750609397888184, -0.004271030426025391, -0.0037914514541625977, -0.0033118724822998047, -0.0028322935104370117, -0.0023527145385742188, -0.0018731355667114258, -0.0013935565948486328, -0.0009139776229858398, -0.0004343986511230469, 4.5180320739746094e-05, 0.0005247592926025391, 0.001004338264465332, 0.001483917236328125, 0.001963496208190918, 0.002443075180053711, 0.002922654151916504, 0.003402233123779297, 0.00388181209564209, 0.004361391067504883, 0.004840970039367676, 0.005320549011230469, 0.005800127983093262, 0.006279706954956055, 0.006759285926818848, 0.007238864898681641, 0.007718443870544434, 0.008198022842407227, 0.00867760181427002, 0.009157180786132812, 0.009636759757995605, 0.010116338729858398, 0.010595917701721191, 0.011075496673583984, 0.011555075645446777, 0.01203465461730957, 0.012514233589172363, 0.012993812561035156, 0.01347339153289795, 0.013952970504760742, 0.014432549476623535, 0.014912128448486328, 0.015391707420349121, 0.015871286392211914, 0.016350865364074707, 0.0168304443359375]}, "gradients/decoder.model.decoder.layers.3.self_attn_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 2.0, 3.0, 6.0, 2.0, 10.0, 15.0, 39.0, 46.0, 77.0, 124.0, 151.0, 132.0, 133.0, 100.0, 63.0, 51.0, 24.0, 20.0, 10.0, 3.0, 3.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.755821228027344, -4.631500720977783, -4.507180213928223, -4.382859706878662, -4.258539199829102, -4.134218692779541, -4.0098981857299805, -3.88557767868042, -3.7612571716308594, -3.636936664581299, -3.5126161575317383, -3.3882956504821777, -3.263975143432617, -3.1396546363830566, -3.015334129333496, -2.8910136222839355, -2.766693115234375, -2.6423726081848145, -2.518052101135254, -2.3937315940856934, -2.269411087036133, -2.1450905799865723, -2.0207700729370117, -1.8964495658874512, -1.7721290588378906, -1.64780855178833, -1.5234880447387695, -1.399167537689209, -1.2748470306396484, -1.150526523590088, -1.0262060165405273, -0.9018855094909668, -0.7775650024414062, -0.6532444953918457, -0.5289239883422852, -0.4046034812927246, -0.28028297424316406, -0.15596246719360352, -0.03164196014404297, 0.09267854690551758, 0.21699905395507812, 0.34131956100463867, 0.4656400680541992, 0.5899605751037598, 0.7142810821533203, 0.8386015892028809, 0.9629220962524414, 1.087242603302002, 1.2115631103515625, 1.335883617401123, 1.4602041244506836, 1.5845246315002441, 1.7088451385498047, 1.8331656455993652, 1.9574861526489258, 2.0818066596984863, 2.206127166748047, 2.3304476737976074, 2.454768180847168, 2.5790886878967285, 2.703409194946289, 2.8277297019958496, 2.95205020904541, 3.0763707160949707, 3.2006912231445312]}, "gradients/decoder.model.decoder.layers.3.self_attn_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 0.0, 2.0, 1.0, 3.0, 4.0, 4.0, 11.0, 9.0, 14.0, 14.0, 14.0, 29.0, 38.0, 43.0, 42.0, 68.0, 65.0, 81.0, 68.0, 66.0, 70.0, 64.0, 61.0, 62.0, 44.0, 37.0, 30.0, 23.0, 12.0, 9.0, 7.0, 5.0, 2.0, 7.0, 4.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.9833922386169434, -1.9069877862930298, -1.8305833339691162, -1.7541788816452026, -1.677774429321289, -1.6013699769973755, -1.524965524673462, -1.4485610723495483, -1.3721566200256348, -1.2957521677017212, -1.2193477153778076, -1.142943263053894, -1.0665388107299805, -0.9901343584060669, -0.9137299060821533, -0.8373254537582397, -0.7609210014343262, -0.6845165491104126, -0.608112096786499, -0.5317076444625854, -0.4553031921386719, -0.3788987398147583, -0.3024942874908447, -0.22608983516693115, -0.14968538284301758, -0.073280930519104, 0.0031235218048095703, 0.07952797412872314, 0.15593242645263672, 0.2323368787765503, 0.30874133110046387, 0.38514578342437744, 0.461550235748291, 0.5379546880722046, 0.6143591403961182, 0.6907635927200317, 0.7671680450439453, 0.8435724973678589, 0.9199769496917725, 0.996381402015686, 1.0727858543395996, 1.1491903066635132, 1.2255947589874268, 1.3019992113113403, 1.378403663635254, 1.4548081159591675, 1.531212568283081, 1.6076170206069946, 1.6840214729309082, 1.7604259252548218, 1.8368303775787354, 1.913234829902649, 1.9896392822265625, 2.0660438537597656, 2.1424481868743896, 2.2188525199890137, 2.295257091522217, 2.37166166305542, 2.448065996170044, 2.524470329284668, 2.600874900817871, 2.677279472351074, 2.7536838054656982, 2.8300881385803223, 2.9064927101135254]}, "gradients/decoder.model.decoder.layers.3.self_attn.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0, 2.0, 2.0, 3.0, 8.0, 11.0, 18.0, 29.0, 45.0, 58.0, 96.0, 145.0, 288.0, 551.0, 1136.0, 2412.0, 5392.0, 12855.0, 35771.0, 178495.0, 675172.0, 93283.0, 25125.0, 9726.0, 4208.0, 1764.0, 903.0, 450.0, 245.0, 142.0, 83.0, 39.0, 37.0, 26.0, 13.0, 13.0, 5.0, 8.0, 1.0, 4.0, 0.0, 1.0, 3.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.087890625, -2.019073486328125, -1.95025634765625, -1.881439208984375, -1.8126220703125, -1.743804931640625, -1.67498779296875, -1.606170654296875, -1.537353515625, -1.468536376953125, -1.39971923828125, -1.330902099609375, -1.2620849609375, -1.193267822265625, -1.12445068359375, -1.055633544921875, -0.98681640625, -0.917999267578125, -0.84918212890625, -0.780364990234375, -0.7115478515625, -0.642730712890625, -0.57391357421875, -0.505096435546875, -0.436279296875, -0.367462158203125, -0.29864501953125, -0.229827880859375, -0.1610107421875, -0.092193603515625, -0.02337646484375, 0.045440673828125, 0.1142578125, 0.183074951171875, 0.25189208984375, 0.320709228515625, 0.3895263671875, 0.458343505859375, 0.52716064453125, 0.595977783203125, 0.664794921875, 0.733612060546875, 0.80242919921875, 0.871246337890625, 0.9400634765625, 1.008880615234375, 1.07769775390625, 1.146514892578125, 1.21533203125, 1.284149169921875, 1.35296630859375, 1.421783447265625, 1.4906005859375, 1.559417724609375, 1.62823486328125, 1.697052001953125, 1.765869140625, 1.834686279296875, 1.90350341796875, 1.972320556640625, 2.0411376953125, 2.109954833984375, 2.17877197265625, 2.247589111328125, 2.31640625]}, "gradients/decoder.model.decoder.layers.3.self_attn.out_proj.bias": {"_type": "histogram", "values": [1.0, 2.0, 1.0, 0.0, 1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 3.0, 5.0, 3.0, 8.0, 5.0, 6.0, 10.0, 6.0, 14.0, 15.0, 18.0, 20.0, 28.0, 25.0, 45.0, 38.0, 34.0, 55.0, 39.0, 54.0, 49.0, 48.0, 44.0, 44.0, 46.0, 52.0, 39.0, 49.0, 29.0, 34.0, 28.0, 26.0, 17.0, 15.0, 10.0, 11.0, 7.0, 2.0, 6.0, 3.0, 5.0, 4.0, 0.0, 1.0, 1.0, 1.0, 3.0, 2.0, 1.0, 0.0, 0.0, 1.0], "bins": [-3.423828125, -3.3209228515625, -3.218017578125, -3.1151123046875, -3.01220703125, -2.9093017578125, -2.806396484375, -2.7034912109375, -2.6005859375, -2.4976806640625, -2.394775390625, -2.2918701171875, -2.18896484375, -2.0860595703125, -1.983154296875, -1.8802490234375, -1.77734375, -1.6744384765625, -1.571533203125, -1.4686279296875, -1.36572265625, -1.2628173828125, -1.159912109375, -1.0570068359375, -0.9541015625, -0.8511962890625, -0.748291015625, -0.6453857421875, -0.54248046875, -0.4395751953125, -0.336669921875, -0.2337646484375, -0.130859375, -0.0279541015625, 0.074951171875, 0.1778564453125, 0.28076171875, 0.3836669921875, 0.486572265625, 0.5894775390625, 0.6923828125, 0.7952880859375, 0.898193359375, 1.0010986328125, 1.10400390625, 1.2069091796875, 1.309814453125, 1.4127197265625, 1.515625, 1.6185302734375, 1.721435546875, 1.8243408203125, 1.92724609375, 2.0301513671875, 2.133056640625, 2.2359619140625, 2.3388671875, 2.4417724609375, 2.544677734375, 2.6475830078125, 2.75048828125, 2.8533935546875, 2.956298828125, 3.0592041015625, 3.162109375]}, "gradients/decoder.model.decoder.layers.3.self_attn.v_proj.weight": {"_type": "histogram", "values": [1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 3.0, 1.0, 3.0, 5.0, 4.0, 4.0, 7.0, 4.0, 7.0, 11.0, 18.0, 9.0, 12.0, 13.0, 35.0, 39.0, 56.0, 79.0, 112.0, 205.0, 402.0, 1117.0, 5549.0, 49055.0, 934554.0, 49502.0, 5547.0, 1143.0, 426.0, 202.0, 115.0, 74.0, 46.0, 49.0, 30.0, 30.0, 18.0, 20.0, 15.0, 4.0, 7.0, 10.0, 7.0, 2.0, 2.0, 5.0, 0.0, 3.0, 2.0, 0.0, 2.0, 0.0, 4.0, 1.0, 0.0, 1.0], "bins": [-5.5625, -5.391357421875, -5.22021484375, -5.049072265625, -4.8779296875, -4.706787109375, -4.53564453125, -4.364501953125, -4.193359375, -4.022216796875, -3.85107421875, -3.679931640625, -3.5087890625, -3.337646484375, -3.16650390625, -2.995361328125, -2.82421875, -2.653076171875, -2.48193359375, -2.310791015625, -2.1396484375, -1.968505859375, -1.79736328125, -1.626220703125, -1.455078125, -1.283935546875, -1.11279296875, -0.941650390625, -0.7705078125, -0.599365234375, -0.42822265625, -0.257080078125, -0.0859375, 0.085205078125, 0.25634765625, 0.427490234375, 0.5986328125, 0.769775390625, 0.94091796875, 1.112060546875, 1.283203125, 1.454345703125, 1.62548828125, 1.796630859375, 1.9677734375, 2.138916015625, 2.31005859375, 2.481201171875, 2.65234375, 2.823486328125, 2.99462890625, 3.165771484375, 3.3369140625, 3.508056640625, 3.67919921875, 3.850341796875, 4.021484375, 4.192626953125, 4.36376953125, 4.534912109375, 4.7060546875, 4.877197265625, 5.04833984375, 5.219482421875, 5.390625]}, "gradients/decoder.model.decoder.layers.3.self_attn.v_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 3.0, 0.0, 1.0, 0.0, 1.0, 2.0, 4.0, 2.0, 6.0, 6.0, 4.0, 13.0, 6.0, 9.0, 21.0, 14.0, 16.0, 19.0, 24.0, 32.0, 20.0, 43.0, 50.0, 56.0, 48.0, 53.0, 55.0, 62.0, 50.0, 48.0, 46.0, 39.0, 33.0, 30.0, 32.0, 33.0, 28.0, 26.0, 17.0, 16.0, 5.0, 9.0, 7.0, 4.0, 6.0, 5.0, 4.0, 4.0, 3.0, 1.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.65625, -3.52862548828125, -3.4010009765625, -3.27337646484375, -3.145751953125, -3.01812744140625, -2.8905029296875, -2.76287841796875, -2.63525390625, -2.50762939453125, -2.3800048828125, -2.25238037109375, -2.124755859375, -1.99713134765625, -1.8695068359375, -1.74188232421875, -1.6142578125, -1.48663330078125, -1.3590087890625, -1.23138427734375, -1.103759765625, -0.97613525390625, -0.8485107421875, -0.72088623046875, -0.59326171875, -0.46563720703125, -0.3380126953125, -0.21038818359375, -0.082763671875, 0.04486083984375, 0.1724853515625, 0.30010986328125, 0.427734375, 0.55535888671875, 0.6829833984375, 0.81060791015625, 0.938232421875, 1.06585693359375, 1.1934814453125, 1.32110595703125, 1.44873046875, 1.57635498046875, 1.7039794921875, 1.83160400390625, 1.959228515625, 2.08685302734375, 2.2144775390625, 2.34210205078125, 2.4697265625, 2.59735107421875, 2.7249755859375, 2.85260009765625, 2.980224609375, 3.10784912109375, 3.2354736328125, 3.36309814453125, 3.49072265625, 3.61834716796875, 3.7459716796875, 3.87359619140625, 4.001220703125, 4.12884521484375, 4.2564697265625, 4.38409423828125, 4.51171875]}, "gradients/decoder.model.decoder.layers.3.self_attn.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 2.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 3.0, 0.0, 3.0, 6.0, 2.0, 7.0, 7.0, 16.0, 19.0, 39.0, 67.0, 130.0, 244.0, 538.0, 1361.0, 4935.0, 23298.0, 369509.0, 615815.0, 24926.0, 5115.0, 1435.0, 518.0, 246.0, 135.0, 70.0, 40.0, 22.0, 21.0, 9.0, 1.0, 3.0, 7.0, 3.0, 3.0, 2.0, 3.0, 0.0, 2.0, 1.0, 1.0, 0.0, 1.0, 1.0], "bins": [-1.21875, -1.1866912841796875, -1.154632568359375, -1.1225738525390625, -1.09051513671875, -1.0584564208984375, -1.026397705078125, -0.9943389892578125, -0.9622802734375, -0.9302215576171875, -0.898162841796875, -0.8661041259765625, -0.83404541015625, -0.8019866943359375, -0.769927978515625, -0.7378692626953125, -0.705810546875, -0.6737518310546875, -0.641693115234375, -0.6096343994140625, -0.57757568359375, -0.5455169677734375, -0.513458251953125, -0.4813995361328125, -0.4493408203125, -0.4172821044921875, -0.385223388671875, -0.3531646728515625, -0.32110595703125, -0.2890472412109375, -0.256988525390625, -0.2249298095703125, -0.19287109375, -0.1608123779296875, -0.128753662109375, -0.0966949462890625, -0.06463623046875, -0.0325775146484375, -0.000518798828125, 0.0315399169921875, 0.0635986328125, 0.0956573486328125, 0.127716064453125, 0.1597747802734375, 0.19183349609375, 0.2238922119140625, 0.255950927734375, 0.2880096435546875, 0.320068359375, 0.3521270751953125, 0.384185791015625, 0.4162445068359375, 0.44830322265625, 0.4803619384765625, 0.512420654296875, 0.5444793701171875, 0.5765380859375, 0.6085968017578125, 0.640655517578125, 0.6727142333984375, 0.70477294921875, 0.7368316650390625, 0.768890380859375, 0.8009490966796875, 0.8330078125]}, "gradients/decoder.model.decoder.layers.3.self_attn.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 1.0, 2.0, 6.0, 5.0, 9.0, 4.0, 11.0, 14.0, 21.0, 33.0, 74.0, 112.0, 315.0, 202.0, 73.0, 42.0, 21.0, 16.0, 23.0, 4.0, 7.0, 8.0, 4.0, 2.0, 1.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00010716915130615234, -0.00010283850133419037, -9.85078513622284e-05, -9.417720139026642e-05, -8.984655141830444e-05, -8.551590144634247e-05, -8.118525147438049e-05, -7.685460150241852e-05, -7.252395153045654e-05, -6.819330155849457e-05, -6.386265158653259e-05, -5.953200161457062e-05, -5.520135164260864e-05, -5.087070167064667e-05, -4.654005169868469e-05, -4.220940172672272e-05, -3.787875175476074e-05, -3.354810178279877e-05, -2.9217451810836792e-05, -2.4886801838874817e-05, -2.0556151866912842e-05, -1.6225501894950867e-05, -1.1894851922988892e-05, -7.5642019510269165e-06, -3.2335519790649414e-06, 1.0970979928970337e-06, 5.427747964859009e-06, 9.758397936820984e-06, 1.4089047908782959e-05, 1.8419697880744934e-05, 2.275034785270691e-05, 2.7080997824668884e-05, 3.141164779663086e-05, 3.5742297768592834e-05, 4.007294774055481e-05, 4.4403597712516785e-05, 4.873424768447876e-05, 5.3064897656440735e-05, 5.739554762840271e-05, 6.172619760036469e-05, 6.605684757232666e-05, 7.038749754428864e-05, 7.471814751625061e-05, 7.904879748821259e-05, 8.337944746017456e-05, 8.771009743213654e-05, 9.204074740409851e-05, 9.637139737606049e-05, 0.00010070204734802246, 0.00010503269731998444, 0.00010936334729194641, 0.00011369399726390839, 0.00011802464723587036, 0.00012235529720783234, 0.0001266859471797943, 0.0001310165971517563, 0.00013534724712371826, 0.00013967789709568024, 0.0001440085470676422, 0.0001483391970396042, 0.00015266984701156616, 0.00015700049698352814, 0.0001613311469554901, 0.0001656617969274521, 0.00016999244689941406]}, "gradients/decoder.model.decoder.layers.3.self_attn.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 3.0, 1.0, 4.0, 9.0, 11.0, 16.0, 34.0, 76.0, 94.0, 217.0, 368.0, 734.0, 1895.0, 6278.0, 42233.0, 898380.0, 83969.0, 9844.0, 2474.0, 936.0, 411.0, 245.0, 135.0, 71.0, 50.0, 30.0, 13.0, 11.0, 8.0, 4.0, 4.0, 3.0, 0.0, 4.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.3095703125, -1.269439697265625, -1.22930908203125, -1.189178466796875, -1.1490478515625, -1.108917236328125, -1.06878662109375, -1.028656005859375, -0.988525390625, -0.948394775390625, -0.90826416015625, -0.868133544921875, -0.8280029296875, -0.787872314453125, -0.74774169921875, -0.707611083984375, -0.66748046875, -0.627349853515625, -0.58721923828125, -0.547088623046875, -0.5069580078125, -0.466827392578125, -0.42669677734375, -0.386566162109375, -0.346435546875, -0.306304931640625, -0.26617431640625, -0.226043701171875, -0.1859130859375, -0.145782470703125, -0.10565185546875, -0.065521240234375, -0.025390625, 0.014739990234375, 0.05487060546875, 0.095001220703125, 0.1351318359375, 0.175262451171875, 0.21539306640625, 0.255523681640625, 0.295654296875, 0.335784912109375, 0.37591552734375, 0.416046142578125, 0.4561767578125, 0.496307373046875, 0.53643798828125, 0.576568603515625, 0.61669921875, 0.656829833984375, 0.69696044921875, 0.737091064453125, 0.7772216796875, 0.817352294921875, 0.85748291015625, 0.897613525390625, 0.937744140625, 0.977874755859375, 1.01800537109375, 1.058135986328125, 1.0982666015625, 1.138397216796875, 1.17852783203125, 1.218658447265625, 1.2587890625]}, "gradients/decoder.model.decoder.layers.3.self_attn.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 2.0, 1.0, 1.0, 2.0, 2.0, 5.0, 6.0, 9.0, 7.0, 15.0, 16.0, 31.0, 39.0, 55.0, 147.0, 428.0, 72.0, 60.0, 25.0, 28.0, 12.0, 8.0, 9.0, 10.0, 3.0, 6.0, 4.0, 4.0, 2.0, 0.0, 1.0, 1.0, 1.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.38330078125, -0.3686981201171875, -0.354095458984375, -0.3394927978515625, -0.32489013671875, -0.3102874755859375, -0.295684814453125, -0.2810821533203125, -0.2664794921875, -0.2518768310546875, -0.237274169921875, -0.2226715087890625, -0.20806884765625, -0.1934661865234375, -0.178863525390625, -0.1642608642578125, -0.149658203125, -0.1350555419921875, -0.120452880859375, -0.1058502197265625, -0.09124755859375, -0.0766448974609375, -0.062042236328125, -0.0474395751953125, -0.0328369140625, -0.0182342529296875, -0.003631591796875, 0.0109710693359375, 0.02557373046875, 0.0401763916015625, 0.054779052734375, 0.0693817138671875, 0.083984375, 0.0985870361328125, 0.113189697265625, 0.1277923583984375, 0.14239501953125, 0.1569976806640625, 0.171600341796875, 0.1862030029296875, 0.2008056640625, 0.2154083251953125, 0.230010986328125, 0.2446136474609375, 0.25921630859375, 0.2738189697265625, 0.288421630859375, 0.3030242919921875, 0.317626953125, 0.3322296142578125, 0.346832275390625, 0.3614349365234375, 0.37603759765625, 0.3906402587890625, 0.405242919921875, 0.4198455810546875, 0.4344482421875, 0.4490509033203125, 0.463653564453125, 0.4782562255859375, 0.49285888671875, 0.5074615478515625, 0.522064208984375, 0.5366668701171875, 0.55126953125]}, "gradients/decoder.model.decoder.layers.2.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 1.0, 2.0, 7.0, 23.0, 163.0, 474.0, 269.0, 69.0, 9.0, 3.0], "bins": [-51.87593460083008, -51.006404876708984, -50.13687515258789, -49.26734161376953, -48.39781188964844, -47.528282165527344, -46.65875244140625, -45.789222717285156, -44.91969299316406, -44.05016326904297, -43.180633544921875, -42.311100006103516, -41.44157028198242, -40.57204055786133, -39.702510833740234, -38.83298110961914, -37.96344757080078, -37.09391784667969, -36.224388122558594, -35.354854583740234, -34.48532485961914, -33.61579513549805, -32.74626541137695, -31.87673568725586, -31.007204055786133, -30.13767433166504, -29.268142700195312, -28.39861297607422, -27.529083251953125, -26.6595516204834, -25.790021896362305, -24.920490264892578, -24.050960540771484, -23.18143081665039, -22.311899185180664, -21.44236946105957, -20.572837829589844, -19.70330810546875, -18.833778381347656, -17.964248657226562, -17.094717025756836, -16.225187301635742, -15.355655670166016, -14.486125946044922, -13.616595268249512, -12.747064590454102, -11.877534866333008, -11.008004188537598, -10.138473510742188, -9.268942832946777, -8.399412155151367, -7.529882431030273, -6.660351753234863, -5.790821075439453, -4.921290874481201, -4.051760673522949, -3.1822304725646973, -2.312700033187866, -1.4431695938110352, -0.5736391544342041, 0.29589128494262695, 1.165421962738037, 2.034952163696289, 2.904482364654541, 3.774013042449951]}, "gradients/decoder.model.decoder.layers.2.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 3.0, 2.0, 2.0, 3.0, 5.0, 3.0, 10.0, 4.0, 13.0, 10.0, 15.0, 14.0, 20.0, 14.0, 30.0, 23.0, 25.0, 22.0, 32.0, 34.0, 37.0, 45.0, 42.0, 33.0, 55.0, 53.0, 42.0, 47.0, 50.0, 42.0, 36.0, 36.0, 33.0, 36.0, 26.0, 20.0, 14.0, 21.0, 13.0, 9.0, 12.0, 7.0, 6.0, 6.0, 3.0, 0.0, 1.0, 2.0, 2.0, 2.0, 1.0, 0.0, 0.0, 2.0], "bins": [-6.7066826820373535, -6.516510486602783, -6.326337814331055, -6.136165618896484, -5.945992946624756, -5.7558207511901855, -5.565648078918457, -5.375475883483887, -5.185303688049316, -4.995131492614746, -4.804958820343018, -4.614786624908447, -4.424613952636719, -4.234441757202148, -4.044269561767578, -3.8540968894958496, -3.6639244556427, -3.473752021789551, -3.2835795879364014, -3.093407154083252, -2.9032349586486816, -2.7130625247955322, -2.522890090942383, -2.3327178955078125, -2.142545223236084, -1.9523727893829346, -1.7622004747390747, -1.5720280408859253, -1.3818557262420654, -1.191683292388916, -1.0015108585357666, -0.8113385438919067, -0.6211662292480469, -0.43099385499954224, -0.2408214509487152, -0.050649046897888184, 0.13952332735061646, 0.3296957015991211, 0.5198681354522705, 0.7100404500961304, 0.9002128839492798, 1.0903853178024292, 1.280557632446289, 1.4707300662994385, 1.660902500152588, 1.8510748147964478, 2.0412473678588867, 2.231419563293457, 2.4215919971466064, 2.611764430999756, 2.8019368648529053, 2.9921092987060547, 3.182281494140625, 3.3724539279937744, 3.562626361846924, 3.752798557281494, 3.9429712295532227, 4.133143424987793, 4.3233160972595215, 4.513488292694092, 4.70366096496582, 4.893833160400391, 5.084005355834961, 5.2741780281066895, 5.46435022354126]}, "gradients/decoder.model.decoder.layers.2.fc2.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 4.0, 2.0, 0.0, 1.0, 4.0, 4.0, 7.0, 11.0, 12.0, 16.0, 13.0, 39.0, 42.0, 61.0, 88.0, 160.0, 292.0, 552.0, 1256.0, 3749.0, 14128.0, 135600.0, 3296241.0, 699325.0, 33450.0, 5904.0, 1699.0, 732.0, 341.0, 182.0, 118.0, 71.0, 68.0, 39.0, 29.0, 20.0, 10.0, 8.0, 5.0, 6.0, 0.0, 2.0, 2.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.4921875, -5.3133544921875, -5.134521484375, -4.9556884765625, -4.77685546875, -4.5980224609375, -4.419189453125, -4.2403564453125, -4.0615234375, -3.8826904296875, -3.703857421875, -3.5250244140625, -3.34619140625, -3.1673583984375, -2.988525390625, -2.8096923828125, -2.630859375, -2.4520263671875, -2.273193359375, -2.0943603515625, -1.91552734375, -1.7366943359375, -1.557861328125, -1.3790283203125, -1.2001953125, -1.0213623046875, -0.842529296875, -0.6636962890625, -0.48486328125, -0.3060302734375, -0.127197265625, 0.0516357421875, 0.23046875, 0.4093017578125, 0.588134765625, 0.7669677734375, 0.94580078125, 1.1246337890625, 1.303466796875, 1.4822998046875, 1.6611328125, 1.8399658203125, 2.018798828125, 2.1976318359375, 2.37646484375, 2.5552978515625, 2.734130859375, 2.9129638671875, 3.091796875, 3.2706298828125, 3.449462890625, 3.6282958984375, 3.80712890625, 3.9859619140625, 4.164794921875, 4.3436279296875, 4.5224609375, 4.7012939453125, 4.880126953125, 5.0589599609375, 5.23779296875, 5.4166259765625, 5.595458984375, 5.7742919921875, 5.953125]}, "gradients/decoder.model.decoder.layers.2.fc2.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 1.0, 0.0, 3.0, 1.0, 6.0, 7.0, 8.0, 8.0, 5.0, 17.0, 19.0, 18.0, 29.0, 41.0, 51.0, 51.0, 55.0, 65.0, 78.0, 91.0, 72.0, 62.0, 71.0, 44.0, 48.0, 43.0, 34.0, 28.0, 24.0, 8.0, 12.0, 5.0, 1.0, 6.0, 0.0, 3.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.591796875, -2.4971923828125, -2.402587890625, -2.3079833984375, -2.21337890625, -2.1187744140625, -2.024169921875, -1.9295654296875, -1.8349609375, -1.7403564453125, -1.645751953125, -1.5511474609375, -1.45654296875, -1.3619384765625, -1.267333984375, -1.1727294921875, -1.078125, -0.9835205078125, -0.888916015625, -0.7943115234375, -0.69970703125, -0.6051025390625, -0.510498046875, -0.4158935546875, -0.3212890625, -0.2266845703125, -0.132080078125, -0.0374755859375, 0.05712890625, 0.1517333984375, 0.246337890625, 0.3409423828125, 0.435546875, 0.5301513671875, 0.624755859375, 0.7193603515625, 0.81396484375, 0.9085693359375, 1.003173828125, 1.0977783203125, 1.1923828125, 1.2869873046875, 1.381591796875, 1.4761962890625, 1.57080078125, 1.6654052734375, 1.760009765625, 1.8546142578125, 1.94921875, 2.0438232421875, 2.138427734375, 2.2330322265625, 2.32763671875, 2.4222412109375, 2.516845703125, 2.6114501953125, 2.7060546875, 2.8006591796875, 2.895263671875, 2.9898681640625, 3.08447265625, 3.1790771484375, 3.273681640625, 3.3682861328125, 3.462890625]}, "gradients/decoder.model.decoder.layers.2.fc1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 2.0, 1.0, 3.0, 2.0, 3.0, 5.0, 9.0, 6.0, 10.0, 21.0, 34.0, 57.0, 91.0, 329.0, 761.0, 2530.0, 18735.0, 1054185.0, 3081151.0, 30904.0, 3807.0, 992.0, 318.0, 141.0, 69.0, 49.0, 28.0, 10.0, 8.0, 6.0, 9.0, 8.0, 4.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-5.30859375, -5.06829833984375, -4.8280029296875, -4.58770751953125, -4.347412109375, -4.10711669921875, -3.8668212890625, -3.62652587890625, -3.38623046875, -3.14593505859375, -2.9056396484375, -2.66534423828125, -2.425048828125, -2.18475341796875, -1.9444580078125, -1.70416259765625, -1.4638671875, -1.22357177734375, -0.9832763671875, -0.74298095703125, -0.502685546875, -0.26239013671875, -0.0220947265625, 0.21820068359375, 0.45849609375, 0.69879150390625, 0.9390869140625, 1.17938232421875, 1.419677734375, 1.65997314453125, 1.9002685546875, 2.14056396484375, 2.380859375, 2.62115478515625, 2.8614501953125, 3.10174560546875, 3.342041015625, 3.58233642578125, 3.8226318359375, 4.06292724609375, 4.30322265625, 4.54351806640625, 4.7838134765625, 5.02410888671875, 5.264404296875, 5.50469970703125, 5.7449951171875, 5.98529052734375, 6.2255859375, 6.46588134765625, 6.7061767578125, 6.94647216796875, 7.186767578125, 7.42706298828125, 7.6673583984375, 7.90765380859375, 8.14794921875, 8.38824462890625, 8.6285400390625, 8.86883544921875, 9.109130859375, 9.34942626953125, 9.5897216796875, 9.83001708984375, 10.0703125]}, "gradients/decoder.model.decoder.layers.2.fc1.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 1.0, 1.0, 0.0, 2.0, 0.0, 1.0, 3.0, 3.0, 6.0, 10.0, 12.0, 14.0, 32.0, 47.0, 89.0, 129.0, 282.0, 629.0, 967.0, 889.0, 465.0, 230.0, 100.0, 59.0, 34.0, 29.0, 23.0, 18.0, 6.0, 2.0, 2.0, 2.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.353515625, -2.298553466796875, -2.24359130859375, -2.188629150390625, -2.1336669921875, -2.078704833984375, -2.02374267578125, -1.968780517578125, -1.913818359375, -1.858856201171875, -1.80389404296875, -1.748931884765625, -1.6939697265625, -1.639007568359375, -1.58404541015625, -1.529083251953125, -1.47412109375, -1.419158935546875, -1.36419677734375, -1.309234619140625, -1.2542724609375, -1.199310302734375, -1.14434814453125, -1.089385986328125, -1.034423828125, -0.979461669921875, -0.92449951171875, -0.869537353515625, -0.8145751953125, -0.759613037109375, -0.70465087890625, -0.649688720703125, -0.5947265625, -0.539764404296875, -0.48480224609375, -0.429840087890625, -0.3748779296875, -0.319915771484375, -0.26495361328125, -0.209991455078125, -0.155029296875, -0.100067138671875, -0.04510498046875, 0.009857177734375, 0.0648193359375, 0.119781494140625, 0.17474365234375, 0.229705810546875, 0.28466796875, 0.339630126953125, 0.39459228515625, 0.449554443359375, 0.5045166015625, 0.559478759765625, 0.61444091796875, 0.669403076171875, 0.724365234375, 0.779327392578125, 0.83428955078125, 0.889251708984375, 0.9442138671875, 0.999176025390625, 1.05413818359375, 1.109100341796875, 1.1640625]}, "gradients/decoder.model.decoder.layers.2.encoder_attn_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 2.0, 1.0, 4.0, 8.0, 10.0, 19.0, 26.0, 44.0, 64.0, 84.0, 105.0, 113.0, 120.0, 103.0, 97.0, 85.0, 39.0, 22.0, 25.0, 19.0, 8.0, 6.0, 3.0, 3.0, 0.0, 2.0, 2.0, 1.0, 1.0], "bins": [-8.206478118896484, -8.033744812011719, -7.861011981964111, -7.688278675079346, -7.515545845031738, -7.342812538146973, -7.170079708099365, -6.9973464012146, -6.824613571166992, -6.651880264282227, -6.479147434234619, -6.3064141273498535, -6.133681297302246, -5.9609479904174805, -5.788215160369873, -5.615481853485107, -5.4427490234375, -5.270015716552734, -5.097282886505127, -4.924549579620361, -4.751816749572754, -4.579083442687988, -4.406350612640381, -4.233617305755615, -4.06088399887085, -3.888150930404663, -3.7154178619384766, -3.54268479347229, -3.3699517250061035, -3.197218656539917, -3.0244855880737305, -2.851752281188965, -2.6790192127227783, -2.506286144256592, -2.3335530757904053, -2.1608200073242188, -1.9880869388580322, -1.8153538703918457, -1.6426206827163696, -1.469887614250183, -1.2971545457839966, -1.12442147731781, -0.9516884088516235, -0.7789552807807922, -0.6062222123146057, -0.4334891438484192, -0.2607560157775879, -0.08802294731140137, 0.08471012115478516, 0.2574431896209717, 0.4301762878894806, 0.6029093861579895, 0.775642454624176, 0.9483755230903625, 1.1211086511611938, 1.2938417196273804, 1.466574788093567, 1.6393078565597534, 1.81204092502594, 1.984774112701416, 2.1575071811676025, 2.330240249633789, 2.5029733180999756, 2.675706386566162, 2.8484394550323486]}, "gradients/decoder.model.decoder.layers.2.encoder_attn_layer_norm.bias": {"_type": "histogram", "values": [1.0, 2.0, 0.0, 2.0, 1.0, 0.0, 1.0, 1.0, 5.0, 2.0, 2.0, 2.0, 5.0, 8.0, 7.0, 11.0, 15.0, 9.0, 14.0, 13.0, 23.0, 21.0, 21.0, 27.0, 28.0, 46.0, 30.0, 46.0, 45.0, 37.0, 39.0, 54.0, 47.0, 46.0, 40.0, 46.0, 41.0, 38.0, 35.0, 32.0, 34.0, 26.0, 26.0, 18.0, 14.0, 9.0, 8.0, 12.0, 7.0, 5.0, 5.0, 5.0, 1.0, 3.0, 3.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0], "bins": [-2.432069778442383, -2.3531317710876465, -2.27419376373291, -2.195255756378174, -2.1163179874420166, -2.0373799800872803, -1.958441972732544, -1.8795039653778076, -1.8005659580230713, -1.721627950668335, -1.6426900625228882, -1.5637520551681519, -1.4848140478134155, -1.4058761596679688, -1.3269381523132324, -1.248000144958496, -1.1690622568130493, -1.090124249458313, -1.0111863613128662, -0.9322483539581299, -0.8533103466033936, -0.774372398853302, -0.6954344511032104, -0.6164964437484741, -0.5375584959983826, -0.45862051844596863, -0.3796825408935547, -0.30074459314346313, -0.2218066155910492, -0.14286863803863525, -0.0639306902885437, 0.015007317066192627, 0.09394526481628418, 0.17288324236869812, 0.25182121992111206, 0.3307591676712036, 0.40969714522361755, 0.4886351227760315, 0.567573070526123, 0.6465110778808594, 0.7254490256309509, 0.8043869733810425, 0.8833249807357788, 0.9622629284858704, 1.041200876235962, 1.1201388835906982, 1.1990768909454346, 1.278014898300171, 1.3569527864456177, 1.435890793800354, 1.5148286819458008, 1.593766689300537, 1.6727046966552734, 1.7516427040100098, 1.8305805921554565, 1.9095185995101929, 1.9884564876556396, 2.067394495010376, 2.1463325023651123, 2.2252702713012695, 2.304208278656006, 2.383146286010742, 2.4620842933654785, 2.541022300720215, 2.619960308074951]}, "gradients/decoder.model.decoder.layers.2.encoder_attn.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 2.0, 0.0, 4.0, 3.0, 7.0, 7.0, 17.0, 17.0, 27.0, 31.0, 47.0, 90.0, 132.0, 194.0, 312.0, 494.0, 855.0, 1543.0, 3551.0, 10728.0, 45521.0, 322823.0, 563092.0, 73968.0, 15672.0, 4808.0, 2060.0, 992.0, 548.0, 329.0, 227.0, 147.0, 93.0, 64.0, 49.0, 32.0, 26.0, 21.0, 13.0, 5.0, 6.0, 2.0, 1.0, 2.0, 4.0, 1.0, 1.0, 1.0, 0.0, 0.0, 2.0], "bins": [-0.9775390625, -0.9497222900390625, -0.921905517578125, -0.8940887451171875, -0.86627197265625, -0.8384552001953125, -0.810638427734375, -0.7828216552734375, -0.7550048828125, -0.7271881103515625, -0.699371337890625, -0.6715545654296875, -0.64373779296875, -0.6159210205078125, -0.588104248046875, -0.5602874755859375, -0.532470703125, -0.5046539306640625, -0.476837158203125, -0.4490203857421875, -0.42120361328125, -0.3933868408203125, -0.365570068359375, -0.3377532958984375, -0.3099365234375, -0.2821197509765625, -0.254302978515625, -0.2264862060546875, -0.19866943359375, -0.1708526611328125, -0.143035888671875, -0.1152191162109375, -0.08740234375, -0.0595855712890625, -0.031768798828125, -0.0039520263671875, 0.02386474609375, 0.0516815185546875, 0.079498291015625, 0.1073150634765625, 0.1351318359375, 0.1629486083984375, 0.190765380859375, 0.2185821533203125, 0.24639892578125, 0.2742156982421875, 0.302032470703125, 0.3298492431640625, 0.357666015625, 0.3854827880859375, 0.413299560546875, 0.4411163330078125, 0.46893310546875, 0.4967498779296875, 0.524566650390625, 0.5523834228515625, 0.5802001953125, 0.6080169677734375, 0.635833740234375, 0.6636505126953125, 0.69146728515625, 0.7192840576171875, 0.747100830078125, 0.7749176025390625, 0.802734375]}, "gradients/decoder.model.decoder.layers.2.encoder_attn.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 4.0, 1.0, 2.0, 1.0, 4.0, 2.0, 5.0, 2.0, 12.0, 6.0, 8.0, 14.0, 16.0, 19.0, 30.0, 27.0, 36.0, 36.0, 37.0, 47.0, 44.0, 65.0, 51.0, 64.0, 53.0, 61.0, 64.0, 42.0, 54.0, 40.0, 33.0, 29.0, 28.0, 14.0, 20.0, 18.0, 6.0, 5.0, 10.0, 6.0, 0.0, 1.0, 2.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.046875, -1.977386474609375, -1.90789794921875, -1.838409423828125, -1.7689208984375, -1.699432373046875, -1.62994384765625, -1.560455322265625, -1.490966796875, -1.421478271484375, -1.35198974609375, -1.282501220703125, -1.2130126953125, -1.143524169921875, -1.07403564453125, -1.004547119140625, -0.93505859375, -0.865570068359375, -0.79608154296875, -0.726593017578125, -0.6571044921875, -0.587615966796875, -0.51812744140625, -0.448638916015625, -0.379150390625, -0.309661865234375, -0.24017333984375, -0.170684814453125, -0.1011962890625, -0.031707763671875, 0.03778076171875, 0.107269287109375, 0.1767578125, 0.246246337890625, 0.31573486328125, 0.385223388671875, 0.4547119140625, 0.524200439453125, 0.59368896484375, 0.663177490234375, 0.732666015625, 0.802154541015625, 0.87164306640625, 0.941131591796875, 1.0106201171875, 1.080108642578125, 1.14959716796875, 1.219085693359375, 1.28857421875, 1.358062744140625, 1.42755126953125, 1.497039794921875, 1.5665283203125, 1.636016845703125, 1.70550537109375, 1.774993896484375, 1.844482421875, 1.913970947265625, 1.98345947265625, 2.052947998046875, 2.1224365234375, 2.191925048828125, 2.26141357421875, 2.330902099609375, 2.400390625]}, "gradients/decoder.model.decoder.layers.2.encoder_attn.v_proj.weight": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 2.0, 2.0, 5.0, 4.0, 3.0, 7.0, 8.0, 11.0, 10.0, 20.0, 21.0, 40.0, 63.0, 84.0, 115.0, 183.0, 299.0, 395.0, 628.0, 1003.0, 1637.0, 2536.0, 4384.0, 7207.0, 13133.0, 24816.0, 50796.0, 113204.0, 285807.0, 304974.0, 122073.0, 53741.0, 27054.0, 14070.0, 8020.0, 4719.0, 2743.0, 1707.0, 1095.0, 671.0, 413.0, 269.0, 187.0, 135.0, 83.0, 61.0, 38.0, 34.0, 14.0, 8.0, 7.0, 11.0, 3.0, 8.0, 1.0, 1.0, 5.0, 0.0, 2.0, 2.0, 1.0], "bins": [-0.230224609375, -0.2230396270751953, -0.21585464477539062, -0.20866966247558594, -0.20148468017578125, -0.19429969787597656, -0.18711471557617188, -0.1799297332763672, -0.1727447509765625, -0.1655597686767578, -0.15837478637695312, -0.15118980407714844, -0.14400482177734375, -0.13681983947753906, -0.12963485717773438, -0.12244987487792969, -0.115264892578125, -0.10807991027832031, -0.10089492797851562, -0.09370994567871094, -0.08652496337890625, -0.07933998107910156, -0.07215499877929688, -0.06497001647949219, -0.0577850341796875, -0.05060005187988281, -0.043415069580078125, -0.03623008728027344, -0.02904510498046875, -0.021860122680664062, -0.014675140380859375, -0.0074901580810546875, -0.00030517578125, 0.0068798065185546875, 0.014064788818359375, 0.021249771118164062, 0.02843475341796875, 0.03561973571777344, 0.042804718017578125, 0.04998970031738281, 0.0571746826171875, 0.06435966491699219, 0.07154464721679688, 0.07872962951660156, 0.08591461181640625, 0.09309959411621094, 0.10028457641601562, 0.10746955871582031, 0.114654541015625, 0.12183952331542969, 0.12902450561523438, 0.13620948791503906, 0.14339447021484375, 0.15057945251464844, 0.15776443481445312, 0.1649494171142578, 0.1721343994140625, 0.1793193817138672, 0.18650436401367188, 0.19368934631347656, 0.20087432861328125, 0.20805931091308594, 0.21524429321289062, 0.2224292755126953, 0.2296142578125]}, "gradients/decoder.model.decoder.layers.2.encoder_attn.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 3.0, 2.0, 0.0, 5.0, 4.0, 3.0, 7.0, 9.0, 6.0, 14.0, 13.0, 5.0, 10.0, 13.0, 17.0, 19.0, 34.0, 21.0, 36.0, 40.0, 40.0, 44.0, 37.0, 43.0, 57.0, 53.0, 41.0, 58.0, 52.0, 50.0, 26.0, 46.0, 36.0, 37.0, 17.0, 27.0, 13.0, 11.0, 13.0, 11.0, 9.0, 6.0, 9.0, 4.0, 5.0, 2.0, 2.0, 2.0, 0.0, 3.0, 1.0, 3.0, 0.0, 1.0, 1.0], "bins": [-4.140625, -4.02056884765625, -3.9005126953125, -3.78045654296875, -3.660400390625, -3.54034423828125, -3.4202880859375, -3.30023193359375, -3.18017578125, -3.06011962890625, -2.9400634765625, -2.82000732421875, -2.699951171875, -2.57989501953125, -2.4598388671875, -2.33978271484375, -2.2197265625, -2.09967041015625, -1.9796142578125, -1.85955810546875, -1.739501953125, -1.61944580078125, -1.4993896484375, -1.37933349609375, -1.25927734375, -1.13922119140625, -1.0191650390625, -0.89910888671875, -0.779052734375, -0.65899658203125, -0.5389404296875, -0.41888427734375, -0.298828125, -0.17877197265625, -0.0587158203125, 0.06134033203125, 0.181396484375, 0.30145263671875, 0.4215087890625, 0.54156494140625, 0.66162109375, 0.78167724609375, 0.9017333984375, 1.02178955078125, 1.141845703125, 1.26190185546875, 1.3819580078125, 1.50201416015625, 1.6220703125, 1.74212646484375, 1.8621826171875, 1.98223876953125, 2.102294921875, 2.22235107421875, 2.3424072265625, 2.46246337890625, 2.58251953125, 2.70257568359375, 2.8226318359375, 2.94268798828125, 3.062744140625, 3.18280029296875, 3.3028564453125, 3.42291259765625, 3.54296875]}, "gradients/decoder.model.decoder.layers.2.encoder_attn.k_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 2.0, 0.0, 1.0, 2.0, 1.0, 7.0, 10.0, 9.0, 22.0, 25.0, 49.0, 86.0, 143.0, 254.0, 610.0, 1492.0, 5119.0, 31914.0, 585387.0, 391962.0, 24501.0, 4453.0, 1345.0, 529.0, 258.0, 143.0, 85.0, 53.0, 35.0, 22.0, 12.0, 10.0, 11.0, 5.0, 3.0, 2.0, 0.0, 4.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.08428955078125, -0.08137702941894531, -0.07846450805664062, -0.07555198669433594, -0.07263946533203125, -0.06972694396972656, -0.06681442260742188, -0.06390190124511719, -0.0609893798828125, -0.05807685852050781, -0.055164337158203125, -0.05225181579589844, -0.04933929443359375, -0.04642677307128906, -0.043514251708984375, -0.04060173034667969, -0.037689208984375, -0.03477668762207031, -0.031864166259765625, -0.028951644897460938, -0.02603912353515625, -0.023126602172851562, -0.020214080810546875, -0.017301559448242188, -0.0143890380859375, -0.011476516723632812, -0.008563995361328125, -0.0056514739990234375, -0.00273895263671875, 0.0001735687255859375, 0.003086090087890625, 0.0059986114501953125, 0.0089111328125, 0.011823654174804688, 0.014736175537109375, 0.017648696899414062, 0.02056121826171875, 0.023473739624023438, 0.026386260986328125, 0.029298782348632812, 0.0322113037109375, 0.03512382507324219, 0.038036346435546875, 0.04094886779785156, 0.04386138916015625, 0.04677391052246094, 0.049686431884765625, 0.05259895324707031, 0.055511474609375, 0.05842399597167969, 0.061336517333984375, 0.06424903869628906, 0.06716156005859375, 0.07007408142089844, 0.07298660278320312, 0.07589912414550781, 0.0788116455078125, 0.08172416687011719, 0.08463668823242188, 0.08754920959472656, 0.09046173095703125, 0.09337425231933594, 0.09628677368164062, 0.09919929504394531, 0.10211181640625]}, "gradients/decoder.model.decoder.layers.2.encoder_attn.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 3.0, 3.0, 1.0, 3.0, 2.0, 7.0, 4.0, 4.0, 8.0, 13.0, 11.0, 25.0, 36.0, 50.0, 62.0, 106.0, 109.0, 126.0, 109.0, 94.0, 66.0, 45.0, 34.0, 32.0, 18.0, 15.0, 10.0, 10.0, 1.0, 3.0, 1.0, 1.0, 3.0, 1.0, 0.0, 0.0, 4.0, 1.0], "bins": [-6.35385513305664e-05, -6.20819628238678e-05, -6.062537431716919e-05, -5.916878581047058e-05, -5.771219730377197e-05, -5.6255608797073364e-05, -5.4799020290374756e-05, -5.334243178367615e-05, -5.188584327697754e-05, -5.042925477027893e-05, -4.897266626358032e-05, -4.7516077756881714e-05, -4.6059489250183105e-05, -4.46029007434845e-05, -4.314631223678589e-05, -4.168972373008728e-05, -4.023313522338867e-05, -3.8776546716690063e-05, -3.7319958209991455e-05, -3.586336970329285e-05, -3.440678119659424e-05, -3.295019268989563e-05, -3.149360418319702e-05, -3.0037015676498413e-05, -2.8580427169799805e-05, -2.7123838663101196e-05, -2.5667250156402588e-05, -2.421066164970398e-05, -2.275407314300537e-05, -2.1297484636306763e-05, -1.9840896129608154e-05, -1.8384307622909546e-05, -1.6927719116210938e-05, -1.547113060951233e-05, -1.401454210281372e-05, -1.2557953596115112e-05, -1.1101365089416504e-05, -9.644776582717896e-06, -8.188188076019287e-06, -6.731599569320679e-06, -5.27501106262207e-06, -3.818422555923462e-06, -2.3618340492248535e-06, -9.052455425262451e-07, 5.513429641723633e-07, 2.0079314708709717e-06, 3.46451997756958e-06, 4.9211084842681885e-06, 6.377696990966797e-06, 7.834285497665405e-06, 9.290874004364014e-06, 1.0747462511062622e-05, 1.220405101776123e-05, 1.3660639524459839e-05, 1.5117228031158447e-05, 1.6573816537857056e-05, 1.8030405044555664e-05, 1.9486993551254272e-05, 2.094358205795288e-05, 2.240017056465149e-05, 2.3856759071350098e-05, 2.5313347578048706e-05, 2.6769936084747314e-05, 2.8226524591445923e-05, 2.968311309814453e-05]}, "gradients/decoder.model.decoder.layers.2.encoder_attn.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 0.0, 1.0, 0.0, 1.0, 3.0, 4.0, 4.0, 8.0, 5.0, 14.0, 29.0, 39.0, 80.0, 142.0, 286.0, 714.0, 2256.0, 10163.0, 80094.0, 849057.0, 90401.0, 11362.0, 2524.0, 756.0, 322.0, 144.0, 53.0, 38.0, 21.0, 8.0, 9.0, 4.0, 12.0, 4.0, 4.0, 1.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.1329345703125, -0.1289691925048828, -0.12500381469726562, -0.12103843688964844, -0.11707305908203125, -0.11310768127441406, -0.10914230346679688, -0.10517692565917969, -0.1012115478515625, -0.09724617004394531, -0.09328079223632812, -0.08931541442871094, -0.08535003662109375, -0.08138465881347656, -0.07741928100585938, -0.07345390319824219, -0.069488525390625, -0.06552314758300781, -0.061557769775390625, -0.05759239196777344, -0.05362701416015625, -0.04966163635253906, -0.045696258544921875, -0.04173088073730469, -0.0377655029296875, -0.03380012512207031, -0.029834747314453125, -0.025869369506835938, -0.02190399169921875, -0.017938613891601562, -0.013973236083984375, -0.010007858276367188, -0.00604248046875, -0.0020771026611328125, 0.001888275146484375, 0.0058536529541015625, 0.00981903076171875, 0.013784408569335938, 0.017749786376953125, 0.021715164184570312, 0.0256805419921875, 0.029645919799804688, 0.033611297607421875, 0.03757667541503906, 0.04154205322265625, 0.04550743103027344, 0.049472808837890625, 0.05343818664550781, 0.057403564453125, 0.06136894226074219, 0.06533432006835938, 0.06929969787597656, 0.07326507568359375, 0.07723045349121094, 0.08119583129882812, 0.08516120910644531, 0.0891265869140625, 0.09309196472167969, 0.09705734252929688, 0.10102272033691406, 0.10498809814453125, 0.10895347595214844, 0.11291885375976562, 0.11688423156738281, 0.120849609375]}, "gradients/decoder.model.decoder.layers.2.encoder_attn.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 2.0, 2.0, 1.0, 4.0, 0.0, 7.0, 6.0, 6.0, 10.0, 26.0, 30.0, 70.0, 90.0, 202.0, 232.0, 131.0, 73.0, 37.0, 18.0, 17.0, 13.0, 9.0, 5.0, 5.0, 3.0, 3.0, 5.0, 3.0, 1.0, 2.0, 1.0, 2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.02728271484375, -0.026459932327270508, -0.025637149810791016, -0.024814367294311523, -0.02399158477783203, -0.02316880226135254, -0.022346019744873047, -0.021523237228393555, -0.020700454711914062, -0.01987767219543457, -0.019054889678955078, -0.018232107162475586, -0.017409324645996094, -0.0165865421295166, -0.01576375961303711, -0.014940977096557617, -0.014118194580078125, -0.013295412063598633, -0.01247262954711914, -0.011649847030639648, -0.010827064514160156, -0.010004281997680664, -0.009181499481201172, -0.00835871696472168, -0.0075359344482421875, -0.006713151931762695, -0.005890369415283203, -0.005067586898803711, -0.004244804382324219, -0.0034220218658447266, -0.0025992393493652344, -0.0017764568328857422, -0.00095367431640625, -0.0001308917999267578, 0.0006918907165527344, 0.0015146732330322266, 0.0023374557495117188, 0.003160238265991211, 0.003983020782470703, 0.004805803298950195, 0.0056285858154296875, 0.00645136833190918, 0.007274150848388672, 0.008096933364868164, 0.008919715881347656, 0.009742498397827148, 0.01056528091430664, 0.011388063430786133, 0.012210845947265625, 0.013033628463745117, 0.01385641098022461, 0.014679193496704102, 0.015501976013183594, 0.016324758529663086, 0.017147541046142578, 0.01797032356262207, 0.018793106079101562, 0.019615888595581055, 0.020438671112060547, 0.02126145362854004, 0.02208423614501953, 0.022907018661499023, 0.023729801177978516, 0.024552583694458008, 0.0253753662109375]}, "gradients/decoder.model.decoder.layers.2.self_attn_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 2.0, 5.0, 9.0, 22.0, 54.0, 104.0, 165.0, 212.0, 189.0, 122.0, 61.0, 36.0, 15.0, 10.0, 5.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.862611770629883, -5.671850681304932, -5.481089115142822, -5.290328025817871, -5.09956693649292, -4.908805847167969, -4.718044281005859, -4.527283191680908, -4.336522102355957, -4.145761013031006, -3.9549996852874756, -3.7642383575439453, -3.573477268218994, -3.382715940475464, -3.1919546127319336, -3.0011935234069824, -2.810431957244873, -2.6196706295013428, -2.4289095401763916, -2.2381482124328613, -2.04738712310791, -1.8566257953643799, -1.6658644676208496, -1.4751032590866089, -1.2843420505523682, -1.0935808420181274, -0.9028195738792419, -0.7120583057403564, -0.5212970972061157, -0.330535888671875, -0.13977456092834473, 0.050986647605895996, 0.24174737930297852, 0.4325086176395416, 0.6232698559761047, 0.8140311241149902, 1.004792332649231, 1.1955535411834717, 1.386314868927002, 1.5770760774612427, 1.7678372859954834, 1.9585984945297241, 2.149359703063965, 2.340121030807495, 2.5308823585510254, 2.7216434478759766, 2.912404775619507, 3.103166103363037, 3.2939271926879883, 3.4846885204315186, 3.6754496097564697, 3.8662109375, 4.056972026824951, 4.247733116149902, 4.438494682312012, 4.629255771636963, 4.820016860961914, 5.010777950286865, 5.201539516448975, 5.392300605773926, 5.583061695098877, 5.773822784423828, 5.9645843505859375, 6.155345439910889, 6.346107006072998]}, "gradients/decoder.model.decoder.layers.2.self_attn_layer_norm.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 2.0, 3.0, 0.0, 3.0, 4.0, 2.0, 3.0, 7.0, 3.0, 13.0, 15.0, 20.0, 15.0, 25.0, 33.0, 29.0, 50.0, 49.0, 56.0, 64.0, 50.0, 68.0, 66.0, 67.0, 53.0, 55.0, 55.0, 41.0, 38.0, 32.0, 30.0, 15.0, 8.0, 14.0, 13.0, 4.0, 6.0, 2.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-1.867017388343811, -1.79472017288208, -1.7224228382110596, -1.650125503540039, -1.577828288078308, -1.5055310726165771, -1.4332337379455566, -1.3609364032745361, -1.2886391878128052, -1.2163419723510742, -1.1440446376800537, -1.0717473030090332, -0.9994500875473022, -0.9271528124809265, -0.8548555374145508, -0.782558262348175, -0.7102609872817993, -0.6379637122154236, -0.5656664371490479, -0.4933691620826721, -0.4210718870162964, -0.34877461194992065, -0.2764773368835449, -0.2041800618171692, -0.13188278675079346, -0.059585511684417725, 0.012711763381958008, 0.08500903844833374, 0.15730631351470947, 0.2296035885810852, 0.30190086364746094, 0.37419813871383667, 0.44649529457092285, 0.5187925696372986, 0.5910898447036743, 0.66338711977005, 0.7356843948364258, 0.8079816699028015, 0.8802789449691772, 0.952576220035553, 1.0248734951019287, 1.0971708297729492, 1.1694680452346802, 1.2417652606964111, 1.3140625953674316, 1.3863599300384521, 1.458657145500183, 1.530954360961914, 1.6032516956329346, 1.675549030303955, 1.747846245765686, 1.820143461227417, 1.8924407958984375, 1.964738130569458, 2.0370354652404785, 2.10933256149292, 2.1816298961639404, 2.253927230834961, 2.3262243270874023, 2.398521661758423, 2.4708189964294434, 2.543116331100464, 2.6154136657714844, 2.687710762023926, 2.7600080966949463]}, "gradients/decoder.model.decoder.layers.2.self_attn.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 3.0, 5.0, 3.0, 7.0, 6.0, 9.0, 16.0, 14.0, 28.0, 42.0, 65.0, 117.0, 153.0, 299.0, 587.0, 1108.0, 2681.0, 7492.0, 26927.0, 150235.0, 686589.0, 135074.0, 24952.0, 7142.0, 2609.0, 1066.0, 560.0, 284.0, 204.0, 104.0, 65.0, 39.0, 27.0, 20.0, 12.0, 5.0, 3.0, 7.0, 1.0, 1.0, 5.0, 1.0, 0.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.552734375, -2.46612548828125, -2.3795166015625, -2.29290771484375, -2.206298828125, -2.11968994140625, -2.0330810546875, -1.94647216796875, -1.85986328125, -1.77325439453125, -1.6866455078125, -1.60003662109375, -1.513427734375, -1.42681884765625, -1.3402099609375, -1.25360107421875, -1.1669921875, -1.08038330078125, -0.9937744140625, -0.90716552734375, -0.820556640625, -0.73394775390625, -0.6473388671875, -0.56072998046875, -0.47412109375, -0.38751220703125, -0.3009033203125, -0.21429443359375, -0.127685546875, -0.04107666015625, 0.0455322265625, 0.13214111328125, 0.21875, 0.30535888671875, 0.3919677734375, 0.47857666015625, 0.565185546875, 0.65179443359375, 0.7384033203125, 0.82501220703125, 0.91162109375, 0.99822998046875, 1.0848388671875, 1.17144775390625, 1.258056640625, 1.34466552734375, 1.4312744140625, 1.51788330078125, 1.6044921875, 1.69110107421875, 1.7777099609375, 1.86431884765625, 1.950927734375, 2.03753662109375, 2.1241455078125, 2.21075439453125, 2.29736328125, 2.38397216796875, 2.4705810546875, 2.55718994140625, 2.643798828125, 2.73040771484375, 2.8170166015625, 2.90362548828125, 2.990234375]}, "gradients/decoder.model.decoder.layers.2.self_attn.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 3.0, 1.0, 2.0, 3.0, 6.0, 8.0, 10.0, 5.0, 17.0, 15.0, 21.0, 34.0, 45.0, 42.0, 39.0, 44.0, 64.0, 66.0, 50.0, 53.0, 58.0, 57.0, 54.0, 45.0, 47.0, 41.0, 44.0, 35.0, 25.0, 22.0, 12.0, 11.0, 8.0, 9.0, 6.0, 4.0, 4.0, 3.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0], "bins": [-4.51171875, -4.381134033203125, -4.25054931640625, -4.119964599609375, -3.9893798828125, -3.858795166015625, -3.72821044921875, -3.597625732421875, -3.467041015625, -3.336456298828125, -3.20587158203125, -3.075286865234375, -2.9447021484375, -2.814117431640625, -2.68353271484375, -2.552947998046875, -2.42236328125, -2.291778564453125, -2.16119384765625, -2.030609130859375, -1.9000244140625, -1.769439697265625, -1.63885498046875, -1.508270263671875, -1.377685546875, -1.247100830078125, -1.11651611328125, -0.985931396484375, -0.8553466796875, -0.724761962890625, -0.59417724609375, -0.463592529296875, -0.3330078125, -0.202423095703125, -0.07183837890625, 0.058746337890625, 0.1893310546875, 0.319915771484375, 0.45050048828125, 0.581085205078125, 0.711669921875, 0.842254638671875, 0.97283935546875, 1.103424072265625, 1.2340087890625, 1.364593505859375, 1.49517822265625, 1.625762939453125, 1.75634765625, 1.886932373046875, 2.01751708984375, 2.148101806640625, 2.2786865234375, 2.409271240234375, 2.53985595703125, 2.670440673828125, 2.801025390625, 2.931610107421875, 3.06219482421875, 3.192779541015625, 3.3233642578125, 3.453948974609375, 3.58453369140625, 3.715118408203125, 3.845703125]}, "gradients/decoder.model.decoder.layers.2.self_attn.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 1.0, 2.0, 2.0, 6.0, 4.0, 5.0, 5.0, 2.0, 9.0, 12.0, 10.0, 16.0, 13.0, 23.0, 31.0, 44.0, 55.0, 75.0, 132.0, 270.0, 728.0, 3526.0, 50577.0, 953998.0, 34859.0, 2799.0, 682.0, 239.0, 119.0, 80.0, 48.0, 40.0, 36.0, 20.0, 25.0, 17.0, 8.0, 5.0, 14.0, 8.0, 10.0, 1.0, 3.0, 2.0, 5.0, 1.0, 1.0, 2.0, 1.0, 1.0], "bins": [-8.6484375, -8.41748046875, -8.1865234375, -7.95556640625, -7.724609375, -7.49365234375, -7.2626953125, -7.03173828125, -6.80078125, -6.56982421875, -6.3388671875, -6.10791015625, -5.876953125, -5.64599609375, -5.4150390625, -5.18408203125, -4.953125, -4.72216796875, -4.4912109375, -4.26025390625, -4.029296875, -3.79833984375, -3.5673828125, -3.33642578125, -3.10546875, -2.87451171875, -2.6435546875, -2.41259765625, -2.181640625, -1.95068359375, -1.7197265625, -1.48876953125, -1.2578125, -1.02685546875, -0.7958984375, -0.56494140625, -0.333984375, -0.10302734375, 0.1279296875, 0.35888671875, 0.58984375, 0.82080078125, 1.0517578125, 1.28271484375, 1.513671875, 1.74462890625, 1.9755859375, 2.20654296875, 2.4375, 2.66845703125, 2.8994140625, 3.13037109375, 3.361328125, 3.59228515625, 3.8232421875, 4.05419921875, 4.28515625, 4.51611328125, 4.7470703125, 4.97802734375, 5.208984375, 5.43994140625, 5.6708984375, 5.90185546875, 6.1328125]}, "gradients/decoder.model.decoder.layers.2.self_attn.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 2.0, 4.0, 1.0, 6.0, 4.0, 6.0, 9.0, 12.0, 5.0, 9.0, 5.0, 17.0, 16.0, 18.0, 25.0, 26.0, 28.0, 28.0, 34.0, 38.0, 35.0, 49.0, 52.0, 46.0, 45.0, 53.0, 44.0, 47.0, 50.0, 32.0, 38.0, 26.0, 30.0, 35.0, 17.0, 27.0, 15.0, 16.0, 12.0, 7.0, 9.0, 10.0, 7.0, 5.0, 6.0, 4.0, 1.0, 1.0, 2.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.94921875, -3.827880859375, -3.70654296875, -3.585205078125, -3.4638671875, -3.342529296875, -3.22119140625, -3.099853515625, -2.978515625, -2.857177734375, -2.73583984375, -2.614501953125, -2.4931640625, -2.371826171875, -2.25048828125, -2.129150390625, -2.0078125, -1.886474609375, -1.76513671875, -1.643798828125, -1.5224609375, -1.401123046875, -1.27978515625, -1.158447265625, -1.037109375, -0.915771484375, -0.79443359375, -0.673095703125, -0.5517578125, -0.430419921875, -0.30908203125, -0.187744140625, -0.06640625, 0.054931640625, 0.17626953125, 0.297607421875, 0.4189453125, 0.540283203125, 0.66162109375, 0.782958984375, 0.904296875, 1.025634765625, 1.14697265625, 1.268310546875, 1.3896484375, 1.510986328125, 1.63232421875, 1.753662109375, 1.875, 1.996337890625, 2.11767578125, 2.239013671875, 2.3603515625, 2.481689453125, 2.60302734375, 2.724365234375, 2.845703125, 2.967041015625, 3.08837890625, 3.209716796875, 3.3310546875, 3.452392578125, 3.57373046875, 3.695068359375, 3.81640625]}, "gradients/decoder.model.decoder.layers.2.self_attn.k_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 1.0, 4.0, 5.0, 4.0, 9.0, 8.0, 12.0, 20.0, 31.0, 47.0, 73.0, 151.0, 297.0, 888.0, 3647.0, 24149.0, 328584.0, 646697.0, 37163.0, 4954.0, 1033.0, 364.0, 185.0, 84.0, 50.0, 34.0, 13.0, 11.0, 16.0, 9.0, 9.0, 2.0, 3.0, 4.0, 0.0, 0.0, 0.0, 3.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.90087890625, -0.8663558959960938, -0.8318328857421875, -0.7973098754882812, -0.762786865234375, -0.7282638549804688, -0.6937408447265625, -0.6592178344726562, -0.62469482421875, -0.5901718139648438, -0.5556488037109375, -0.5211257934570312, -0.486602783203125, -0.45207977294921875, -0.4175567626953125, -0.38303375244140625, -0.3485107421875, -0.31398773193359375, -0.2794647216796875, -0.24494171142578125, -0.210418701171875, -0.17589569091796875, -0.1413726806640625, -0.10684967041015625, -0.07232666015625, -0.03780364990234375, -0.0032806396484375, 0.03124237060546875, 0.065765380859375, 0.10028839111328125, 0.1348114013671875, 0.16933441162109375, 0.203857421875, 0.23838043212890625, 0.2729034423828125, 0.30742645263671875, 0.341949462890625, 0.37647247314453125, 0.4109954833984375, 0.44551849365234375, 0.48004150390625, 0.5145645141601562, 0.5490875244140625, 0.5836105346679688, 0.618133544921875, 0.6526565551757812, 0.6871795654296875, 0.7217025756835938, 0.7562255859375, 0.7907485961914062, 0.8252716064453125, 0.8597946166992188, 0.894317626953125, 0.9288406372070312, 0.9633636474609375, 0.9978866577148438, 1.03240966796875, 1.0669326782226562, 1.1014556884765625, 1.1359786987304688, 1.170501708984375, 1.2050247192382812, 1.2395477294921875, 1.2740707397460938, 1.30859375]}, "gradients/decoder.model.decoder.layers.2.self_attn.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 3.0, 1.0, 1.0, 1.0, 1.0, 6.0, 4.0, 10.0, 11.0, 12.0, 11.0, 23.0, 28.0, 33.0, 49.0, 52.0, 79.0, 113.0, 139.0, 126.0, 91.0, 55.0, 52.0, 16.0, 25.0, 12.0, 14.0, 11.0, 7.0, 6.0, 4.0, 4.0, 3.0, 3.0, 3.0, 2.0, 2.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00010645389556884766, -0.00010307691991329193, -9.96999442577362e-05, -9.632296860218048e-05, -9.294599294662476e-05, -8.956901729106903e-05, -8.61920416355133e-05, -8.281506597995758e-05, -7.943809032440186e-05, -7.606111466884613e-05, -7.26841390132904e-05, -6.930716335773468e-05, -6.593018770217896e-05, -6.255321204662323e-05, -5.9176236391067505e-05, -5.579926073551178e-05, -5.2422285079956055e-05, -4.904530942440033e-05, -4.5668333768844604e-05, -4.229135811328888e-05, -3.8914382457733154e-05, -3.553740680217743e-05, -3.2160431146621704e-05, -2.878345549106598e-05, -2.5406479835510254e-05, -2.202950417995453e-05, -1.8652528524398804e-05, -1.527555286884308e-05, -1.1898577213287354e-05, -8.521601557731628e-06, -5.144625902175903e-06, -1.7676502466201782e-06, 1.6093254089355469e-06, 4.986301064491272e-06, 8.363276720046997e-06, 1.1740252375602722e-05, 1.5117228031158447e-05, 1.8494203686714172e-05, 2.1871179342269897e-05, 2.5248154997825623e-05, 2.8625130653381348e-05, 3.200210630893707e-05, 3.53790819644928e-05, 3.875605762004852e-05, 4.213303327560425e-05, 4.551000893115997e-05, 4.88869845867157e-05, 5.226396024227142e-05, 5.564093589782715e-05, 5.9017911553382874e-05, 6.23948872089386e-05, 6.577186286449432e-05, 6.914883852005005e-05, 7.252581417560577e-05, 7.59027898311615e-05, 7.927976548671722e-05, 8.265674114227295e-05, 8.603371679782867e-05, 8.94106924533844e-05, 9.278766810894012e-05, 9.616464376449585e-05, 9.954161942005157e-05, 0.0001029185950756073, 0.00010629557073116302, 0.00010967254638671875]}, "gradients/decoder.model.decoder.layers.2.self_attn.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 3.0, 4.0, 10.0, 9.0, 29.0, 56.0, 128.0, 316.0, 759.0, 2037.0, 7328.0, 54202.0, 839349.0, 127767.0, 12058.0, 2872.0, 980.0, 361.0, 147.0, 69.0, 43.0, 18.0, 4.0, 4.0, 4.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.2548828125, -1.207794189453125, -1.16070556640625, -1.113616943359375, -1.0665283203125, -1.019439697265625, -0.97235107421875, -0.925262451171875, -0.878173828125, -0.831085205078125, -0.78399658203125, -0.736907958984375, -0.6898193359375, -0.642730712890625, -0.59564208984375, -0.548553466796875, -0.50146484375, -0.454376220703125, -0.40728759765625, -0.360198974609375, -0.3131103515625, -0.266021728515625, -0.21893310546875, -0.171844482421875, -0.124755859375, -0.077667236328125, -0.03057861328125, 0.016510009765625, 0.0635986328125, 0.110687255859375, 0.15777587890625, 0.204864501953125, 0.251953125, 0.299041748046875, 0.34613037109375, 0.393218994140625, 0.4403076171875, 0.487396240234375, 0.53448486328125, 0.581573486328125, 0.628662109375, 0.675750732421875, 0.72283935546875, 0.769927978515625, 0.8170166015625, 0.864105224609375, 0.91119384765625, 0.958282470703125, 1.00537109375, 1.052459716796875, 1.09954833984375, 1.146636962890625, 1.1937255859375, 1.240814208984375, 1.28790283203125, 1.334991455078125, 1.382080078125, 1.429168701171875, 1.47625732421875, 1.523345947265625, 1.5704345703125, 1.617523193359375, 1.66461181640625, 1.711700439453125, 1.7587890625]}, "gradients/decoder.model.decoder.layers.2.self_attn.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 3.0, 2.0, 3.0, 2.0, 4.0, 5.0, 15.0, 21.0, 26.0, 52.0, 79.0, 122.0, 204.0, 197.0, 93.0, 65.0, 28.0, 33.0, 27.0, 15.0, 7.0, 6.0, 3.0, 1.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.6904296875, -0.6722526550292969, -0.6540756225585938, -0.6358985900878906, -0.6177215576171875, -0.5995445251464844, -0.5813674926757812, -0.5631904602050781, -0.545013427734375, -0.5268363952636719, -0.5086593627929688, -0.4904823303222656, -0.4723052978515625, -0.4541282653808594, -0.43595123291015625, -0.4177742004394531, -0.39959716796875, -0.3814201354980469, -0.36324310302734375, -0.3450660705566406, -0.3268890380859375, -0.3087120056152344, -0.29053497314453125, -0.2723579406738281, -0.254180908203125, -0.23600387573242188, -0.21782684326171875, -0.19964981079101562, -0.1814727783203125, -0.16329574584960938, -0.14511871337890625, -0.12694168090820312, -0.1087646484375, -0.09058761596679688, -0.07241058349609375, -0.054233551025390625, -0.0360565185546875, -0.017879486083984375, 0.00029754638671875, 0.018474578857421875, 0.036651611328125, 0.054828643798828125, 0.07300567626953125, 0.09118270874023438, 0.1093597412109375, 0.12753677368164062, 0.14571380615234375, 0.16389083862304688, 0.18206787109375, 0.20024490356445312, 0.21842193603515625, 0.23659896850585938, 0.2547760009765625, 0.2729530334472656, 0.29113006591796875, 0.3093070983886719, 0.327484130859375, 0.3456611633300781, 0.36383819580078125, 0.3820152282714844, 0.4001922607421875, 0.4183692932128906, 0.43654632568359375, 0.4547233581542969, 0.472900390625]}, "gradients/decoder.model.decoder.layers.1.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 5.0, 7.0, 7.0, 9.0, 15.0, 27.0, 45.0, 43.0, 71.0, 81.0, 88.0, 104.0, 109.0, 84.0, 75.0, 74.0, 50.0, 33.0, 23.0, 16.0, 20.0, 6.0, 4.0, 4.0, 2.0, 2.0, 0.0, 2.0, 0.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0], "bins": [-6.314308166503906, -6.128770351409912, -5.943232536315918, -5.757694721221924, -5.57215690612793, -5.386618614196777, -5.201080799102783, -5.015542984008789, -4.830005168914795, -4.644467353820801, -4.458929538726807, -4.2733917236328125, -4.08785343170166, -3.902315855026245, -3.716777801513672, -3.5312399864196777, -3.3457021713256836, -3.1601643562316895, -2.9746265411376953, -2.789088487625122, -2.603550672531128, -2.418012857437134, -2.2324748039245605, -2.0469369888305664, -1.8613991737365723, -1.6758613586425781, -1.4903234243392944, -1.3047854900360107, -1.1192476749420166, -0.9337098002433777, -0.7481719255447388, -0.5626339912414551, -0.37709569931030273, -0.19155782461166382, -0.006019949913024902, 0.179517924785614, 0.36505579948425293, 0.5505936741828918, 0.7361315488815308, 0.9216694831848145, 1.1072072982788086, 1.2927451133728027, 1.4782830476760864, 1.6638209819793701, 1.8493587970733643, 2.0348966121673584, 2.2204346656799316, 2.405972480773926, 2.59151029586792, 2.777048110961914, 2.962585926055908, 3.1481239795684814, 3.3336617946624756, 3.5191996097564697, 3.704737663269043, 3.890275478363037, 4.075813293457031, 4.261351108551025, 4.4468889236450195, 4.632426738739014, 4.817964553833008, 5.00350284576416, 5.189040660858154, 5.374578475952148, 5.560116291046143]}, "gradients/decoder.model.decoder.layers.1.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 1.0, 0.0, 2.0, 2.0, 1.0, 4.0, 6.0, 9.0, 7.0, 4.0, 5.0, 12.0, 10.0, 19.0, 5.0, 14.0, 16.0, 22.0, 23.0, 26.0, 18.0, 29.0, 26.0, 27.0, 33.0, 35.0, 21.0, 34.0, 31.0, 38.0, 46.0, 42.0, 40.0, 29.0, 40.0, 26.0, 31.0, 26.0, 38.0, 25.0, 21.0, 31.0, 13.0, 21.0, 12.0, 22.0, 22.0, 13.0, 10.0, 8.0, 5.0, 3.0, 3.0, 0.0, 3.0, 3.0, 4.0, 1.0, 2.0, 0.0, 1.0], "bins": [-5.3152008056640625, -5.153570175170898, -4.991939544677734, -4.830308437347412, -4.668677806854248, -4.507047176361084, -4.345416069030762, -4.183785438537598, -4.022154808044434, -3.8605241775512695, -3.6988933086395264, -3.537262439727783, -3.375631809234619, -3.214001178741455, -3.052370309829712, -2.8907394409179688, -2.7291088104248047, -2.5674781799316406, -2.4058473110198975, -2.2442164421081543, -2.0825858116149902, -1.9209550619125366, -1.759324312210083, -1.5976935625076294, -1.4360628128051758, -1.2744320631027222, -1.1128013134002686, -0.9511705636978149, -0.7895398139953613, -0.6279090642929077, -0.4662783145904541, -0.3046475648880005, -0.14301681518554688, 0.01861393451690674, 0.18024468421936035, 0.34187543392181396, 0.5035061836242676, 0.6651369333267212, 0.8267676830291748, 0.9883984327316284, 1.150029182434082, 1.3116599321365356, 1.4732906818389893, 1.6349214315414429, 1.7965521812438965, 1.95818293094635, 2.1198136806488037, 2.281444549560547, 2.443075180053711, 2.604705810546875, 2.766336679458618, 2.9279675483703613, 3.0895981788635254, 3.2512288093566895, 3.4128596782684326, 3.574490547180176, 3.73612117767334, 3.897751808166504, 4.059382438659668, 4.22101354598999, 4.382644176483154, 4.544274806976318, 4.705905914306641, 4.867536544799805, 5.029167175292969]}, "gradients/decoder.model.decoder.layers.1.fc2.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 9.0, 5.0, 5.0, 11.0, 24.0, 27.0, 37.0, 43.0, 78.0, 139.0, 206.0, 361.0, 781.0, 1777.0, 5153.0, 17465.0, 129847.0, 2555025.0, 1398102.0, 67743.0, 11553.0, 3272.0, 1259.0, 585.0, 288.0, 179.0, 120.0, 67.0, 41.0, 23.0, 16.0, 15.0, 15.0, 5.0, 5.0, 3.0, 2.0, 0.0, 2.0, 2.0, 2.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.97265625, -4.82135009765625, -4.6700439453125, -4.51873779296875, -4.367431640625, -4.21612548828125, -4.0648193359375, -3.91351318359375, -3.76220703125, -3.61090087890625, -3.4595947265625, -3.30828857421875, -3.156982421875, -3.00567626953125, -2.8543701171875, -2.70306396484375, -2.5517578125, -2.40045166015625, -2.2491455078125, -2.09783935546875, -1.946533203125, -1.79522705078125, -1.6439208984375, -1.49261474609375, -1.34130859375, -1.19000244140625, -1.0386962890625, -0.88739013671875, -0.736083984375, -0.58477783203125, -0.4334716796875, -0.28216552734375, -0.130859375, 0.02044677734375, 0.1717529296875, 0.32305908203125, 0.474365234375, 0.62567138671875, 0.7769775390625, 0.92828369140625, 1.07958984375, 1.23089599609375, 1.3822021484375, 1.53350830078125, 1.684814453125, 1.83612060546875, 1.9874267578125, 2.13873291015625, 2.2900390625, 2.44134521484375, 2.5926513671875, 2.74395751953125, 2.895263671875, 3.04656982421875, 3.1978759765625, 3.34918212890625, 3.50048828125, 3.65179443359375, 3.8031005859375, 3.95440673828125, 4.105712890625, 4.25701904296875, 4.4083251953125, 4.55963134765625, 4.7109375]}, "gradients/decoder.model.decoder.layers.1.fc2.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 3.0, 4.0, 1.0, 5.0, 5.0, 9.0, 14.0, 16.0, 20.0, 26.0, 36.0, 45.0, 44.0, 39.0, 70.0, 73.0, 72.0, 67.0, 65.0, 64.0, 69.0, 73.0, 56.0, 36.0, 30.0, 11.0, 20.0, 19.0, 8.0, 7.0, 3.0, 1.0, 1.0, 3.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-2.57421875, -2.480010986328125, -2.38580322265625, -2.291595458984375, -2.1973876953125, -2.103179931640625, -2.00897216796875, -1.914764404296875, -1.820556640625, -1.726348876953125, -1.63214111328125, -1.537933349609375, -1.4437255859375, -1.349517822265625, -1.25531005859375, -1.161102294921875, -1.06689453125, -0.972686767578125, -0.87847900390625, -0.784271240234375, -0.6900634765625, -0.595855712890625, -0.50164794921875, -0.407440185546875, -0.313232421875, -0.219024658203125, -0.12481689453125, -0.030609130859375, 0.0635986328125, 0.157806396484375, 0.25201416015625, 0.346221923828125, 0.4404296875, 0.534637451171875, 0.62884521484375, 0.723052978515625, 0.8172607421875, 0.911468505859375, 1.00567626953125, 1.099884033203125, 1.194091796875, 1.288299560546875, 1.38250732421875, 1.476715087890625, 1.5709228515625, 1.665130615234375, 1.75933837890625, 1.853546142578125, 1.94775390625, 2.041961669921875, 2.13616943359375, 2.230377197265625, 2.3245849609375, 2.418792724609375, 2.51300048828125, 2.607208251953125, 2.701416015625, 2.795623779296875, 2.88983154296875, 2.984039306640625, 3.0782470703125, 3.172454833984375, 3.26666259765625, 3.360870361328125, 3.455078125]}, "gradients/decoder.model.decoder.layers.1.fc1.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 9.0, 1.0, 4.0, 4.0, 5.0, 6.0, 5.0, 10.0, 18.0, 28.0, 32.0, 44.0, 89.0, 126.0, 231.0, 532.0, 1234.0, 4094.0, 20644.0, 284796.0, 3737378.0, 126474.0, 13511.0, 2979.0, 1018.0, 433.0, 229.0, 124.0, 73.0, 53.0, 26.0, 22.0, 13.0, 9.0, 15.0, 3.0, 7.0, 2.0, 6.0, 4.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.015625, -4.8099365234375, -4.604248046875, -4.3985595703125, -4.19287109375, -3.9871826171875, -3.781494140625, -3.5758056640625, -3.3701171875, -3.1644287109375, -2.958740234375, -2.7530517578125, -2.54736328125, -2.3416748046875, -2.135986328125, -1.9302978515625, -1.724609375, -1.5189208984375, -1.313232421875, -1.1075439453125, -0.90185546875, -0.6961669921875, -0.490478515625, -0.2847900390625, -0.0791015625, 0.1265869140625, 0.332275390625, 0.5379638671875, 0.74365234375, 0.9493408203125, 1.155029296875, 1.3607177734375, 1.56640625, 1.7720947265625, 1.977783203125, 2.1834716796875, 2.38916015625, 2.5948486328125, 2.800537109375, 3.0062255859375, 3.2119140625, 3.4176025390625, 3.623291015625, 3.8289794921875, 4.03466796875, 4.2403564453125, 4.446044921875, 4.6517333984375, 4.857421875, 5.0631103515625, 5.268798828125, 5.4744873046875, 5.68017578125, 5.8858642578125, 6.091552734375, 6.2972412109375, 6.5029296875, 6.7086181640625, 6.914306640625, 7.1199951171875, 7.32568359375, 7.5313720703125, 7.737060546875, 7.9427490234375, 8.1484375]}, "gradients/decoder.model.decoder.layers.1.fc1.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 3.0, 1.0, 2.0, 0.0, 2.0, 1.0, 3.0, 4.0, 5.0, 7.0, 5.0, 9.0, 7.0, 19.0, 23.0, 28.0, 57.0, 84.0, 134.0, 277.0, 458.0, 733.0, 832.0, 613.0, 301.0, 166.0, 95.0, 75.0, 41.0, 19.0, 16.0, 24.0, 15.0, 8.0, 6.0, 4.0, 3.0, 4.0, 1.0, 2.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.9560546875, -1.904754638671875, -1.85345458984375, -1.802154541015625, -1.7508544921875, -1.699554443359375, -1.64825439453125, -1.596954345703125, -1.545654296875, -1.494354248046875, -1.44305419921875, -1.391754150390625, -1.3404541015625, -1.289154052734375, -1.23785400390625, -1.186553955078125, -1.13525390625, -1.083953857421875, -1.03265380859375, -0.981353759765625, -0.9300537109375, -0.878753662109375, -0.82745361328125, -0.776153564453125, -0.724853515625, -0.673553466796875, -0.62225341796875, -0.570953369140625, -0.5196533203125, -0.468353271484375, -0.41705322265625, -0.365753173828125, -0.314453125, -0.263153076171875, -0.21185302734375, -0.160552978515625, -0.1092529296875, -0.057952880859375, -0.00665283203125, 0.044647216796875, 0.095947265625, 0.147247314453125, 0.19854736328125, 0.249847412109375, 0.3011474609375, 0.352447509765625, 0.40374755859375, 0.455047607421875, 0.50634765625, 0.557647705078125, 0.60894775390625, 0.660247802734375, 0.7115478515625, 0.762847900390625, 0.81414794921875, 0.865447998046875, 0.916748046875, 0.968048095703125, 1.01934814453125, 1.070648193359375, 1.1219482421875, 1.173248291015625, 1.22454833984375, 1.275848388671875, 1.3271484375]}, "gradients/decoder.model.decoder.layers.1.encoder_attn_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 2.0, 0.0, 3.0, 10.0, 10.0, 11.0, 40.0, 55.0, 97.0, 101.0, 137.0, 149.0, 135.0, 103.0, 65.0, 38.0, 22.0, 19.0, 7.0, 6.0, 0.0, 6.0, 2.0, 0.0, 1.0, 0.0, 1.0], "bins": [-9.758206367492676, -9.556876182556152, -9.355545997619629, -9.154215812683105, -8.952885627746582, -8.751554489135742, -8.550224304199219, -8.348894119262695, -8.147563934326172, -7.946233749389648, -7.744903564453125, -7.543572902679443, -7.34224271774292, -7.1409125328063965, -6.939582347869873, -6.738251686096191, -6.536921501159668, -6.3355913162231445, -6.134261131286621, -5.9329304695129395, -5.731600284576416, -5.530270099639893, -5.328939914703369, -5.1276092529296875, -4.926279544830322, -4.724949359893799, -4.523619174957275, -4.322288513183594, -4.12095832824707, -3.919628143310547, -3.7182979583740234, -3.516967535018921, -3.3156373500823975, -3.114307165145874, -2.9129767417907715, -2.711646556854248, -2.5103161334991455, -2.308985948562622, -2.1076555252075195, -1.906325340270996, -1.704995036125183, -1.5036647319793701, -1.3023344278335571, -1.1010041236877441, -0.8996738791465759, -0.6983436346054077, -0.4970133304595947, -0.29568302631378174, -0.09435272216796875, 0.10697756707668304, 0.30830785632133484, 0.5096381306648254, 0.7109684348106384, 0.9122986793518066, 1.1136289834976196, 1.3149592876434326, 1.5162895917892456, 1.7176198959350586, 1.9189502000808716, 2.1202805042266846, 2.321610689163208, 2.5229411125183105, 2.724271297454834, 2.9256014823913574, 3.12693190574646]}, "gradients/decoder.model.decoder.layers.1.encoder_attn_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 1.0, 2.0, 1.0, 1.0, 1.0, 4.0, 5.0, 9.0, 9.0, 14.0, 14.0, 12.0, 12.0, 18.0, 25.0, 17.0, 24.0, 27.0, 34.0, 37.0, 34.0, 34.0, 30.0, 48.0, 34.0, 45.0, 46.0, 36.0, 51.0, 36.0, 42.0, 40.0, 29.0, 23.0, 38.0, 25.0, 18.0, 28.0, 18.0, 12.0, 11.0, 18.0, 15.0, 6.0, 9.0, 8.0, 3.0, 3.0, 2.0, 2.0, 2.0, 1.0, 1.0, 0.0, 2.0, 1.0], "bins": [-2.59322190284729, -2.51908278465271, -2.444943904876709, -2.370804786682129, -2.296665906906128, -2.222526788711548, -2.148387908935547, -2.074248790740967, -2.000109910964966, -1.9259709119796753, -1.8518319129943848, -1.7776929140090942, -1.7035539150238037, -1.6294149160385132, -1.5552759170532227, -1.4811367988586426, -1.406997799873352, -1.3328588008880615, -1.258719801902771, -1.1845808029174805, -1.11044180393219, -1.0363028049468994, -0.9621637463569641, -0.8880247473716736, -0.8138857483863831, -0.7397467494010925, -0.665607750415802, -0.5914686918258667, -0.5173296928405762, -0.44319072365760803, -0.3690516948699951, -0.2949126958847046, -0.22077369689941406, -0.14663469791412354, -0.07249568402767181, 0.0016433298587799072, 0.07578232884407043, 0.14992132782936096, 0.22406035661697388, 0.2981993556022644, 0.37233835458755493, 0.44647735357284546, 0.520616352558136, 0.5947554111480713, 0.6688944101333618, 0.7430334091186523, 0.8171724081039429, 0.8913114070892334, 0.9654504060745239, 1.0395894050598145, 1.113728404045105, 1.1878674030303955, 1.262006402015686, 1.3361454010009766, 1.4102845191955566, 1.4844233989715576, 1.5585625171661377, 1.6327015161514282, 1.7068405151367188, 1.7809795141220093, 1.8551185131072998, 1.9292575120925903, 2.003396511077881, 2.077535629272461, 2.151674509048462]}, "gradients/decoder.model.decoder.layers.1.encoder_attn.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 8.0, 5.0, 6.0, 15.0, 26.0, 34.0, 45.0, 89.0, 166.0, 298.0, 642.0, 1639.0, 5305.0, 23226.0, 171338.0, 726884.0, 97122.0, 15354.0, 3911.0, 1291.0, 521.0, 275.0, 131.0, 89.0, 45.0, 32.0, 25.0, 15.0, 8.0, 7.0, 3.0, 7.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.86279296875, -0.8312759399414062, -0.7997589111328125, -0.7682418823242188, -0.736724853515625, -0.7052078247070312, -0.6736907958984375, -0.6421737670898438, -0.61065673828125, -0.5791397094726562, -0.5476226806640625, -0.5161056518554688, -0.484588623046875, -0.45307159423828125, -0.4215545654296875, -0.39003753662109375, -0.3585205078125, -0.32700347900390625, -0.2954864501953125, -0.26396942138671875, -0.232452392578125, -0.20093536376953125, -0.1694183349609375, -0.13790130615234375, -0.10638427734375, -0.07486724853515625, -0.0433502197265625, -0.01183319091796875, 0.019683837890625, 0.05120086669921875, 0.0827178955078125, 0.11423492431640625, 0.145751953125, 0.17726898193359375, 0.2087860107421875, 0.24030303955078125, 0.271820068359375, 0.30333709716796875, 0.3348541259765625, 0.36637115478515625, 0.39788818359375, 0.42940521240234375, 0.4609222412109375, 0.49243927001953125, 0.523956298828125, 0.5554733276367188, 0.5869903564453125, 0.6185073852539062, 0.6500244140625, 0.6815414428710938, 0.7130584716796875, 0.7445755004882812, 0.776092529296875, 0.8076095581054688, 0.8391265869140625, 0.8706436157226562, 0.90216064453125, 0.9336776733398438, 0.9651947021484375, 0.9967117309570312, 1.028228759765625, 1.0597457885742188, 1.0912628173828125, 1.1227798461914062, 1.154296875]}, "gradients/decoder.model.decoder.layers.1.encoder_attn.out_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 2.0, 2.0, 0.0, 10.0, 12.0, 9.0, 17.0, 24.0, 25.0, 38.0, 41.0, 57.0, 52.0, 58.0, 63.0, 66.0, 67.0, 57.0, 66.0, 58.0, 56.0, 44.0, 50.0, 30.0, 24.0, 31.0, 20.0, 14.0, 10.0, 4.0, 3.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.6240234375, -1.5547637939453125, -1.485504150390625, -1.4162445068359375, -1.34698486328125, -1.2777252197265625, -1.208465576171875, -1.1392059326171875, -1.0699462890625, -1.0006866455078125, -0.931427001953125, -0.8621673583984375, -0.79290771484375, -0.7236480712890625, -0.654388427734375, -0.5851287841796875, -0.515869140625, -0.4466094970703125, -0.377349853515625, -0.3080902099609375, -0.23883056640625, -0.1695709228515625, -0.100311279296875, -0.0310516357421875, 0.0382080078125, 0.1074676513671875, 0.176727294921875, 0.2459869384765625, 0.31524658203125, 0.3845062255859375, 0.453765869140625, 0.5230255126953125, 0.59228515625, 0.6615447998046875, 0.730804443359375, 0.8000640869140625, 0.86932373046875, 0.9385833740234375, 1.007843017578125, 1.0771026611328125, 1.1463623046875, 1.2156219482421875, 1.284881591796875, 1.3541412353515625, 1.42340087890625, 1.4926605224609375, 1.561920166015625, 1.6311798095703125, 1.700439453125, 1.7696990966796875, 1.838958740234375, 1.9082183837890625, 1.97747802734375, 2.0467376708984375, 2.115997314453125, 2.1852569580078125, 2.2545166015625, 2.3237762451171875, 2.393035888671875, 2.4622955322265625, 2.53155517578125, 2.6008148193359375, 2.670074462890625, 2.7393341064453125, 2.80859375]}, "gradients/decoder.model.decoder.layers.1.encoder_attn.v_proj.weight": {"_type": "histogram", "values": [2.0, 2.0, 2.0, 3.0, 7.0, 4.0, 4.0, 6.0, 5.0, 12.0, 18.0, 28.0, 43.0, 49.0, 71.0, 120.0, 160.0, 255.0, 371.0, 617.0, 969.0, 1562.0, 2654.0, 4455.0, 7602.0, 13430.0, 24891.0, 48346.0, 105628.0, 260796.0, 318582.0, 133145.0, 58588.0, 28957.0, 15459.0, 8743.0, 5159.0, 2916.0, 1767.0, 1149.0, 680.0, 431.0, 291.0, 188.0, 103.0, 82.0, 68.0, 41.0, 36.0, 24.0, 14.0, 12.0, 5.0, 1.0, 7.0, 4.0, 5.0, 2.0, 0.0, 3.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.187255859375, -0.18104171752929688, -0.17482757568359375, -0.16861343383789062, -0.1623992919921875, -0.15618515014648438, -0.14997100830078125, -0.14375686645507812, -0.137542724609375, -0.13132858276367188, -0.12511444091796875, -0.11890029907226562, -0.1126861572265625, -0.10647201538085938, -0.10025787353515625, -0.09404373168945312, -0.08782958984375, -0.08161544799804688, -0.07540130615234375, -0.06918716430664062, -0.0629730224609375, -0.056758880615234375, -0.05054473876953125, -0.044330596923828125, -0.038116455078125, -0.031902313232421875, -0.02568817138671875, -0.019474029541015625, -0.0132598876953125, -0.007045745849609375, -0.00083160400390625, 0.005382537841796875, 0.0115966796875, 0.017810821533203125, 0.02402496337890625, 0.030239105224609375, 0.0364532470703125, 0.042667388916015625, 0.04888153076171875, 0.055095672607421875, 0.061309814453125, 0.06752395629882812, 0.07373809814453125, 0.07995223999023438, 0.0861663818359375, 0.09238052368164062, 0.09859466552734375, 0.10480880737304688, 0.11102294921875, 0.11723709106445312, 0.12345123291015625, 0.12966537475585938, 0.1358795166015625, 0.14209365844726562, 0.14830780029296875, 0.15452194213867188, 0.160736083984375, 0.16695022583007812, 0.17316436767578125, 0.17937850952148438, 0.1855926513671875, 0.19180679321289062, 0.19802093505859375, 0.20423507690429688, 0.21044921875]}, "gradients/decoder.model.decoder.layers.1.encoder_attn.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 3.0, 2.0, 3.0, 5.0, 5.0, 7.0, 9.0, 18.0, 10.0, 13.0, 15.0, 18.0, 20.0, 27.0, 24.0, 37.0, 34.0, 49.0, 45.0, 56.0, 59.0, 54.0, 53.0, 45.0, 41.0, 46.0, 35.0, 40.0, 41.0, 22.0, 23.0, 17.0, 14.0, 19.0, 25.0, 18.0, 12.0, 9.0, 6.0, 6.0, 7.0, 4.0, 8.0, 4.0, 3.0, 2.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0], "bins": [-3.427734375, -3.326934814453125, -3.22613525390625, -3.125335693359375, -3.0245361328125, -2.923736572265625, -2.82293701171875, -2.722137451171875, -2.621337890625, -2.520538330078125, -2.41973876953125, -2.318939208984375, -2.2181396484375, -2.117340087890625, -2.01654052734375, -1.915740966796875, -1.81494140625, -1.714141845703125, -1.61334228515625, -1.512542724609375, -1.4117431640625, -1.310943603515625, -1.21014404296875, -1.109344482421875, -1.008544921875, -0.907745361328125, -0.80694580078125, -0.706146240234375, -0.6053466796875, -0.504547119140625, -0.40374755859375, -0.302947998046875, -0.2021484375, -0.101348876953125, -0.00054931640625, 0.100250244140625, 0.2010498046875, 0.301849365234375, 0.40264892578125, 0.503448486328125, 0.604248046875, 0.705047607421875, 0.80584716796875, 0.906646728515625, 1.0074462890625, 1.108245849609375, 1.20904541015625, 1.309844970703125, 1.41064453125, 1.511444091796875, 1.61224365234375, 1.713043212890625, 1.8138427734375, 1.914642333984375, 2.01544189453125, 2.116241455078125, 2.217041015625, 2.317840576171875, 2.41864013671875, 2.519439697265625, 2.6202392578125, 2.721038818359375, 2.82183837890625, 2.922637939453125, 3.0234375]}, "gradients/decoder.model.decoder.layers.1.encoder_attn.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 0.0, 0.0, 0.0, 4.0, 3.0, 3.0, 5.0, 5.0, 7.0, 12.0, 17.0, 31.0, 60.0, 65.0, 119.0, 193.0, 375.0, 891.0, 3486.0, 22819.0, 574789.0, 420978.0, 20051.0, 3079.0, 783.0, 329.0, 162.0, 102.0, 71.0, 50.0, 27.0, 14.0, 15.0, 7.0, 4.0, 2.0, 3.0, 0.0, 0.0, 2.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0], "bins": [-0.1370849609375, -0.1331644058227539, -0.1292438507080078, -0.12532329559326172, -0.12140274047851562, -0.11748218536376953, -0.11356163024902344, -0.10964107513427734, -0.10572052001953125, -0.10179996490478516, -0.09787940979003906, -0.09395885467529297, -0.09003829956054688, -0.08611774444580078, -0.08219718933105469, -0.0782766342163086, -0.0743560791015625, -0.0704355239868164, -0.06651496887207031, -0.06259441375732422, -0.058673858642578125, -0.05475330352783203, -0.05083274841308594, -0.046912193298339844, -0.04299163818359375, -0.039071083068847656, -0.03515052795410156, -0.03122997283935547, -0.027309417724609375, -0.02338886260986328, -0.019468307495117188, -0.015547752380371094, -0.011627197265625, -0.007706642150878906, -0.0037860870361328125, 0.00013446807861328125, 0.004055023193359375, 0.007975578308105469, 0.011896133422851562, 0.015816688537597656, 0.01973724365234375, 0.023657798767089844, 0.027578353881835938, 0.03149890899658203, 0.035419464111328125, 0.03934001922607422, 0.04326057434082031, 0.047181129455566406, 0.0511016845703125, 0.055022239685058594, 0.05894279479980469, 0.06286334991455078, 0.06678390502929688, 0.07070446014404297, 0.07462501525878906, 0.07854557037353516, 0.08246612548828125, 0.08638668060302734, 0.09030723571777344, 0.09422779083251953, 0.09814834594726562, 0.10206890106201172, 0.10598945617675781, 0.1099100112915039, 0.11383056640625]}, "gradients/decoder.model.decoder.layers.1.encoder_attn.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 7.0, 2.0, 3.0, 6.0, 9.0, 15.0, 33.0, 29.0, 51.0, 61.0, 84.0, 121.0, 118.0, 113.0, 116.0, 64.0, 57.0, 41.0, 32.0, 13.0, 12.0, 9.0, 1.0, 6.0, 3.0, 1.0, 2.0, 2.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-4.1425228118896484e-05, -3.9662234485149384e-05, -3.789924085140228e-05, -3.613624721765518e-05, -3.437325358390808e-05, -3.261025995016098e-05, -3.084726631641388e-05, -2.908427268266678e-05, -2.7321279048919678e-05, -2.5558285415172577e-05, -2.3795291781425476e-05, -2.2032298147678375e-05, -2.0269304513931274e-05, -1.8506310880184174e-05, -1.6743317246437073e-05, -1.4980323612689972e-05, -1.3217329978942871e-05, -1.145433634519577e-05, -9.69134271144867e-06, -7.928349077701569e-06, -6.165355443954468e-06, -4.402361810207367e-06, -2.639368176460266e-06, -8.763745427131653e-07, 8.866190910339355e-07, 2.6496127247810364e-06, 4.412606358528137e-06, 6.175599992275238e-06, 7.938593626022339e-06, 9.70158725976944e-06, 1.146458089351654e-05, 1.3227574527263641e-05, 1.4990568161010742e-05, 1.6753561794757843e-05, 1.8516555428504944e-05, 2.0279549062252045e-05, 2.2042542695999146e-05, 2.3805536329746246e-05, 2.5568529963493347e-05, 2.7331523597240448e-05, 2.909451723098755e-05, 3.085751086473465e-05, 3.262050449848175e-05, 3.438349813222885e-05, 3.614649176597595e-05, 3.790948539972305e-05, 3.9672479033470154e-05, 4.1435472667217255e-05, 4.3198466300964355e-05, 4.4961459934711456e-05, 4.672445356845856e-05, 4.848744720220566e-05, 5.025044083595276e-05, 5.201343446969986e-05, 5.377642810344696e-05, 5.553942173719406e-05, 5.730241537094116e-05, 5.906540900468826e-05, 6.0828402638435364e-05, 6.259139627218246e-05, 6.435438990592957e-05, 6.611738353967667e-05, 6.788037717342377e-05, 6.964337080717087e-05, 7.140636444091797e-05]}, "gradients/decoder.model.decoder.layers.1.encoder_attn.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 2.0, 0.0, 3.0, 1.0, 1.0, 1.0, 4.0, 8.0, 4.0, 11.0, 15.0, 37.0, 63.0, 117.0, 255.0, 580.0, 1667.0, 5355.0, 27869.0, 645080.0, 339502.0, 21204.0, 4391.0, 1408.0, 542.0, 221.0, 96.0, 59.0, 22.0, 20.0, 11.0, 1.0, 5.0, 4.0, 2.0, 3.0, 2.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.1578369140625, -0.1531848907470703, -0.14853286743164062, -0.14388084411621094, -0.13922882080078125, -0.13457679748535156, -0.12992477416992188, -0.1252727508544922, -0.1206207275390625, -0.11596870422363281, -0.11131668090820312, -0.10666465759277344, -0.10201263427734375, -0.09736061096191406, -0.09270858764648438, -0.08805656433105469, -0.083404541015625, -0.07875251770019531, -0.07410049438476562, -0.06944847106933594, -0.06479644775390625, -0.06014442443847656, -0.055492401123046875, -0.05084037780761719, -0.0461883544921875, -0.04153633117675781, -0.036884307861328125, -0.03223228454589844, -0.02758026123046875, -0.022928237915039062, -0.018276214599609375, -0.013624191284179688, -0.00897216796875, -0.0043201446533203125, 0.000331878662109375, 0.0049839019775390625, 0.00963592529296875, 0.014287948608398438, 0.018939971923828125, 0.023591995239257812, 0.0282440185546875, 0.03289604187011719, 0.037548065185546875, 0.04220008850097656, 0.04685211181640625, 0.05150413513183594, 0.056156158447265625, 0.06080818176269531, 0.065460205078125, 0.07011222839355469, 0.07476425170898438, 0.07941627502441406, 0.08406829833984375, 0.08872032165527344, 0.09337234497070312, 0.09802436828613281, 0.1026763916015625, 0.10732841491699219, 0.11198043823242188, 0.11663246154785156, 0.12128448486328125, 0.12593650817871094, 0.13058853149414062, 0.1352405548095703, 0.139892578125]}, "gradients/decoder.model.decoder.layers.1.encoder_attn.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 3.0, 3.0, 3.0, 4.0, 3.0, 3.0, 9.0, 10.0, 10.0, 24.0, 41.0, 115.0, 326.0, 258.0, 81.0, 41.0, 26.0, 14.0, 11.0, 7.0, 5.0, 6.0, 2.0, 2.0, 1.0, 1.0, 1.0, 1.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0], "bins": [-0.03790283203125, -0.03695273399353027, -0.03600263595581055, -0.03505253791809082, -0.034102439880371094, -0.03315234184265137, -0.03220224380493164, -0.031252145767211914, -0.030302047729492188, -0.02935194969177246, -0.028401851654052734, -0.027451753616333008, -0.02650165557861328, -0.025551557540893555, -0.024601459503173828, -0.0236513614654541, -0.022701263427734375, -0.02175116539001465, -0.020801067352294922, -0.019850969314575195, -0.01890087127685547, -0.017950773239135742, -0.017000675201416016, -0.01605057716369629, -0.015100479125976562, -0.014150381088256836, -0.01320028305053711, -0.012250185012817383, -0.011300086975097656, -0.01034998893737793, -0.009399890899658203, -0.008449792861938477, -0.00749969482421875, -0.0065495967864990234, -0.005599498748779297, -0.00464940071105957, -0.0036993026733398438, -0.002749204635620117, -0.0017991065979003906, -0.0008490085601806641, 0.0001010894775390625, 0.001051187515258789, 0.0020012855529785156, 0.002951383590698242, 0.0039014816284179688, 0.004851579666137695, 0.005801677703857422, 0.0067517757415771484, 0.007701873779296875, 0.008651971817016602, 0.009602069854736328, 0.010552167892456055, 0.011502265930175781, 0.012452363967895508, 0.013402462005615234, 0.014352560043334961, 0.015302658081054688, 0.016252756118774414, 0.01720285415649414, 0.018152952194213867, 0.019103050231933594, 0.02005314826965332, 0.021003246307373047, 0.021953344345092773, 0.0229034423828125]}, "gradients/decoder.model.decoder.layers.1.self_attn_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 2.0, 2.0, 5.0, 4.0, 14.0, 20.0, 36.0, 52.0, 78.0, 103.0, 121.0, 134.0, 130.0, 96.0, 73.0, 61.0, 33.0, 13.0, 14.0, 10.0, 5.0, 2.0, 3.0, 2.0, 4.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.1637420654296875, -4.063394069671631, -3.9630463123321533, -3.862698554992676, -3.762350559234619, -3.6620028018951416, -3.561655044555664, -3.4613070487976074, -3.360959053039551, -3.2606112957000732, -3.1602632999420166, -3.059915542602539, -2.9595675468444824, -2.859219789505005, -2.7588720321655273, -2.6585240364074707, -2.558176279067993, -2.4578285217285156, -2.357480525970459, -2.2571327686309814, -2.156784772872925, -2.0564370155334473, -1.9560891389846802, -1.855741262435913, -1.755393385887146, -1.655045509338379, -1.5546976327896118, -1.4543497562408447, -1.3540019989013672, -1.2536540031433105, -1.153306245803833, -1.052958369255066, -0.9526102542877197, -0.8522623777389526, -0.7519145011901855, -0.6515666842460632, -0.5512188076972961, -0.45087093114852905, -0.35052311420440674, -0.25017523765563965, -0.14982736110687256, -0.04947949945926666, 0.05086836218833923, 0.15121620893478394, 0.251564085483551, 0.3519119620323181, 0.45225977897644043, 0.5526076555252075, 0.6529555320739746, 0.7533034086227417, 0.8536512851715088, 0.9539991021156311, 1.054347038269043, 1.1546947956085205, 1.2550426721572876, 1.3553905487060547, 1.4557384252548218, 1.5560863018035889, 1.656434178352356, 1.756782054901123, 1.8571298122406006, 1.9574778079986572, 2.0578255653381348, 2.1581735610961914, 2.258521318435669]}, "gradients/decoder.model.decoder.layers.1.self_attn_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 3.0, 0.0, 2.0, 1.0, 2.0, 3.0, 6.0, 8.0, 12.0, 11.0, 19.0, 12.0, 26.0, 21.0, 24.0, 27.0, 39.0, 37.0, 37.0, 44.0, 40.0, 48.0, 51.0, 46.0, 50.0, 53.0, 47.0, 53.0, 35.0, 34.0, 38.0, 30.0, 25.0, 24.0, 25.0, 12.0, 16.0, 15.0, 12.0, 10.0, 5.0, 3.0, 2.0, 2.0, 4.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0, 1.0], "bins": [-1.6357100009918213, -1.5870754718780518, -1.5384409427642822, -1.4898064136505127, -1.4411718845367432, -1.3925373554229736, -1.343902826309204, -1.2952682971954346, -1.246633768081665, -1.1979992389678955, -1.149364709854126, -1.1007301807403564, -1.052095651626587, -1.0034611225128174, -0.9548265933990479, -0.9061920642852783, -0.8575575351715088, -0.8089230060577393, -0.7602884769439697, -0.7116539478302002, -0.6630194187164307, -0.6143848896026611, -0.5657503604888916, -0.5171158313751221, -0.46848130226135254, -0.419846773147583, -0.3712122440338135, -0.32257771492004395, -0.2739431858062744, -0.22530865669250488, -0.17667412757873535, -0.12803959846496582, -0.07940518856048584, -0.03077065944671631, 0.017863869667053223, 0.06649839878082275, 0.11513292789459229, 0.16376745700836182, 0.21240198612213135, 0.2610365152359009, 0.3096710443496704, 0.35830557346343994, 0.4069401025772095, 0.455574631690979, 0.5042091608047485, 0.5528436899185181, 0.6014782190322876, 0.6501127481460571, 0.6987472772598267, 0.7473818063735962, 0.7960163354873657, 0.8446508646011353, 0.8932853937149048, 0.9419199228286743, 0.9905544519424438, 1.0391889810562134, 1.087823510169983, 1.1364580392837524, 1.185092568397522, 1.2337270975112915, 1.282361626625061, 1.3309961557388306, 1.3796306848526, 1.4282652139663696, 1.4768997430801392]}, "gradients/decoder.model.decoder.layers.1.self_attn.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 2.0, 0.0, 2.0, 4.0, 4.0, 9.0, 10.0, 21.0, 25.0, 30.0, 50.0, 100.0, 177.0, 402.0, 875.0, 2080.0, 5508.0, 17627.0, 62799.0, 285379.0, 505220.0, 122474.0, 30921.0, 9306.0, 3132.0, 1237.0, 531.0, 269.0, 132.0, 93.0, 46.0, 34.0, 29.0, 10.0, 6.0, 7.0, 4.0, 5.0, 1.0, 3.0, 1.0, 1.0, 0.0, 4.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.419921875, -2.342437744140625, -2.26495361328125, -2.187469482421875, -2.1099853515625, -2.032501220703125, -1.95501708984375, -1.877532958984375, -1.800048828125, -1.722564697265625, -1.64508056640625, -1.567596435546875, -1.4901123046875, -1.412628173828125, -1.33514404296875, -1.257659912109375, -1.18017578125, -1.102691650390625, -1.02520751953125, -0.947723388671875, -0.8702392578125, -0.792755126953125, -0.71527099609375, -0.637786865234375, -0.560302734375, -0.482818603515625, -0.40533447265625, -0.327850341796875, -0.2503662109375, -0.172882080078125, -0.09539794921875, -0.017913818359375, 0.0595703125, 0.137054443359375, 0.21453857421875, 0.292022705078125, 0.3695068359375, 0.446990966796875, 0.52447509765625, 0.601959228515625, 0.679443359375, 0.756927490234375, 0.83441162109375, 0.911895751953125, 0.9893798828125, 1.066864013671875, 1.14434814453125, 1.221832275390625, 1.29931640625, 1.376800537109375, 1.45428466796875, 1.531768798828125, 1.6092529296875, 1.686737060546875, 1.76422119140625, 1.841705322265625, 1.919189453125, 1.996673583984375, 2.07415771484375, 2.151641845703125, 2.2291259765625, 2.306610107421875, 2.38409423828125, 2.461578369140625, 2.5390625]}, "gradients/decoder.model.decoder.layers.1.self_attn.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 1.0, 2.0, 5.0, 2.0, 8.0, 12.0, 15.0, 22.0, 16.0, 40.0, 29.0, 40.0, 44.0, 44.0, 55.0, 57.0, 65.0, 56.0, 89.0, 55.0, 64.0, 64.0, 39.0, 32.0, 43.0, 24.0, 24.0, 20.0, 18.0, 13.0, 9.0, 6.0, 0.0, 1.0, 0.0, 2.0, 2.0, 0.0, 1.0], "bins": [-6.4453125, -6.294158935546875, -6.14300537109375, -5.991851806640625, -5.8406982421875, -5.689544677734375, -5.53839111328125, -5.387237548828125, -5.236083984375, -5.084930419921875, -4.93377685546875, -4.782623291015625, -4.6314697265625, -4.480316162109375, -4.32916259765625, -4.178009033203125, -4.02685546875, -3.875701904296875, -3.72454833984375, -3.573394775390625, -3.4222412109375, -3.271087646484375, -3.11993408203125, -2.968780517578125, -2.817626953125, -2.666473388671875, -2.51531982421875, -2.364166259765625, -2.2130126953125, -2.061859130859375, -1.91070556640625, -1.759552001953125, -1.6083984375, -1.457244873046875, -1.30609130859375, -1.154937744140625, -1.0037841796875, -0.852630615234375, -0.70147705078125, -0.550323486328125, -0.399169921875, -0.248016357421875, -0.09686279296875, 0.054290771484375, 0.2054443359375, 0.356597900390625, 0.50775146484375, 0.658905029296875, 0.81005859375, 0.961212158203125, 1.11236572265625, 1.263519287109375, 1.4146728515625, 1.565826416015625, 1.71697998046875, 1.868133544921875, 2.019287109375, 2.170440673828125, 2.32159423828125, 2.472747802734375, 2.6239013671875, 2.775054931640625, 2.92620849609375, 3.077362060546875, 3.228515625]}, "gradients/decoder.model.decoder.layers.1.self_attn.v_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 2.0, 2.0, 0.0, 2.0, 4.0, 1.0, 3.0, 4.0, 4.0, 6.0, 8.0, 19.0, 16.0, 17.0, 34.0, 46.0, 60.0, 81.0, 125.0, 207.0, 494.0, 1500.0, 6261.0, 36205.0, 327484.0, 597034.0, 65173.0, 10281.0, 2109.0, 630.0, 248.0, 166.0, 99.0, 58.0, 59.0, 36.0, 21.0, 18.0, 20.0, 7.0, 3.0, 4.0, 3.0, 2.0, 4.0, 2.0, 5.0, 2.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.32421875, -3.20611572265625, -3.0880126953125, -2.96990966796875, -2.851806640625, -2.73370361328125, -2.6156005859375, -2.49749755859375, -2.37939453125, -2.26129150390625, -2.1431884765625, -2.02508544921875, -1.906982421875, -1.78887939453125, -1.6707763671875, -1.55267333984375, -1.4345703125, -1.31646728515625, -1.1983642578125, -1.08026123046875, -0.962158203125, -0.84405517578125, -0.7259521484375, -0.60784912109375, -0.48974609375, -0.37164306640625, -0.2535400390625, -0.13543701171875, -0.017333984375, 0.10076904296875, 0.2188720703125, 0.33697509765625, 0.455078125, 0.57318115234375, 0.6912841796875, 0.80938720703125, 0.927490234375, 1.04559326171875, 1.1636962890625, 1.28179931640625, 1.39990234375, 1.51800537109375, 1.6361083984375, 1.75421142578125, 1.872314453125, 1.99041748046875, 2.1085205078125, 2.22662353515625, 2.3447265625, 2.46282958984375, 2.5809326171875, 2.69903564453125, 2.817138671875, 2.93524169921875, 3.0533447265625, 3.17144775390625, 3.28955078125, 3.40765380859375, 3.5257568359375, 3.64385986328125, 3.761962890625, 3.88006591796875, 3.9981689453125, 4.11627197265625, 4.234375]}, "gradients/decoder.model.decoder.layers.1.self_attn.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 3.0, 2.0, 4.0, 5.0, 9.0, 15.0, 12.0, 25.0, 19.0, 32.0, 24.0, 33.0, 63.0, 59.0, 63.0, 71.0, 54.0, 79.0, 63.0, 65.0, 58.0, 38.0, 55.0, 32.0, 25.0, 27.0, 22.0, 15.0, 10.0, 10.0, 7.0, 3.0, 2.0, 4.0, 4.0, 1.0, 0.0, 0.0, 0.0, 3.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.51171875, -5.3355712890625, -5.159423828125, -4.9832763671875, -4.80712890625, -4.6309814453125, -4.454833984375, -4.2786865234375, -4.1025390625, -3.9263916015625, -3.750244140625, -3.5740966796875, -3.39794921875, -3.2218017578125, -3.045654296875, -2.8695068359375, -2.693359375, -2.5172119140625, -2.341064453125, -2.1649169921875, -1.98876953125, -1.8126220703125, -1.636474609375, -1.4603271484375, -1.2841796875, -1.1080322265625, -0.931884765625, -0.7557373046875, -0.57958984375, -0.4034423828125, -0.227294921875, -0.0511474609375, 0.125, 0.3011474609375, 0.477294921875, 0.6534423828125, 0.82958984375, 1.0057373046875, 1.181884765625, 1.3580322265625, 1.5341796875, 1.7103271484375, 1.886474609375, 2.0626220703125, 2.23876953125, 2.4149169921875, 2.591064453125, 2.7672119140625, 2.943359375, 3.1195068359375, 3.295654296875, 3.4718017578125, 3.64794921875, 3.8240966796875, 4.000244140625, 4.1763916015625, 4.3525390625, 4.5286865234375, 4.704833984375, 4.8809814453125, 5.05712890625, 5.2332763671875, 5.409423828125, 5.5855712890625, 5.76171875]}, "gradients/decoder.model.decoder.layers.1.self_attn.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 3.0, 0.0, 2.0, 4.0, 4.0, 3.0, 6.0, 2.0, 6.0, 9.0, 14.0, 12.0, 24.0, 34.0, 56.0, 104.0, 169.0, 411.0, 1029.0, 3528.0, 17038.0, 152775.0, 751230.0, 104449.0, 13117.0, 2883.0, 884.0, 352.0, 155.0, 94.0, 46.0, 32.0, 32.0, 16.0, 9.0, 7.0, 5.0, 3.0, 5.0, 6.0, 2.0, 1.0, 1.0, 4.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.0, -0.971771240234375, -0.94354248046875, -0.915313720703125, -0.8870849609375, -0.858856201171875, -0.83062744140625, -0.802398681640625, -0.774169921875, -0.745941162109375, -0.71771240234375, -0.689483642578125, -0.6612548828125, -0.633026123046875, -0.60479736328125, -0.576568603515625, -0.54833984375, -0.520111083984375, -0.49188232421875, -0.463653564453125, -0.4354248046875, -0.407196044921875, -0.37896728515625, -0.350738525390625, -0.322509765625, -0.294281005859375, -0.26605224609375, -0.237823486328125, -0.2095947265625, -0.181365966796875, -0.15313720703125, -0.124908447265625, -0.0966796875, -0.068450927734375, -0.04022216796875, -0.011993408203125, 0.0162353515625, 0.044464111328125, 0.07269287109375, 0.100921630859375, 0.129150390625, 0.157379150390625, 0.18560791015625, 0.213836669921875, 0.2420654296875, 0.270294189453125, 0.29852294921875, 0.326751708984375, 0.35498046875, 0.383209228515625, 0.41143798828125, 0.439666748046875, 0.4678955078125, 0.496124267578125, 0.52435302734375, 0.552581787109375, 0.580810546875, 0.609039306640625, 0.63726806640625, 0.665496826171875, 0.6937255859375, 0.721954345703125, 0.75018310546875, 0.778411865234375, 0.806640625]}, "gradients/decoder.model.decoder.layers.1.self_attn.k_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 3.0, 3.0, 3.0, 6.0, 3.0, 6.0, 12.0, 15.0, 24.0, 34.0, 41.0, 52.0, 80.0, 91.0, 109.0, 146.0, 107.0, 68.0, 51.0, 44.0, 29.0, 23.0, 13.0, 11.0, 12.0, 7.0, 9.0, 3.0, 4.0, 4.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00010287761688232422, -9.935349225997925e-05, -9.582936763763428e-05, -9.23052430152893e-05, -8.878111839294434e-05, -8.525699377059937e-05, -8.17328691482544e-05, -7.820874452590942e-05, -7.468461990356445e-05, -7.116049528121948e-05, -6.763637065887451e-05, -6.411224603652954e-05, -6.058812141418457e-05, -5.70639967918396e-05, -5.353987216949463e-05, -5.001574754714966e-05, -4.649162292480469e-05, -4.296749830245972e-05, -3.9443373680114746e-05, -3.5919249057769775e-05, -3.2395124435424805e-05, -2.8870999813079834e-05, -2.5346875190734863e-05, -2.1822750568389893e-05, -1.8298625946044922e-05, -1.4774501323699951e-05, -1.125037670135498e-05, -7.72625207901001e-06, -4.202127456665039e-06, -6.780028343200684e-07, 2.8461217880249023e-06, 6.370246410369873e-06, 9.894371032714844e-06, 1.3418495655059814e-05, 1.6942620277404785e-05, 2.0466744899749756e-05, 2.3990869522094727e-05, 2.7514994144439697e-05, 3.103911876678467e-05, 3.456324338912964e-05, 3.808736801147461e-05, 4.161149263381958e-05, 4.513561725616455e-05, 4.865974187850952e-05, 5.218386650085449e-05, 5.570799112319946e-05, 5.9232115745544434e-05, 6.27562403678894e-05, 6.628036499023438e-05, 6.980448961257935e-05, 7.332861423492432e-05, 7.685273885726929e-05, 8.037686347961426e-05, 8.390098810195923e-05, 8.74251127243042e-05, 9.094923734664917e-05, 9.447336196899414e-05, 9.799748659133911e-05, 0.00010152161121368408, 0.00010504573583602905, 0.00010856986045837402, 0.000112093985080719, 0.00011561810970306396, 0.00011914223432540894, 0.0001226663589477539]}, "gradients/decoder.model.decoder.layers.1.self_attn.q_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 2.0, 6.0, 0.0, 2.0, 7.0, 17.0, 19.0, 26.0, 61.0, 92.0, 152.0, 249.0, 446.0, 810.0, 1668.0, 3966.0, 13729.0, 91686.0, 734591.0, 171248.0, 20375.0, 5220.0, 1964.0, 944.0, 504.0, 300.0, 178.0, 121.0, 72.0, 39.0, 31.0, 10.0, 11.0, 10.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0], "bins": [-1.1591796875, -1.125701904296875, -1.09222412109375, -1.058746337890625, -1.0252685546875, -0.991790771484375, -0.95831298828125, -0.924835205078125, -0.891357421875, -0.857879638671875, -0.82440185546875, -0.790924072265625, -0.7574462890625, -0.723968505859375, -0.69049072265625, -0.657012939453125, -0.62353515625, -0.590057373046875, -0.55657958984375, -0.523101806640625, -0.4896240234375, -0.456146240234375, -0.42266845703125, -0.389190673828125, -0.355712890625, -0.322235107421875, -0.28875732421875, -0.255279541015625, -0.2218017578125, -0.188323974609375, -0.15484619140625, -0.121368408203125, -0.087890625, -0.054412841796875, -0.02093505859375, 0.012542724609375, 0.0460205078125, 0.079498291015625, 0.11297607421875, 0.146453857421875, 0.179931640625, 0.213409423828125, 0.24688720703125, 0.280364990234375, 0.3138427734375, 0.347320556640625, 0.38079833984375, 0.414276123046875, 0.44775390625, 0.481231689453125, 0.51470947265625, 0.548187255859375, 0.5816650390625, 0.615142822265625, 0.64862060546875, 0.682098388671875, 0.715576171875, 0.749053955078125, 0.78253173828125, 0.816009521484375, 0.8494873046875, 0.882965087890625, 0.91644287109375, 0.949920654296875, 0.9833984375]}, "gradients/decoder.model.decoder.layers.1.self_attn.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 4.0, 3.0, 2.0, 7.0, 16.0, 20.0, 24.0, 35.0, 67.0, 79.0, 113.0, 175.0, 144.0, 90.0, 80.0, 70.0, 26.0, 19.0, 16.0, 8.0, 7.0, 5.0, 6.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.5205078125, -0.5036087036132812, -0.4867095947265625, -0.46981048583984375, -0.452911376953125, -0.43601226806640625, -0.4191131591796875, -0.40221405029296875, -0.38531494140625, -0.36841583251953125, -0.3515167236328125, -0.33461761474609375, -0.317718505859375, -0.30081939697265625, -0.2839202880859375, -0.26702117919921875, -0.2501220703125, -0.23322296142578125, -0.2163238525390625, -0.19942474365234375, -0.182525634765625, -0.16562652587890625, -0.1487274169921875, -0.13182830810546875, -0.11492919921875, -0.09803009033203125, -0.0811309814453125, -0.06423187255859375, -0.047332763671875, -0.03043365478515625, -0.0135345458984375, 0.00336456298828125, 0.020263671875, 0.03716278076171875, 0.0540618896484375, 0.07096099853515625, 0.087860107421875, 0.10475921630859375, 0.1216583251953125, 0.13855743408203125, 0.15545654296875, 0.17235565185546875, 0.1892547607421875, 0.20615386962890625, 0.223052978515625, 0.23995208740234375, 0.2568511962890625, 0.27375030517578125, 0.2906494140625, 0.30754852294921875, 0.3244476318359375, 0.34134674072265625, 0.358245849609375, 0.37514495849609375, 0.3920440673828125, 0.40894317626953125, 0.42584228515625, 0.44274139404296875, 0.4596405029296875, 0.47653961181640625, 0.493438720703125, 0.5103378295898438, 0.5272369384765625, 0.5441360473632812, 0.56103515625]}, "gradients/decoder.model.decoder.layers.0.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 10.0, 38.0, 106.0, 278.0, 310.0, 186.0, 51.0, 24.0, 8.0, 5.0, 1.0, 1.0], "bins": [-30.301959991455078, -29.763917922973633, -29.225875854492188, -28.687835693359375, -28.14979362487793, -27.611751556396484, -27.07370948791504, -26.535667419433594, -25.99762725830078, -25.459585189819336, -24.92154312133789, -24.383502960205078, -23.845460891723633, -23.307418823242188, -22.769376754760742, -22.231334686279297, -21.69329261779785, -21.155250549316406, -20.61720848083496, -20.07916831970215, -19.541126251220703, -19.003084182739258, -18.465042114257812, -17.927000045776367, -17.388957977294922, -16.850915908813477, -16.31287384033203, -15.774832725524902, -15.236791610717773, -14.698749542236328, -14.160707473754883, -13.622665405273438, -13.084625244140625, -12.54658317565918, -12.00854206085205, -11.470499992370605, -10.932458877563477, -10.394416809082031, -9.856374740600586, -9.31833267211914, -8.780291557312012, -8.242249488830566, -7.7042083740234375, -7.166166305541992, -6.628124713897705, -6.090083122253418, -5.552041053771973, -5.0139994621276855, -4.475957870483398, -3.9379162788391113, -3.399874448776245, -2.861832618713379, -2.323791027069092, -1.7857494354248047, -1.2477076053619385, -0.7096657752990723, -0.17162418365478516, 0.3664175271987915, 0.9044592380523682, 1.4425009489059448, 1.9805426597595215, 2.5185842514038086, 3.056626081466675, 3.594667911529541, 4.132709503173828]}, "gradients/decoder.model.decoder.layers.0.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 5.0, 3.0, 1.0, 4.0, 4.0, 6.0, 5.0, 9.0, 8.0, 9.0, 7.0, 5.0, 16.0, 17.0, 18.0, 19.0, 24.0, 31.0, 31.0, 33.0, 32.0, 32.0, 34.0, 41.0, 44.0, 40.0, 29.0, 34.0, 27.0, 34.0, 41.0, 43.0, 38.0, 28.0, 34.0, 30.0, 35.0, 22.0, 17.0, 17.0, 17.0, 22.0, 15.0, 7.0, 10.0, 4.0, 7.0, 7.0, 8.0, 4.0, 3.0, 0.0, 1.0, 4.0], "bins": [-6.742868900299072, -6.559817314147949, -6.376765727996826, -6.193714141845703, -6.01066255569458, -5.827610969543457, -5.644558906555176, -5.461507797241211, -5.27845573425293, -5.095404148101807, -4.912352561950684, -4.7293009757995605, -4.5462493896484375, -4.3631978034973145, -4.180146217346191, -3.9970943927764893, -3.8140430450439453, -3.6309914588928223, -3.447939872741699, -3.264888286590576, -3.081836700439453, -2.89878511428833, -2.715733289718628, -2.532681703567505, -2.349630117416382, -2.166578531265259, -1.9835269451141357, -1.8004752397537231, -1.6174236536026, -1.434372067451477, -1.2513203620910645, -1.0682687759399414, -0.8852167129516602, -0.7021651268005371, -0.5191134810447693, -0.33606183528900146, -0.15301024913787842, 0.03004133701324463, 0.21309304237365723, 0.3961446285247803, 0.5791962146759033, 0.7622478008270264, 0.9452994465827942, 1.128351092338562, 1.311402678489685, 1.494454264640808, 1.6775059700012207, 1.8605575561523438, 2.043609142303467, 2.22666072845459, 2.409712314605713, 2.592763900756836, 2.775815486907959, 2.958867073059082, 3.141918897628784, 3.3249704837799072, 3.5080220699310303, 3.6910736560821533, 3.8741252422332764, 4.0571770668029785, 4.240228652954102, 4.423280239105225, 4.606331825256348, 4.789383411407471, 4.972434997558594]}, "gradients/decoder.model.decoder.layers.0.fc2.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 1.0, 3.0, 3.0, 1.0, 1.0, 5.0, 5.0, 10.0, 9.0, 12.0, 17.0, 24.0, 35.0, 44.0, 66.0, 85.0, 182.0, 357.0, 869.0, 2715.0, 12493.0, 184248.0, 3791008.0, 184252.0, 13212.0, 2847.0, 948.0, 370.0, 168.0, 100.0, 63.0, 37.0, 35.0, 23.0, 13.0, 9.0, 9.0, 5.0, 5.0, 7.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-5.7265625, -5.510498046875, -5.29443359375, -5.078369140625, -4.8623046875, -4.646240234375, -4.43017578125, -4.214111328125, -3.998046875, -3.781982421875, -3.56591796875, -3.349853515625, -3.1337890625, -2.917724609375, -2.70166015625, -2.485595703125, -2.26953125, -2.053466796875, -1.83740234375, -1.621337890625, -1.4052734375, -1.189208984375, -0.97314453125, -0.757080078125, -0.541015625, -0.324951171875, -0.10888671875, 0.107177734375, 0.3232421875, 0.539306640625, 0.75537109375, 0.971435546875, 1.1875, 1.403564453125, 1.61962890625, 1.835693359375, 2.0517578125, 2.267822265625, 2.48388671875, 2.699951171875, 2.916015625, 3.132080078125, 3.34814453125, 3.564208984375, 3.7802734375, 3.996337890625, 4.21240234375, 4.428466796875, 4.64453125, 4.860595703125, 5.07666015625, 5.292724609375, 5.5087890625, 5.724853515625, 5.94091796875, 6.156982421875, 6.373046875, 6.589111328125, 6.80517578125, 7.021240234375, 7.2373046875, 7.453369140625, 7.66943359375, 7.885498046875, 8.1015625]}, "gradients/decoder.model.decoder.layers.0.fc2.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 3.0, 6.0, 6.0, 7.0, 8.0, 17.0, 16.0, 25.0, 16.0, 38.0, 46.0, 44.0, 61.0, 73.0, 72.0, 73.0, 70.0, 79.0, 67.0, 45.0, 41.0, 47.0, 33.0, 30.0, 20.0, 29.0, 17.0, 9.0, 5.0, 6.0, 5.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0], "bins": [-3.244140625, -3.162841796875, -3.08154296875, -3.000244140625, -2.9189453125, -2.837646484375, -2.75634765625, -2.675048828125, -2.59375, -2.512451171875, -2.43115234375, -2.349853515625, -2.2685546875, -2.187255859375, -2.10595703125, -2.024658203125, -1.943359375, -1.862060546875, -1.78076171875, -1.699462890625, -1.6181640625, -1.536865234375, -1.45556640625, -1.374267578125, -1.29296875, -1.211669921875, -1.13037109375, -1.049072265625, -0.9677734375, -0.886474609375, -0.80517578125, -0.723876953125, -0.642578125, -0.561279296875, -0.47998046875, -0.398681640625, -0.3173828125, -0.236083984375, -0.15478515625, -0.073486328125, 0.0078125, 0.089111328125, 0.17041015625, 0.251708984375, 0.3330078125, 0.414306640625, 0.49560546875, 0.576904296875, 0.658203125, 0.739501953125, 0.82080078125, 0.902099609375, 0.9833984375, 1.064697265625, 1.14599609375, 1.227294921875, 1.30859375, 1.389892578125, 1.47119140625, 1.552490234375, 1.6337890625, 1.715087890625, 1.79638671875, 1.877685546875, 1.958984375]}, "gradients/decoder.model.decoder.layers.0.fc1.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 2.0, 2.0, 2.0, 5.0, 22.0, 25.0, 53.0, 83.0, 186.0, 403.0, 983.0, 3750.0, 37607.0, 3871668.0, 267774.0, 8891.0, 1716.0, 575.0, 259.0, 116.0, 81.0, 36.0, 20.0, 12.0, 7.0, 2.0, 1.0, 3.0, 1.0, 3.0, 1.0, 1.0, 3.0, 0.0, 0.0, 2.0, 0.0, 2.0], "bins": [-10.875, -10.60125732421875, -10.3275146484375, -10.05377197265625, -9.780029296875, -9.50628662109375, -9.2325439453125, -8.95880126953125, -8.68505859375, -8.41131591796875, -8.1375732421875, -7.86383056640625, -7.590087890625, -7.31634521484375, -7.0426025390625, -6.76885986328125, -6.4951171875, -6.22137451171875, -5.9476318359375, -5.67388916015625, -5.400146484375, -5.12640380859375, -4.8526611328125, -4.57891845703125, -4.30517578125, -4.03143310546875, -3.7576904296875, -3.48394775390625, -3.210205078125, -2.93646240234375, -2.6627197265625, -2.38897705078125, -2.115234375, -1.84149169921875, -1.5677490234375, -1.29400634765625, -1.020263671875, -0.74652099609375, -0.4727783203125, -0.19903564453125, 0.07470703125, 0.34844970703125, 0.6221923828125, 0.89593505859375, 1.169677734375, 1.44342041015625, 1.7171630859375, 1.99090576171875, 2.2646484375, 2.53839111328125, 2.8121337890625, 3.08587646484375, 3.359619140625, 3.63336181640625, 3.9071044921875, 4.18084716796875, 4.45458984375, 4.72833251953125, 5.0020751953125, 5.27581787109375, 5.549560546875, 5.82330322265625, 6.0970458984375, 6.37078857421875, 6.64453125]}, "gradients/decoder.model.decoder.layers.0.fc1.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 2.0, 5.0, 6.0, 10.0, 18.0, 23.0, 46.0, 85.0, 147.0, 345.0, 821.0, 1138.0, 790.0, 319.0, 166.0, 75.0, 40.0, 18.0, 11.0, 7.0, 7.0, 4.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.5361328125, -1.4706878662109375, -1.405242919921875, -1.3397979736328125, -1.27435302734375, -1.2089080810546875, -1.143463134765625, -1.0780181884765625, -1.0125732421875, -0.9471282958984375, -0.881683349609375, -0.8162384033203125, -0.75079345703125, -0.6853485107421875, -0.619903564453125, -0.5544586181640625, -0.489013671875, -0.4235687255859375, -0.358123779296875, -0.2926788330078125, -0.22723388671875, -0.1617889404296875, -0.096343994140625, -0.0308990478515625, 0.0345458984375, 0.0999908447265625, 0.165435791015625, 0.2308807373046875, 0.29632568359375, 0.3617706298828125, 0.427215576171875, 0.4926605224609375, 0.55810546875, 0.6235504150390625, 0.688995361328125, 0.7544403076171875, 0.81988525390625, 0.8853302001953125, 0.950775146484375, 1.0162200927734375, 1.0816650390625, 1.1471099853515625, 1.212554931640625, 1.2779998779296875, 1.34344482421875, 1.4088897705078125, 1.474334716796875, 1.5397796630859375, 1.605224609375, 1.6706695556640625, 1.736114501953125, 1.8015594482421875, 1.86700439453125, 1.9324493408203125, 1.997894287109375, 2.0633392333984375, 2.1287841796875, 2.1942291259765625, 2.259674072265625, 2.3251190185546875, 2.39056396484375, 2.4560089111328125, 2.521453857421875, 2.5868988037109375, 2.65234375]}, "gradients/decoder.model.decoder.layers.0.encoder_attn_layer_norm.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 3.0, 7.0, 3.0, 8.0, 9.0, 11.0, 19.0, 7.0, 30.0, 26.0, 42.0, 42.0, 50.0, 52.0, 68.0, 70.0, 66.0, 72.0, 72.0, 63.0, 56.0, 53.0, 34.0, 34.0, 28.0, 18.0, 22.0, 17.0, 8.0, 5.0, 7.0, 5.0, 1.0, 1.0, 2.0, 2.0, 1.0, 3.0], "bins": [-4.512925148010254, -4.408871173858643, -4.304817199707031, -4.20076322555542, -4.096709251403809, -3.9926552772521973, -3.888601303100586, -3.7845473289489746, -3.6804933547973633, -3.576439380645752, -3.4723854064941406, -3.3683314323425293, -3.264277458190918, -3.1602234840393066, -3.0561695098876953, -2.952115535736084, -2.8480613231658936, -2.7440073490142822, -2.639953374862671, -2.5358994007110596, -2.4318454265594482, -2.327791452407837, -2.2237372398376465, -2.119683265686035, -2.015629291534424, -1.9115753173828125, -1.8075213432312012, -1.7034673690795898, -1.5994133949279785, -1.4953594207763672, -1.3913053274154663, -1.287251353263855, -1.1831974983215332, -1.0791435241699219, -0.9750895500183105, -0.8710355162620544, -0.7669815421104431, -0.6629275679588318, -0.5588735342025757, -0.45481956005096436, -0.350765585899353, -0.2467115968465805, -0.14265760779380798, -0.03860360383987427, 0.06545037031173706, 0.1695043444633484, 0.2735583782196045, 0.3776123523712158, 0.48166632652282715, 0.5857203006744385, 0.6897742748260498, 0.7938283085823059, 0.8978822827339172, 1.0019361972808838, 1.1059902906417847, 1.210044264793396, 1.3140982389450073, 1.4181522130966187, 1.52220618724823, 1.6262602806091309, 1.7303142547607422, 1.8343682289123535, 1.9384222030639648, 2.042476177215576, 2.1465301513671875]}, "gradients/decoder.model.decoder.layers.0.encoder_attn_layer_norm.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 3.0, 2.0, 0.0, 6.0, 2.0, 2.0, 4.0, 5.0, 11.0, 9.0, 14.0, 7.0, 15.0, 15.0, 22.0, 22.0, 23.0, 27.0, 35.0, 27.0, 33.0, 37.0, 38.0, 45.0, 45.0, 46.0, 47.0, 37.0, 34.0, 33.0, 40.0, 36.0, 34.0, 29.0, 27.0, 25.0, 29.0, 26.0, 26.0, 21.0, 11.0, 13.0, 7.0, 12.0, 11.0, 3.0, 6.0, 4.0, 3.0, 1.0, 2.0, 1.0, 4.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-2.2501275539398193, -2.178273916244507, -2.1064205169677734, -2.034566879272461, -1.962713360786438, -1.890859842300415, -1.8190062046051025, -1.7471526861190796, -1.6752991676330566, -1.6034456491470337, -1.5315921306610107, -1.4597384929656982, -1.3878849744796753, -1.3160314559936523, -1.2441778182983398, -1.172324299812317, -1.100470781326294, -1.028617262840271, -0.9567636847496033, -0.8849101066589355, -0.8130565881729126, -0.7412030696868896, -0.6693494915962219, -0.5974959135055542, -0.5256423950195312, -0.4537888467311859, -0.3819352984428406, -0.31008175015449524, -0.2382282018661499, -0.16637465357780457, -0.09452110528945923, -0.022667527198791504, 0.04918622970581055, 0.12103977799415588, 0.19289332628250122, 0.26474687457084656, 0.3366004228591919, 0.40845397114753723, 0.48030751943588257, 0.5521610975265503, 0.6240146160125732, 0.6958681344985962, 0.7677217125892639, 0.8395752906799316, 0.9114288091659546, 0.9832823276519775, 1.05513596534729, 1.126989483833313, 1.198843002319336, 1.2706965208053589, 1.3425500392913818, 1.4144036769866943, 1.4862571954727173, 1.5581107139587402, 1.6299643516540527, 1.7018178701400757, 1.7736713886260986, 1.8455249071121216, 1.9173784255981445, 1.989232063293457, 2.0610857009887695, 2.132939100265503, 2.2047927379608154, 2.276646137237549, 2.3484997749328613]}, "gradients/decoder.model.decoder.layers.0.encoder_attn.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 4.0, 1.0, 1.0, 5.0, 10.0, 6.0, 4.0, 15.0, 21.0, 25.0, 42.0, 74.0, 145.0, 182.0, 334.0, 586.0, 1251.0, 3765.0, 20566.0, 261265.0, 704924.0, 45386.0, 6399.0, 1768.0, 765.0, 379.0, 227.0, 135.0, 90.0, 65.0, 44.0, 26.0, 14.0, 16.0, 10.0, 9.0, 1.0, 2.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0], "bins": [-1.689453125, -1.64520263671875, -1.6009521484375, -1.55670166015625, -1.512451171875, -1.46820068359375, -1.4239501953125, -1.37969970703125, -1.33544921875, -1.29119873046875, -1.2469482421875, -1.20269775390625, -1.158447265625, -1.11419677734375, -1.0699462890625, -1.02569580078125, -0.9814453125, -0.93719482421875, -0.8929443359375, -0.84869384765625, -0.804443359375, -0.76019287109375, -0.7159423828125, -0.67169189453125, -0.62744140625, -0.58319091796875, -0.5389404296875, -0.49468994140625, -0.450439453125, -0.40618896484375, -0.3619384765625, -0.31768798828125, -0.2734375, -0.22918701171875, -0.1849365234375, -0.14068603515625, -0.096435546875, -0.05218505859375, -0.0079345703125, 0.03631591796875, 0.08056640625, 0.12481689453125, 0.1690673828125, 0.21331787109375, 0.257568359375, 0.30181884765625, 0.3460693359375, 0.39031982421875, 0.4345703125, 0.47882080078125, 0.5230712890625, 0.56732177734375, 0.611572265625, 0.65582275390625, 0.7000732421875, 0.74432373046875, 0.78857421875, 0.83282470703125, 0.8770751953125, 0.92132568359375, 0.965576171875, 1.00982666015625, 1.0540771484375, 1.09832763671875, 1.142578125]}, "gradients/decoder.model.decoder.layers.0.encoder_attn.out_proj.bias": {"_type": "histogram", "values": [1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0, 0.0, 10.0, 8.0, 13.0, 14.0, 23.0, 27.0, 38.0, 33.0, 51.0, 66.0, 68.0, 72.0, 77.0, 72.0, 70.0, 66.0, 62.0, 39.0, 50.0, 45.0, 38.0, 18.0, 23.0, 7.0, 8.0, 1.0, 4.0, 6.0, 0.0, 0.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.9072265625, -1.8283538818359375, -1.749481201171875, -1.6706085205078125, -1.59173583984375, -1.5128631591796875, -1.433990478515625, -1.3551177978515625, -1.2762451171875, -1.1973724365234375, -1.118499755859375, -1.0396270751953125, -0.96075439453125, -0.8818817138671875, -0.803009033203125, -0.7241363525390625, -0.645263671875, -0.5663909912109375, -0.487518310546875, -0.4086456298828125, -0.32977294921875, -0.2509002685546875, -0.172027587890625, -0.0931549072265625, -0.0142822265625, 0.0645904541015625, 0.143463134765625, 0.2223358154296875, 0.30120849609375, 0.3800811767578125, 0.458953857421875, 0.5378265380859375, 0.61669921875, 0.6955718994140625, 0.774444580078125, 0.8533172607421875, 0.93218994140625, 1.0110626220703125, 1.089935302734375, 1.1688079833984375, 1.2476806640625, 1.3265533447265625, 1.405426025390625, 1.4842987060546875, 1.56317138671875, 1.6420440673828125, 1.720916748046875, 1.7997894287109375, 1.878662109375, 1.9575347900390625, 2.036407470703125, 2.1152801513671875, 2.19415283203125, 2.2730255126953125, 2.351898193359375, 2.4307708740234375, 2.5096435546875, 2.5885162353515625, 2.667388916015625, 2.7462615966796875, 2.82513427734375, 2.9040069580078125, 2.982879638671875, 3.0617523193359375, 3.140625]}, "gradients/decoder.model.decoder.layers.0.encoder_attn.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 1.0, 2.0, 2.0, 3.0, 6.0, 4.0, 10.0, 5.0, 8.0, 20.0, 19.0, 36.0, 49.0, 55.0, 86.0, 171.0, 232.0, 354.0, 604.0, 1014.0, 1721.0, 3203.0, 5859.0, 11259.0, 23462.0, 51125.0, 137095.0, 416273.0, 248178.0, 80833.0, 33524.0, 15912.0, 7943.0, 4112.0, 2207.0, 1248.0, 720.0, 439.0, 247.0, 150.0, 110.0, 91.0, 43.0, 40.0, 28.0, 17.0, 13.0, 4.0, 5.0, 8.0, 3.0, 2.0, 2.0, 2.0, 3.0, 2.0, 4.0, 1.0, 1.0, 2.0, 1.0], "bins": [-0.403564453125, -0.3904380798339844, -0.37731170654296875, -0.3641853332519531, -0.3510589599609375, -0.3379325866699219, -0.32480621337890625, -0.3116798400878906, -0.298553466796875, -0.2854270935058594, -0.27230072021484375, -0.2591743469238281, -0.2460479736328125, -0.23292160034179688, -0.21979522705078125, -0.20666885375976562, -0.19354248046875, -0.18041610717773438, -0.16728973388671875, -0.15416336059570312, -0.1410369873046875, -0.12791061401367188, -0.11478424072265625, -0.10165786743164062, -0.088531494140625, -0.07540512084960938, -0.06227874755859375, -0.049152374267578125, -0.0360260009765625, -0.022899627685546875, -0.00977325439453125, 0.003353118896484375, 0.0164794921875, 0.029605865478515625, 0.04273223876953125, 0.055858612060546875, 0.0689849853515625, 0.08211135864257812, 0.09523773193359375, 0.10836410522460938, 0.121490478515625, 0.13461685180664062, 0.14774322509765625, 0.16086959838867188, 0.1739959716796875, 0.18712234497070312, 0.20024871826171875, 0.21337509155273438, 0.22650146484375, 0.23962783813476562, 0.25275421142578125, 0.2658805847167969, 0.2790069580078125, 0.2921333312988281, 0.30525970458984375, 0.3183860778808594, 0.331512451171875, 0.3446388244628906, 0.35776519775390625, 0.3708915710449219, 0.3840179443359375, 0.3971443176269531, 0.41027069091796875, 0.4233970642089844, 0.4365234375]}, "gradients/decoder.model.decoder.layers.0.encoder_attn.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 3.0, 3.0, 6.0, 7.0, 5.0, 8.0, 6.0, 11.0, 5.0, 12.0, 18.0, 16.0, 13.0, 36.0, 26.0, 39.0, 35.0, 41.0, 36.0, 38.0, 54.0, 47.0, 39.0, 51.0, 42.0, 34.0, 33.0, 50.0, 34.0, 39.0, 34.0, 32.0, 21.0, 22.0, 14.0, 21.0, 16.0, 11.0, 16.0, 9.0, 3.0, 4.0, 7.0, 7.0, 3.0, 3.0, 3.0, 1.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.7890625, -4.6392822265625, -4.489501953125, -4.3397216796875, -4.18994140625, -4.0401611328125, -3.890380859375, -3.7406005859375, -3.5908203125, -3.4410400390625, -3.291259765625, -3.1414794921875, -2.99169921875, -2.8419189453125, -2.692138671875, -2.5423583984375, -2.392578125, -2.2427978515625, -2.093017578125, -1.9432373046875, -1.79345703125, -1.6436767578125, -1.493896484375, -1.3441162109375, -1.1943359375, -1.0445556640625, -0.894775390625, -0.7449951171875, -0.59521484375, -0.4454345703125, -0.295654296875, -0.1458740234375, 0.00390625, 0.1536865234375, 0.303466796875, 0.4532470703125, 0.60302734375, 0.7528076171875, 0.902587890625, 1.0523681640625, 1.2021484375, 1.3519287109375, 1.501708984375, 1.6514892578125, 1.80126953125, 1.9510498046875, 2.100830078125, 2.2506103515625, 2.400390625, 2.5501708984375, 2.699951171875, 2.8497314453125, 2.99951171875, 3.1492919921875, 3.299072265625, 3.4488525390625, 3.5986328125, 3.7484130859375, 3.898193359375, 4.0479736328125, 4.19775390625, 4.3475341796875, 4.497314453125, 4.6470947265625, 4.796875]}, "gradients/decoder.model.decoder.layers.0.encoder_attn.k_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 4.0, 2.0, 1.0, 8.0, 8.0, 15.0, 15.0, 24.0, 46.0, 73.0, 141.0, 247.0, 474.0, 913.0, 2316.0, 7169.0, 41874.0, 665016.0, 296027.0, 25572.0, 5298.0, 1682.0, 735.0, 350.0, 203.0, 122.0, 71.0, 47.0, 27.0, 26.0, 16.0, 14.0, 8.0, 8.0, 1.0, 4.0, 1.0, 2.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.1705322265625, -0.16518020629882812, -0.15982818603515625, -0.15447616577148438, -0.1491241455078125, -0.14377212524414062, -0.13842010498046875, -0.13306808471679688, -0.127716064453125, -0.12236404418945312, -0.11701202392578125, -0.11166000366210938, -0.1063079833984375, -0.10095596313476562, -0.09560394287109375, -0.09025192260742188, -0.08489990234375, -0.07954788208007812, -0.07419586181640625, -0.06884384155273438, -0.0634918212890625, -0.058139801025390625, -0.05278778076171875, -0.047435760498046875, -0.042083740234375, -0.036731719970703125, -0.03137969970703125, -0.026027679443359375, -0.0206756591796875, -0.015323638916015625, -0.00997161865234375, -0.004619598388671875, 0.000732421875, 0.006084442138671875, 0.01143646240234375, 0.016788482666015625, 0.0221405029296875, 0.027492523193359375, 0.03284454345703125, 0.038196563720703125, 0.043548583984375, 0.048900604248046875, 0.05425262451171875, 0.059604644775390625, 0.0649566650390625, 0.07030868530273438, 0.07566070556640625, 0.08101272583007812, 0.08636474609375, 0.09171676635742188, 0.09706878662109375, 0.10242080688476562, 0.1077728271484375, 0.11312484741210938, 0.11847686767578125, 0.12382888793945312, 0.129180908203125, 0.13453292846679688, 0.13988494873046875, 0.14523696899414062, 0.1505889892578125, 0.15594100952148438, 0.16129302978515625, 0.16664505004882812, 0.1719970703125]}, "gradients/decoder.model.decoder.layers.0.encoder_attn.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 6.0, 5.0, 3.0, 5.0, 10.0, 16.0, 17.0, 50.0, 68.0, 101.0, 148.0, 178.0, 151.0, 95.0, 59.0, 35.0, 20.0, 17.0, 8.0, 5.0, 3.0, 2.0, 3.0, 3.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0001157522201538086, -0.00011194311082363129, -0.00010813400149345398, -0.00010432489216327667, -0.00010051578283309937, -9.670667350292206e-05, -9.289756417274475e-05, -8.908845484256744e-05, -8.527934551239014e-05, -8.147023618221283e-05, -7.766112685203552e-05, -7.385201752185822e-05, -7.004290819168091e-05, -6.62337988615036e-05, -6.24246895313263e-05, -5.861558020114899e-05, -5.480647087097168e-05, -5.099736154079437e-05, -4.7188252210617065e-05, -4.337914288043976e-05, -3.957003355026245e-05, -3.5760924220085144e-05, -3.195181488990784e-05, -2.814270555973053e-05, -2.4333596229553223e-05, -2.0524486899375916e-05, -1.671537756919861e-05, -1.2906268239021301e-05, -9.097158908843994e-06, -5.288049578666687e-06, -1.4789402484893799e-06, 2.3301690816879272e-06, 6.139278411865234e-06, 9.948387742042542e-06, 1.3757497072219849e-05, 1.7566606402397156e-05, 2.1375715732574463e-05, 2.518482506275177e-05, 2.8993934392929077e-05, 3.2803043723106384e-05, 3.661215305328369e-05, 4.0421262383461e-05, 4.4230371713638306e-05, 4.803948104381561e-05, 5.184859037399292e-05, 5.565769970417023e-05, 5.9466809034347534e-05, 6.327591836452484e-05, 6.708502769470215e-05, 7.089413702487946e-05, 7.470324635505676e-05, 7.851235568523407e-05, 8.232146501541138e-05, 8.613057434558868e-05, 8.993968367576599e-05, 9.37487930059433e-05, 9.75579023361206e-05, 0.00010136701166629791, 0.00010517612099647522, 0.00010898523032665253, 0.00011279433965682983, 0.00011660344898700714, 0.00012041255831718445, 0.00012422166764736176, 0.00012803077697753906]}, "gradients/decoder.model.decoder.layers.0.encoder_attn.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 5.0, 0.0, 7.0, 1.0, 5.0, 4.0, 4.0, 4.0, 6.0, 13.0, 25.0, 26.0, 34.0, 43.0, 55.0, 65.0, 130.0, 170.0, 310.0, 465.0, 861.0, 1530.0, 2996.0, 6780.0, 16631.0, 51416.0, 255675.0, 580087.0, 89073.0, 24771.0, 9072.0, 3961.0, 1874.0, 959.0, 511.0, 325.0, 192.0, 132.0, 95.0, 66.0, 59.0, 28.0, 24.0, 21.0, 12.0, 10.0, 9.0, 7.0, 9.0, 2.0, 6.0, 0.0, 0.0, 2.0, 4.0], "bins": [-0.138427734375, -0.13460445404052734, -0.1307811737060547, -0.12695789337158203, -0.12313461303710938, -0.11931133270263672, -0.11548805236816406, -0.1116647720336914, -0.10784149169921875, -0.1040182113647461, -0.10019493103027344, -0.09637165069580078, -0.09254837036132812, -0.08872509002685547, -0.08490180969238281, -0.08107852935791016, -0.0772552490234375, -0.07343196868896484, -0.06960868835449219, -0.06578540802001953, -0.061962127685546875, -0.05813884735107422, -0.05431556701660156, -0.050492286682128906, -0.04666900634765625, -0.042845726013183594, -0.03902244567871094, -0.03519916534423828, -0.031375885009765625, -0.02755260467529297, -0.023729324340820312, -0.019906044006347656, -0.016082763671875, -0.012259483337402344, -0.008436203002929688, -0.004612922668457031, -0.000789642333984375, 0.0030336380004882812, 0.0068569183349609375, 0.010680198669433594, 0.01450347900390625, 0.018326759338378906, 0.022150039672851562, 0.02597332000732422, 0.029796600341796875, 0.03361988067626953, 0.03744316101074219, 0.041266441345214844, 0.0450897216796875, 0.048913002014160156, 0.05273628234863281, 0.05655956268310547, 0.060382843017578125, 0.06420612335205078, 0.06802940368652344, 0.0718526840209961, 0.07567596435546875, 0.0794992446899414, 0.08332252502441406, 0.08714580535888672, 0.09096908569335938, 0.09479236602783203, 0.09861564636230469, 0.10243892669677734, 0.10626220703125]}, "gradients/decoder.model.decoder.layers.0.encoder_attn.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 3.0, 0.0, 5.0, 3.0, 4.0, 11.0, 13.0, 15.0, 19.0, 39.0, 53.0, 89.0, 170.0, 262.0, 136.0, 67.0, 45.0, 28.0, 12.0, 13.0, 7.0, 6.0, 3.0, 2.0, 3.0, 1.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.050323486328125, -0.04866218566894531, -0.047000885009765625, -0.04533958435058594, -0.04367828369140625, -0.04201698303222656, -0.040355682373046875, -0.03869438171386719, -0.0370330810546875, -0.03537178039550781, -0.033710479736328125, -0.03204917907714844, -0.03038787841796875, -0.028726577758789062, -0.027065277099609375, -0.025403976440429688, -0.02374267578125, -0.022081375122070312, -0.020420074462890625, -0.018758773803710938, -0.01709747314453125, -0.015436172485351562, -0.013774871826171875, -0.012113571166992188, -0.0104522705078125, -0.008790969848632812, -0.007129669189453125, -0.0054683685302734375, -0.00380706787109375, -0.0021457672119140625, -0.000484466552734375, 0.0011768341064453125, 0.002838134765625, 0.0044994354248046875, 0.006160736083984375, 0.007822036743164062, 0.00948333740234375, 0.011144638061523438, 0.012805938720703125, 0.014467239379882812, 0.0161285400390625, 0.017789840698242188, 0.019451141357421875, 0.021112442016601562, 0.02277374267578125, 0.024435043334960938, 0.026096343994140625, 0.027757644653320312, 0.0294189453125, 0.031080245971679688, 0.032741546630859375, 0.03440284729003906, 0.03606414794921875, 0.03772544860839844, 0.039386749267578125, 0.04104804992675781, 0.0427093505859375, 0.04437065124511719, 0.046031951904296875, 0.04769325256347656, 0.04935455322265625, 0.05101585388183594, 0.052677154541015625, 0.05433845520019531, 0.055999755859375]}, "gradients/decoder.model.decoder.layers.0.self_attn_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0, 1.0, 7.0, 18.0, 48.0, 81.0, 170.0, 196.0, 225.0, 126.0, 80.0, 42.0, 14.0, 5.0, 2.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.6458585262298584, -3.462466239929199, -3.279074192047119, -3.095682144165039, -2.91228985786438, -2.7288975715637207, -2.5455055236816406, -2.3621134757995605, -2.1787211894989014, -1.9953290224075317, -1.811936855316162, -1.6285446882247925, -1.4451525211334229, -1.2617603540420532, -1.0783681869506836, -0.894976019859314, -0.7115838527679443, -0.5281916856765747, -0.3447995185852051, -0.16140735149383545, 0.02198481559753418, 0.2053769826889038, 0.38876914978027344, 0.5721613168716431, 0.7555534839630127, 0.9389456510543823, 1.122337818145752, 1.3057299852371216, 1.4891221523284912, 1.6725143194198608, 1.8559064865112305, 2.0392985343933105, 2.2226905822753906, 2.4060826301574707, 2.58947491645813, 2.772867202758789, 2.956259250640869, 3.139651298522949, 3.3230435848236084, 3.5064358711242676, 3.6898279190063477, 3.8732199668884277, 4.056612014770508, 4.240004539489746, 4.423396587371826, 4.606788635253906, 4.7901811599731445, 4.973573207855225, 5.156965255737305, 5.340357303619385, 5.523749351501465, 5.707141876220703, 5.890533924102783, 6.073925971984863, 6.257318496704102, 6.440710544586182, 6.624102592468262, 6.807494640350342, 6.990886688232422, 7.17427921295166, 7.35767126083374, 7.54106330871582, 7.724455833435059, 7.907847881317139, 8.091239929199219]}, "gradients/decoder.model.decoder.layers.0.self_attn_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 3.0, 1.0, 4.0, 5.0, 2.0, 8.0, 18.0, 13.0, 21.0, 22.0, 34.0, 44.0, 53.0, 44.0, 66.0, 66.0, 67.0, 80.0, 62.0, 57.0, 61.0, 50.0, 44.0, 41.0, 47.0, 31.0, 23.0, 13.0, 9.0, 12.0, 4.0, 2.0, 4.0, 2.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.7804067134857178, -1.7105568647384644, -1.640707015991211, -1.570857286453247, -1.5010074377059937, -1.4311575889587402, -1.3613078594207764, -1.291458010673523, -1.2216081619262695, -1.1517583131790161, -1.0819084644317627, -1.0120587348937988, -0.9422088861465454, -0.872359037399292, -0.8025092482566833, -0.7326594591140747, -0.6628096103668213, -0.5929597616195679, -0.5231099724769592, -0.4532601535320282, -0.38341033458709717, -0.31356051564216614, -0.2437106966972351, -0.17386087775230408, -0.10401105880737305, -0.03416123986244202, 0.035688579082489014, 0.10553839802742004, 0.17538821697235107, 0.2452380359172821, 0.31508785486221313, 0.38493767380714417, 0.4547877311706543, 0.5246375799179077, 0.5944873690605164, 0.664337158203125, 0.7341870069503784, 0.8040368556976318, 0.8738866448402405, 0.9437364339828491, 1.0135862827301025, 1.083436131477356, 1.1532859802246094, 1.2231357097625732, 1.2929855585098267, 1.36283540725708, 1.432685136795044, 1.5025349855422974, 1.5723848342895508, 1.6422346830368042, 1.7120845317840576, 1.7819342613220215, 1.851784110069275, 1.9216339588165283, 1.9914836883544922, 2.061333656311035, 2.131183385848999, 2.201033115386963, 2.270883083343506, 2.3407328128814697, 2.4105825424194336, 2.4804325103759766, 2.5502822399139404, 2.6201322078704834, 2.6899819374084473]}, "gradients/decoder.model.decoder.layers.0.self_attn.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 2.0, 1.0, 5.0, 2.0, 0.0, 5.0, 3.0, 5.0, 11.0, 13.0, 18.0, 29.0, 45.0, 45.0, 80.0, 130.0, 237.0, 394.0, 943.0, 2993.0, 21244.0, 261115.0, 677169.0, 73875.0, 7283.0, 1544.0, 585.0, 302.0, 175.0, 118.0, 47.0, 47.0, 34.0, 18.0, 11.0, 12.0, 8.0, 7.0, 3.0, 5.0, 1.0, 3.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6.4375, -6.2379150390625, -6.038330078125, -5.8387451171875, -5.63916015625, -5.4395751953125, -5.239990234375, -5.0404052734375, -4.8408203125, -4.6412353515625, -4.441650390625, -4.2420654296875, -4.04248046875, -3.8428955078125, -3.643310546875, -3.4437255859375, -3.244140625, -3.0445556640625, -2.844970703125, -2.6453857421875, -2.44580078125, -2.2462158203125, -2.046630859375, -1.8470458984375, -1.6474609375, -1.4478759765625, -1.248291015625, -1.0487060546875, -0.84912109375, -0.6495361328125, -0.449951171875, -0.2503662109375, -0.05078125, 0.1488037109375, 0.348388671875, 0.5479736328125, 0.74755859375, 0.9471435546875, 1.146728515625, 1.3463134765625, 1.5458984375, 1.7454833984375, 1.945068359375, 2.1446533203125, 2.34423828125, 2.5438232421875, 2.743408203125, 2.9429931640625, 3.142578125, 3.3421630859375, 3.541748046875, 3.7413330078125, 3.94091796875, 4.1405029296875, 4.340087890625, 4.5396728515625, 4.7392578125, 4.9388427734375, 5.138427734375, 5.3380126953125, 5.53759765625, 5.7371826171875, 5.936767578125, 6.1363525390625, 6.3359375]}, "gradients/decoder.model.decoder.layers.0.self_attn.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 3.0, 7.0, 5.0, 12.0, 21.0, 24.0, 31.0, 38.0, 43.0, 64.0, 57.0, 69.0, 84.0, 81.0, 89.0, 69.0, 65.0, 53.0, 39.0, 45.0, 33.0, 25.0, 21.0, 10.0, 6.0, 9.0, 4.0, 2.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.06640625, -3.93365478515625, -3.8009033203125, -3.66815185546875, -3.535400390625, -3.40264892578125, -3.2698974609375, -3.13714599609375, -3.00439453125, -2.87164306640625, -2.7388916015625, -2.60614013671875, -2.473388671875, -2.34063720703125, -2.2078857421875, -2.07513427734375, -1.9423828125, -1.80963134765625, -1.6768798828125, -1.54412841796875, -1.411376953125, -1.27862548828125, -1.1458740234375, -1.01312255859375, -0.88037109375, -0.74761962890625, -0.6148681640625, -0.48211669921875, -0.349365234375, -0.21661376953125, -0.0838623046875, 0.04888916015625, 0.181640625, 0.31439208984375, 0.4471435546875, 0.57989501953125, 0.712646484375, 0.84539794921875, 0.9781494140625, 1.11090087890625, 1.24365234375, 1.37640380859375, 1.5091552734375, 1.64190673828125, 1.774658203125, 1.90740966796875, 2.0401611328125, 2.17291259765625, 2.3056640625, 2.43841552734375, 2.5711669921875, 2.70391845703125, 2.836669921875, 2.96942138671875, 3.1021728515625, 3.23492431640625, 3.36767578125, 3.50042724609375, 3.6331787109375, 3.76593017578125, 3.898681640625, 4.03143310546875, 4.1641845703125, 4.29693603515625, 4.4296875]}, "gradients/decoder.model.decoder.layers.0.self_attn.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 1.0, 0.0, 4.0, 2.0, 2.0, 8.0, 3.0, 8.0, 10.0, 7.0, 9.0, 18.0, 36.0, 51.0, 64.0, 137.0, 228.0, 378.0, 731.0, 1294.0, 2982.0, 8432.0, 38212.0, 287077.0, 587397.0, 96748.0, 15951.0, 4686.0, 1926.0, 938.0, 500.0, 268.0, 156.0, 109.0, 70.0, 45.0, 27.0, 16.0, 7.0, 3.0, 3.0, 5.0, 4.0, 6.0, 2.0, 3.0, 0.0, 4.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-5.5078125, -5.34210205078125, -5.1763916015625, -5.01068115234375, -4.844970703125, -4.67926025390625, -4.5135498046875, -4.34783935546875, -4.18212890625, -4.01641845703125, -3.8507080078125, -3.68499755859375, -3.519287109375, -3.35357666015625, -3.1878662109375, -3.02215576171875, -2.8564453125, -2.69073486328125, -2.5250244140625, -2.35931396484375, -2.193603515625, -2.02789306640625, -1.8621826171875, -1.69647216796875, -1.53076171875, -1.36505126953125, -1.1993408203125, -1.03363037109375, -0.867919921875, -0.70220947265625, -0.5364990234375, -0.37078857421875, -0.205078125, -0.03936767578125, 0.1263427734375, 0.29205322265625, 0.457763671875, 0.62347412109375, 0.7891845703125, 0.95489501953125, 1.12060546875, 1.28631591796875, 1.4520263671875, 1.61773681640625, 1.783447265625, 1.94915771484375, 2.1148681640625, 2.28057861328125, 2.4462890625, 2.61199951171875, 2.7777099609375, 2.94342041015625, 3.109130859375, 3.27484130859375, 3.4405517578125, 3.60626220703125, 3.77197265625, 3.93768310546875, 4.1033935546875, 4.26910400390625, 4.434814453125, 4.60052490234375, 4.7662353515625, 4.93194580078125, 5.09765625]}, "gradients/decoder.model.decoder.layers.0.self_attn.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 5.0, 0.0, 0.0, 0.0, 0.0, 2.0, 3.0, 1.0, 2.0, 0.0, 2.0, 5.0, 6.0, 6.0, 9.0, 14.0, 21.0, 22.0, 34.0, 42.0, 47.0, 52.0, 63.0, 73.0, 72.0, 63.0, 78.0, 84.0, 54.0, 53.0, 48.0, 31.0, 30.0, 18.0, 19.0, 14.0, 23.0, 5.0, 1.0, 5.0, 4.0, 5.0, 1.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-4.3203125, -4.16845703125, -4.0166015625, -3.86474609375, -3.712890625, -3.56103515625, -3.4091796875, -3.25732421875, -3.10546875, -2.95361328125, -2.8017578125, -2.64990234375, -2.498046875, -2.34619140625, -2.1943359375, -2.04248046875, -1.890625, -1.73876953125, -1.5869140625, -1.43505859375, -1.283203125, -1.13134765625, -0.9794921875, -0.82763671875, -0.67578125, -0.52392578125, -0.3720703125, -0.22021484375, -0.068359375, 0.08349609375, 0.2353515625, 0.38720703125, 0.5390625, 0.69091796875, 0.8427734375, 0.99462890625, 1.146484375, 1.29833984375, 1.4501953125, 1.60205078125, 1.75390625, 1.90576171875, 2.0576171875, 2.20947265625, 2.361328125, 2.51318359375, 2.6650390625, 2.81689453125, 2.96875, 3.12060546875, 3.2724609375, 3.42431640625, 3.576171875, 3.72802734375, 3.8798828125, 4.03173828125, 4.18359375, 4.33544921875, 4.4873046875, 4.63916015625, 4.791015625, 4.94287109375, 5.0947265625, 5.24658203125, 5.3984375]}, "gradients/decoder.model.decoder.layers.0.self_attn.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 3.0, 1.0, 2.0, 6.0, 6.0, 6.0, 12.0, 30.0, 28.0, 36.0, 66.0, 92.0, 145.0, 221.0, 315.0, 531.0, 977.0, 1892.0, 3830.0, 8817.0, 21940.0, 62805.0, 199575.0, 471117.0, 181939.0, 57754.0, 20444.0, 8197.0, 3597.0, 1763.0, 970.0, 511.0, 323.0, 201.0, 137.0, 93.0, 57.0, 40.0, 23.0, 11.0, 25.0, 9.0, 9.0, 5.0, 4.0, 2.0, 0.0, 0.0, 1.0, 1.0, 2.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.73779296875, -0.7150650024414062, -0.6923370361328125, -0.6696090698242188, -0.646881103515625, -0.6241531372070312, -0.6014251708984375, -0.5786972045898438, -0.55596923828125, -0.5332412719726562, -0.5105133056640625, -0.48778533935546875, -0.465057373046875, -0.44232940673828125, -0.4196014404296875, -0.39687347412109375, -0.3741455078125, -0.35141754150390625, -0.3286895751953125, -0.30596160888671875, -0.283233642578125, -0.26050567626953125, -0.2377777099609375, -0.21504974365234375, -0.19232177734375, -0.16959381103515625, -0.1468658447265625, -0.12413787841796875, -0.101409912109375, -0.07868194580078125, -0.0559539794921875, -0.03322601318359375, -0.010498046875, 0.01222991943359375, 0.0349578857421875, 0.05768585205078125, 0.080413818359375, 0.10314178466796875, 0.1258697509765625, 0.14859771728515625, 0.17132568359375, 0.19405364990234375, 0.2167816162109375, 0.23950958251953125, 0.262237548828125, 0.28496551513671875, 0.3076934814453125, 0.33042144775390625, 0.3531494140625, 0.37587738037109375, 0.3986053466796875, 0.42133331298828125, 0.444061279296875, 0.46678924560546875, 0.4895172119140625, 0.5122451782226562, 0.53497314453125, 0.5577011108398438, 0.5804290771484375, 0.6031570434570312, 0.625885009765625, 0.6486129760742188, 0.6713409423828125, 0.6940689086914062, 0.716796875]}, "gradients/decoder.model.decoder.layers.0.self_attn.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 4.0, 1.0, 0.0, 4.0, 2.0, 2.0, 1.0, 2.0, 4.0, 6.0, 4.0, 6.0, 7.0, 7.0, 19.0, 29.0, 27.0, 32.0, 39.0, 48.0, 68.0, 65.0, 94.0, 131.0, 58.0, 56.0, 59.0, 49.0, 41.0, 33.0, 26.0, 22.0, 17.0, 13.0, 4.0, 7.0, 4.0, 9.0, 2.0, 4.0, 2.0, 1.0, 1.0, 4.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0001786947250366211, -0.00017332099378108978, -0.00016794726252555847, -0.00016257353127002716, -0.00015719980001449585, -0.00015182606875896454, -0.00014645233750343323, -0.00014107860624790192, -0.0001357048749923706, -0.0001303311437368393, -0.00012495741248130798, -0.00011958368122577667, -0.00011420994997024536, -0.00010883621871471405, -0.00010346248745918274, -9.808875620365143e-05, -9.271502494812012e-05, -8.73412936925888e-05, -8.19675624370575e-05, -7.659383118152618e-05, -7.122009992599487e-05, -6.584636867046356e-05, -6.047263741493225e-05, -5.509890615940094e-05, -4.972517490386963e-05, -4.435144364833832e-05, -3.897771239280701e-05, -3.3603981137275696e-05, -2.8230249881744385e-05, -2.2856518626213074e-05, -1.7482787370681763e-05, -1.2109056115150452e-05, -6.735324859619141e-06, -1.3615936040878296e-06, 4.0121376514434814e-06, 9.385868906974792e-06, 1.4759600162506104e-05, 2.0133331418037415e-05, 2.5507062673568726e-05, 3.088079392910004e-05, 3.625452518463135e-05, 4.162825644016266e-05, 4.700198769569397e-05, 5.237571895122528e-05, 5.774945020675659e-05, 6.31231814622879e-05, 6.849691271781921e-05, 7.387064397335052e-05, 7.924437522888184e-05, 8.461810648441315e-05, 8.999183773994446e-05, 9.536556899547577e-05, 0.00010073930025100708, 0.00010611303150653839, 0.0001114867627620697, 0.00011686049401760101, 0.00012223422527313232, 0.00012760795652866364, 0.00013298168778419495, 0.00013835541903972626, 0.00014372915029525757, 0.00014910288155078888, 0.0001544766128063202, 0.0001598503440618515, 0.0001652240753173828]}, "gradients/decoder.model.decoder.layers.0.self_attn.q_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 3.0, 2.0, 3.0, 0.0, 5.0, 0.0, 6.0, 9.0, 9.0, 10.0, 17.0, 16.0, 34.0, 44.0, 57.0, 85.0, 149.0, 240.0, 361.0, 721.0, 1439.0, 3371.0, 8396.0, 29076.0, 173505.0, 671393.0, 123960.0, 22966.0, 6926.0, 2826.0, 1289.0, 659.0, 386.0, 211.0, 107.0, 69.0, 59.0, 31.0, 29.0, 26.0, 17.0, 17.0, 4.0, 6.0, 5.0, 6.0, 4.0, 4.0, 4.0, 3.0, 2.0, 0.0, 2.0, 0.0, 1.0], "bins": [-1.8212890625, -1.76837158203125, -1.7154541015625, -1.66253662109375, -1.609619140625, -1.55670166015625, -1.5037841796875, -1.45086669921875, -1.39794921875, -1.34503173828125, -1.2921142578125, -1.23919677734375, -1.186279296875, -1.13336181640625, -1.0804443359375, -1.02752685546875, -0.974609375, -0.92169189453125, -0.8687744140625, -0.81585693359375, -0.762939453125, -0.71002197265625, -0.6571044921875, -0.60418701171875, -0.55126953125, -0.49835205078125, -0.4454345703125, -0.39251708984375, -0.339599609375, -0.28668212890625, -0.2337646484375, -0.18084716796875, -0.1279296875, -0.07501220703125, -0.0220947265625, 0.03082275390625, 0.083740234375, 0.13665771484375, 0.1895751953125, 0.24249267578125, 0.29541015625, 0.34832763671875, 0.4012451171875, 0.45416259765625, 0.507080078125, 0.55999755859375, 0.6129150390625, 0.66583251953125, 0.71875, 0.77166748046875, 0.8245849609375, 0.87750244140625, 0.930419921875, 0.98333740234375, 1.0362548828125, 1.08917236328125, 1.14208984375, 1.19500732421875, 1.2479248046875, 1.30084228515625, 1.353759765625, 1.40667724609375, 1.4595947265625, 1.51251220703125, 1.5654296875]}, "gradients/decoder.model.decoder.layers.0.self_attn.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 2.0, 3.0, 6.0, 9.0, 8.0, 11.0, 19.0, 43.0, 61.0, 96.0, 149.0, 241.0, 131.0, 77.0, 54.0, 36.0, 27.0, 17.0, 7.0, 2.0, 4.0, 4.0, 4.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.0498046875, -1.01617431640625, -0.9825439453125, -0.94891357421875, -0.915283203125, -0.88165283203125, -0.8480224609375, -0.81439208984375, -0.78076171875, -0.74713134765625, -0.7135009765625, -0.67987060546875, -0.646240234375, -0.61260986328125, -0.5789794921875, -0.54534912109375, -0.51171875, -0.47808837890625, -0.4444580078125, -0.41082763671875, -0.377197265625, -0.34356689453125, -0.3099365234375, -0.27630615234375, -0.24267578125, -0.20904541015625, -0.1754150390625, -0.14178466796875, -0.108154296875, -0.07452392578125, -0.0408935546875, -0.00726318359375, 0.0263671875, 0.05999755859375, 0.0936279296875, 0.12725830078125, 0.160888671875, 0.19451904296875, 0.2281494140625, 0.26177978515625, 0.29541015625, 0.32904052734375, 0.3626708984375, 0.39630126953125, 0.429931640625, 0.46356201171875, 0.4971923828125, 0.53082275390625, 0.564453125, 0.59808349609375, 0.6317138671875, 0.66534423828125, 0.698974609375, 0.73260498046875, 0.7662353515625, 0.79986572265625, 0.83349609375, 0.86712646484375, 0.9007568359375, 0.93438720703125, 0.968017578125, 1.00164794921875, 1.0352783203125, 1.06890869140625, 1.1025390625]}, "gradients/decoder.model.decoder.layernorm_embedding.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 2.0, 6.0, 3.0, 16.0, 65.0, 121.0, 217.0, 253.0, 185.0, 95.0, 24.0, 9.0, 3.0, 3.0, 0.0, 2.0, 1.0, 2.0, 2.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-11.671242713928223, -11.243896484375, -10.816550254821777, -10.389204025268555, -9.961856842041016, -9.53451156616211, -9.10716438293457, -8.679818153381348, -8.252471923828125, -7.825125694274902, -7.39777946472168, -6.970432758331299, -6.543086528778076, -6.1157402992248535, -5.688393592834473, -5.26104736328125, -4.833701133728027, -4.406354904174805, -3.979008436203003, -3.551661968231201, -3.1243157386779785, -2.696969509124756, -2.269623041152954, -1.8422765731811523, -1.4149303436279297, -0.9875839948654175, -0.5602376461029053, -0.13289129734039307, 0.29445505142211914, 0.7218012809753418, 1.1491477489471436, 1.5764942169189453, 2.003840446472168, 2.4311866760253906, 2.8585331439971924, 3.285879611968994, 3.713225841522217, 4.1405720710754395, 4.56791877746582, 4.995265007019043, 5.422611236572266, 5.849957466125488, 6.277303695678711, 6.704650402069092, 7.1319966316223145, 7.559342861175537, 7.986689567565918, 8.41403579711914, 8.841382026672363, 9.268728256225586, 9.696074485778809, 10.123420715332031, 10.55076789855957, 10.978113174438477, 11.405460357666016, 11.832806587219238, 12.260152816772461, 12.687499046325684, 13.114845275878906, 13.542191505432129, 13.969537734985352, 14.39688491821289, 14.824231147766113, 15.251577377319336, 15.678923606872559]}, "gradients/decoder.model.decoder.layernorm_embedding.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0, 0.0, 0.0, 7.0, 5.0, 10.0, 9.0, 16.0, 14.0, 20.0, 34.0, 35.0, 31.0, 38.0, 40.0, 53.0, 62.0, 48.0, 65.0, 69.0, 45.0, 45.0, 44.0, 61.0, 42.0, 39.0, 36.0, 33.0, 23.0, 23.0, 13.0, 9.0, 10.0, 11.0, 5.0, 8.0, 4.0, 1.0, 4.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-7.266835689544678, -7.066995620727539, -6.8671555519104, -6.667315483093262, -6.467475414276123, -6.267635345458984, -6.067795276641846, -5.867955207824707, -5.668115139007568, -5.46827507019043, -5.268435001373291, -5.068594932556152, -4.868754863739014, -4.668914794921875, -4.469074726104736, -4.269234657287598, -4.069394588470459, -3.8695545196533203, -3.6697144508361816, -3.469874382019043, -3.2700343132019043, -3.0701942443847656, -2.870354175567627, -2.6705141067504883, -2.4706740379333496, -2.270833969116211, -2.0709939002990723, -1.8711538314819336, -1.671313762664795, -1.4714736938476562, -1.2716336250305176, -1.071793556213379, -0.8719534873962402, -0.6721134185791016, -0.4722733497619629, -0.2724332809448242, -0.07259321212768555, 0.12724685668945312, 0.3270869255065918, 0.5269269943237305, 0.7267670631408691, 0.9266071319580078, 1.1264472007751465, 1.3262872695922852, 1.5261273384094238, 1.7259674072265625, 1.9258074760437012, 2.12564754486084, 2.3254876136779785, 2.525327682495117, 2.725167751312256, 2.9250078201293945, 3.124847888946533, 3.324687957763672, 3.5245280265808105, 3.724368095397949, 3.924208164215088, 4.124048233032227, 4.323888301849365, 4.523728370666504, 4.723568439483643, 4.923408508300781, 5.12324857711792, 5.323088645935059, 5.522928714752197]}, "gradients/decoder.model.decoder.embed_positions.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 3.0, 3.0, 4.0, 17.0, 24.0, 39.0, 67.0, 106.0, 208.0, 335.0, 519.0, 891.0, 1488.0, 2251.0, 1035347.0, 3574.0, 2227.0, 1426.0, 885.0, 505.0, 261.0, 170.0, 98.0, 72.0, 36.0, 20.0, 23.0, 8.0, 2.0, 5.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6.990660190582275, -6.710498332977295, -6.4303364753723145, -6.150174617767334, -5.8700127601623535, -5.589850902557373, -5.309688568115234, -5.029526710510254, -4.749364852905273, -4.469202995300293, -4.1890411376953125, -3.908879280090332, -3.6287174224853516, -3.348555564880371, -3.0683934688568115, -2.788231611251831, -2.5080699920654297, -2.227908134460449, -1.9477462768554688, -1.6675843000411987, -1.3874224424362183, -1.1072605848312378, -0.8270986080169678, -0.5469367504119873, -0.26677489280700684, 0.01338699460029602, 0.2935488820075989, 0.5737107992172241, 0.8538726568222046, 1.134034514427185, 1.414196491241455, 1.6943583488464355, 1.9745197296142578, 2.2546815872192383, 2.5348434448242188, 2.815005302429199, 3.0951671600341797, 3.37532901763916, 3.6554911136627197, 3.9356529712677, 4.215814590454102, 4.495976448059082, 4.7761383056640625, 5.056300163269043, 5.336462020874023, 5.616623878479004, 5.896785736083984, 6.176947593688965, 6.4571099281311035, 6.737271785736084, 7.0174336433410645, 7.297595500946045, 7.577757358551025, 7.857919216156006, 8.138081550598145, 8.418243408203125, 8.698405265808105, 8.978567123413086, 9.258728981018066, 9.538890838623047, 9.819052696228027, 10.099214553833008, 10.379376411437988, 10.659538269042969, 10.93970012664795]}, "gradients/decoder.model.decoder.embed_tokens.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 14.0, 17.0, 7.0, 1.0, 1.0, 5.0, 7.0, 10.0, 13.0, 19.0, 15.0, 29.0, 62.0, 113.0, 281.0, 820.0, 2406.0, 7085.0, 51425508.0, 22990.0, 7501.0, 2925.0, 934.0, 302.0, 120.0, 62.0, 29.0, 24.0, 11.0, 10.0, 12.0, 7.0, 10.0, 3.0, 4.0, 4.0, 1.0, 0.0, 0.0, 0.0, 3.0, 0.0, 2.0, 2.0, 0.0, 1.0, 1.0, 3.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-20.328125, -19.542625427246094, -18.757125854492188, -17.97162628173828, -17.186128616333008, -16.4006290435791, -15.615129470825195, -14.829629898071289, -14.0441312789917, -13.258631706237793, -12.473133087158203, -11.687633514404297, -10.90213394165039, -10.1166353225708, -9.331135749816895, -8.545637130737305, -7.760137557983398, -6.97463846206665, -6.189139366149902, -5.403639793395996, -4.618140697479248, -3.8326416015625, -3.0471420288085938, -2.2616429328918457, -1.4761438369750977, -0.6906446218490601, 0.09485459327697754, 0.8803539276123047, 1.6658530235290527, 2.451352119445801, 3.236851692199707, 4.022350788116455, 4.807851791381836, 5.593350887298584, 6.378849983215332, 7.164349555969238, 7.949848651885986, 8.735347747802734, 9.52084732055664, 10.306346893310547, 11.091845512390137, 11.877345085144043, 12.662843704223633, 13.448343276977539, 14.233842849731445, 15.019341468811035, 15.804841041564941, 16.59033966064453, 17.375839233398438, 18.161338806152344, 18.94683837890625, 19.732337951660156, 20.51783561706543, 21.303335189819336, 22.088834762573242, 22.87433433532715, 23.659832000732422, 24.445331573486328, 25.230831146240234, 26.01633071899414, 26.801828384399414, 27.58732795715332, 28.372827529907227, 29.158327102661133, 29.94382667541504]}, "gradients/encoder.adapter.layers.2.conv.weight": {"_type": "histogram", "values": [2.0, 4.0, 7.0, 9.0, 9.0, 11.0, 17.0, 18.0, 33.0, 49.0, 80.0, 135.0, 209.0, 283.0, 414.0, 542.0, 934.0, 1403.0, 2139.0, 3434.0, 5295.0, 8392.0, 13384.0, 21307.0, 33725.0, 54571.0, 89186.0, 151806.0, 266134.0, 462778.0, 3799730.0, 566439.0, 331194.0, 188108.0, 110390.0, 67055.0, 41242.0, 26179.0, 16325.0, 10193.0, 6670.0, 4241.0, 2596.0, 1650.0, 1091.0, 684.0, 456.0, 292.0, 216.0, 137.0, 92.0, 51.0, 35.0, 26.0, 9.0, 13.0, 10.0, 5.0, 11.0, 1.0, 2.0, 1.0, 0.0, 2.0], "bins": [-0.873046875, -0.8445968627929688, -0.8161468505859375, -0.7876968383789062, -0.759246826171875, -0.7307968139648438, -0.7023468017578125, -0.6738967895507812, -0.64544677734375, -0.6169967651367188, -0.5885467529296875, -0.5600967407226562, -0.531646728515625, -0.5031967163085938, -0.4747467041015625, -0.44629669189453125, -0.4178466796875, -0.38939666748046875, -0.3609466552734375, -0.33249664306640625, -0.304046630859375, -0.27559661865234375, -0.2471466064453125, -0.21869659423828125, -0.19024658203125, -0.16179656982421875, -0.1333465576171875, -0.10489654541015625, -0.076446533203125, -0.04799652099609375, -0.0195465087890625, 0.00890350341796875, 0.037353515625, 0.06580352783203125, 0.0942535400390625, 0.12270355224609375, 0.151153564453125, 0.17960357666015625, 0.2080535888671875, 0.23650360107421875, 0.26495361328125, 0.29340362548828125, 0.3218536376953125, 0.35030364990234375, 0.378753662109375, 0.40720367431640625, 0.4356536865234375, 0.46410369873046875, 0.4925537109375, 0.5210037231445312, 0.5494537353515625, 0.5779037475585938, 0.606353759765625, 0.6348037719726562, 0.6632537841796875, 0.6917037963867188, 0.72015380859375, 0.7486038208007812, 0.7770538330078125, 0.8055038452148438, 0.833953857421875, 0.8624038696289062, 0.8908538818359375, 0.9193038940429688, 0.94775390625]}, "gradients/encoder.adapter.layers.2.conv.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 4.0, 4.0, 6.0, 9.0, 10.0, 10.0, 8.0, 8.0, 9.0, 21.0, 26.0, 28.0, 27.0, 39.0, 38.0, 32.0, 43.0, 50.0, 47.0, 41.0, 45.0, 1079.0, 49.0, 38.0, 50.0, 44.0, 51.0, 23.0, 29.0, 23.0, 24.0, 30.0, 11.0, 20.0, 16.0, 9.0, 8.0, 9.0, 3.0, 5.0, 5.0, 1.0, 5.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-18.984375, -18.362060546875, -17.73974609375, -17.117431640625, -16.4951171875, -15.872802734375, -15.25048828125, -14.628173828125, -14.005859375, -13.383544921875, -12.76123046875, -12.138916015625, -11.5166015625, -10.894287109375, -10.27197265625, -9.649658203125, -9.02734375, -8.405029296875, -7.78271484375, -7.160400390625, -6.5380859375, -5.915771484375, -5.29345703125, -4.671142578125, -4.048828125, -3.426513671875, -2.80419921875, -2.181884765625, -1.5595703125, -0.937255859375, -0.31494140625, 0.307373046875, 0.9296875, 1.552001953125, 2.17431640625, 2.796630859375, 3.4189453125, 4.041259765625, 4.66357421875, 5.285888671875, 5.908203125, 6.530517578125, 7.15283203125, 7.775146484375, 8.3974609375, 9.019775390625, 9.64208984375, 10.264404296875, 10.88671875, 11.509033203125, 12.13134765625, 12.753662109375, 13.3759765625, 13.998291015625, 14.62060546875, 15.242919921875, 15.865234375, 16.487548828125, 17.10986328125, 17.732177734375, 18.3544921875, 18.976806640625, 19.59912109375, 20.221435546875, 20.84375]}, "gradients/encoder.adapter.layers.1.conv.weight": {"_type": "histogram", "values": [1.0, 0.0, 3.0, 1.0, 2.0, 8.0, 9.0, 14.0, 15.0, 32.0, 38.0, 59.0, 87.0, 111.0, 134.0, 242.0, 338.0, 477.0, 726.0, 1087.0, 1634.0, 2571.0, 3917.0, 6468.0, 10081.0, 16615.0, 26739.0, 44445.0, 76255.0, 135098.0, 247824.0, 452681.0, 3756748.0, 676364.0, 363431.0, 198416.0, 108816.0, 62821.0, 36818.0, 22407.0, 13659.0, 8539.0, 5480.0, 3474.0, 2308.0, 1426.0, 1026.0, 657.0, 404.0, 274.0, 209.0, 156.0, 102.0, 62.0, 46.0, 29.0, 18.0, 25.0, 14.0, 4.0, 2.0, 3.0, 4.0, 2.0], "bins": [-0.833984375, -0.8085708618164062, -0.7831573486328125, -0.7577438354492188, -0.732330322265625, -0.7069168090820312, -0.6815032958984375, -0.6560897827148438, -0.63067626953125, -0.6052627563476562, -0.5798492431640625, -0.5544357299804688, -0.529022216796875, -0.5036087036132812, -0.4781951904296875, -0.45278167724609375, -0.4273681640625, -0.40195465087890625, -0.3765411376953125, -0.35112762451171875, -0.325714111328125, -0.30030059814453125, -0.2748870849609375, -0.24947357177734375, -0.22406005859375, -0.19864654541015625, -0.1732330322265625, -0.14781951904296875, -0.122406005859375, -0.09699249267578125, -0.0715789794921875, -0.04616546630859375, -0.020751953125, 0.00466156005859375, 0.0300750732421875, 0.05548858642578125, 0.080902099609375, 0.10631561279296875, 0.1317291259765625, 0.15714263916015625, 0.18255615234375, 0.20796966552734375, 0.2333831787109375, 0.25879669189453125, 0.284210205078125, 0.30962371826171875, 0.3350372314453125, 0.36045074462890625, 0.3858642578125, 0.41127777099609375, 0.4366912841796875, 0.46210479736328125, 0.487518310546875, 0.5129318237304688, 0.5383453369140625, 0.5637588500976562, 0.58917236328125, 0.6145858764648438, 0.6399993896484375, 0.6654129028320312, 0.690826416015625, 0.7162399291992188, 0.7416534423828125, 0.7670669555664062, 0.79248046875]}, "gradients/encoder.adapter.layers.1.conv.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 5.0, 4.0, 3.0, 9.0, 4.0, 8.0, 5.0, 12.0, 14.0, 14.0, 17.0, 20.0, 24.0, 17.0, 22.0, 24.0, 34.0, 42.0, 21.0, 40.0, 44.0, 39.0, 50.0, 337.0, 780.0, 32.0, 43.0, 41.0, 44.0, 37.0, 39.0, 22.0, 25.0, 29.0, 31.0, 12.0, 16.0, 13.0, 12.0, 11.0, 5.0, 11.0, 6.0, 5.0, 6.0, 2.0, 1.0, 1.0, 2.0, 3.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-12.2421875, -11.8603515625, -11.478515625, -11.0966796875, -10.71484375, -10.3330078125, -9.951171875, -9.5693359375, -9.1875, -8.8056640625, -8.423828125, -8.0419921875, -7.66015625, -7.2783203125, -6.896484375, -6.5146484375, -6.1328125, -5.7509765625, -5.369140625, -4.9873046875, -4.60546875, -4.2236328125, -3.841796875, -3.4599609375, -3.078125, -2.6962890625, -2.314453125, -1.9326171875, -1.55078125, -1.1689453125, -0.787109375, -0.4052734375, -0.0234375, 0.3583984375, 0.740234375, 1.1220703125, 1.50390625, 1.8857421875, 2.267578125, 2.6494140625, 3.03125, 3.4130859375, 3.794921875, 4.1767578125, 4.55859375, 4.9404296875, 5.322265625, 5.7041015625, 6.0859375, 6.4677734375, 6.849609375, 7.2314453125, 7.61328125, 7.9951171875, 8.376953125, 8.7587890625, 9.140625, 9.5224609375, 9.904296875, 10.2861328125, 10.66796875, 11.0498046875, 11.431640625, 11.8134765625, 12.1953125]}, "gradients/encoder.adapter.layers.0.conv.weight": {"_type": "histogram", "values": [3.0, 1.0, 2.0, 1.0, 4.0, 9.0, 6.0, 7.0, 17.0, 21.0, 38.0, 45.0, 67.0, 105.0, 153.0, 202.0, 283.0, 394.0, 618.0, 928.0, 1389.0, 2147.0, 3430.0, 5529.0, 8960.0, 15010.0, 25573.0, 45742.0, 85658.0, 176357.0, 401728.0, 3731261.0, 1093221.0, 353289.0, 157570.0, 78378.0, 42430.0, 24061.0, 14001.0, 8308.0, 5138.0, 3090.0, 2121.0, 1356.0, 896.0, 585.0, 412.0, 292.0, 183.0, 146.0, 85.0, 65.0, 40.0, 31.0, 19.0, 14.0, 8.0, 6.0, 6.0, 10.0, 5.0, 1.0, 0.0, 1.0], "bins": [-1.0693359375, -1.0358428955078125, -1.002349853515625, -0.9688568115234375, -0.93536376953125, -0.9018707275390625, -0.868377685546875, -0.8348846435546875, -0.8013916015625, -0.7678985595703125, -0.734405517578125, -0.7009124755859375, -0.66741943359375, -0.6339263916015625, -0.600433349609375, -0.5669403076171875, -0.533447265625, -0.4999542236328125, -0.466461181640625, -0.4329681396484375, -0.39947509765625, -0.3659820556640625, -0.332489013671875, -0.2989959716796875, -0.2655029296875, -0.2320098876953125, -0.198516845703125, -0.1650238037109375, -0.13153076171875, -0.0980377197265625, -0.064544677734375, -0.0310516357421875, 0.00244140625, 0.0359344482421875, 0.069427490234375, 0.1029205322265625, 0.13641357421875, 0.1699066162109375, 0.203399658203125, 0.2368927001953125, 0.2703857421875, 0.3038787841796875, 0.337371826171875, 0.3708648681640625, 0.40435791015625, 0.4378509521484375, 0.471343994140625, 0.5048370361328125, 0.538330078125, 0.5718231201171875, 0.605316162109375, 0.6388092041015625, 0.67230224609375, 0.7057952880859375, 0.739288330078125, 0.7727813720703125, 0.8062744140625, 0.8397674560546875, 0.873260498046875, 0.9067535400390625, 0.94024658203125, 0.9737396240234375, 1.007232666015625, 1.0407257080078125, 1.07421875]}, "gradients/encoder.adapter.layers.0.conv.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 3.0, 1.0, 4.0, 1.0, 2.0, 5.0, 8.0, 4.0, 5.0, 6.0, 11.0, 14.0, 15.0, 10.0, 22.0, 23.0, 24.0, 29.0, 32.0, 31.0, 33.0, 32.0, 50.0, 34.0, 35.0, 40.0, 841.0, 267.0, 44.0, 37.0, 33.0, 35.0, 32.0, 48.0, 28.0, 26.0, 26.0, 21.0, 18.0, 15.0, 19.0, 14.0, 11.0, 8.0, 9.0, 6.0, 10.0, 6.0, 3.0, 7.0, 3.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-7.9296875, -7.6729736328125, -7.416259765625, -7.1595458984375, -6.90283203125, -6.6461181640625, -6.389404296875, -6.1326904296875, -5.8759765625, -5.6192626953125, -5.362548828125, -5.1058349609375, -4.84912109375, -4.5924072265625, -4.335693359375, -4.0789794921875, -3.822265625, -3.5655517578125, -3.308837890625, -3.0521240234375, -2.79541015625, -2.5386962890625, -2.281982421875, -2.0252685546875, -1.7685546875, -1.5118408203125, -1.255126953125, -0.9984130859375, -0.74169921875, -0.4849853515625, -0.228271484375, 0.0284423828125, 0.28515625, 0.5418701171875, 0.798583984375, 1.0552978515625, 1.31201171875, 1.5687255859375, 1.825439453125, 2.0821533203125, 2.3388671875, 2.5955810546875, 2.852294921875, 3.1090087890625, 3.36572265625, 3.6224365234375, 3.879150390625, 4.1358642578125, 4.392578125, 4.6492919921875, 4.906005859375, 5.1627197265625, 5.41943359375, 5.6761474609375, 5.932861328125, 6.1895751953125, 6.4462890625, 6.7030029296875, 6.959716796875, 7.2164306640625, 7.47314453125, 7.7298583984375, 7.986572265625, 8.2432861328125, 8.5]}, "gradients/encoder.encoder.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 8.0, 7.0, 23.0, 72.0, 298.0, 421.0, 129.0, 38.0, 12.0, 4.0, 3.0, 1.0, 2.0, 0.0, 0.0, 3.0], "bins": [-29.07339859008789, -28.52666664123535, -27.979936599731445, -27.433204650878906, -26.886474609375, -26.33974266052246, -25.793010711669922, -25.246280670166016, -24.699548721313477, -24.152816772460938, -23.60608673095703, -23.059354782104492, -22.512624740600586, -21.965892791748047, -21.41916275024414, -20.8724308013916, -20.325698852539062, -19.778966903686523, -19.232236862182617, -18.685504913330078, -18.138774871826172, -17.592042922973633, -17.045310974121094, -16.498580932617188, -15.951850891113281, -15.405119895935059, -14.858388900756836, -14.311656951904297, -13.764925956726074, -13.218194961547852, -12.671463966369629, -12.124732971191406, -11.578001022338867, -11.031270027160645, -10.484539031982422, -9.937807083129883, -9.39107608795166, -8.844345092773438, -8.297614097595215, -7.750883102416992, -7.204151630401611, -6.657420635223389, -6.110689163208008, -5.563958168029785, -5.0172271728515625, -4.470495700836182, -3.923764705657959, -3.3770334720611572, -2.8303022384643555, -2.2835710048675537, -1.7368398904800415, -1.1901087760925293, -0.6433775424957275, -0.09664630889892578, 0.4500846862792969, 0.9968159198760986, 1.5435471534729004, 2.090278387069702, 2.637009620666504, 3.1837406158447266, 3.7304718494415283, 4.27720308303833, 4.823934078216553, 5.370665550231934, 5.917396545410156]}, "gradients/encoder.encoder.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 4.0, 3.0, 3.0, 5.0, 6.0, 7.0, 5.0, 3.0, 10.0, 9.0, 13.0, 21.0, 18.0, 22.0, 21.0, 25.0, 27.0, 39.0, 33.0, 37.0, 26.0, 42.0, 20.0, 33.0, 32.0, 39.0, 51.0, 34.0, 42.0, 55.0, 37.0, 45.0, 31.0, 22.0, 25.0, 17.0, 21.0, 19.0, 21.0, 15.0, 22.0, 13.0, 5.0, 11.0, 7.0, 2.0, 5.0, 5.0, 1.0, 2.0, 1.0, 3.0, 1.0, 0.0, 2.0, 1.0], "bins": [-12.089702606201172, -11.727959632873535, -11.366217613220215, -11.004474639892578, -10.642732620239258, -10.280989646911621, -9.919246673583984, -9.557504653930664, -9.195762634277344, -8.834019660949707, -8.472277641296387, -8.11053466796875, -7.74879264831543, -7.387049674987793, -7.0253071784973145, -6.663564682006836, -6.301821708679199, -5.940079212188721, -5.578336715698242, -5.2165937423706055, -4.854851722717285, -4.493108749389648, -4.13136625289917, -3.7696237564086914, -3.407881259918213, -3.0461387634277344, -2.684396266937256, -2.3226535320281982, -1.9609110355377197, -1.5991685390472412, -1.2374258041381836, -0.8756833076477051, -0.513941764831543, -0.15219920873641968, 0.2095433473587036, 0.5712859630584717, 0.9330284595489502, 1.2947709560394287, 1.6565136909484863, 2.018256187438965, 2.3799986839294434, 2.741741180419922, 3.1034836769104004, 3.465226411819458, 3.8269689083099365, 4.188711166381836, 4.550454139709473, 4.912196636199951, 5.27393913269043, 5.635681629180908, 5.997424125671387, 6.359167098999023, 6.720909118652344, 7.0826520919799805, 7.444394588470459, 7.8061370849609375, 8.167879104614258, 8.529622077941895, 8.891364097595215, 9.253107070922852, 9.614849090576172, 9.976592063903809, 10.338335037231445, 10.700077056884766, 11.061820030212402]}, "gradients/encoder.encoder.layers.23.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 2.0, 1.0, 5.0, 3.0, 3.0, 1.0, 8.0, 4.0, 5.0, 5.0, 5.0, 7.0, 9.0, 15.0, 20.0, 39.0, 71.0, 133.0, 227.0, 345.0, 493.0, 700.0, 1098.0, 1688.0, 2994.0, 5787.0, 11646.0, 24993.0, 60640.0, 1051251.0, 2896257.0, 79195.0, 28004.0, 13064.0, 7124.0, 3599.0, 1907.0, 1087.0, 647.0, 436.0, 320.0, 202.0, 97.0, 58.0, 20.0, 22.0, 12.0, 5.0, 8.0, 8.0, 2.0, 4.0, 6.0, 5.0, 3.0, 3.0, 3.0, 2.0, 0.0, 3.0], "bins": [-0.36865234375, -0.35784149169921875, -0.3470306396484375, -0.33621978759765625, -0.325408935546875, -0.31459808349609375, -0.3037872314453125, -0.29297637939453125, -0.28216552734375, -0.27135467529296875, -0.2605438232421875, -0.24973297119140625, -0.238922119140625, -0.22811126708984375, -0.2173004150390625, -0.20648956298828125, -0.1956787109375, -0.18486785888671875, -0.1740570068359375, -0.16324615478515625, -0.152435302734375, -0.14162445068359375, -0.1308135986328125, -0.12000274658203125, -0.10919189453125, -0.09838104248046875, -0.0875701904296875, -0.07675933837890625, -0.065948486328125, -0.05513763427734375, -0.0443267822265625, -0.03351593017578125, -0.022705078125, -0.01189422607421875, -0.0010833740234375, 0.00972747802734375, 0.020538330078125, 0.03134918212890625, 0.0421600341796875, 0.05297088623046875, 0.06378173828125, 0.07459259033203125, 0.0854034423828125, 0.09621429443359375, 0.107025146484375, 0.11783599853515625, 0.1286468505859375, 0.13945770263671875, 0.1502685546875, 0.16107940673828125, 0.1718902587890625, 0.18270111083984375, 0.193511962890625, 0.20432281494140625, 0.2151336669921875, 0.22594451904296875, 0.23675537109375, 0.24756622314453125, 0.2583770751953125, 0.26918792724609375, 0.279998779296875, 0.29080963134765625, 0.3016204833984375, 0.31243133544921875, 0.3232421875]}, "gradients/encoder.encoder.layers.23.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 0.0, 3.0, 1.0, 3.0, 1.0, 0.0, 2.0, 0.0, 4.0, 1.0, 5.0, 2.0, 3.0, 2.0, 3.0, 12.0, 7.0, 13.0, 21.0, 16.0, 22.0, 27.0, 35.0, 34.0, 38.0, 48.0, 51.0, 45.0, 54.0, 55.0, 70.0, 66.0, 62.0, 56.0, 43.0, 34.0, 29.0, 28.0, 14.0, 15.0, 14.0, 16.0, 17.0, 14.0, 4.0, 4.0, 4.0, 3.0, 5.0, 1.0, 2.0, 2.0, 0.0, 3.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0], "bins": [-0.08099365234375, -0.07858657836914062, -0.07617950439453125, -0.07377243041992188, -0.0713653564453125, -0.06895828247070312, -0.06655120849609375, -0.06414413452148438, -0.061737060546875, -0.059329986572265625, -0.05692291259765625, -0.054515838623046875, -0.0521087646484375, -0.049701690673828125, -0.04729461669921875, -0.044887542724609375, -0.04248046875, -0.040073394775390625, -0.03766632080078125, -0.035259246826171875, -0.0328521728515625, -0.030445098876953125, -0.02803802490234375, -0.025630950927734375, -0.023223876953125, -0.020816802978515625, -0.01840972900390625, -0.016002655029296875, -0.0135955810546875, -0.011188507080078125, -0.00878143310546875, -0.006374359130859375, -0.00396728515625, -0.001560211181640625, 0.00084686279296875, 0.003253936767578125, 0.0056610107421875, 0.008068084716796875, 0.01047515869140625, 0.012882232666015625, 0.015289306640625, 0.017696380615234375, 0.02010345458984375, 0.022510528564453125, 0.0249176025390625, 0.027324676513671875, 0.02973175048828125, 0.032138824462890625, 0.0345458984375, 0.036952972412109375, 0.03936004638671875, 0.041767120361328125, 0.0441741943359375, 0.046581268310546875, 0.04898834228515625, 0.051395416259765625, 0.053802490234375, 0.056209564208984375, 0.05861663818359375, 0.061023712158203125, 0.0634307861328125, 0.06583786010742188, 0.06824493408203125, 0.07065200805664062, 0.07305908203125]}, "gradients/encoder.encoder.layers.23.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0, 0.0, 0.0, 2.0, 2.0, 4.0, 2.0, 3.0, 1.0, 4.0, 6.0, 4.0, 8.0, 10.0, 10.0, 23.0, 24.0, 23.0, 36.0, 55.0, 92.0, 489.0, 26703.0, 4161125.0, 5202.0, 191.0, 85.0, 35.0, 33.0, 24.0, 16.0, 16.0, 10.0, 8.0, 5.0, 12.0, 4.0, 4.0, 6.0, 1.0, 9.0, 2.0, 3.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.99560546875, -0.9657363891601562, -0.9358673095703125, -0.9059982299804688, -0.876129150390625, -0.8462600708007812, -0.8163909912109375, -0.7865219116210938, -0.75665283203125, -0.7267837524414062, -0.6969146728515625, -0.6670455932617188, -0.637176513671875, -0.6073074340820312, -0.5774383544921875, -0.5475692749023438, -0.5177001953125, -0.48783111572265625, -0.4579620361328125, -0.42809295654296875, -0.398223876953125, -0.36835479736328125, -0.3384857177734375, -0.30861663818359375, -0.27874755859375, -0.24887847900390625, -0.2190093994140625, -0.18914031982421875, -0.159271240234375, -0.12940216064453125, -0.0995330810546875, -0.06966400146484375, -0.039794921875, -0.00992584228515625, 0.0199432373046875, 0.04981231689453125, 0.079681396484375, 0.10955047607421875, 0.1394195556640625, 0.16928863525390625, 0.19915771484375, 0.22902679443359375, 0.2588958740234375, 0.28876495361328125, 0.318634033203125, 0.34850311279296875, 0.3783721923828125, 0.40824127197265625, 0.4381103515625, 0.46797943115234375, 0.4978485107421875, 0.5277175903320312, 0.557586669921875, 0.5874557495117188, 0.6173248291015625, 0.6471939086914062, 0.67706298828125, 0.7069320678710938, 0.7368011474609375, 0.7666702270507812, 0.796539306640625, 0.8264083862304688, 0.8562774658203125, 0.8861465454101562, 0.916015625]}, "gradients/encoder.encoder.layers.23.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 2.0, 2.0, 4.0, 7.0, 4.0, 3.0, 4.0, 8.0, 9.0, 5.0, 8.0, 16.0, 11.0, 22.0, 27.0, 33.0, 51.0, 90.0, 176.0, 866.0, 1922.0, 416.0, 141.0, 65.0, 44.0, 36.0, 17.0, 26.0, 19.0, 11.0, 8.0, 5.0, 6.0, 2.0, 3.0, 3.0, 3.0, 2.0, 4.0, 3.0, 0.0, 0.0, 1.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.2088623046875, -0.2019634246826172, -0.19506454467773438, -0.18816566467285156, -0.18126678466796875, -0.17436790466308594, -0.16746902465820312, -0.1605701446533203, -0.1536712646484375, -0.1467723846435547, -0.13987350463867188, -0.13297462463378906, -0.12607574462890625, -0.11917686462402344, -0.11227798461914062, -0.10537910461425781, -0.098480224609375, -0.09158134460449219, -0.08468246459960938, -0.07778358459472656, -0.07088470458984375, -0.06398582458496094, -0.057086944580078125, -0.05018806457519531, -0.0432891845703125, -0.03639030456542969, -0.029491424560546875, -0.022592544555664062, -0.01569366455078125, -0.008794784545898438, -0.001895904541015625, 0.0050029754638671875, 0.01190185546875, 0.018800735473632812, 0.025699615478515625, 0.03259849548339844, 0.03949737548828125, 0.04639625549316406, 0.053295135498046875, 0.06019401550292969, 0.0670928955078125, 0.07399177551269531, 0.08089065551757812, 0.08778953552246094, 0.09468841552734375, 0.10158729553222656, 0.10848617553710938, 0.11538505554199219, 0.122283935546875, 0.1291828155517578, 0.13608169555664062, 0.14298057556152344, 0.14987945556640625, 0.15677833557128906, 0.16367721557617188, 0.1705760955810547, 0.1774749755859375, 0.1843738555908203, 0.19127273559570312, 0.19817161560058594, 0.20507049560546875, 0.21196937561035156, 0.21886825561523438, 0.2257671356201172, 0.232666015625]}, "gradients/encoder.encoder.layers.23.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 1.0, 5.0, 5.0, 9.0, 11.0, 22.0, 35.0, 78.0, 156.0, 249.0, 226.0, 104.0, 49.0, 30.0, 17.0, 5.0, 6.0, 4.0, 4.0, 1.0], "bins": [-0.5707597732543945, -0.5601474046707153, -0.5495350956916809, -0.5389227271080017, -0.5283104181289673, -0.5176980495452881, -0.5070857405662537, -0.49647337198257446, -0.48586103320121765, -0.47524869441986084, -0.46463635563850403, -0.4540240168571472, -0.443411648273468, -0.4327993094921112, -0.4221869707107544, -0.4115746319293976, -0.40096229314804077, -0.39034995436668396, -0.37973761558532715, -0.36912527680397034, -0.3585129380226135, -0.3479005694389343, -0.3372882306575775, -0.3266758918762207, -0.3160635530948639, -0.3054512143135071, -0.29483887553215027, -0.28422653675079346, -0.27361416816711426, -0.26300182938575745, -0.25238949060440063, -0.24177715182304382, -0.231164813041687, -0.2205524742603302, -0.2099401354789734, -0.19932778179645538, -0.18871544301509857, -0.17810310423374176, -0.16749075055122375, -0.15687841176986694, -0.14626607298851013, -0.13565373420715332, -0.1250413954257965, -0.1144290417432785, -0.10381670296192169, -0.09320436418056488, -0.08259201794862747, -0.07197967171669006, -0.06136733293533325, -0.05075499042868614, -0.04014264792203903, -0.029530305415391922, -0.018917962908744812, -0.008305620402097702, 0.002306722104549408, 0.012919068336486816, 0.023531407117843628, 0.03414374962449074, 0.04475609213113785, 0.05536843463778496, 0.06598077714443207, 0.07659311592578888, 0.08720546215772629, 0.0978178083896637, 0.10843014717102051]}, "gradients/encoder.encoder.layers.23.final_layer_norm.bias": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 3.0, 4.0, 3.0, 8.0, 8.0, 12.0, 12.0, 15.0, 20.0, 23.0, 25.0, 39.0, 38.0, 48.0, 55.0, 59.0, 57.0, 75.0, 62.0, 68.0, 63.0, 61.0, 55.0, 43.0, 21.0, 22.0, 31.0, 21.0, 14.0, 13.0, 10.0, 6.0, 3.0, 4.0, 2.0, 3.0, 2.0, 3.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0], "bins": [-0.45400798320770264, -0.43871253728866577, -0.4234170913696289, -0.40812167525291443, -0.39282622933387756, -0.3775307834148407, -0.3622353672981262, -0.34693992137908936, -0.3316444754600525, -0.3163490295410156, -0.30105358362197876, -0.2857581675052643, -0.2704627215862274, -0.25516727566719055, -0.23987184464931488, -0.2245764136314392, -0.20928096771240234, -0.19398552179336548, -0.1786900907754898, -0.16339465975761414, -0.14809921383857727, -0.1328037679195404, -0.11750833690166473, -0.10221289843320847, -0.0869174599647522, -0.07162202149629593, -0.05632658302783966, -0.04103114455938339, -0.025735706090927124, -0.010440267622470856, 0.004855170845985413, 0.02015060931444168, 0.03544604778289795, 0.05074148625135422, 0.06603692471981049, 0.08133236318826675, 0.09662780165672302, 0.11192324012517929, 0.12721867859363556, 0.14251410961151123, 0.1578095555305481, 0.17310500144958496, 0.18840043246746063, 0.2036958634853363, 0.21899130940437317, 0.23428675532341003, 0.2495821863412857, 0.2648776173591614, 0.28017306327819824, 0.2954685091972351, 0.310763955116272, 0.32605937123298645, 0.3413548171520233, 0.3566502630710602, 0.37194567918777466, 0.3872411251068115, 0.4025365710258484, 0.41783201694488525, 0.4331274628639221, 0.4484228789806366, 0.46371832489967346, 0.4790137708187103, 0.4943091869354248, 0.5096046328544617, 0.5249000787734985]}, "gradients/encoder.encoder.layers.23.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 2.0, 2.0, 2.0, 3.0, 1.0, 5.0, 4.0, 7.0, 4.0, 11.0, 12.0, 22.0, 22.0, 52.0, 58.0, 83.0, 133.0, 156.0, 241.0, 352.0, 581.0, 995.0, 1597.0, 3050.0, 6680.0, 17417.0, 82462.0, 824674.0, 79222.0, 16798.0, 6497.0, 3081.0, 1654.0, 988.0, 580.0, 331.0, 229.0, 150.0, 117.0, 84.0, 42.0, 53.0, 30.0, 11.0, 20.0, 13.0, 7.0, 5.0, 6.0, 4.0, 6.0, 3.0, 3.0, 3.0, 2.0, 2.0, 1.0, 1.0, 2.0], "bins": [-1.1328125, -1.0979461669921875, -1.063079833984375, -1.0282135009765625, -0.99334716796875, -0.9584808349609375, -0.923614501953125, -0.8887481689453125, -0.8538818359375, -0.8190155029296875, -0.784149169921875, -0.7492828369140625, -0.71441650390625, -0.6795501708984375, -0.644683837890625, -0.6098175048828125, -0.574951171875, -0.5400848388671875, -0.505218505859375, -0.4703521728515625, -0.43548583984375, -0.4006195068359375, -0.365753173828125, -0.3308868408203125, -0.2960205078125, -0.2611541748046875, -0.226287841796875, -0.1914215087890625, -0.15655517578125, -0.1216888427734375, -0.086822509765625, -0.0519561767578125, -0.01708984375, 0.0177764892578125, 0.052642822265625, 0.0875091552734375, 0.12237548828125, 0.1572418212890625, 0.192108154296875, 0.2269744873046875, 0.2618408203125, 0.2967071533203125, 0.331573486328125, 0.3664398193359375, 0.40130615234375, 0.4361724853515625, 0.471038818359375, 0.5059051513671875, 0.540771484375, 0.5756378173828125, 0.610504150390625, 0.6453704833984375, 0.68023681640625, 0.7151031494140625, 0.749969482421875, 0.7848358154296875, 0.8197021484375, 0.8545684814453125, 0.889434814453125, 0.9243011474609375, 0.95916748046875, 0.9940338134765625, 1.028900146484375, 1.0637664794921875, 1.0986328125]}, "gradients/encoder.encoder.layers.23.attention.out_proj.bias": {"_type": "histogram", "values": [3.0, 0.0, 1.0, 2.0, 2.0, 1.0, 2.0, 1.0, 3.0, 2.0, 4.0, 0.0, 2.0, 5.0, 5.0, 0.0, 3.0, 10.0, 14.0, 13.0, 17.0, 14.0, 25.0, 27.0, 42.0, 31.0, 32.0, 58.0, 36.0, 45.0, 62.0, 63.0, 65.0, 58.0, 62.0, 53.0, 41.0, 32.0, 27.0, 26.0, 21.0, 14.0, 16.0, 14.0, 18.0, 14.0, 6.0, 4.0, 5.0, 4.0, 2.0, 3.0, 2.0, 2.0, 1.0, 1.0, 2.0, 3.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-0.07208251953125, -0.06980037689208984, -0.06751823425292969, -0.06523609161376953, -0.06295394897460938, -0.06067180633544922, -0.05838966369628906, -0.056107521057128906, -0.05382537841796875, -0.051543235778808594, -0.04926109313964844, -0.04697895050048828, -0.044696807861328125, -0.04241466522216797, -0.04013252258300781, -0.037850379943847656, -0.0355682373046875, -0.033286094665527344, -0.031003952026367188, -0.02872180938720703, -0.026439666748046875, -0.02415752410888672, -0.021875381469726562, -0.019593238830566406, -0.01731109619140625, -0.015028953552246094, -0.012746810913085938, -0.010464668273925781, -0.008182525634765625, -0.005900382995605469, -0.0036182403564453125, -0.0013360977172851562, 0.000946044921875, 0.0032281875610351562, 0.0055103302001953125, 0.007792472839355469, 0.010074615478515625, 0.012356758117675781, 0.014638900756835938, 0.016921043395996094, 0.01920318603515625, 0.021485328674316406, 0.023767471313476562, 0.02604961395263672, 0.028331756591796875, 0.03061389923095703, 0.03289604187011719, 0.035178184509277344, 0.0374603271484375, 0.039742469787597656, 0.04202461242675781, 0.04430675506591797, 0.046588897705078125, 0.04887104034423828, 0.05115318298339844, 0.053435325622558594, 0.05571746826171875, 0.057999610900878906, 0.06028175354003906, 0.06256389617919922, 0.06484603881835938, 0.06712818145751953, 0.06941032409667969, 0.07169246673583984, 0.073974609375]}, "gradients/encoder.encoder.layers.23.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 2.0, 3.0, 1.0, 4.0, 5.0, 1.0, 9.0, 11.0, 10.0, 15.0, 12.0, 23.0, 30.0, 37.0, 39.0, 67.0, 83.0, 113.0, 186.0, 254.0, 382.0, 538.0, 870.0, 1444.0, 2514.0, 4521.0, 9643.0, 23277.0, 68735.0, 317441.0, 471623.0, 92963.0, 29100.0, 11709.0, 5553.0, 2842.0, 1557.0, 943.0, 625.0, 389.0, 275.0, 186.0, 141.0, 104.0, 72.0, 55.0, 44.0, 37.0, 21.0, 9.0, 13.0, 11.0, 8.0, 9.0, 2.0, 5.0, 3.0, 2.0, 1.0, 0.0, 2.0, 0.0, 1.0], "bins": [-0.283447265625, -0.2743377685546875, -0.265228271484375, -0.2561187744140625, -0.24700927734375, -0.2378997802734375, -0.228790283203125, -0.2196807861328125, -0.2105712890625, -0.2014617919921875, -0.192352294921875, -0.1832427978515625, -0.17413330078125, -0.1650238037109375, -0.155914306640625, -0.1468048095703125, -0.1376953125, -0.1285858154296875, -0.119476318359375, -0.1103668212890625, -0.10125732421875, -0.0921478271484375, -0.083038330078125, -0.0739288330078125, -0.0648193359375, -0.0557098388671875, -0.046600341796875, -0.0374908447265625, -0.02838134765625, -0.0192718505859375, -0.010162353515625, -0.0010528564453125, 0.008056640625, 0.0171661376953125, 0.026275634765625, 0.0353851318359375, 0.04449462890625, 0.0536041259765625, 0.062713623046875, 0.0718231201171875, 0.0809326171875, 0.0900421142578125, 0.099151611328125, 0.1082611083984375, 0.11737060546875, 0.1264801025390625, 0.135589599609375, 0.1446990966796875, 0.15380859375, 0.1629180908203125, 0.172027587890625, 0.1811370849609375, 0.19024658203125, 0.1993560791015625, 0.208465576171875, 0.2175750732421875, 0.2266845703125, 0.2357940673828125, 0.244903564453125, 0.2540130615234375, 0.26312255859375, 0.2722320556640625, 0.281341552734375, 0.2904510498046875, 0.299560546875]}, "gradients/encoder.encoder.layers.23.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 2.0, 0.0, 1.0, 0.0, 5.0, 2.0, 2.0, 1.0, 5.0, 5.0, 2.0, 13.0, 7.0, 7.0, 11.0, 15.0, 11.0, 29.0, 31.0, 27.0, 26.0, 33.0, 30.0, 40.0, 51.0, 40.0, 30.0, 44.0, 48.0, 45.0, 37.0, 37.0, 43.0, 39.0, 30.0, 28.0, 32.0, 32.0, 23.0, 19.0, 24.0, 15.0, 18.0, 11.0, 10.0, 10.0, 5.0, 9.0, 8.0, 6.0, 5.0, 6.0, 4.0, 3.0, 3.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.1739501953125, -0.16814231872558594, -0.16233444213867188, -0.1565265655517578, -0.15071868896484375, -0.1449108123779297, -0.13910293579101562, -0.13329505920410156, -0.1274871826171875, -0.12167930603027344, -0.11587142944335938, -0.11006355285644531, -0.10425567626953125, -0.09844779968261719, -0.09263992309570312, -0.08683204650878906, -0.081024169921875, -0.07521629333496094, -0.06940841674804688, -0.06360054016113281, -0.05779266357421875, -0.05198478698730469, -0.046176910400390625, -0.04036903381347656, -0.0345611572265625, -0.028753280639648438, -0.022945404052734375, -0.017137527465820312, -0.01132965087890625, -0.0055217742919921875, 0.000286102294921875, 0.0060939788818359375, 0.01190185546875, 0.017709732055664062, 0.023517608642578125, 0.029325485229492188, 0.03513336181640625, 0.04094123840332031, 0.046749114990234375, 0.05255699157714844, 0.0583648681640625, 0.06417274475097656, 0.06998062133789062, 0.07578849792480469, 0.08159637451171875, 0.08740425109863281, 0.09321212768554688, 0.09902000427246094, 0.104827880859375, 0.11063575744628906, 0.11644363403320312, 0.12225151062011719, 0.12805938720703125, 0.1338672637939453, 0.13967514038085938, 0.14548301696777344, 0.1512908935546875, 0.15709877014160156, 0.16290664672851562, 0.1687145233154297, 0.17452239990234375, 0.1803302764892578, 0.18613815307617188, 0.19194602966308594, 0.19775390625]}, "gradients/encoder.encoder.layers.23.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 4.0, 0.0, 1.0, 2.0, 1.0, 1.0, 7.0, 5.0, 8.0, 6.0, 22.0, 9.0, 28.0, 38.0, 42.0, 69.0, 98.0, 132.0, 215.0, 375.0, 633.0, 1188.0, 2575.0, 6413.0, 22891.0, 828161.0, 160166.0, 15902.0, 4966.0, 2061.0, 1019.0, 556.0, 351.0, 186.0, 141.0, 89.0, 72.0, 30.0, 36.0, 15.0, 16.0, 8.0, 10.0, 4.0, 4.0, 8.0, 3.0, 2.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.20458984375, -0.19817161560058594, -0.19175338745117188, -0.1853351593017578, -0.17891693115234375, -0.1724987030029297, -0.16608047485351562, -0.15966224670410156, -0.1532440185546875, -0.14682579040527344, -0.14040756225585938, -0.1339893341064453, -0.12757110595703125, -0.12115287780761719, -0.11473464965820312, -0.10831642150878906, -0.101898193359375, -0.09547996520996094, -0.08906173706054688, -0.08264350891113281, -0.07622528076171875, -0.06980705261230469, -0.06338882446289062, -0.05697059631347656, -0.0505523681640625, -0.04413414001464844, -0.037715911865234375, -0.03129768371582031, -0.02487945556640625, -0.018461227416992188, -0.012042999267578125, -0.0056247711181640625, 0.00079345703125, 0.0072116851806640625, 0.013629913330078125, 0.020048141479492188, 0.02646636962890625, 0.03288459777832031, 0.039302825927734375, 0.04572105407714844, 0.0521392822265625, 0.05855751037597656, 0.06497573852539062, 0.07139396667480469, 0.07781219482421875, 0.08423042297363281, 0.09064865112304688, 0.09706687927246094, 0.103485107421875, 0.10990333557128906, 0.11632156372070312, 0.12273979187011719, 0.12915802001953125, 0.1355762481689453, 0.14199447631835938, 0.14841270446777344, 0.1548309326171875, 0.16124916076660156, 0.16766738891601562, 0.1740856170654297, 0.18050384521484375, 0.1869220733642578, 0.19334030151367188, 0.19975852966308594, 0.2061767578125]}, "gradients/encoder.encoder.layers.23.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 2.0, 0.0, 2.0, 2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 10.0, 10.0, 5.0, 20.0, 97.0, 463.0, 301.0, 50.0, 8.0, 6.0, 4.0, 3.0, 3.0, 2.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-4.8100948333740234e-05, -4.663597792387009e-05, -4.517100751399994e-05, -4.370603710412979e-05, -4.2241066694259644e-05, -4.0776096284389496e-05, -3.931112587451935e-05, -3.78461554646492e-05, -3.638118505477905e-05, -3.4916214644908905e-05, -3.345124423503876e-05, -3.198627382516861e-05, -3.052130341529846e-05, -2.9056333005428314e-05, -2.7591362595558167e-05, -2.612639218568802e-05, -2.466142177581787e-05, -2.3196451365947723e-05, -2.1731480956077576e-05, -2.0266510546207428e-05, -1.880154013633728e-05, -1.7336569726467133e-05, -1.5871599316596985e-05, -1.4406628906726837e-05, -1.294165849685669e-05, -1.1476688086986542e-05, -1.0011717677116394e-05, -8.546747267246246e-06, -7.081776857376099e-06, -5.616806447505951e-06, -4.151836037635803e-06, -2.6868656277656555e-06, -1.2218952178955078e-06, 2.430751919746399e-07, 1.7080456018447876e-06, 3.1730160117149353e-06, 4.637986421585083e-06, 6.102956831455231e-06, 7.567927241325378e-06, 9.032897651195526e-06, 1.0497868061065674e-05, 1.1962838470935822e-05, 1.342780888080597e-05, 1.4892779290676117e-05, 1.6357749700546265e-05, 1.7822720110416412e-05, 1.928769052028656e-05, 2.0752660930156708e-05, 2.2217631340026855e-05, 2.3682601749897003e-05, 2.514757215976715e-05, 2.66125425696373e-05, 2.8077512979507446e-05, 2.9542483389377594e-05, 3.100745379924774e-05, 3.247242420911789e-05, 3.393739461898804e-05, 3.5402365028858185e-05, 3.686733543872833e-05, 3.833230584859848e-05, 3.979727625846863e-05, 4.1262246668338776e-05, 4.272721707820892e-05, 4.419218748807907e-05, 4.565715789794922e-05]}, "gradients/encoder.encoder.layers.23.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 1.0, 3.0, 0.0, 1.0, 3.0, 1.0, 2.0, 3.0, 8.0, 4.0, 4.0, 11.0, 9.0, 26.0, 34.0, 77.0, 223.0, 581.0, 1736.0, 6179.0, 29638.0, 950591.0, 47372.0, 8553.0, 2286.0, 715.0, 278.0, 106.0, 50.0, 15.0, 19.0, 10.0, 5.0, 4.0, 2.0, 4.0, 4.0, 1.0, 1.0, 2.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0, 1.0], "bins": [-0.53564453125, -0.5206031799316406, -0.5055618286132812, -0.4905204772949219, -0.4754791259765625, -0.4604377746582031, -0.44539642333984375, -0.4303550720214844, -0.415313720703125, -0.4002723693847656, -0.38523101806640625, -0.3701896667480469, -0.3551483154296875, -0.3401069641113281, -0.32506561279296875, -0.3100242614746094, -0.29498291015625, -0.2799415588378906, -0.26490020751953125, -0.24985885620117188, -0.2348175048828125, -0.21977615356445312, -0.20473480224609375, -0.18969345092773438, -0.174652099609375, -0.15961074829101562, -0.14456939697265625, -0.12952804565429688, -0.1144866943359375, -0.09944534301757812, -0.08440399169921875, -0.06936264038085938, -0.0543212890625, -0.039279937744140625, -0.02423858642578125, -0.009197235107421875, 0.0058441162109375, 0.020885467529296875, 0.03592681884765625, 0.050968170166015625, 0.066009521484375, 0.08105087280273438, 0.09609222412109375, 0.11113357543945312, 0.1261749267578125, 0.14121627807617188, 0.15625762939453125, 0.17129898071289062, 0.18634033203125, 0.20138168334960938, 0.21642303466796875, 0.23146438598632812, 0.2465057373046875, 0.2615470886230469, 0.27658843994140625, 0.2916297912597656, 0.306671142578125, 0.3217124938964844, 0.33675384521484375, 0.3517951965332031, 0.3668365478515625, 0.3818778991699219, 0.39691925048828125, 0.4119606018066406, 0.427001953125]}, "gradients/encoder.encoder.layers.23.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 0.0, 2.0, 1.0, 2.0, 2.0, 1.0, 6.0, 4.0, 5.0, 3.0, 4.0, 10.0, 11.0, 6.0, 6.0, 7.0, 15.0, 23.0, 31.0, 115.0, 420.0, 172.0, 48.0, 25.0, 10.0, 12.0, 10.0, 4.0, 8.0, 10.0, 4.0, 7.0, 8.0, 6.0, 1.0, 2.0, 4.0, 0.0, 1.0, 4.0, 2.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.307861328125, -0.297821044921875, -0.28778076171875, -0.277740478515625, -0.2677001953125, -0.257659912109375, -0.24761962890625, -0.237579345703125, -0.2275390625, -0.217498779296875, -0.20745849609375, -0.197418212890625, -0.1873779296875, -0.177337646484375, -0.16729736328125, -0.157257080078125, -0.147216796875, -0.137176513671875, -0.12713623046875, -0.117095947265625, -0.1070556640625, -0.097015380859375, -0.08697509765625, -0.076934814453125, -0.06689453125, -0.056854248046875, -0.04681396484375, -0.036773681640625, -0.0267333984375, -0.016693115234375, -0.00665283203125, 0.003387451171875, 0.013427734375, 0.023468017578125, 0.03350830078125, 0.043548583984375, 0.0535888671875, 0.063629150390625, 0.07366943359375, 0.083709716796875, 0.09375, 0.103790283203125, 0.11383056640625, 0.123870849609375, 0.1339111328125, 0.143951416015625, 0.15399169921875, 0.164031982421875, 0.174072265625, 0.184112548828125, 0.19415283203125, 0.204193115234375, 0.2142333984375, 0.224273681640625, 0.23431396484375, 0.244354248046875, 0.25439453125, 0.264434814453125, 0.27447509765625, 0.284515380859375, 0.2945556640625, 0.304595947265625, 0.31463623046875, 0.324676513671875, 0.334716796875]}, "gradients/encoder.encoder.layers.23.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 2.0, 9.0, 18.0, 75.0, 472.0, 361.0, 67.0, 10.0, 3.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.8925241231918335, -1.7027668952941895, -1.5130096673965454, -1.3232524394989014, -1.1334953308105469, -0.9437380433082581, -0.7539808750152588, -0.5642236471176147, -0.3744664192199707, -0.18470920622348785, 0.005048006772994995, 0.19480520486831665, 0.3845624327659607, 0.5743196606636047, 0.764076828956604, 0.953834056854248, 1.143591284751892, 1.3333485126495361, 1.5231057405471802, 1.7128629684448242, 1.9026200771331787, 2.0923774242401123, 2.282134532928467, 2.4718918800354004, 2.661648988723755, 2.8514060974121094, 3.041163444519043, 3.2309205532073975, 3.420677900314331, 3.6104350090026855, 3.800192356109619, 3.9899494647979736, 4.179707050323486, 4.36946439743042, 4.559221267700195, 4.748978614807129, 4.9387359619140625, 5.128493309020996, 5.3182501792907715, 5.508007526397705, 5.697764873504639, 5.887522220611572, 6.077279090881348, 6.267036437988281, 6.456793785095215, 6.646551132202148, 6.836308002471924, 7.026065349578857, 7.215822219848633, 7.405579566955566, 7.595336437225342, 7.785093784332275, 7.974851131439209, 8.164608001708984, 8.354365348815918, 8.544122695922852, 8.733880043029785, 8.923637390136719, 9.113394737243652, 9.303152084350586, 9.492908477783203, 9.682665824890137, 9.87242317199707, 10.062180519104004, 10.251937866210938]}, "gradients/encoder.encoder.layers.23.layer_norm.bias": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 1.0, 3.0, 4.0, 10.0, 6.0, 8.0, 15.0, 12.0, 11.0, 13.0, 21.0, 18.0, 30.0, 26.0, 35.0, 37.0, 39.0, 48.0, 46.0, 43.0, 47.0, 60.0, 61.0, 51.0, 45.0, 39.0, 43.0, 37.0, 37.0, 26.0, 29.0, 17.0, 23.0, 13.0, 18.0, 10.0, 7.0, 5.0, 5.0, 7.0, 4.0, 2.0, 1.0, 0.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.8509693145751953, -0.8158197402954102, -0.7806701064109802, -0.7455205321311951, -0.7103708982467651, -0.67522132396698, -0.6400717496871948, -0.6049221754074097, -0.5697725415229797, -0.5346229672431946, -0.49947333335876465, -0.4643237590789795, -0.42917415499687195, -0.3940245509147644, -0.35887497663497925, -0.3237253725528717, -0.28857576847076416, -0.2534261643886566, -0.21827657520771027, -0.18312698602676392, -0.14797738194465637, -0.11282777786254883, -0.07767818868160248, -0.04252859950065613, -0.007378995418548584, 0.027770601212978363, 0.06292019784450531, 0.09806979447603226, 0.1332193911075592, 0.16836899518966675, 0.2035185843706131, 0.23866817355155945, 0.27381789684295654, 0.3089675009250641, 0.34411710500717163, 0.3792666792869568, 0.41441628336906433, 0.4495658874511719, 0.48471546173095703, 0.5198650360107422, 0.5550146698951721, 0.5901642441749573, 0.6253138780593872, 0.6604634523391724, 0.6956130266189575, 0.7307626605033875, 0.7659122347831726, 0.8010618686676025, 0.8362114429473877, 0.8713610172271729, 0.9065106511116028, 0.9416602253913879, 0.9768098592758179, 1.011959433555603, 1.0471090078353882, 1.0822585821151733, 1.117408275604248, 1.1525578498840332, 1.1877074241638184, 1.222857117652893, 1.2580066919326782, 1.2931562662124634, 1.3283058404922485, 1.3634554147720337, 1.3986049890518188]}, "gradients/encoder.encoder.layers.22.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 4.0, 2.0, 8.0, 9.0, 12.0, 11.0, 12.0, 11.0, 19.0, 33.0, 56.0, 59.0, 100.0, 129.0, 211.0, 360.0, 674.0, 1549.0, 3857.0, 14757.0, 140030.0, 3978899.0, 40990.0, 7699.0, 2391.0, 1021.0, 539.0, 296.0, 175.0, 104.0, 96.0, 58.0, 27.0, 25.0, 14.0, 11.0, 10.0, 10.0, 13.0, 1.0, 7.0, 5.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.56640625, -0.5463790893554688, -0.5263519287109375, -0.5063247680664062, -0.486297607421875, -0.46627044677734375, -0.4462432861328125, -0.42621612548828125, -0.40618896484375, -0.38616180419921875, -0.3661346435546875, -0.34610748291015625, -0.326080322265625, -0.30605316162109375, -0.2860260009765625, -0.26599884033203125, -0.2459716796875, -0.22594451904296875, -0.2059173583984375, -0.18589019775390625, -0.165863037109375, -0.14583587646484375, -0.1258087158203125, -0.10578155517578125, -0.08575439453125, -0.06572723388671875, -0.0457000732421875, -0.02567291259765625, -0.005645751953125, 0.01438140869140625, 0.0344085693359375, 0.05443572998046875, 0.074462890625, 0.09449005126953125, 0.1145172119140625, 0.13454437255859375, 0.154571533203125, 0.17459869384765625, 0.1946258544921875, 0.21465301513671875, 0.23468017578125, 0.25470733642578125, 0.2747344970703125, 0.29476165771484375, 0.314788818359375, 0.33481597900390625, 0.3548431396484375, 0.37487030029296875, 0.3948974609375, 0.41492462158203125, 0.4349517822265625, 0.45497894287109375, 0.475006103515625, 0.49503326416015625, 0.5150604248046875, 0.5350875854492188, 0.55511474609375, 0.5751419067382812, 0.5951690673828125, 0.6151962280273438, 0.635223388671875, 0.6552505493164062, 0.6752777099609375, 0.6953048706054688, 0.71533203125]}, "gradients/encoder.encoder.layers.22.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 4.0, 1.0, 5.0, 1.0, 1.0, 1.0, 3.0, 2.0, 4.0, 6.0, 1.0, 8.0, 5.0, 13.0, 11.0, 13.0, 17.0, 18.0, 20.0, 35.0, 29.0, 37.0, 48.0, 42.0, 50.0, 49.0, 63.0, 61.0, 55.0, 52.0, 56.0, 39.0, 31.0, 29.0, 40.0, 30.0, 16.0, 26.0, 15.0, 15.0, 18.0, 9.0, 7.0, 11.0, 3.0, 2.0, 4.0, 3.0, 3.0, 2.0, 3.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.06427001953125, -0.062091827392578125, -0.05991363525390625, -0.057735443115234375, -0.0555572509765625, -0.053379058837890625, -0.05120086669921875, -0.049022674560546875, -0.046844482421875, -0.044666290283203125, -0.04248809814453125, -0.040309906005859375, -0.0381317138671875, -0.035953521728515625, -0.03377532958984375, -0.031597137451171875, -0.0294189453125, -0.027240753173828125, -0.02506256103515625, -0.022884368896484375, -0.0207061767578125, -0.018527984619140625, -0.01634979248046875, -0.014171600341796875, -0.011993408203125, -0.009815216064453125, -0.00763702392578125, -0.005458831787109375, -0.0032806396484375, -0.001102447509765625, 0.00107574462890625, 0.003253936767578125, 0.00543212890625, 0.007610321044921875, 0.00978851318359375, 0.011966705322265625, 0.0141448974609375, 0.016323089599609375, 0.01850128173828125, 0.020679473876953125, 0.022857666015625, 0.025035858154296875, 0.02721405029296875, 0.029392242431640625, 0.0315704345703125, 0.033748626708984375, 0.03592681884765625, 0.038105010986328125, 0.040283203125, 0.042461395263671875, 0.04463958740234375, 0.046817779541015625, 0.0489959716796875, 0.051174163818359375, 0.05335235595703125, 0.055530548095703125, 0.057708740234375, 0.059886932373046875, 0.06206512451171875, 0.06424331665039062, 0.0664215087890625, 0.06859970092773438, 0.07077789306640625, 0.07295608520507812, 0.07513427734375]}, "gradients/encoder.encoder.layers.22.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 2.0, 2.0, 2.0, 2.0, 10.0, 10.0, 5.0, 13.0, 26.0, 33.0, 110.0, 220.0, 662.0, 2828.0, 25719.0, 3965466.0, 188124.0, 8961.0, 1407.0, 389.0, 134.0, 66.0, 33.0, 21.0, 14.0, 11.0, 8.0, 4.0, 5.0, 5.0, 2.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.841796875, -0.8168792724609375, -0.791961669921875, -0.7670440673828125, -0.74212646484375, -0.7172088623046875, -0.692291259765625, -0.6673736572265625, -0.6424560546875, -0.6175384521484375, -0.592620849609375, -0.5677032470703125, -0.54278564453125, -0.5178680419921875, -0.492950439453125, -0.4680328369140625, -0.443115234375, -0.4181976318359375, -0.393280029296875, -0.3683624267578125, -0.34344482421875, -0.3185272216796875, -0.293609619140625, -0.2686920166015625, -0.2437744140625, -0.2188568115234375, -0.193939208984375, -0.1690216064453125, -0.14410400390625, -0.1191864013671875, -0.094268798828125, -0.0693511962890625, -0.04443359375, -0.0195159912109375, 0.005401611328125, 0.0303192138671875, 0.05523681640625, 0.0801544189453125, 0.105072021484375, 0.1299896240234375, 0.1549072265625, 0.1798248291015625, 0.204742431640625, 0.2296600341796875, 0.25457763671875, 0.2794952392578125, 0.304412841796875, 0.3293304443359375, 0.354248046875, 0.3791656494140625, 0.404083251953125, 0.4290008544921875, 0.45391845703125, 0.4788360595703125, 0.503753662109375, 0.5286712646484375, 0.5535888671875, 0.5785064697265625, 0.603424072265625, 0.6283416748046875, 0.65325927734375, 0.6781768798828125, 0.703094482421875, 0.7280120849609375, 0.7529296875]}, "gradients/encoder.encoder.layers.22.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 2.0, 0.0, 0.0, 3.0, 4.0, 7.0, 9.0, 12.0, 9.0, 14.0, 15.0, 29.0, 24.0, 32.0, 72.0, 82.0, 150.0, 734.0, 1956.0, 526.0, 130.0, 85.0, 39.0, 40.0, 28.0, 28.0, 10.0, 6.0, 14.0, 4.0, 7.0, 9.0, 0.0, 3.0, 1.0, 1.0, 2.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.1558837890625, -0.15076637268066406, -0.14564895629882812, -0.1405315399169922, -0.13541412353515625, -0.1302967071533203, -0.12517929077148438, -0.12006187438964844, -0.1149444580078125, -0.10982704162597656, -0.10470962524414062, -0.09959220886230469, -0.09447479248046875, -0.08935737609863281, -0.08423995971679688, -0.07912254333496094, -0.074005126953125, -0.06888771057128906, -0.06377029418945312, -0.05865287780761719, -0.05353546142578125, -0.04841804504394531, -0.043300628662109375, -0.03818321228027344, -0.0330657958984375, -0.027948379516601562, -0.022830963134765625, -0.017713546752929688, -0.01259613037109375, -0.0074787139892578125, -0.002361297607421875, 0.0027561187744140625, 0.00787353515625, 0.012990951538085938, 0.018108367919921875, 0.023225784301757812, 0.02834320068359375, 0.03346061706542969, 0.038578033447265625, 0.04369544982910156, 0.0488128662109375, 0.05393028259277344, 0.059047698974609375, 0.06416511535644531, 0.06928253173828125, 0.07439994812011719, 0.07951736450195312, 0.08463478088378906, 0.089752197265625, 0.09486961364746094, 0.09998703002929688, 0.10510444641113281, 0.11022186279296875, 0.11533927917480469, 0.12045669555664062, 0.12557411193847656, 0.1306915283203125, 0.13580894470214844, 0.14092636108398438, 0.1460437774658203, 0.15116119384765625, 0.1562786102294922, 0.16139602661132812, 0.16651344299316406, 0.171630859375]}, "gradients/encoder.encoder.layers.22.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 4.0, 5.0, 4.0, 11.0, 19.0, 36.0, 63.0, 83.0, 151.0, 177.0, 168.0, 112.0, 71.0, 33.0, 30.0, 16.0, 17.0, 3.0, 3.0, 0.0, 4.0, 2.0, 1.0, 1.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.2656150460243225, -0.25055819749832153, -0.23550133407115936, -0.2204444706439972, -0.20538762211799622, -0.19033077359199524, -0.17527391016483307, -0.1602170467376709, -0.14516019821166992, -0.13010334968566895, -0.11504648625850677, -0.0999896302819252, -0.08493277430534363, -0.06987591832876205, -0.05481906235218048, -0.03976220637559891, -0.024705350399017334, -0.00964849442243576, 0.005408361554145813, 0.020465217530727386, 0.03552207350730896, 0.05057892948389053, 0.0656357854604721, 0.08069264143705368, 0.09574949741363525, 0.11080635339021683, 0.1258632093667984, 0.14092007279396057, 0.15597692131996155, 0.17103376984596252, 0.1860906332731247, 0.20114749670028687, 0.21620434522628784, 0.23126119375228882, 0.246318057179451, 0.26137492060661316, 0.27643176913261414, 0.2914886176586151, 0.3065454959869385, 0.32160234451293945, 0.33665919303894043, 0.3517160415649414, 0.3667728900909424, 0.38182976841926575, 0.3968866169452667, 0.4119434654712677, 0.42700034379959106, 0.44205719232559204, 0.457114040851593, 0.472170889377594, 0.48722773790359497, 0.502284586429596, 0.5173414945602417, 0.5323983430862427, 0.5474551916122437, 0.5625120401382446, 0.5775688886642456, 0.5926257371902466, 0.6076825857162476, 0.6227394342422485, 0.6377962827682495, 0.6528531908988953, 0.6679100394248962, 0.6829668879508972, 0.6980237364768982]}, "gradients/encoder.encoder.layers.22.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 2.0, 4.0, 3.0, 4.0, 3.0, 7.0, 9.0, 5.0, 7.0, 8.0, 19.0, 19.0, 30.0, 37.0, 26.0, 47.0, 48.0, 52.0, 58.0, 67.0, 49.0, 60.0, 58.0, 51.0, 56.0, 43.0, 56.0, 31.0, 41.0, 19.0, 25.0, 20.0, 8.0, 19.0, 7.0, 6.0, 3.0, 4.0, 3.0, 1.0, 1.0, 2.0], "bins": [-0.5101990103721619, -0.49831050634384155, -0.48642200231552124, -0.4745334982872009, -0.4626449942588806, -0.4507564902305603, -0.43886798620224, -0.4269794821739197, -0.41509097814559937, -0.40320247411727905, -0.39131397008895874, -0.3794254660606384, -0.3675369620323181, -0.3556484580039978, -0.3437599539756775, -0.3318714499473572, -0.31998294591903687, -0.30809444189071655, -0.29620593786239624, -0.2843174338340759, -0.2724289298057556, -0.2605404257774353, -0.248651921749115, -0.23676341772079468, -0.22487488389015198, -0.21298637986183167, -0.20109787583351135, -0.18920937180519104, -0.17732086777687073, -0.16543236374855042, -0.1535438597202301, -0.1416553556919098, -0.12976685166358948, -0.11787834763526917, -0.10598984360694885, -0.09410133957862854, -0.08221283555030823, -0.07032433152198792, -0.058435820043087006, -0.04654731601476669, -0.03465881198644638, -0.022770307958126068, -0.010881802067160606, 0.0010067038238048553, 0.012895207852125168, 0.02478371188044548, 0.03667221963405609, 0.048560723662376404, 0.060449227690696716, 0.07233773171901703, 0.08422623574733734, 0.09611473977565765, 0.10800324380397797, 0.11989174783229828, 0.13178026676177979, 0.1436687707901001, 0.1555572748184204, 0.16744577884674072, 0.17933428287506104, 0.19122278690338135, 0.20311129093170166, 0.21499979496002197, 0.22688829898834229, 0.2387768030166626, 0.2506653070449829]}, "gradients/encoder.encoder.layers.22.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 3.0, 3.0, 0.0, 4.0, 9.0, 13.0, 19.0, 24.0, 44.0, 73.0, 83.0, 124.0, 198.0, 310.0, 488.0, 733.0, 1113.0, 1714.0, 2804.0, 4645.0, 7836.0, 13278.0, 23440.0, 44007.0, 89685.0, 214688.0, 340536.0, 152903.0, 68181.0, 34784.0, 19228.0, 10849.0, 6404.0, 3859.0, 2214.0, 1453.0, 943.0, 618.0, 438.0, 276.0, 174.0, 135.0, 85.0, 54.0, 36.0, 22.0, 12.0, 5.0, 8.0, 4.0, 3.0, 5.0, 1.0, 1.0, 1.0, 1.0], "bins": [-0.257568359375, -0.2500629425048828, -0.24255752563476562, -0.23505210876464844, -0.22754669189453125, -0.22004127502441406, -0.21253585815429688, -0.2050304412841797, -0.1975250244140625, -0.1900196075439453, -0.18251419067382812, -0.17500877380371094, -0.16750335693359375, -0.15999794006347656, -0.15249252319335938, -0.1449871063232422, -0.137481689453125, -0.1299762725830078, -0.12247085571289062, -0.11496543884277344, -0.10746002197265625, -0.09995460510253906, -0.09244918823242188, -0.08494377136230469, -0.0774383544921875, -0.06993293762207031, -0.062427520751953125, -0.05492210388183594, -0.04741668701171875, -0.03991127014160156, -0.032405853271484375, -0.024900436401367188, -0.01739501953125, -0.009889602661132812, -0.002384185791015625, 0.0051212310791015625, 0.01262664794921875, 0.020132064819335938, 0.027637481689453125, 0.03514289855957031, 0.0426483154296875, 0.05015373229980469, 0.057659149169921875, 0.06516456604003906, 0.07266998291015625, 0.08017539978027344, 0.08768081665039062, 0.09518623352050781, 0.102691650390625, 0.11019706726074219, 0.11770248413085938, 0.12520790100097656, 0.13271331787109375, 0.14021873474121094, 0.14772415161132812, 0.1552295684814453, 0.1627349853515625, 0.1702404022216797, 0.17774581909179688, 0.18525123596191406, 0.19275665283203125, 0.20026206970214844, 0.20776748657226562, 0.2152729034423828, 0.2227783203125]}, "gradients/encoder.encoder.layers.22.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 1.0, 3.0, 2.0, 5.0, 0.0, 3.0, 3.0, 2.0, 2.0, 6.0, 8.0, 6.0, 7.0, 10.0, 12.0, 24.0, 19.0, 18.0, 32.0, 27.0, 34.0, 45.0, 45.0, 52.0, 55.0, 47.0, 67.0, 53.0, 59.0, 45.0, 40.0, 51.0, 27.0, 30.0, 30.0, 19.0, 24.0, 22.0, 17.0, 14.0, 11.0, 13.0, 5.0, 4.0, 3.0, 4.0, 1.0, 3.0, 2.0, 2.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.06207275390625, -0.05997943878173828, -0.05788612365722656, -0.055792808532714844, -0.053699493408203125, -0.051606178283691406, -0.04951286315917969, -0.04741954803466797, -0.04532623291015625, -0.04323291778564453, -0.04113960266113281, -0.039046287536621094, -0.036952972412109375, -0.034859657287597656, -0.03276634216308594, -0.03067302703857422, -0.0285797119140625, -0.02648639678955078, -0.024393081665039062, -0.022299766540527344, -0.020206451416015625, -0.018113136291503906, -0.016019821166992188, -0.013926506042480469, -0.01183319091796875, -0.009739875793457031, -0.0076465606689453125, -0.005553245544433594, -0.003459930419921875, -0.0013666152954101562, 0.0007266998291015625, 0.0028200149536132812, 0.004913330078125, 0.007006645202636719, 0.009099960327148438, 0.011193275451660156, 0.013286590576171875, 0.015379905700683594, 0.017473220825195312, 0.01956653594970703, 0.02165985107421875, 0.02375316619873047, 0.025846481323242188, 0.027939796447753906, 0.030033111572265625, 0.032126426696777344, 0.03421974182128906, 0.03631305694580078, 0.0384063720703125, 0.04049968719482422, 0.04259300231933594, 0.044686317443847656, 0.046779632568359375, 0.048872947692871094, 0.05096626281738281, 0.05305957794189453, 0.05515289306640625, 0.05724620819091797, 0.05933952331542969, 0.061432838439941406, 0.06352615356445312, 0.06561946868896484, 0.06771278381347656, 0.06980609893798828, 0.0718994140625]}, "gradients/encoder.encoder.layers.22.attention.v_proj.weight": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 1.0, 1.0, 2.0, 3.0, 2.0, 4.0, 8.0, 9.0, 11.0, 10.0, 15.0, 15.0, 18.0, 29.0, 41.0, 64.0, 69.0, 116.0, 147.0, 239.0, 428.0, 877.0, 2224.0, 6607.0, 24475.0, 119413.0, 672321.0, 176058.0, 31846.0, 8476.0, 2646.0, 1048.0, 519.0, 261.0, 172.0, 90.0, 80.0, 41.0, 43.0, 36.0, 25.0, 17.0, 10.0, 10.0, 14.0, 2.0, 6.0, 7.0, 6.0, 3.0, 1.0, 3.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.491455078125, -0.4748497009277344, -0.45824432373046875, -0.4416389465332031, -0.4250335693359375, -0.4084281921386719, -0.39182281494140625, -0.3752174377441406, -0.358612060546875, -0.3420066833496094, -0.32540130615234375, -0.3087959289550781, -0.2921905517578125, -0.2755851745605469, -0.25897979736328125, -0.24237442016601562, -0.22576904296875, -0.20916366577148438, -0.19255828857421875, -0.17595291137695312, -0.1593475341796875, -0.14274215698242188, -0.12613677978515625, -0.10953140258789062, -0.092926025390625, -0.07632064819335938, -0.05971527099609375, -0.043109893798828125, -0.0265045166015625, -0.009899139404296875, 0.00670623779296875, 0.023311614990234375, 0.0399169921875, 0.056522369384765625, 0.07312774658203125, 0.08973312377929688, 0.1063385009765625, 0.12294387817382812, 0.13954925537109375, 0.15615463256835938, 0.172760009765625, 0.18936538696289062, 0.20597076416015625, 0.22257614135742188, 0.2391815185546875, 0.2557868957519531, 0.27239227294921875, 0.2889976501464844, 0.30560302734375, 0.3222084045410156, 0.33881378173828125, 0.3554191589355469, 0.3720245361328125, 0.3886299133300781, 0.40523529052734375, 0.4218406677246094, 0.438446044921875, 0.4550514221191406, 0.47165679931640625, 0.4882621765136719, 0.5048675537109375, 0.5214729309082031, 0.5380783081054688, 0.5546836853027344, 0.5712890625]}, "gradients/encoder.encoder.layers.22.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 2.0, 0.0, 3.0, 4.0, 2.0, 3.0, 4.0, 7.0, 11.0, 4.0, 15.0, 14.0, 15.0, 18.0, 20.0, 19.0, 18.0, 31.0, 23.0, 27.0, 28.0, 43.0, 43.0, 36.0, 55.0, 41.0, 37.0, 58.0, 45.0, 51.0, 38.0, 46.0, 38.0, 29.0, 24.0, 29.0, 17.0, 24.0, 13.0, 15.0, 12.0, 11.0, 8.0, 7.0, 8.0, 5.0, 3.0, 3.0, 2.0, 3.0, 0.0, 1.0, 0.0, 3.0, 1.0, 2.0, 0.0, 1.0], "bins": [-0.1558837890625, -0.15110206604003906, -0.14632034301757812, -0.1415386199951172, -0.13675689697265625, -0.1319751739501953, -0.12719345092773438, -0.12241172790527344, -0.1176300048828125, -0.11284828186035156, -0.10806655883789062, -0.10328483581542969, -0.09850311279296875, -0.09372138977050781, -0.08893966674804688, -0.08415794372558594, -0.079376220703125, -0.07459449768066406, -0.06981277465820312, -0.06503105163574219, -0.06024932861328125, -0.05546760559082031, -0.050685882568359375, -0.04590415954589844, -0.0411224365234375, -0.03634071350097656, -0.031558990478515625, -0.026777267456054688, -0.02199554443359375, -0.017213821411132812, -0.012432098388671875, -0.0076503753662109375, -0.00286865234375, 0.0019130706787109375, 0.006694793701171875, 0.011476516723632812, 0.01625823974609375, 0.021039962768554688, 0.025821685791015625, 0.030603408813476562, 0.0353851318359375, 0.04016685485839844, 0.044948577880859375, 0.04973030090332031, 0.05451202392578125, 0.05929374694824219, 0.06407546997070312, 0.06885719299316406, 0.073638916015625, 0.07842063903808594, 0.08320236206054688, 0.08798408508300781, 0.09276580810546875, 0.09754753112792969, 0.10232925415039062, 0.10711097717285156, 0.1118927001953125, 0.11667442321777344, 0.12145614624023438, 0.1262378692626953, 0.13101959228515625, 0.1358013153076172, 0.14058303833007812, 0.14536476135253906, 0.150146484375]}, "gradients/encoder.encoder.layers.22.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 2.0, 1.0, 0.0, 1.0, 3.0, 1.0, 2.0, 5.0, 8.0, 5.0, 12.0, 17.0, 20.0, 33.0, 43.0, 70.0, 142.0, 260.0, 606.0, 2062.0, 7773.0, 47986.0, 791225.0, 175400.0, 17276.0, 3692.0, 1091.0, 383.0, 182.0, 90.0, 63.0, 31.0, 28.0, 12.0, 12.0, 6.0, 7.0, 6.0, 6.0, 4.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.2486572265625, -0.23935890197753906, -0.23006057739257812, -0.2207622528076172, -0.21146392822265625, -0.2021656036376953, -0.19286727905273438, -0.18356895446777344, -0.1742706298828125, -0.16497230529785156, -0.15567398071289062, -0.1463756561279297, -0.13707733154296875, -0.1277790069580078, -0.11848068237304688, -0.10918235778808594, -0.099884033203125, -0.09058570861816406, -0.08128738403320312, -0.07198905944824219, -0.06269073486328125, -0.05339241027832031, -0.044094085693359375, -0.03479576110839844, -0.0254974365234375, -0.016199111938476562, -0.006900787353515625, 0.0023975372314453125, 0.01169586181640625, 0.020994186401367188, 0.030292510986328125, 0.03959083557128906, 0.04888916015625, 0.05818748474121094, 0.06748580932617188, 0.07678413391113281, 0.08608245849609375, 0.09538078308105469, 0.10467910766601562, 0.11397743225097656, 0.1232757568359375, 0.13257408142089844, 0.14187240600585938, 0.1511707305908203, 0.16046905517578125, 0.1697673797607422, 0.17906570434570312, 0.18836402893066406, 0.197662353515625, 0.20696067810058594, 0.21625900268554688, 0.2255573272705078, 0.23485565185546875, 0.2441539764404297, 0.2534523010253906, 0.26275062561035156, 0.2720489501953125, 0.28134727478027344, 0.2906455993652344, 0.2999439239501953, 0.30924224853515625, 0.3185405731201172, 0.3278388977050781, 0.33713722229003906, 0.346435546875]}, "gradients/encoder.encoder.layers.22.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 3.0, 3.0, 1.0, 2.0, 6.0, 5.0, 7.0, 14.0, 12.0, 16.0, 13.0, 29.0, 30.0, 44.0, 61.0, 78.0, 137.0, 159.0, 123.0, 75.0, 48.0, 37.0, 26.0, 22.0, 10.0, 14.0, 11.0, 7.0, 5.0, 7.0, 3.0, 3.0, 2.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-3.74913215637207e-05, -3.62154096364975e-05, -3.493949770927429e-05, -3.3663585782051086e-05, -3.238767385482788e-05, -3.1111761927604675e-05, -2.983585000038147e-05, -2.8559938073158264e-05, -2.728402614593506e-05, -2.6008114218711853e-05, -2.4732202291488647e-05, -2.3456290364265442e-05, -2.2180378437042236e-05, -2.090446650981903e-05, -1.9628554582595825e-05, -1.835264265537262e-05, -1.7076730728149414e-05, -1.580081880092621e-05, -1.4524906873703003e-05, -1.3248994946479797e-05, -1.1973083019256592e-05, -1.0697171092033386e-05, -9.42125916481018e-06, -8.145347237586975e-06, -6.8694353103637695e-06, -5.593523383140564e-06, -4.317611455917358e-06, -3.041699528694153e-06, -1.7657876014709473e-06, -4.898756742477417e-07, 7.860362529754639e-07, 2.0619481801986694e-06, 3.337860107421875e-06, 4.6137720346450806e-06, 5.889683961868286e-06, 7.165595889091492e-06, 8.441507816314697e-06, 9.717419743537903e-06, 1.0993331670761108e-05, 1.2269243597984314e-05, 1.354515552520752e-05, 1.4821067452430725e-05, 1.609697937965393e-05, 1.7372891306877136e-05, 1.8648803234100342e-05, 1.9924715161323547e-05, 2.1200627088546753e-05, 2.247653901576996e-05, 2.3752450942993164e-05, 2.502836287021637e-05, 2.6304274797439575e-05, 2.758018672466278e-05, 2.8856098651885986e-05, 3.0132010579109192e-05, 3.14079225063324e-05, 3.26838344335556e-05, 3.395974636077881e-05, 3.5235658288002014e-05, 3.651157021522522e-05, 3.7787482142448425e-05, 3.906339406967163e-05, 4.0339305996894836e-05, 4.161521792411804e-05, 4.289112985134125e-05, 4.416704177856445e-05]}, "gradients/encoder.encoder.layers.22.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 3.0, 2.0, 1.0, 2.0, 4.0, 5.0, 10.0, 11.0, 11.0, 23.0, 47.0, 67.0, 126.0, 230.0, 467.0, 1169.0, 3235.0, 13758.0, 121509.0, 846605.0, 49586.0, 7859.0, 2172.0, 848.0, 372.0, 179.0, 101.0, 64.0, 30.0, 26.0, 12.0, 9.0, 6.0, 4.0, 4.0, 4.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.1939697265625, -0.18712234497070312, -0.18027496337890625, -0.17342758178710938, -0.1665802001953125, -0.15973281860351562, -0.15288543701171875, -0.14603805541992188, -0.139190673828125, -0.13234329223632812, -0.12549591064453125, -0.11864852905273438, -0.1118011474609375, -0.10495376586914062, -0.09810638427734375, -0.09125900268554688, -0.08441162109375, -0.07756423950195312, -0.07071685791015625, -0.06386947631835938, -0.0570220947265625, -0.050174713134765625, -0.04332733154296875, -0.036479949951171875, -0.029632568359375, -0.022785186767578125, -0.01593780517578125, -0.009090423583984375, -0.0022430419921875, 0.004604339599609375, 0.01145172119140625, 0.018299102783203125, 0.025146484375, 0.031993865966796875, 0.03884124755859375, 0.045688629150390625, 0.0525360107421875, 0.059383392333984375, 0.06623077392578125, 0.07307815551757812, 0.079925537109375, 0.08677291870117188, 0.09362030029296875, 0.10046768188476562, 0.1073150634765625, 0.11416244506835938, 0.12100982666015625, 0.12785720825195312, 0.13470458984375, 0.14155197143554688, 0.14839935302734375, 0.15524673461914062, 0.1620941162109375, 0.16894149780273438, 0.17578887939453125, 0.18263626098632812, 0.189483642578125, 0.19633102416992188, 0.20317840576171875, 0.21002578735351562, 0.2168731689453125, 0.22372055053710938, 0.23056793212890625, 0.23741531372070312, 0.2442626953125]}, "gradients/encoder.encoder.layers.22.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 3.0, 3.0, 3.0, 3.0, 3.0, 2.0, 5.0, 7.0, 12.0, 14.0, 22.0, 33.0, 53.0, 57.0, 109.0, 170.0, 191.0, 106.0, 70.0, 44.0, 27.0, 19.0, 25.0, 9.0, 7.0, 5.0, 5.0, 5.0, 0.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0843505859375, -0.0817117691040039, -0.07907295227050781, -0.07643413543701172, -0.07379531860351562, -0.07115650177001953, -0.06851768493652344, -0.06587886810302734, -0.06324005126953125, -0.060601234436035156, -0.05796241760253906, -0.05532360076904297, -0.052684783935546875, -0.05004596710205078, -0.04740715026855469, -0.044768333435058594, -0.0421295166015625, -0.039490699768066406, -0.03685188293457031, -0.03421306610107422, -0.031574249267578125, -0.02893543243408203, -0.026296615600585938, -0.023657798767089844, -0.02101898193359375, -0.018380165100097656, -0.015741348266601562, -0.013102531433105469, -0.010463714599609375, -0.007824897766113281, -0.0051860809326171875, -0.0025472640991210938, 9.1552734375e-05, 0.0027303695678710938, 0.0053691864013671875, 0.008008003234863281, 0.010646820068359375, 0.013285636901855469, 0.015924453735351562, 0.018563270568847656, 0.02120208740234375, 0.023840904235839844, 0.026479721069335938, 0.02911853790283203, 0.031757354736328125, 0.03439617156982422, 0.03703498840332031, 0.039673805236816406, 0.0423126220703125, 0.044951438903808594, 0.04759025573730469, 0.05022907257080078, 0.052867889404296875, 0.05550670623779297, 0.05814552307128906, 0.060784339904785156, 0.06342315673828125, 0.06606197357177734, 0.06870079040527344, 0.07133960723876953, 0.07397842407226562, 0.07661724090576172, 0.07925605773925781, 0.0818948745727539, 0.08453369140625]}, "gradients/encoder.encoder.layers.22.layer_norm.weight": {"_type": "histogram", "values": [3.0, 4.0, 2.0, 40.0, 311.0, 530.0, 105.0, 21.0, 6.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.6719133853912354, -0.5430411100387573, -0.4141687750816345, -0.2852964699268341, -0.1564241647720337, -0.027551889419555664, 0.10132044553756714, 0.23019278049468994, 0.35906505584716797, 0.4879373610019684, 0.6168096661567688, 0.7456820011138916, 0.8745542764663696, 1.0034265518188477, 1.1322989463806152, 1.2611712217330933, 1.3900434970855713, 1.5189157724380493, 1.6477880477905273, 1.776660442352295, 1.905532717704773, 2.034404993057251, 2.1632773876190186, 2.292149543762207, 2.4210219383239746, 2.549894332885742, 2.6787664890289307, 2.8076388835906982, 2.9365110397338867, 3.0653834342956543, 3.194255828857422, 3.3231282234191895, 3.452000617980957, 3.5808730125427246, 3.709745168685913, 3.8386175632476807, 3.967489719390869, 4.096362113952637, 4.225234508514404, 4.354106903076172, 4.482978820800781, 4.611851215362549, 4.740723609924316, 4.869595527648926, 4.998467922210693, 5.127340316772461, 5.2562127113342285, 5.385085105895996, 5.513957500457764, 5.642829895019531, 5.771702289581299, 5.900574207305908, 6.029446601867676, 6.158318996429443, 6.287191390991211, 6.4160637855529785, 6.544936180114746, 6.673808574676514, 6.802680969238281, 6.931552886962891, 7.060425281524658, 7.189297676086426, 7.318170070648193, 7.447042465209961, 7.57591438293457]}, "gradients/encoder.encoder.layers.22.layer_norm.bias": {"_type": "histogram", "values": [1.0, 2.0, 0.0, 1.0, 1.0, 2.0, 0.0, 4.0, 3.0, 2.0, 6.0, 1.0, 5.0, 8.0, 13.0, 13.0, 11.0, 22.0, 19.0, 21.0, 22.0, 31.0, 39.0, 30.0, 45.0, 32.0, 42.0, 34.0, 41.0, 46.0, 39.0, 52.0, 33.0, 49.0, 50.0, 39.0, 39.0, 32.0, 24.0, 28.0, 20.0, 21.0, 16.0, 16.0, 11.0, 13.0, 10.0, 8.0, 3.0, 2.0, 1.0, 5.0, 2.0, 2.0, 4.0, 1.0, 2.0, 1.0, 0.0, 0.0, 1.0, 2.0, 0.0, 1.0], "bins": [-0.5708951950073242, -0.5520833730697632, -0.5332715511322021, -0.5144597291946411, -0.4956478774547577, -0.47683605551719666, -0.45802420377731323, -0.4392123818397522, -0.42040055990219116, -0.4015887379646301, -0.3827769160270691, -0.36396506428718567, -0.34515324234962463, -0.3263414204120636, -0.3075295686721802, -0.28871774673461914, -0.2699059247970581, -0.25109410285949707, -0.23228226602077484, -0.2134704291820526, -0.19465860724449158, -0.17584678530693054, -0.1570349484682083, -0.13822311162948608, -0.11941128969192505, -0.10059946030378342, -0.08178763091564178, -0.06297580152750015, -0.04416397213935852, -0.02535214275121689, -0.006540313363075256, 0.012271523475646973, 0.031083285808563232, 0.049895115196704865, 0.0687069445848465, 0.08751877397298813, 0.10633060336112976, 0.1251424252986908, 0.14395426213741302, 0.16276609897613525, 0.1815779209136963, 0.20038974285125732, 0.21920157968997955, 0.23801341652870178, 0.2568252384662628, 0.27563706040382385, 0.2944489121437073, 0.3132607340812683, 0.33207255601882935, 0.3508843779563904, 0.3696961998939514, 0.38850805163383484, 0.4073198735713959, 0.4261316955089569, 0.44494354724884033, 0.46375536918640137, 0.4825671911239624, 0.5013790130615234, 0.5201908349990845, 0.5390026569366455, 0.5578144788742065, 0.5766263604164124, 0.5954381823539734, 0.6142500042915344, 0.6330618262290955]}, "gradients/encoder.encoder.layers.21.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 3.0, 1.0, 3.0, 2.0, 2.0, 0.0, 7.0, 3.0, 6.0, 13.0, 10.0, 7.0, 18.0, 22.0, 26.0, 37.0, 39.0, 52.0, 72.0, 101.0, 148.0, 184.0, 303.0, 474.0, 893.0, 2257.0, 8322.0, 64143.0, 4003748.0, 98310.0, 10126.0, 2509.0, 904.0, 481.0, 332.0, 222.0, 133.0, 92.0, 60.0, 53.0, 48.0, 25.0, 28.0, 20.0, 13.0, 10.0, 7.0, 9.0, 5.0, 5.0, 4.0, 1.0, 5.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.720703125, -0.6978759765625, -0.675048828125, -0.6522216796875, -0.62939453125, -0.6065673828125, -0.583740234375, -0.5609130859375, -0.5380859375, -0.5152587890625, -0.492431640625, -0.4696044921875, -0.44677734375, -0.4239501953125, -0.401123046875, -0.3782958984375, -0.35546875, -0.3326416015625, -0.309814453125, -0.2869873046875, -0.26416015625, -0.2413330078125, -0.218505859375, -0.1956787109375, -0.1728515625, -0.1500244140625, -0.127197265625, -0.1043701171875, -0.08154296875, -0.0587158203125, -0.035888671875, -0.0130615234375, 0.009765625, 0.0325927734375, 0.055419921875, 0.0782470703125, 0.10107421875, 0.1239013671875, 0.146728515625, 0.1695556640625, 0.1923828125, 0.2152099609375, 0.238037109375, 0.2608642578125, 0.28369140625, 0.3065185546875, 0.329345703125, 0.3521728515625, 0.375, 0.3978271484375, 0.420654296875, 0.4434814453125, 0.46630859375, 0.4891357421875, 0.511962890625, 0.5347900390625, 0.5576171875, 0.5804443359375, 0.603271484375, 0.6260986328125, 0.64892578125, 0.6717529296875, 0.694580078125, 0.7174072265625, 0.740234375]}, "gradients/encoder.encoder.layers.21.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 2.0, 0.0, 2.0, 0.0, 1.0, 3.0, 1.0, 1.0, 2.0, 0.0, 7.0, 4.0, 10.0, 3.0, 6.0, 8.0, 12.0, 9.0, 13.0, 19.0, 26.0, 17.0, 28.0, 34.0, 30.0, 46.0, 39.0, 54.0, 55.0, 55.0, 53.0, 46.0, 51.0, 45.0, 37.0, 45.0, 37.0, 36.0, 30.0, 28.0, 27.0, 14.0, 16.0, 13.0, 8.0, 7.0, 10.0, 5.0, 4.0, 1.0, 3.0, 4.0, 3.0, 2.0, 3.0, 2.0, 2.0, 2.0, 0.0, 1.0], "bins": [-0.072265625, -0.0701742172241211, -0.06808280944824219, -0.06599140167236328, -0.06389999389648438, -0.06180858612060547, -0.05971717834472656, -0.057625770568847656, -0.05553436279296875, -0.053442955017089844, -0.05135154724121094, -0.04926013946533203, -0.047168731689453125, -0.04507732391357422, -0.04298591613769531, -0.040894508361816406, -0.0388031005859375, -0.036711692810058594, -0.03462028503417969, -0.03252887725830078, -0.030437469482421875, -0.02834606170654297, -0.026254653930664062, -0.024163246154785156, -0.02207183837890625, -0.019980430603027344, -0.017889022827148438, -0.01579761505126953, -0.013706207275390625, -0.011614799499511719, -0.009523391723632812, -0.007431983947753906, -0.005340576171875, -0.0032491683959960938, -0.0011577606201171875, 0.0009336471557617188, 0.003025054931640625, 0.005116462707519531, 0.0072078704833984375, 0.009299278259277344, 0.01139068603515625, 0.013482093811035156, 0.015573501586914062, 0.01766490936279297, 0.019756317138671875, 0.02184772491455078, 0.023939132690429688, 0.026030540466308594, 0.0281219482421875, 0.030213356018066406, 0.03230476379394531, 0.03439617156982422, 0.036487579345703125, 0.03857898712158203, 0.04067039489746094, 0.042761802673339844, 0.04485321044921875, 0.046944618225097656, 0.04903602600097656, 0.05112743377685547, 0.053218841552734375, 0.05531024932861328, 0.05740165710449219, 0.059493064880371094, 0.06158447265625]}, "gradients/encoder.encoder.layers.21.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 3.0, 0.0, 2.0, 4.0, 5.0, 1.0, 2.0, 6.0, 5.0, 8.0, 11.0, 14.0, 27.0, 32.0, 38.0, 62.0, 122.0, 270.0, 693.0, 2043.0, 7831.0, 40599.0, 547311.0, 3490343.0, 86114.0, 13709.0, 3320.0, 955.0, 367.0, 160.0, 83.0, 46.0, 37.0, 20.0, 12.0, 9.0, 10.0, 7.0, 6.0, 2.0, 4.0, 0.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.58349609375, -0.5673713684082031, -0.5512466430664062, -0.5351219177246094, -0.5189971923828125, -0.5028724670410156, -0.48674774169921875, -0.4706230163574219, -0.454498291015625, -0.4383735656738281, -0.42224884033203125, -0.4061241149902344, -0.3899993896484375, -0.3738746643066406, -0.35774993896484375, -0.3416252136230469, -0.32550048828125, -0.3093757629394531, -0.29325103759765625, -0.2771263122558594, -0.2610015869140625, -0.24487686157226562, -0.22875213623046875, -0.21262741088867188, -0.196502685546875, -0.18037796020507812, -0.16425323486328125, -0.14812850952148438, -0.1320037841796875, -0.11587905883789062, -0.09975433349609375, -0.08362960815429688, -0.0675048828125, -0.051380157470703125, -0.03525543212890625, -0.019130706787109375, -0.0030059814453125, 0.013118743896484375, 0.02924346923828125, 0.045368194580078125, 0.061492919921875, 0.07761764526367188, 0.09374237060546875, 0.10986709594726562, 0.1259918212890625, 0.14211654663085938, 0.15824127197265625, 0.17436599731445312, 0.19049072265625, 0.20661544799804688, 0.22274017333984375, 0.23886489868164062, 0.2549896240234375, 0.2711143493652344, 0.28723907470703125, 0.3033638000488281, 0.319488525390625, 0.3356132507324219, 0.35173797607421875, 0.3678627014160156, 0.3839874267578125, 0.4001121520996094, 0.41623687744140625, 0.4323616027832031, 0.448486328125]}, "gradients/encoder.encoder.layers.21.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 3.0, 7.0, 4.0, 6.0, 6.0, 11.0, 10.0, 10.0, 24.0, 17.0, 27.0, 28.0, 41.0, 55.0, 82.0, 117.0, 173.0, 423.0, 1269.0, 873.0, 316.0, 142.0, 124.0, 68.0, 42.0, 37.0, 27.0, 33.0, 27.0, 16.0, 18.0, 10.0, 9.0, 6.0, 6.0, 5.0, 6.0, 1.0, 1.0, 0.0, 4.0, 1.0, 0.0, 1.0, 2.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.1444091796875, -0.13956642150878906, -0.13472366333007812, -0.1298809051513672, -0.12503814697265625, -0.12019538879394531, -0.11535263061523438, -0.11050987243652344, -0.1056671142578125, -0.10082435607910156, -0.09598159790039062, -0.09113883972167969, -0.08629608154296875, -0.08145332336425781, -0.07661056518554688, -0.07176780700683594, -0.066925048828125, -0.06208229064941406, -0.057239532470703125, -0.05239677429199219, -0.04755401611328125, -0.04271125793457031, -0.037868499755859375, -0.03302574157714844, -0.0281829833984375, -0.023340225219726562, -0.018497467041015625, -0.013654708862304688, -0.00881195068359375, -0.0039691925048828125, 0.000873565673828125, 0.0057163238525390625, 0.01055908203125, 0.015401840209960938, 0.020244598388671875, 0.025087356567382812, 0.02993011474609375, 0.03477287292480469, 0.039615631103515625, 0.04445838928222656, 0.0493011474609375, 0.05414390563964844, 0.058986663818359375, 0.06382942199707031, 0.06867218017578125, 0.07351493835449219, 0.07835769653320312, 0.08320045471191406, 0.088043212890625, 0.09288597106933594, 0.09772872924804688, 0.10257148742675781, 0.10741424560546875, 0.11225700378417969, 0.11709976196289062, 0.12194252014160156, 0.1267852783203125, 0.13162803649902344, 0.13647079467773438, 0.1413135528564453, 0.14615631103515625, 0.1509990692138672, 0.15584182739257812, 0.16068458557128906, 0.16552734375]}, "gradients/encoder.encoder.layers.21.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 3.0, 3.0, 3.0, 8.0, 10.0, 14.0, 25.0, 48.0, 57.0, 123.0, 180.0, 192.0, 120.0, 86.0, 51.0, 34.0, 25.0, 9.0, 11.0, 5.0, 1.0, 0.0, 1.0, 4.0, 3.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.34069955348968506, -0.31957659125328064, -0.2984536290168762, -0.2773306369781494, -0.256207674741745, -0.23508471250534058, -0.21396175026893616, -0.19283877313137054, -0.17171581089496613, -0.1505928486585617, -0.1294698715209961, -0.10834690928459167, -0.08722393959760666, -0.06610096991062164, -0.044978007674217224, -0.02385503053665161, -0.0027320683002471924, 0.018390899524092674, 0.03951386734843254, 0.06063683331012726, 0.08175980299711227, 0.10288277268409729, 0.12400573492050171, 0.14512871205806732, 0.16625167429447174, 0.18737463653087616, 0.20849761366844177, 0.2296205759048462, 0.2507435381412506, 0.27186650037765503, 0.29298949241638184, 0.31411245465278625, 0.3352354168891907, 0.3563583791255951, 0.3774813413619995, 0.3986043334007263, 0.41972729563713074, 0.44085025787353516, 0.4619732201099396, 0.483096182346344, 0.5042191743850708, 0.5253421664237976, 0.5464650988578796, 0.5675880908966064, 0.5887110233306885, 0.6098340153694153, 0.6309570074081421, 0.6520799398422241, 0.6732028722763062, 0.694325864315033, 0.715448796749115, 0.7365717887878418, 0.7576947212219238, 0.7788177132606506, 0.7999407052993774, 0.8210636377334595, 0.8421866297721863, 0.8633096218109131, 0.8844325542449951, 0.9055555462837219, 0.926678478717804, 0.9478014707565308, 0.9689244031906128, 0.9900473952293396, 1.0111703872680664]}, "gradients/encoder.encoder.layers.21.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 1.0, 1.0, 4.0, 2.0, 5.0, 4.0, 4.0, 7.0, 14.0, 10.0, 14.0, 20.0, 24.0, 19.0, 30.0, 36.0, 29.0, 36.0, 52.0, 43.0, 54.0, 49.0, 47.0, 48.0, 60.0, 46.0, 41.0, 44.0, 39.0, 31.0, 31.0, 28.0, 24.0, 17.0, 17.0, 21.0, 16.0, 9.0, 9.0, 7.0, 10.0, 4.0, 0.0, 6.0, 1.0, 1.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.37087929248809814, -0.3571820557117462, -0.3434848487377167, -0.32978761196136475, -0.3160904049873352, -0.3023931682109833, -0.28869593143463135, -0.2749987244606018, -0.2613014876842499, -0.24760426580905914, -0.2339070439338684, -0.22020980715751648, -0.20651258528232574, -0.192815363407135, -0.17911812663078308, -0.16542090475559235, -0.1517236828804016, -0.13802646100521088, -0.12432923167943954, -0.11063200235366821, -0.09693478047847748, -0.08323755860328674, -0.06954032927751541, -0.05584309995174408, -0.042145878076553345, -0.02844865247607231, -0.014751426875591278, -0.0010542012751102448, 0.012643024325370789, 0.026340246200561523, 0.040037475526332855, 0.05373470485210419, 0.06743192672729492, 0.08112914860248566, 0.09482637792825699, 0.10852360725402832, 0.12222082912921906, 0.1359180510044098, 0.14961528778076172, 0.16331250965595245, 0.1770097315311432, 0.19070695340633392, 0.20440417528152466, 0.2181014120578766, 0.23179863393306732, 0.24549585580825806, 0.25919309258461, 0.2728903293609619, 0.28658753633499146, 0.3002847731113434, 0.3139819800853729, 0.32767921686172485, 0.3413764238357544, 0.3550736606121063, 0.36877089738845825, 0.3824681043624878, 0.3961653411388397, 0.40986257791519165, 0.4235597848892212, 0.4372570216655731, 0.45095425844192505, 0.4646514654159546, 0.4783487021923065, 0.49204593896865845, 0.505743145942688]}, "gradients/encoder.encoder.layers.21.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 3.0, 3.0, 5.0, 3.0, 8.0, 9.0, 6.0, 13.0, 18.0, 28.0, 51.0, 52.0, 81.0, 107.0, 174.0, 233.0, 336.0, 511.0, 745.0, 1041.0, 1687.0, 2511.0, 3922.0, 6111.0, 9665.0, 16550.0, 29108.0, 53893.0, 109851.0, 223865.0, 274704.0, 149939.0, 71874.0, 37614.0, 20610.0, 12050.0, 7510.0, 4595.0, 3014.0, 1976.0, 1299.0, 907.0, 531.0, 405.0, 273.0, 225.0, 132.0, 87.0, 70.0, 54.0, 37.0, 25.0, 15.0, 10.0, 8.0, 7.0, 3.0, 4.0, 0.0, 3.0, 0.0, 4.0], "bins": [-0.1585693359375, -0.15364646911621094, -0.14872360229492188, -0.1438007354736328, -0.13887786865234375, -0.1339550018310547, -0.12903213500976562, -0.12410926818847656, -0.1191864013671875, -0.11426353454589844, -0.10934066772460938, -0.10441780090332031, -0.09949493408203125, -0.09457206726074219, -0.08964920043945312, -0.08472633361816406, -0.079803466796875, -0.07488059997558594, -0.06995773315429688, -0.06503486633300781, -0.06011199951171875, -0.05518913269042969, -0.050266265869140625, -0.04534339904785156, -0.0404205322265625, -0.03549766540527344, -0.030574798583984375, -0.025651931762695312, -0.02072906494140625, -0.015806198120117188, -0.010883331298828125, -0.0059604644775390625, -0.00103759765625, 0.0038852691650390625, 0.008808135986328125, 0.013731002807617188, 0.01865386962890625, 0.023576736450195312, 0.028499603271484375, 0.03342247009277344, 0.0383453369140625, 0.04326820373535156, 0.048191070556640625, 0.05311393737792969, 0.05803680419921875, 0.06295967102050781, 0.06788253784179688, 0.07280540466308594, 0.077728271484375, 0.08265113830566406, 0.08757400512695312, 0.09249687194824219, 0.09741973876953125, 0.10234260559082031, 0.10726547241210938, 0.11218833923339844, 0.1171112060546875, 0.12203407287597656, 0.12695693969726562, 0.1318798065185547, 0.13680267333984375, 0.1417255401611328, 0.14664840698242188, 0.15157127380371094, 0.156494140625]}, "gradients/encoder.encoder.layers.21.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 0.0, 2.0, 0.0, 2.0, 1.0, 1.0, 0.0, 3.0, 7.0, 7.0, 6.0, 5.0, 3.0, 9.0, 9.0, 9.0, 8.0, 10.0, 17.0, 22.0, 22.0, 25.0, 25.0, 28.0, 26.0, 38.0, 48.0, 43.0, 37.0, 48.0, 45.0, 43.0, 45.0, 52.0, 37.0, 46.0, 29.0, 29.0, 36.0, 26.0, 28.0, 22.0, 16.0, 23.0, 15.0, 13.0, 7.0, 10.0, 9.0, 7.0, 5.0, 3.0, 0.0, 2.0, 8.0, 1.0, 1.0, 0.0, 1.0, 1.0], "bins": [-0.057403564453125, -0.05576038360595703, -0.05411720275878906, -0.052474021911621094, -0.050830841064453125, -0.049187660217285156, -0.04754447937011719, -0.04590129852294922, -0.04425811767578125, -0.04261493682861328, -0.04097175598144531, -0.039328575134277344, -0.037685394287109375, -0.036042213439941406, -0.03439903259277344, -0.03275585174560547, -0.0311126708984375, -0.02946949005126953, -0.027826309204101562, -0.026183128356933594, -0.024539947509765625, -0.022896766662597656, -0.021253585815429688, -0.01961040496826172, -0.01796722412109375, -0.01632404327392578, -0.014680862426757812, -0.013037681579589844, -0.011394500732421875, -0.009751319885253906, -0.008108139038085938, -0.006464958190917969, -0.00482177734375, -0.0031785964965820312, -0.0015354156494140625, 0.00010776519775390625, 0.001750946044921875, 0.0033941268920898438, 0.0050373077392578125, 0.006680488586425781, 0.00832366943359375, 0.009966850280761719, 0.011610031127929688, 0.013253211975097656, 0.014896392822265625, 0.016539573669433594, 0.018182754516601562, 0.01982593536376953, 0.0214691162109375, 0.02311229705810547, 0.024755477905273438, 0.026398658752441406, 0.028041839599609375, 0.029685020446777344, 0.03132820129394531, 0.03297138214111328, 0.03461456298828125, 0.03625774383544922, 0.03790092468261719, 0.039544105529785156, 0.041187286376953125, 0.042830467224121094, 0.04447364807128906, 0.04611682891845703, 0.047760009765625]}, "gradients/encoder.encoder.layers.21.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 1.0, 1.0, 2.0, 3.0, 4.0, 1.0, 5.0, 3.0, 2.0, 12.0, 12.0, 12.0, 9.0, 22.0, 29.0, 62.0, 73.0, 92.0, 138.0, 238.0, 386.0, 691.0, 1280.0, 2344.0, 4407.0, 8535.0, 17668.0, 38966.0, 95918.0, 256465.0, 354914.0, 156132.0, 59175.0, 25710.0, 12156.0, 6014.0, 3113.0, 1683.0, 925.0, 534.0, 269.0, 163.0, 103.0, 81.0, 55.0, 44.0, 30.0, 24.0, 14.0, 14.0, 8.0, 7.0, 4.0, 7.0, 3.0, 1.0, 1.0, 2.0, 1.0, 3.0, 2.0], "bins": [-0.19091796875, -0.18517494201660156, -0.17943191528320312, -0.1736888885498047, -0.16794586181640625, -0.1622028350830078, -0.15645980834960938, -0.15071678161621094, -0.1449737548828125, -0.13923072814941406, -0.13348770141601562, -0.1277446746826172, -0.12200164794921875, -0.11625862121582031, -0.11051559448242188, -0.10477256774902344, -0.099029541015625, -0.09328651428222656, -0.08754348754882812, -0.08180046081542969, -0.07605743408203125, -0.07031440734863281, -0.06457138061523438, -0.05882835388183594, -0.0530853271484375, -0.04734230041503906, -0.041599273681640625, -0.03585624694824219, -0.03011322021484375, -0.024370193481445312, -0.018627166748046875, -0.012884140014648438, -0.00714111328125, -0.0013980865478515625, 0.004344940185546875, 0.010087966918945312, 0.01583099365234375, 0.021574020385742188, 0.027317047119140625, 0.03306007385253906, 0.0388031005859375, 0.04454612731933594, 0.050289154052734375, 0.05603218078613281, 0.06177520751953125, 0.06751823425292969, 0.07326126098632812, 0.07900428771972656, 0.084747314453125, 0.09049034118652344, 0.09623336791992188, 0.10197639465332031, 0.10771942138671875, 0.11346244812011719, 0.11920547485351562, 0.12494850158691406, 0.1306915283203125, 0.13643455505371094, 0.14217758178710938, 0.1479206085205078, 0.15366363525390625, 0.1594066619873047, 0.16514968872070312, 0.17089271545410156, 0.1766357421875]}, "gradients/encoder.encoder.layers.21.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 2.0, 1.0, 2.0, 3.0, 6.0, 6.0, 8.0, 9.0, 13.0, 9.0, 13.0, 14.0, 18.0, 23.0, 21.0, 24.0, 32.0, 32.0, 36.0, 40.0, 41.0, 42.0, 38.0, 53.0, 42.0, 43.0, 45.0, 42.0, 36.0, 37.0, 32.0, 31.0, 35.0, 37.0, 23.0, 17.0, 20.0, 14.0, 14.0, 11.0, 11.0, 10.0, 10.0, 7.0, 2.0, 2.0, 3.0, 3.0, 3.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.11566162109375, -0.11175251007080078, -0.10784339904785156, -0.10393428802490234, -0.10002517700195312, -0.0961160659790039, -0.09220695495605469, -0.08829784393310547, -0.08438873291015625, -0.08047962188720703, -0.07657051086425781, -0.0726613998413086, -0.06875228881835938, -0.06484317779541016, -0.06093406677246094, -0.05702495574951172, -0.0531158447265625, -0.04920673370361328, -0.04529762268066406, -0.041388511657714844, -0.037479400634765625, -0.033570289611816406, -0.029661178588867188, -0.02575206756591797, -0.02184295654296875, -0.01793384552001953, -0.014024734497070312, -0.010115623474121094, -0.006206512451171875, -0.0022974014282226562, 0.0016117095947265625, 0.005520820617675781, 0.009429931640625, 0.013339042663574219, 0.017248153686523438, 0.021157264709472656, 0.025066375732421875, 0.028975486755371094, 0.03288459777832031, 0.03679370880126953, 0.04070281982421875, 0.04461193084716797, 0.04852104187011719, 0.052430152893066406, 0.056339263916015625, 0.060248374938964844, 0.06415748596191406, 0.06806659698486328, 0.0719757080078125, 0.07588481903076172, 0.07979393005371094, 0.08370304107666016, 0.08761215209960938, 0.0915212631225586, 0.09543037414550781, 0.09933948516845703, 0.10324859619140625, 0.10715770721435547, 0.11106681823730469, 0.1149759292602539, 0.11888504028320312, 0.12279415130615234, 0.12670326232910156, 0.13061237335205078, 0.134521484375]}, "gradients/encoder.encoder.layers.21.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 2.0, 0.0, 4.0, 4.0, 5.0, 9.0, 10.0, 20.0, 23.0, 26.0, 36.0, 39.0, 63.0, 91.0, 120.0, 180.0, 265.0, 364.0, 647.0, 978.0, 1861.0, 4226.0, 11860.0, 52827.0, 640048.0, 289439.0, 30022.0, 8280.0, 3151.0, 1471.0, 821.0, 553.0, 336.0, 270.0, 148.0, 88.0, 87.0, 58.0, 34.0, 25.0, 27.0, 16.0, 7.0, 9.0, 5.0, 3.0, 4.0, 2.0, 3.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.2261962890625, -0.21909141540527344, -0.21198654174804688, -0.2048816680908203, -0.19777679443359375, -0.1906719207763672, -0.18356704711914062, -0.17646217346191406, -0.1693572998046875, -0.16225242614746094, -0.15514755249023438, -0.1480426788330078, -0.14093780517578125, -0.1338329315185547, -0.12672805786132812, -0.11962318420410156, -0.112518310546875, -0.10541343688964844, -0.09830856323242188, -0.09120368957519531, -0.08409881591796875, -0.07699394226074219, -0.06988906860351562, -0.06278419494628906, -0.0556793212890625, -0.04857444763183594, -0.041469573974609375, -0.03436470031738281, -0.02725982666015625, -0.020154953002929688, -0.013050079345703125, -0.0059452056884765625, 0.00115966796875, 0.008264541625976562, 0.015369415283203125, 0.022474288940429688, 0.02957916259765625, 0.03668403625488281, 0.043788909912109375, 0.05089378356933594, 0.0579986572265625, 0.06510353088378906, 0.07220840454101562, 0.07931327819824219, 0.08641815185546875, 0.09352302551269531, 0.10062789916992188, 0.10773277282714844, 0.114837646484375, 0.12194252014160156, 0.12904739379882812, 0.1361522674560547, 0.14325714111328125, 0.1503620147705078, 0.15746688842773438, 0.16457176208496094, 0.1716766357421875, 0.17878150939941406, 0.18588638305664062, 0.1929912567138672, 0.20009613037109375, 0.2072010040283203, 0.21430587768554688, 0.22141075134277344, 0.228515625]}, "gradients/encoder.encoder.layers.21.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 3.0, 4.0, 1.0, 2.0, 2.0, 2.0, 3.0, 5.0, 5.0, 4.0, 7.0, 10.0, 12.0, 20.0, 30.0, 19.0, 57.0, 84.0, 148.0, 205.0, 141.0, 69.0, 46.0, 30.0, 20.0, 18.0, 21.0, 10.0, 9.0, 3.0, 6.0, 0.0, 3.0, 2.0, 4.0, 0.0, 1.0, 3.0, 3.0, 5.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.950429916381836e-05, -2.8195790946483612e-05, -2.6887282729148865e-05, -2.5578774511814117e-05, -2.427026629447937e-05, -2.2961758077144623e-05, -2.1653249859809875e-05, -2.0344741642475128e-05, -1.903623342514038e-05, -1.7727725207805634e-05, -1.6419216990470886e-05, -1.5110708773136139e-05, -1.3802200555801392e-05, -1.2493692338466644e-05, -1.1185184121131897e-05, -9.87667590379715e-06, -8.568167686462402e-06, -7.259659469127655e-06, -5.951151251792908e-06, -4.64264303445816e-06, -3.334134817123413e-06, -2.0256265997886658e-06, -7.171183824539185e-07, 5.913898348808289e-07, 1.8998980522155762e-06, 3.2084062695503235e-06, 4.516914486885071e-06, 5.825422704219818e-06, 7.1339309215545654e-06, 8.442439138889313e-06, 9.75094735622406e-06, 1.1059455573558807e-05, 1.2367963790893555e-05, 1.3676472008228302e-05, 1.498498022556305e-05, 1.6293488442897797e-05, 1.7601996660232544e-05, 1.891050487756729e-05, 2.021901309490204e-05, 2.1527521312236786e-05, 2.2836029529571533e-05, 2.414453774690628e-05, 2.5453045964241028e-05, 2.6761554181575775e-05, 2.8070062398910522e-05, 2.937857061624527e-05, 3.068707883358002e-05, 3.1995587050914764e-05, 3.330409526824951e-05, 3.461260348558426e-05, 3.5921111702919006e-05, 3.7229619920253754e-05, 3.85381281375885e-05, 3.984663635492325e-05, 4.1155144572257996e-05, 4.246365278959274e-05, 4.377216100692749e-05, 4.508066922426224e-05, 4.6389177441596985e-05, 4.769768565893173e-05, 4.900619387626648e-05, 5.031470209360123e-05, 5.1623210310935974e-05, 5.293171852827072e-05, 5.424022674560547e-05]}, "gradients/encoder.encoder.layers.21.attention.q_proj.weight": {"_type": "histogram", "values": [3.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 4.0, 2.0, 5.0, 0.0, 8.0, 10.0, 14.0, 25.0, 29.0, 57.0, 109.0, 157.0, 332.0, 645.0, 1291.0, 3121.0, 8555.0, 32525.0, 294712.0, 624217.0, 61870.0, 13089.0, 4299.0, 1757.0, 827.0, 369.0, 251.0, 111.0, 61.0, 45.0, 25.0, 14.0, 9.0, 8.0, 7.0, 3.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.148681640625, -0.14321136474609375, -0.1377410888671875, -0.13227081298828125, -0.126800537109375, -0.12133026123046875, -0.1158599853515625, -0.11038970947265625, -0.10491943359375, -0.09944915771484375, -0.0939788818359375, -0.08850860595703125, -0.083038330078125, -0.07756805419921875, -0.0720977783203125, -0.06662750244140625, -0.0611572265625, -0.05568695068359375, -0.0502166748046875, -0.04474639892578125, -0.039276123046875, -0.03380584716796875, -0.0283355712890625, -0.02286529541015625, -0.01739501953125, -0.01192474365234375, -0.0064544677734375, -0.00098419189453125, 0.004486083984375, 0.00995635986328125, 0.0154266357421875, 0.02089691162109375, 0.0263671875, 0.03183746337890625, 0.0373077392578125, 0.04277801513671875, 0.048248291015625, 0.05371856689453125, 0.0591888427734375, 0.06465911865234375, 0.07012939453125, 0.07559967041015625, 0.0810699462890625, 0.08654022216796875, 0.092010498046875, 0.09748077392578125, 0.1029510498046875, 0.10842132568359375, 0.1138916015625, 0.11936187744140625, 0.1248321533203125, 0.13030242919921875, 0.135772705078125, 0.14124298095703125, 0.1467132568359375, 0.15218353271484375, 0.15765380859375, 0.16312408447265625, 0.1685943603515625, 0.17406463623046875, 0.179534912109375, 0.18500518798828125, 0.1904754638671875, 0.19594573974609375, 0.201416015625]}, "gradients/encoder.encoder.layers.21.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 4.0, 3.0, 3.0, 2.0, 9.0, 5.0, 4.0, 13.0, 13.0, 15.0, 21.0, 29.0, 49.0, 66.0, 77.0, 116.0, 130.0, 107.0, 93.0, 63.0, 46.0, 28.0, 27.0, 16.0, 16.0, 11.0, 12.0, 7.0, 4.0, 6.0, 2.0, 2.0, 2.0, 3.0, 1.0, 1.0, 2.0, 2.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 2.0], "bins": [-0.08258056640625, -0.08025264739990234, -0.07792472839355469, -0.07559680938720703, -0.07326889038085938, -0.07094097137451172, -0.06861305236816406, -0.0662851333618164, -0.06395721435546875, -0.061629295349121094, -0.05930137634277344, -0.05697345733642578, -0.054645538330078125, -0.05231761932373047, -0.04998970031738281, -0.047661781311035156, -0.0453338623046875, -0.043005943298339844, -0.04067802429199219, -0.03835010528564453, -0.036022186279296875, -0.03369426727294922, -0.03136634826660156, -0.029038429260253906, -0.02671051025390625, -0.024382591247558594, -0.022054672241210938, -0.01972675323486328, -0.017398834228515625, -0.015070915222167969, -0.012742996215820312, -0.010415077209472656, -0.008087158203125, -0.005759239196777344, -0.0034313201904296875, -0.0011034011840820312, 0.001224517822265625, 0.0035524368286132812, 0.0058803558349609375, 0.008208274841308594, 0.01053619384765625, 0.012864112854003906, 0.015192031860351562, 0.01751995086669922, 0.019847869873046875, 0.02217578887939453, 0.024503707885742188, 0.026831626892089844, 0.0291595458984375, 0.031487464904785156, 0.03381538391113281, 0.03614330291748047, 0.038471221923828125, 0.04079914093017578, 0.04312705993652344, 0.045454978942871094, 0.04778289794921875, 0.050110816955566406, 0.05243873596191406, 0.05476665496826172, 0.057094573974609375, 0.05942249298095703, 0.06175041198730469, 0.06407833099365234, 0.06640625]}, "gradients/encoder.encoder.layers.21.layer_norm.weight": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 4.0, 9.0, 11.0, 32.0, 68.0, 147.0, 228.0, 225.0, 140.0, 67.0, 30.0, 28.0, 11.0, 5.0, 6.0, 1.0, 3.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.29053565859794617, -0.26106521487236023, -0.23159478604793549, -0.20212435722351074, -0.1726539134979248, -0.14318346977233887, -0.11371304094791412, -0.08424261212348938, -0.05477216839790344, -0.025301732122898102, 0.004168704152107239, 0.03363914042711258, 0.06310957670211792, 0.09258002042770386, 0.1220504492521286, 0.15152087807655334, 0.18099132180213928, 0.21046176552772522, 0.23993219435214996, 0.2694026231765747, 0.29887306690216064, 0.3283435106277466, 0.3578139543533325, 0.38728436827659607, 0.416754812002182, 0.44622525572776794, 0.4756956696510315, 0.5051661133766174, 0.5346365571022034, 0.5641070008277893, 0.5935774445533752, 0.6230478286743164, 0.6525182723999023, 0.6819887161254883, 0.7114591598510742, 0.7409296035766602, 0.7704000473022461, 0.799870491027832, 0.8293408751487732, 0.8588113188743591, 0.8882817625999451, 0.917752206325531, 0.9472226500511169, 0.9766930937767029, 1.006163477897644, 1.03563392162323, 1.065104365348816, 1.0945748090744019, 1.1240452527999878, 1.1535156965255737, 1.1829861402511597, 1.2124565839767456, 1.2419270277023315, 1.2713974714279175, 1.3008679151535034, 1.3303382396697998, 1.3598086833953857, 1.3892791271209717, 1.4187495708465576, 1.4482200145721436, 1.4776904582977295, 1.5071609020233154, 1.5366313457489014, 1.5661017894744873, 1.5955722332000732]}, "gradients/encoder.encoder.layers.21.layer_norm.bias": {"_type": "histogram", "values": [1.0, 2.0, 1.0, 0.0, 2.0, 2.0, 3.0, 4.0, 3.0, 2.0, 7.0, 2.0, 9.0, 16.0, 14.0, 6.0, 9.0, 13.0, 23.0, 26.0, 28.0, 31.0, 25.0, 37.0, 41.0, 38.0, 42.0, 50.0, 37.0, 43.0, 55.0, 45.0, 37.0, 36.0, 43.0, 30.0, 41.0, 26.0, 19.0, 36.0, 21.0, 24.0, 23.0, 14.0, 8.0, 11.0, 7.0, 9.0, 3.0, 4.0, 3.0, 3.0, 2.0, 2.0, 3.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.38704580068588257, -0.37429752945899963, -0.3615492284297943, -0.3488009572029114, -0.33605265617370605, -0.3233043849468231, -0.3105560839176178, -0.29780781269073486, -0.28505951166152954, -0.2723112404346466, -0.2595629394054413, -0.24681465327739716, -0.23406636714935303, -0.2213180959224701, -0.20856979489326477, -0.19582152366638184, -0.1830732375383377, -0.17032495141029358, -0.15757666528224945, -0.14482837915420532, -0.1320800930261612, -0.11933181434869766, -0.10658352822065353, -0.0938352420926094, -0.08108695596456528, -0.06833866983652115, -0.05559038370847702, -0.04284210130572319, -0.030093815177679062, -0.017345532774925232, -0.0045972466468811035, 0.008151039481163025, 0.020899325609207153, 0.03364761173725128, 0.04639589786529541, 0.05914418026804924, 0.07189247012138367, 0.0846407487988472, 0.09738903492689133, 0.11013732105493546, 0.12288560718297958, 0.13563388586044312, 0.14838217198848724, 0.16113045811653137, 0.1738787442445755, 0.18662703037261963, 0.19937531650066376, 0.21212360262870789, 0.22487188875675201, 0.23762017488479614, 0.2503684461116791, 0.2631167471408844, 0.27586501836776733, 0.28861331939697266, 0.3013615906238556, 0.3141098916530609, 0.32685816287994385, 0.3396064341068268, 0.3523547351360321, 0.36510300636291504, 0.37785130739212036, 0.3905995786190033, 0.4033478796482086, 0.41609615087509155, 0.4288444519042969]}, "gradients/encoder.encoder.layers.20.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 3.0, 3.0, 2.0, 3.0, 3.0, 5.0, 11.0, 12.0, 14.0, 20.0, 24.0, 36.0, 67.0, 74.0, 102.0, 176.0, 272.0, 362.0, 619.0, 956.0, 1514.0, 2621.0, 4550.0, 8867.0, 18436.0, 44389.0, 153345.0, 1519080.0, 2126564.0, 213956.0, 53545.0, 21606.0, 10188.0, 5301.0, 2889.0, 1754.0, 980.0, 666.0, 446.0, 268.0, 171.0, 138.0, 81.0, 52.0, 41.0, 21.0, 21.0, 10.0, 15.0, 4.0, 4.0, 5.0, 6.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0], "bins": [-0.125244140625, -0.12108230590820312, -0.11692047119140625, -0.11275863647460938, -0.1085968017578125, -0.10443496704101562, -0.10027313232421875, -0.09611129760742188, -0.091949462890625, -0.08778762817382812, -0.08362579345703125, -0.07946395874023438, -0.0753021240234375, -0.07114028930664062, -0.06697845458984375, -0.06281661987304688, -0.05865478515625, -0.054492950439453125, -0.05033111572265625, -0.046169281005859375, -0.0420074462890625, -0.037845611572265625, -0.03368377685546875, -0.029521942138671875, -0.025360107421875, -0.021198272705078125, -0.01703643798828125, -0.012874603271484375, -0.0087127685546875, -0.004550933837890625, -0.00038909912109375, 0.003772735595703125, 0.0079345703125, 0.012096405029296875, 0.01625823974609375, 0.020420074462890625, 0.0245819091796875, 0.028743743896484375, 0.03290557861328125, 0.037067413330078125, 0.041229248046875, 0.045391082763671875, 0.04955291748046875, 0.053714752197265625, 0.0578765869140625, 0.062038421630859375, 0.06620025634765625, 0.07036209106445312, 0.07452392578125, 0.07868576049804688, 0.08284759521484375, 0.08700942993164062, 0.0911712646484375, 0.09533309936523438, 0.09949493408203125, 0.10365676879882812, 0.107818603515625, 0.11198043823242188, 0.11614227294921875, 0.12030410766601562, 0.1244659423828125, 0.12862777709960938, 0.13278961181640625, 0.13695144653320312, 0.14111328125]}, "gradients/encoder.encoder.layers.20.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 3.0, 1.0, 8.0, 3.0, 8.0, 10.0, 8.0, 14.0, 18.0, 18.0, 17.0, 23.0, 34.0, 33.0, 21.0, 42.0, 37.0, 44.0, 42.0, 38.0, 59.0, 56.0, 59.0, 39.0, 39.0, 40.0, 39.0, 38.0, 33.0, 34.0, 26.0, 27.0, 24.0, 12.0, 7.0, 13.0, 9.0, 5.0, 8.0, 6.0, 4.0, 2.0, 5.0, 2.0, 2.0, 1.0, 1.0, 1.0, 0.0, 2.0], "bins": [-0.06622314453125, -0.06434822082519531, -0.062473297119140625, -0.06059837341308594, -0.05872344970703125, -0.05684852600097656, -0.054973602294921875, -0.05309867858886719, -0.0512237548828125, -0.04934883117675781, -0.047473907470703125, -0.04559898376464844, -0.04372406005859375, -0.04184913635253906, -0.039974212646484375, -0.03809928894042969, -0.036224365234375, -0.03434944152832031, -0.032474517822265625, -0.030599594116210938, -0.02872467041015625, -0.026849746704101562, -0.024974822998046875, -0.023099899291992188, -0.0212249755859375, -0.019350051879882812, -0.017475128173828125, -0.015600204467773438, -0.01372528076171875, -0.011850357055664062, -0.009975433349609375, -0.008100509643554688, -0.0062255859375, -0.0043506622314453125, -0.002475738525390625, -0.0006008148193359375, 0.00127410888671875, 0.0031490325927734375, 0.005023956298828125, 0.0068988800048828125, 0.0087738037109375, 0.010648727416992188, 0.012523651123046875, 0.014398574829101562, 0.01627349853515625, 0.018148422241210938, 0.020023345947265625, 0.021898269653320312, 0.023773193359375, 0.025648117065429688, 0.027523040771484375, 0.029397964477539062, 0.03127288818359375, 0.03314781188964844, 0.035022735595703125, 0.03689765930175781, 0.0387725830078125, 0.04064750671386719, 0.042522430419921875, 0.04439735412597656, 0.04627227783203125, 0.04814720153808594, 0.050022125244140625, 0.05189704895019531, 0.05377197265625]}, "gradients/encoder.encoder.layers.20.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 4.0, 1.0, 4.0, 4.0, 1.0, 7.0, 8.0, 5.0, 13.0, 25.0, 27.0, 44.0, 88.0, 133.0, 208.0, 356.0, 739.0, 1889.0, 5549.0, 21413.0, 119006.0, 1840967.0, 2044586.0, 126419.0, 22699.0, 6061.0, 2126.0, 901.0, 441.0, 230.0, 154.0, 75.0, 45.0, 26.0, 17.0, 13.0, 6.0, 1.0, 3.0, 4.0, 1.0, 0.0, 1.0], "bins": [-0.3603515625, -0.3519744873046875, -0.343597412109375, -0.3352203369140625, -0.32684326171875, -0.3184661865234375, -0.310089111328125, -0.3017120361328125, -0.2933349609375, -0.2849578857421875, -0.276580810546875, -0.2682037353515625, -0.25982666015625, -0.2514495849609375, -0.243072509765625, -0.2346954345703125, -0.226318359375, -0.2179412841796875, -0.209564208984375, -0.2011871337890625, -0.19281005859375, -0.1844329833984375, -0.176055908203125, -0.1676788330078125, -0.1593017578125, -0.1509246826171875, -0.142547607421875, -0.1341705322265625, -0.12579345703125, -0.1174163818359375, -0.109039306640625, -0.1006622314453125, -0.09228515625, -0.0839080810546875, -0.075531005859375, -0.0671539306640625, -0.05877685546875, -0.0503997802734375, -0.042022705078125, -0.0336456298828125, -0.0252685546875, -0.0168914794921875, -0.008514404296875, -0.0001373291015625, 0.00823974609375, 0.0166168212890625, 0.024993896484375, 0.0333709716796875, 0.041748046875, 0.0501251220703125, 0.058502197265625, 0.0668792724609375, 0.07525634765625, 0.0836334228515625, 0.092010498046875, 0.1003875732421875, 0.1087646484375, 0.1171417236328125, 0.125518798828125, 0.1338958740234375, 0.14227294921875, 0.1506500244140625, 0.159027099609375, 0.1674041748046875, 0.17578125]}, "gradients/encoder.encoder.layers.20.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 2.0, 1.0, 2.0, 3.0, 6.0, 5.0, 8.0, 10.0, 15.0, 20.0, 46.0, 48.0, 83.0, 100.0, 134.0, 176.0, 358.0, 605.0, 807.0, 638.0, 386.0, 206.0, 131.0, 82.0, 64.0, 49.0, 21.0, 18.0, 23.0, 15.0, 3.0, 5.0, 5.0, 3.0, 2.0, 3.0, 1.0, 1.0, 1.0, 0.0, 2.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.11895751953125, -0.11370563507080078, -0.10845375061035156, -0.10320186614990234, -0.09794998168945312, -0.0926980972290039, -0.08744621276855469, -0.08219432830810547, -0.07694244384765625, -0.07169055938720703, -0.06643867492675781, -0.061186790466308594, -0.055934906005859375, -0.050683021545410156, -0.04543113708496094, -0.04017925262451172, -0.0349273681640625, -0.02967548370361328, -0.024423599243164062, -0.019171714782714844, -0.013919830322265625, -0.008667945861816406, -0.0034160614013671875, 0.0018358230590820312, 0.00708770751953125, 0.012339591979980469, 0.017591476440429688, 0.022843360900878906, 0.028095245361328125, 0.033347129821777344, 0.03859901428222656, 0.04385089874267578, 0.049102783203125, 0.05435466766357422, 0.05960655212402344, 0.06485843658447266, 0.07011032104492188, 0.0753622055053711, 0.08061408996582031, 0.08586597442626953, 0.09111785888671875, 0.09636974334716797, 0.10162162780761719, 0.1068735122680664, 0.11212539672851562, 0.11737728118896484, 0.12262916564941406, 0.12788105010986328, 0.1331329345703125, 0.13838481903076172, 0.14363670349121094, 0.14888858795166016, 0.15414047241210938, 0.1593923568725586, 0.1646442413330078, 0.16989612579345703, 0.17514801025390625, 0.18039989471435547, 0.1856517791748047, 0.1909036636352539, 0.19615554809570312, 0.20140743255615234, 0.20665931701660156, 0.21191120147705078, 0.2171630859375]}, "gradients/encoder.encoder.layers.20.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 3.0, 0.0, 2.0, 11.0, 11.0, 30.0, 76.0, 193.0, 350.0, 204.0, 83.0, 28.0, 19.0, 2.0, 4.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.9967993497848511, -0.9433290362358093, -0.8898587822914124, -0.8363884687423706, -0.7829182147979736, -0.7294479012489319, -0.6759775876998901, -0.6225073337554932, -0.5690370202064514, -0.5155667066574097, -0.4620964527130127, -0.40862613916397095, -0.3551558554172516, -0.3016855716705322, -0.24821525812149048, -0.19474497437477112, -0.14127469062805176, -0.0878043994307518, -0.03433410823345184, 0.01913619041442871, 0.07260647416114807, 0.12607675790786743, 0.17954707145690918, 0.23301735520362854, 0.2864876389503479, 0.33995792269706726, 0.3934282064437866, 0.44689851999282837, 0.5003688335418701, 0.5538390874862671, 0.6073094010353088, 0.6607797145843506, 0.714249849319458, 0.7677201628684998, 0.8211904168128967, 0.8746607303619385, 0.9281309843063354, 0.9816012978553772, 1.035071611404419, 1.088541865348816, 1.142012119293213, 1.1954823732376099, 1.2489527463912964, 1.3024230003356934, 1.3558932542800903, 1.4093635082244873, 1.4628338813781738, 1.5163041353225708, 1.5697745084762573, 1.6232447624206543, 1.6767151355743408, 1.7301853895187378, 1.7836556434631348, 1.8371260166168213, 1.8905962705612183, 1.9440665245056152, 1.9975368976593018, 2.0510072708129883, 2.1044774055480957, 2.1579477787017822, 2.2114181518554688, 2.264888286590576, 2.3183586597442627, 2.371829032897949, 2.4252991676330566]}, "gradients/encoder.encoder.layers.20.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 2.0, 1.0, 0.0, 1.0, 2.0, 2.0, 7.0, 7.0, 6.0, 8.0, 15.0, 18.0, 22.0, 17.0, 25.0, 33.0, 32.0, 43.0, 30.0, 45.0, 41.0, 50.0, 35.0, 47.0, 55.0, 64.0, 47.0, 48.0, 39.0, 46.0, 50.0, 35.0, 31.0, 20.0, 23.0, 14.0, 11.0, 12.0, 10.0, 8.0, 4.0, 3.0, 5.0, 3.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.4721795916557312, -0.4545271396636963, -0.4368746876716614, -0.41922223567962646, -0.40156978368759155, -0.38391733169555664, -0.36626487970352173, -0.3486124277114868, -0.3309599757194519, -0.313307523727417, -0.2956550717353821, -0.27800261974334717, -0.26035016775131226, -0.24269771575927734, -0.22504527866840363, -0.2073928266763687, -0.189740389585495, -0.17208793759346008, -0.15443548560142517, -0.13678303360939026, -0.11913058906793594, -0.10147813707590103, -0.08382569253444672, -0.0661732405424118, -0.04852078855037689, -0.03086833842098713, -0.013215888291597366, 0.004436559975147247, 0.02208901196718216, 0.03974146395921707, 0.05739390850067139, 0.0750463604927063, 0.09269881248474121, 0.11035126447677612, 0.12800371646881104, 0.14565616846084595, 0.16330862045288086, 0.18096107244491577, 0.1986135095357895, 0.2162659615278244, 0.23391841351985931, 0.25157085061073303, 0.26922330260276794, 0.28687575459480286, 0.30452820658683777, 0.3221806585788727, 0.3398331105709076, 0.3574855625629425, 0.3751380145549774, 0.39279046654701233, 0.41044291853904724, 0.42809537053108215, 0.44574782252311707, 0.463400274515152, 0.4810526967048645, 0.4987051486968994, 0.5163576006889343, 0.5340100526809692, 0.5516625046730042, 0.5693149566650391, 0.586967408657074, 0.6046198606491089, 0.6222723126411438, 0.6399247646331787, 0.6575772166252136]}, "gradients/encoder.encoder.layers.20.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 3.0, 7.0, 4.0, 5.0, 12.0, 16.0, 32.0, 45.0, 68.0, 94.0, 169.0, 298.0, 484.0, 877.0, 1590.0, 2988.0, 5972.0, 11797.0, 25295.0, 56657.0, 135798.0, 285885.0, 282183.0, 133277.0, 56397.0, 24418.0, 11639.0, 5827.0, 3020.0, 1608.0, 892.0, 480.0, 282.0, 177.0, 104.0, 66.0, 25.0, 27.0, 21.0, 9.0, 7.0, 5.0, 6.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0], "bins": [-0.1517333984375, -0.1468372344970703, -0.14194107055664062, -0.13704490661621094, -0.13214874267578125, -0.12725257873535156, -0.12235641479492188, -0.11746025085449219, -0.1125640869140625, -0.10766792297363281, -0.10277175903320312, -0.09787559509277344, -0.09297943115234375, -0.08808326721191406, -0.08318710327148438, -0.07829093933105469, -0.073394775390625, -0.06849861145019531, -0.06360244750976562, -0.05870628356933594, -0.05381011962890625, -0.04891395568847656, -0.044017791748046875, -0.03912162780761719, -0.0342254638671875, -0.029329299926757812, -0.024433135986328125, -0.019536972045898438, -0.01464080810546875, -0.009744644165039062, -0.004848480224609375, 4.76837158203125e-05, 0.00494384765625, 0.009840011596679688, 0.014736175537109375, 0.019632339477539062, 0.02452850341796875, 0.029424667358398438, 0.034320831298828125, 0.03921699523925781, 0.0441131591796875, 0.04900932312011719, 0.053905487060546875, 0.05880165100097656, 0.06369781494140625, 0.06859397888183594, 0.07349014282226562, 0.07838630676269531, 0.083282470703125, 0.08817863464355469, 0.09307479858398438, 0.09797096252441406, 0.10286712646484375, 0.10776329040527344, 0.11265945434570312, 0.11755561828613281, 0.1224517822265625, 0.1273479461669922, 0.13224411010742188, 0.13714027404785156, 0.14203643798828125, 0.14693260192871094, 0.15182876586914062, 0.1567249298095703, 0.16162109375]}, "gradients/encoder.encoder.layers.20.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 3.0, 1.0, 1.0, 4.0, 3.0, 10.0, 6.0, 15.0, 7.0, 15.0, 15.0, 21.0, 16.0, 35.0, 29.0, 37.0, 31.0, 46.0, 44.0, 51.0, 45.0, 54.0, 49.0, 52.0, 52.0, 53.0, 36.0, 45.0, 36.0, 42.0, 10.0, 32.0, 24.0, 27.0, 11.0, 16.0, 12.0, 5.0, 7.0, 4.0, 5.0, 2.0, 4.0, 1.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0], "bins": [-0.05535888671875, -0.05359649658203125, -0.0518341064453125, -0.05007171630859375, -0.048309326171875, -0.04654693603515625, -0.0447845458984375, -0.04302215576171875, -0.041259765625, -0.03949737548828125, -0.0377349853515625, -0.03597259521484375, -0.034210205078125, -0.03244781494140625, -0.0306854248046875, -0.02892303466796875, -0.02716064453125, -0.02539825439453125, -0.0236358642578125, -0.02187347412109375, -0.020111083984375, -0.01834869384765625, -0.0165863037109375, -0.01482391357421875, -0.0130615234375, -0.01129913330078125, -0.0095367431640625, -0.00777435302734375, -0.006011962890625, -0.00424957275390625, -0.0024871826171875, -0.00072479248046875, 0.00103759765625, 0.00279998779296875, 0.0045623779296875, 0.00632476806640625, 0.008087158203125, 0.00984954833984375, 0.0116119384765625, 0.01337432861328125, 0.01513671875, 0.01689910888671875, 0.0186614990234375, 0.02042388916015625, 0.022186279296875, 0.02394866943359375, 0.0257110595703125, 0.02747344970703125, 0.02923583984375, 0.03099822998046875, 0.0327606201171875, 0.03452301025390625, 0.036285400390625, 0.03804779052734375, 0.0398101806640625, 0.04157257080078125, 0.0433349609375, 0.04509735107421875, 0.0468597412109375, 0.04862213134765625, 0.050384521484375, 0.05214691162109375, 0.0539093017578125, 0.05567169189453125, 0.05743408203125]}, "gradients/encoder.encoder.layers.20.attention.v_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0, 4.0, 1.0, 10.0, 7.0, 15.0, 26.0, 29.0, 39.0, 56.0, 90.0, 109.0, 154.0, 258.0, 369.0, 555.0, 863.0, 1348.0, 2239.0, 3640.0, 6556.0, 11878.0, 23019.0, 46218.0, 98171.0, 207272.0, 294649.0, 181373.0, 84431.0, 40098.0, 19905.0, 10459.0, 5756.0, 3292.0, 2045.0, 1246.0, 759.0, 519.0, 360.0, 223.0, 155.0, 105.0, 77.0, 53.0, 43.0, 25.0, 16.0, 14.0, 13.0, 11.0, 5.0, 1.0, 5.0, 2.0, 0.0, 1.0, 0.0, 2.0], "bins": [-0.1507568359375, -0.1461048126220703, -0.14145278930664062, -0.13680076599121094, -0.13214874267578125, -0.12749671936035156, -0.12284469604492188, -0.11819267272949219, -0.1135406494140625, -0.10888862609863281, -0.10423660278320312, -0.09958457946777344, -0.09493255615234375, -0.09028053283691406, -0.08562850952148438, -0.08097648620605469, -0.076324462890625, -0.07167243957519531, -0.06702041625976562, -0.06236839294433594, -0.05771636962890625, -0.05306434631347656, -0.048412322998046875, -0.04376029968261719, -0.0391082763671875, -0.03445625305175781, -0.029804229736328125, -0.025152206420898438, -0.02050018310546875, -0.015848159790039062, -0.011196136474609375, -0.0065441131591796875, -0.00189208984375, 0.0027599334716796875, 0.007411956787109375, 0.012063980102539062, 0.01671600341796875, 0.021368026733398438, 0.026020050048828125, 0.030672073364257812, 0.0353240966796875, 0.03997611999511719, 0.044628143310546875, 0.04928016662597656, 0.05393218994140625, 0.05858421325683594, 0.06323623657226562, 0.06788825988769531, 0.072540283203125, 0.07719230651855469, 0.08184432983398438, 0.08649635314941406, 0.09114837646484375, 0.09580039978027344, 0.10045242309570312, 0.10510444641113281, 0.1097564697265625, 0.11440849304199219, 0.11906051635742188, 0.12371253967285156, 0.12836456298828125, 0.13301658630371094, 0.13766860961914062, 0.1423206329345703, 0.14697265625]}, "gradients/encoder.encoder.layers.20.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 4.0, 0.0, 6.0, 2.0, 8.0, 6.0, 11.0, 5.0, 13.0, 15.0, 14.0, 15.0, 31.0, 29.0, 28.0, 26.0, 32.0, 33.0, 39.0, 43.0, 46.0, 44.0, 48.0, 39.0, 49.0, 54.0, 33.0, 41.0, 32.0, 40.0, 36.0, 34.0, 20.0, 31.0, 17.0, 18.0, 20.0, 6.0, 12.0, 6.0, 10.0, 10.0, 3.0, 4.0, 4.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.1302490234375, -0.12591171264648438, -0.12157440185546875, -0.11723709106445312, -0.1128997802734375, -0.10856246948242188, -0.10422515869140625, -0.09988784790039062, -0.095550537109375, -0.09121322631835938, -0.08687591552734375, -0.08253860473632812, -0.0782012939453125, -0.07386398315429688, -0.06952667236328125, -0.06518936157226562, -0.06085205078125, -0.056514739990234375, -0.05217742919921875, -0.047840118408203125, -0.0435028076171875, -0.039165496826171875, -0.03482818603515625, -0.030490875244140625, -0.026153564453125, -0.021816253662109375, -0.01747894287109375, -0.013141632080078125, -0.0088043212890625, -0.004467010498046875, -0.00012969970703125, 0.004207611083984375, 0.008544921875, 0.012882232666015625, 0.01721954345703125, 0.021556854248046875, 0.0258941650390625, 0.030231475830078125, 0.03456878662109375, 0.038906097412109375, 0.043243408203125, 0.047580718994140625, 0.05191802978515625, 0.056255340576171875, 0.0605926513671875, 0.06492996215820312, 0.06926727294921875, 0.07360458374023438, 0.07794189453125, 0.08227920532226562, 0.08661651611328125, 0.09095382690429688, 0.0952911376953125, 0.09962844848632812, 0.10396575927734375, 0.10830307006835938, 0.112640380859375, 0.11697769165039062, 0.12131500244140625, 0.12565231323242188, 0.1299896240234375, 0.13432693481445312, 0.13866424560546875, 0.14300155639648438, 0.1473388671875]}, "gradients/encoder.encoder.layers.20.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 1.0, 3.0, 4.0, 3.0, 7.0, 6.0, 12.0, 6.0, 19.0, 23.0, 52.0, 73.0, 138.0, 250.0, 535.0, 1179.0, 2921.0, 9335.0, 1013345.0, 14215.0, 3678.0, 1471.0, 612.0, 282.0, 156.0, 87.0, 48.0, 43.0, 17.0, 16.0, 7.0, 10.0, 1.0, 2.0, 5.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.1123046875, -1.077117919921875, -1.04193115234375, -1.006744384765625, -0.9715576171875, -0.936370849609375, -0.90118408203125, -0.865997314453125, -0.830810546875, -0.795623779296875, -0.76043701171875, -0.725250244140625, -0.6900634765625, -0.654876708984375, -0.61968994140625, -0.584503173828125, -0.54931640625, -0.514129638671875, -0.47894287109375, -0.443756103515625, -0.4085693359375, -0.373382568359375, -0.33819580078125, -0.303009033203125, -0.267822265625, -0.232635498046875, -0.19744873046875, -0.162261962890625, -0.1270751953125, -0.091888427734375, -0.05670166015625, -0.021514892578125, 0.013671875, 0.048858642578125, 0.08404541015625, 0.119232177734375, 0.1544189453125, 0.189605712890625, 0.22479248046875, 0.259979248046875, 0.295166015625, 0.330352783203125, 0.36553955078125, 0.400726318359375, 0.4359130859375, 0.471099853515625, 0.50628662109375, 0.541473388671875, 0.57666015625, 0.611846923828125, 0.64703369140625, 0.682220458984375, 0.7174072265625, 0.752593994140625, 0.78778076171875, 0.822967529296875, 0.858154296875, 0.893341064453125, 0.92852783203125, 0.963714599609375, 0.9989013671875, 1.034088134765625, 1.06927490234375, 1.104461669921875, 1.1396484375]}, "gradients/encoder.encoder.layers.20.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 2.0, 2.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 6.0, 5.0, 12.0, 157.0, 705.0, 108.0, 12.0, 2.0, 0.0, 3.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.00022220611572265625, -0.0002174200490117073, -0.00021263398230075836, -0.00020784791558980942, -0.00020306184887886047, -0.00019827578216791153, -0.00019348971545696259, -0.00018870364874601364, -0.0001839175820350647, -0.00017913151532411575, -0.0001743454486131668, -0.00016955938190221786, -0.00016477331519126892, -0.00015998724848031998, -0.00015520118176937103, -0.0001504151150584221, -0.00014562904834747314, -0.0001408429816365242, -0.00013605691492557526, -0.0001312708482146263, -0.00012648478150367737, -0.00012169871479272842, -0.00011691264808177948, -0.00011212658137083054, -0.00010734051465988159, -0.00010255444794893265, -9.77683812379837e-05, -9.298231452703476e-05, -8.819624781608582e-05, -8.341018110513687e-05, -7.862411439418793e-05, -7.383804768323898e-05, -6.905198097229004e-05, -6.42659142613411e-05, -5.947984755039215e-05, -5.469378083944321e-05, -4.990771412849426e-05, -4.512164741754532e-05, -4.0335580706596375e-05, -3.554951399564743e-05, -3.0763447284698486e-05, -2.5977380573749542e-05, -2.1191313862800598e-05, -1.6405247151851654e-05, -1.161918044090271e-05, -6.833113729953766e-06, -2.0470470190048218e-06, 2.7390196919441223e-06, 7.525086402893066e-06, 1.231115311384201e-05, 1.7097219824790955e-05, 2.18832865357399e-05, 2.6669353246688843e-05, 3.145541995763779e-05, 3.624148666858673e-05, 4.1027553379535675e-05, 4.581362009048462e-05, 5.059968680143356e-05, 5.538575351238251e-05, 6.017182022333145e-05, 6.49578869342804e-05, 6.974395364522934e-05, 7.453002035617828e-05, 7.931608706712723e-05, 8.410215377807617e-05]}, "gradients/encoder.encoder.layers.20.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 1.0, 1.0, 0.0, 2.0, 2.0, 5.0, 6.0, 9.0, 10.0, 21.0, 26.0, 26.0, 44.0, 81.0, 122.0, 154.0, 261.0, 474.0, 808.0, 1407.0, 2528.0, 5108.0, 11077.0, 27129.0, 79319.0, 238458.0, 383515.0, 193634.0, 62764.0, 22469.0, 9233.0, 4531.0, 2298.0, 1260.0, 668.0, 393.0, 255.0, 145.0, 112.0, 69.0, 47.0, 29.0, 20.0, 11.0, 13.0, 6.0, 7.0, 2.0, 4.0, 0.0, 2.0, 0.0, 2.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.055023193359375, -0.053151607513427734, -0.05128002166748047, -0.0494084358215332, -0.04753684997558594, -0.04566526412963867, -0.043793678283691406, -0.04192209243774414, -0.040050506591796875, -0.03817892074584961, -0.036307334899902344, -0.03443574905395508, -0.03256416320800781, -0.030692577362060547, -0.02882099151611328, -0.026949405670166016, -0.02507781982421875, -0.023206233978271484, -0.02133464813232422, -0.019463062286376953, -0.017591476440429688, -0.015719890594482422, -0.013848304748535156, -0.01197671890258789, -0.010105133056640625, -0.00823354721069336, -0.006361961364746094, -0.004490375518798828, -0.0026187896728515625, -0.0007472038269042969, 0.0011243820190429688, 0.0029959678649902344, 0.0048675537109375, 0.006739139556884766, 0.008610725402832031, 0.010482311248779297, 0.012353897094726562, 0.014225482940673828, 0.016097068786621094, 0.01796865463256836, 0.019840240478515625, 0.02171182632446289, 0.023583412170410156, 0.025454998016357422, 0.027326583862304688, 0.029198169708251953, 0.03106975555419922, 0.032941341400146484, 0.03481292724609375, 0.036684513092041016, 0.03855609893798828, 0.04042768478393555, 0.04229927062988281, 0.04417085647583008, 0.046042442321777344, 0.04791402816772461, 0.049785614013671875, 0.05165719985961914, 0.053528785705566406, 0.05540037155151367, 0.05727195739746094, 0.0591435432434082, 0.06101512908935547, 0.06288671493530273, 0.06475830078125]}, "gradients/encoder.encoder.layers.20.attention.q_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 3.0, 4.0, 3.0, 2.0, 3.0, 3.0, 7.0, 5.0, 5.0, 8.0, 9.0, 7.0, 11.0, 21.0, 14.0, 15.0, 22.0, 30.0, 34.0, 52.0, 65.0, 61.0, 59.0, 57.0, 69.0, 59.0, 64.0, 53.0, 46.0, 35.0, 43.0, 30.0, 26.0, 19.0, 14.0, 12.0, 6.0, 6.0, 2.0, 3.0, 7.0, 7.0, 3.0, 3.0, 1.0, 0.0, 1.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.031219482421875, -0.030238628387451172, -0.029257774353027344, -0.028276920318603516, -0.027296066284179688, -0.02631521224975586, -0.02533435821533203, -0.024353504180908203, -0.023372650146484375, -0.022391796112060547, -0.02141094207763672, -0.02043008804321289, -0.019449234008789062, -0.018468379974365234, -0.017487525939941406, -0.016506671905517578, -0.01552581787109375, -0.014544963836669922, -0.013564109802246094, -0.012583255767822266, -0.011602401733398438, -0.01062154769897461, -0.009640693664550781, -0.008659839630126953, -0.007678985595703125, -0.006698131561279297, -0.005717277526855469, -0.004736423492431641, -0.0037555694580078125, -0.0027747154235839844, -0.0017938613891601562, -0.0008130073547363281, 0.0001678466796875, 0.0011487007141113281, 0.0021295547485351562, 0.0031104087829589844, 0.0040912628173828125, 0.005072116851806641, 0.006052970886230469, 0.007033824920654297, 0.008014678955078125, 0.008995532989501953, 0.009976387023925781, 0.01095724105834961, 0.011938095092773438, 0.012918949127197266, 0.013899803161621094, 0.014880657196044922, 0.01586151123046875, 0.016842365264892578, 0.017823219299316406, 0.018804073333740234, 0.019784927368164062, 0.02076578140258789, 0.02174663543701172, 0.022727489471435547, 0.023708343505859375, 0.024689197540283203, 0.02567005157470703, 0.02665090560913086, 0.027631759643554688, 0.028612613677978516, 0.029593467712402344, 0.030574321746826172, 0.03155517578125]}, "gradients/encoder.encoder.layers.20.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 2.0, 3.0, 7.0, 15.0, 22.0, 78.0, 208.0, 348.0, 222.0, 72.0, 24.0, 8.0, 6.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.8911445140838623, -0.837341845035553, -0.7835391759872437, -0.7297364473342896, -0.6759337782859802, -0.6221311092376709, -0.5683284401893616, -0.5145257711410522, -0.46072307229042053, -0.4069204032421112, -0.3531177043914795, -0.29931503534317017, -0.24551235139369965, -0.19170966744422913, -0.1379069983959198, -0.08410429954528809, -0.03030163049697876, 0.023501049727201462, 0.07730372995138168, 0.1311064064502716, 0.18490909039974213, 0.23871177434921265, 0.292514443397522, 0.3463171422481537, 0.400119811296463, 0.45392248034477234, 0.507725179195404, 0.5615278482437134, 0.6153305172920227, 0.669133186340332, 0.7229359149932861, 0.7767385840415955, 0.8305412530899048, 0.8843439221382141, 0.9381465911865234, 0.9919493198394775, 1.045751929283142, 1.0995546579360962, 1.1533572673797607, 1.2071599960327148, 1.260962724685669, 1.314765453338623, 1.3685680627822876, 1.4223707914352417, 1.4761734008789062, 1.5299761295318604, 1.5837788581848145, 1.637581467628479, 1.6913840770721436, 1.7451868057250977, 1.7989894151687622, 1.8527921438217163, 1.9065947532653809, 1.960397481918335, 2.014200210571289, 2.068002939224243, 2.1218056678771973, 2.1756083965301514, 2.2294111251831055, 2.2832136154174805, 2.3370163440704346, 2.3908190727233887, 2.4446218013763428, 2.498424530029297, 2.552227020263672]}, "gradients/encoder.encoder.layers.20.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 3.0, 0.0, 1.0, 2.0, 4.0, 6.0, 6.0, 4.0, 9.0, 7.0, 4.0, 14.0, 18.0, 22.0, 22.0, 28.0, 23.0, 27.0, 38.0, 28.0, 46.0, 32.0, 32.0, 54.0, 60.0, 52.0, 44.0, 41.0, 43.0, 37.0, 35.0, 36.0, 25.0, 31.0, 30.0, 28.0, 22.0, 19.0, 16.0, 14.0, 12.0, 8.0, 9.0, 12.0, 4.0, 4.0, 0.0, 3.0, 1.0, 3.0], "bins": [-0.5137677192687988, -0.5008597373962402, -0.4879518151283264, -0.4750438630580902, -0.462135910987854, -0.4492279291152954, -0.4363199770450592, -0.423412024974823, -0.4105040729045868, -0.3975961208343506, -0.3846881687641144, -0.3717802166938782, -0.3588722348213196, -0.34596431255340576, -0.33305633068084717, -0.32014837861061096, -0.30724042654037476, -0.29433247447013855, -0.28142452239990234, -0.26851657032966614, -0.25560861825942993, -0.24270065128803253, -0.22979268431663513, -0.21688473224639893, -0.20397678017616272, -0.1910688281059265, -0.1781608760356903, -0.1652529090642929, -0.1523449569940567, -0.1394370049238205, -0.1265290379524231, -0.11362108588218689, -0.10071313381195068, -0.08780518174171448, -0.07489722222089767, -0.06198926642537117, -0.049081310629844666, -0.03617335855960846, -0.023265399038791656, -0.010357439517974854, 0.0025505125522613525, 0.015458468347787857, 0.02836642414331436, 0.041274379938840866, 0.05418233573436737, 0.06709028780460358, 0.07999824732542038, 0.09290620684623718, 0.10581415891647339, 0.1187221109867096, 0.1316300630569458, 0.1445380300283432, 0.1574459820985794, 0.1703539341688156, 0.183261901140213, 0.19616985321044922, 0.20907780528068542, 0.22198575735092163, 0.23489370942115784, 0.24780167639255524, 0.26070964336395264, 0.27361756563186646, 0.28652554750442505, 0.29943349957466125, 0.31234145164489746]}, "gradients/encoder.encoder.layers.19.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 2.0, 1.0, 0.0, 9.0, 4.0, 5.0, 5.0, 2.0, 6.0, 10.0, 7.0, 9.0, 15.0, 17.0, 19.0, 27.0, 27.0, 44.0, 44.0, 66.0, 108.0, 221.0, 513.0, 1870.0, 16164.0, 880083.0, 3258773.0, 31971.0, 2939.0, 637.0, 261.0, 104.0, 63.0, 53.0, 39.0, 35.0, 26.0, 22.0, 14.0, 13.0, 12.0, 12.0, 11.0, 4.0, 7.0, 5.0, 6.0, 6.0, 0.0, 2.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.394287109375, -0.3820152282714844, -0.36974334716796875, -0.3574714660644531, -0.3451995849609375, -0.3329277038574219, -0.32065582275390625, -0.3083839416503906, -0.296112060546875, -0.2838401794433594, -0.27156829833984375, -0.2592964172363281, -0.2470245361328125, -0.23475265502929688, -0.22248077392578125, -0.21020889282226562, -0.19793701171875, -0.18566513061523438, -0.17339324951171875, -0.16112136840820312, -0.1488494873046875, -0.13657760620117188, -0.12430572509765625, -0.11203384399414062, -0.099761962890625, -0.08749008178710938, -0.07521820068359375, -0.06294631958007812, -0.0506744384765625, -0.038402557373046875, -0.02613067626953125, -0.013858795166015625, -0.0015869140625, 0.010684967041015625, 0.02295684814453125, 0.035228729248046875, 0.0475006103515625, 0.059772491455078125, 0.07204437255859375, 0.08431625366210938, 0.096588134765625, 0.10886001586914062, 0.12113189697265625, 0.13340377807617188, 0.1456756591796875, 0.15794754028320312, 0.17021942138671875, 0.18249130249023438, 0.19476318359375, 0.20703506469726562, 0.21930694580078125, 0.23157882690429688, 0.2438507080078125, 0.2561225891113281, 0.26839447021484375, 0.2806663513183594, 0.292938232421875, 0.3052101135253906, 0.31748199462890625, 0.3297538757324219, 0.3420257568359375, 0.3542976379394531, 0.36656951904296875, 0.3788414001464844, 0.39111328125]}, "gradients/encoder.encoder.layers.19.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 2.0, 2.0, 2.0, 0.0, 3.0, 2.0, 3.0, 5.0, 8.0, 3.0, 5.0, 8.0, 6.0, 15.0, 11.0, 13.0, 15.0, 14.0, 19.0, 23.0, 30.0, 32.0, 28.0, 33.0, 34.0, 33.0, 36.0, 35.0, 41.0, 37.0, 34.0, 51.0, 44.0, 36.0, 41.0, 38.0, 30.0, 27.0, 30.0, 35.0, 30.0, 21.0, 14.0, 21.0, 12.0, 7.0, 9.0, 11.0, 5.0, 6.0, 2.0, 5.0, 4.0, 3.0, 3.0, 2.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 1.0], "bins": [-0.0479736328125, -0.04642438888549805, -0.044875144958496094, -0.04332590103149414, -0.04177665710449219, -0.040227413177490234, -0.03867816925048828, -0.03712892532348633, -0.035579681396484375, -0.03403043746948242, -0.03248119354248047, -0.030931949615478516, -0.029382705688476562, -0.02783346176147461, -0.026284217834472656, -0.024734973907470703, -0.02318572998046875, -0.021636486053466797, -0.020087242126464844, -0.01853799819946289, -0.016988754272460938, -0.015439510345458984, -0.013890266418457031, -0.012341022491455078, -0.010791778564453125, -0.009242534637451172, -0.007693290710449219, -0.006144046783447266, -0.0045948028564453125, -0.0030455589294433594, -0.0014963150024414062, 5.2928924560546875e-05, 0.0016021728515625, 0.003151416778564453, 0.004700660705566406, 0.006249904632568359, 0.0077991485595703125, 0.009348392486572266, 0.010897636413574219, 0.012446880340576172, 0.013996124267578125, 0.015545368194580078, 0.01709461212158203, 0.018643856048583984, 0.020193099975585938, 0.02174234390258789, 0.023291587829589844, 0.024840831756591797, 0.02639007568359375, 0.027939319610595703, 0.029488563537597656, 0.03103780746459961, 0.03258705139160156, 0.034136295318603516, 0.03568553924560547, 0.03723478317260742, 0.038784027099609375, 0.04033327102661133, 0.04188251495361328, 0.043431758880615234, 0.04498100280761719, 0.04653024673461914, 0.048079490661621094, 0.04962873458862305, 0.051177978515625]}, "gradients/encoder.encoder.layers.19.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [3.0, 0.0, 2.0, 0.0, 1.0, 0.0, 1.0, 3.0, 3.0, 3.0, 2.0, 2.0, 4.0, 7.0, 6.0, 9.0, 23.0, 26.0, 43.0, 65.0, 89.0, 144.0, 260.0, 394.0, 806.0, 1795.0, 4955.0, 17289.0, 89221.0, 891680.0, 2874629.0, 259569.0, 38533.0, 9089.0, 3034.0, 1187.0, 604.0, 323.0, 184.0, 116.0, 72.0, 50.0, 24.0, 16.0, 11.0, 7.0, 6.0, 5.0, 1.0, 1.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.1826171875, -0.17658233642578125, -0.1705474853515625, -0.16451263427734375, -0.158477783203125, -0.15244293212890625, -0.1464080810546875, -0.14037322998046875, -0.13433837890625, -0.12830352783203125, -0.1222686767578125, -0.11623382568359375, -0.110198974609375, -0.10416412353515625, -0.0981292724609375, -0.09209442138671875, -0.0860595703125, -0.08002471923828125, -0.0739898681640625, -0.06795501708984375, -0.061920166015625, -0.05588531494140625, -0.0498504638671875, -0.04381561279296875, -0.03778076171875, -0.03174591064453125, -0.0257110595703125, -0.01967620849609375, -0.013641357421875, -0.00760650634765625, -0.0015716552734375, 0.00446319580078125, 0.010498046875, 0.01653289794921875, 0.0225677490234375, 0.02860260009765625, 0.034637451171875, 0.04067230224609375, 0.0467071533203125, 0.05274200439453125, 0.05877685546875, 0.06481170654296875, 0.0708465576171875, 0.07688140869140625, 0.082916259765625, 0.08895111083984375, 0.0949859619140625, 0.10102081298828125, 0.1070556640625, 0.11309051513671875, 0.1191253662109375, 0.12516021728515625, 0.131195068359375, 0.13722991943359375, 0.1432647705078125, 0.14929962158203125, 0.15533447265625, 0.16136932373046875, 0.1674041748046875, 0.17343902587890625, 0.179473876953125, 0.18550872802734375, 0.1915435791015625, 0.19757843017578125, 0.20361328125]}, "gradients/encoder.encoder.layers.19.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 2.0, 0.0, 2.0, 5.0, 3.0, 5.0, 6.0, 14.0, 16.0, 22.0, 31.0, 42.0, 53.0, 103.0, 139.0, 177.0, 261.0, 396.0, 666.0, 668.0, 477.0, 312.0, 208.0, 125.0, 96.0, 64.0, 52.0, 44.0, 24.0, 19.0, 18.0, 11.0, 10.0, 5.0, 3.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.1103515625, -0.10628509521484375, -0.1022186279296875, -0.09815216064453125, -0.094085693359375, -0.09001922607421875, -0.0859527587890625, -0.08188629150390625, -0.07781982421875, -0.07375335693359375, -0.0696868896484375, -0.06562042236328125, -0.061553955078125, -0.05748748779296875, -0.0534210205078125, -0.04935455322265625, -0.0452880859375, -0.04122161865234375, -0.0371551513671875, -0.03308868408203125, -0.029022216796875, -0.02495574951171875, -0.0208892822265625, -0.01682281494140625, -0.01275634765625, -0.00868988037109375, -0.0046234130859375, -0.00055694580078125, 0.003509521484375, 0.00757598876953125, 0.0116424560546875, 0.01570892333984375, 0.019775390625, 0.02384185791015625, 0.0279083251953125, 0.03197479248046875, 0.036041259765625, 0.04010772705078125, 0.0441741943359375, 0.04824066162109375, 0.05230712890625, 0.05637359619140625, 0.0604400634765625, 0.06450653076171875, 0.068572998046875, 0.07263946533203125, 0.0767059326171875, 0.08077239990234375, 0.0848388671875, 0.08890533447265625, 0.0929718017578125, 0.09703826904296875, 0.101104736328125, 0.10517120361328125, 0.1092376708984375, 0.11330413818359375, 0.11737060546875, 0.12143707275390625, 0.1255035400390625, 0.12957000732421875, 0.133636474609375, 0.13770294189453125, 0.1417694091796875, 0.14583587646484375, 0.14990234375]}, "gradients/encoder.encoder.layers.19.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 3.0, 1.0, 2.0, 0.0, 0.0, 7.0, 6.0, 5.0, 8.0, 10.0, 14.0, 26.0, 36.0, 56.0, 64.0, 128.0, 127.0, 128.0, 113.0, 83.0, 57.0, 47.0, 34.0, 21.0, 13.0, 3.0, 9.0, 3.0, 5.0, 3.0, 1.0, 0.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.51695317029953, -0.49932047724723816, -0.4816877841949463, -0.4640551209449768, -0.44642242789268494, -0.42878973484039307, -0.4111570715904236, -0.3935243785381317, -0.37589168548583984, -0.358258992433548, -0.3406262993812561, -0.3229936361312866, -0.30536094307899475, -0.2877282500267029, -0.2700955867767334, -0.25246289372444153, -0.23483020067214966, -0.2171975076198578, -0.1995648294687271, -0.18193215131759644, -0.16429945826530457, -0.1466667652130127, -0.12903408706188202, -0.11140140146017075, -0.09376871585845947, -0.0761360302567482, -0.058503344655036926, -0.04087065905332565, -0.02323797345161438, -0.005605287849903107, 0.012027397751808167, 0.02966008335351944, 0.04729276895523071, 0.06492545455694199, 0.08255814015865326, 0.10019082576036453, 0.1178235113620758, 0.13545620441436768, 0.15308888256549835, 0.17072156071662903, 0.1883542537689209, 0.20598694682121277, 0.22361962497234344, 0.24125230312347412, 0.258884996175766, 0.27651768922805786, 0.29415035247802734, 0.3117830455303192, 0.3294157385826111, 0.34704843163490295, 0.3646811246871948, 0.3823137879371643, 0.3999464809894562, 0.41757917404174805, 0.43521183729171753, 0.4528445303440094, 0.47047722339630127, 0.48810991644859314, 0.505742609500885, 0.5233752727508545, 0.5410079956054688, 0.5586406588554382, 0.5762733221054077, 0.593906044960022, 0.6115387082099915]}, "gradients/encoder.encoder.layers.19.final_layer_norm.bias": {"_type": "histogram", "values": [3.0, 3.0, 1.0, 1.0, 8.0, 0.0, 9.0, 6.0, 8.0, 4.0, 6.0, 7.0, 14.0, 18.0, 19.0, 18.0, 29.0, 24.0, 29.0, 39.0, 43.0, 36.0, 38.0, 33.0, 26.0, 38.0, 45.0, 39.0, 42.0, 32.0, 37.0, 33.0, 41.0, 30.0, 34.0, 25.0, 29.0, 27.0, 17.0, 27.0, 20.0, 19.0, 7.0, 7.0, 14.0, 7.0, 6.0, 3.0, 3.0, 4.0, 5.0, 5.0, 2.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.28948819637298584, -0.27887579798698425, -0.26826339960098267, -0.25765103101730347, -0.24703861773014069, -0.2364262342453003, -0.2258138358592987, -0.21520143747329712, -0.20458903908729553, -0.19397664070129395, -0.18336425721645355, -0.17275185883045197, -0.16213946044445038, -0.15152707695960999, -0.1409146785736084, -0.1303022801876068, -0.11968989670276642, -0.10907750576734543, -0.09846510738134384, -0.08785271644592285, -0.07724031805992126, -0.06662792712450027, -0.056015536189079285, -0.0454031378030777, -0.03479074686765671, -0.02417835220694542, -0.01356595940887928, -0.002953566610813141, 0.007658828049898148, 0.018271222710609436, 0.028883613646030426, 0.03949601203203201, 0.050108402967453, 0.06072079762816429, 0.07133319228887558, 0.08194558322429657, 0.09255798161029816, 0.10317037254571915, 0.11378276348114014, 0.12439516186714172, 0.1350075602531433, 0.1456199586391449, 0.1562323421239853, 0.16684474050998688, 0.17745713889598846, 0.18806952238082886, 0.19868192076683044, 0.20929431915283203, 0.21990670263767242, 0.230519101023674, 0.2411314845085144, 0.251743882894516, 0.2623562812805176, 0.27296867966651917, 0.28358107805252075, 0.29419344663619995, 0.30480584502220154, 0.3154182434082031, 0.3260306417942047, 0.3366430401802063, 0.3472554087638855, 0.3578678071498871, 0.36848020553588867, 0.37909260392189026, 0.38970500230789185]}, "gradients/encoder.encoder.layers.19.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 7.0, 10.0, 12.0, 26.0, 24.0, 27.0, 51.0, 64.0, 88.0, 119.0, 181.0, 269.0, 446.0, 642.0, 1027.0, 1541.0, 2794.0, 4795.0, 8464.0, 16088.0, 30437.0, 61360.0, 123496.0, 228257.0, 256059.0, 153744.0, 76568.0, 37791.0, 19433.0, 10464.0, 5806.0, 3219.0, 1977.0, 1169.0, 693.0, 427.0, 304.0, 215.0, 144.0, 107.0, 71.0, 43.0, 29.0, 21.0, 15.0, 21.0, 4.0, 7.0, 2.0, 4.0, 4.0, 1.0, 2.0, 0.0, 1.0, 0.0, 1.0, 1.0], "bins": [-0.12139892578125, -0.11737346649169922, -0.11334800720214844, -0.10932254791259766, -0.10529708862304688, -0.1012716293334961, -0.09724617004394531, -0.09322071075439453, -0.08919525146484375, -0.08516979217529297, -0.08114433288574219, -0.0771188735961914, -0.07309341430664062, -0.06906795501708984, -0.06504249572753906, -0.06101703643798828, -0.0569915771484375, -0.05296611785888672, -0.04894065856933594, -0.044915199279785156, -0.040889739990234375, -0.036864280700683594, -0.03283882141113281, -0.02881336212158203, -0.02478790283203125, -0.02076244354248047, -0.016736984252929688, -0.012711524963378906, -0.008686065673828125, -0.004660606384277344, -0.0006351470947265625, 0.0033903121948242188, 0.007415771484375, 0.011441230773925781, 0.015466690063476562, 0.019492149353027344, 0.023517608642578125, 0.027543067932128906, 0.03156852722167969, 0.03559398651123047, 0.03961944580078125, 0.04364490509033203, 0.04767036437988281, 0.051695823669433594, 0.055721282958984375, 0.059746742248535156, 0.06377220153808594, 0.06779766082763672, 0.0718231201171875, 0.07584857940673828, 0.07987403869628906, 0.08389949798583984, 0.08792495727539062, 0.0919504165649414, 0.09597587585449219, 0.10000133514404297, 0.10402679443359375, 0.10805225372314453, 0.11207771301269531, 0.1161031723022461, 0.12012863159179688, 0.12415409088134766, 0.12817955017089844, 0.13220500946044922, 0.13623046875]}, "gradients/encoder.encoder.layers.19.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 2.0, 2.0, 2.0, 3.0, 6.0, 6.0, 8.0, 10.0, 23.0, 33.0, 24.0, 33.0, 31.0, 57.0, 42.0, 47.0, 61.0, 41.0, 67.0, 69.0, 63.0, 49.0, 60.0, 34.0, 53.0, 36.0, 34.0, 27.0, 25.0, 22.0, 11.0, 9.0, 5.0, 4.0, 5.0, 3.0, 4.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.06402587890625, -0.0620269775390625, -0.060028076171875, -0.0580291748046875, -0.0560302734375, -0.0540313720703125, -0.052032470703125, -0.0500335693359375, -0.04803466796875, -0.0460357666015625, -0.044036865234375, -0.0420379638671875, -0.0400390625, -0.0380401611328125, -0.036041259765625, -0.0340423583984375, -0.03204345703125, -0.0300445556640625, -0.028045654296875, -0.0260467529296875, -0.0240478515625, -0.0220489501953125, -0.020050048828125, -0.0180511474609375, -0.01605224609375, -0.0140533447265625, -0.012054443359375, -0.0100555419921875, -0.008056640625, -0.0060577392578125, -0.004058837890625, -0.0020599365234375, -6.103515625e-05, 0.0019378662109375, 0.003936767578125, 0.0059356689453125, 0.0079345703125, 0.0099334716796875, 0.011932373046875, 0.0139312744140625, 0.01593017578125, 0.0179290771484375, 0.019927978515625, 0.0219268798828125, 0.02392578125, 0.0259246826171875, 0.027923583984375, 0.0299224853515625, 0.03192138671875, 0.0339202880859375, 0.035919189453125, 0.0379180908203125, 0.0399169921875, 0.0419158935546875, 0.043914794921875, 0.0459136962890625, 0.04791259765625, 0.0499114990234375, 0.051910400390625, 0.0539093017578125, 0.055908203125, 0.0579071044921875, 0.059906005859375, 0.0619049072265625, 0.06390380859375]}, "gradients/encoder.encoder.layers.19.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 3.0, 4.0, 7.0, 11.0, 11.0, 21.0, 18.0, 22.0, 30.0, 45.0, 70.0, 80.0, 150.0, 207.0, 350.0, 570.0, 1025.0, 2143.0, 4915.0, 13780.0, 43052.0, 155523.0, 448333.0, 270070.0, 72556.0, 21898.0, 7414.0, 2960.0, 1370.0, 707.0, 407.0, 259.0, 159.0, 122.0, 71.0, 56.0, 43.0, 23.0, 18.0, 14.0, 10.0, 7.0, 11.0, 9.0, 3.0, 6.0, 1.0, 3.0, 0.0, 1.0, 2.0], "bins": [-0.226806640625, -0.22045516967773438, -0.21410369873046875, -0.20775222778320312, -0.2014007568359375, -0.19504928588867188, -0.18869781494140625, -0.18234634399414062, -0.175994873046875, -0.16964340209960938, -0.16329193115234375, -0.15694046020507812, -0.1505889892578125, -0.14423751831054688, -0.13788604736328125, -0.13153457641601562, -0.12518310546875, -0.11883163452148438, -0.11248016357421875, -0.10612869262695312, -0.0997772216796875, -0.09342575073242188, -0.08707427978515625, -0.08072280883789062, -0.074371337890625, -0.06801986694335938, -0.06166839599609375, -0.055316925048828125, -0.0489654541015625, -0.042613983154296875, -0.03626251220703125, -0.029911041259765625, -0.0235595703125, -0.017208099365234375, -0.01085662841796875, -0.004505157470703125, 0.0018463134765625, 0.008197784423828125, 0.01454925537109375, 0.020900726318359375, 0.027252197265625, 0.033603668212890625, 0.03995513916015625, 0.046306610107421875, 0.0526580810546875, 0.059009552001953125, 0.06536102294921875, 0.07171249389648438, 0.07806396484375, 0.08441543579101562, 0.09076690673828125, 0.09711837768554688, 0.1034698486328125, 0.10982131958007812, 0.11617279052734375, 0.12252426147460938, 0.128875732421875, 0.13522720336914062, 0.14157867431640625, 0.14793014526367188, 0.1542816162109375, 0.16063308715820312, 0.16698455810546875, 0.17333602905273438, 0.1796875]}, "gradients/encoder.encoder.layers.19.attention.v_proj.bias": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 1.0, 1.0, 2.0, 3.0, 3.0, 6.0, 4.0, 5.0, 8.0, 7.0, 12.0, 14.0, 11.0, 14.0, 27.0, 19.0, 28.0, 22.0, 26.0, 33.0, 45.0, 53.0, 38.0, 37.0, 37.0, 49.0, 45.0, 48.0, 37.0, 39.0, 42.0, 36.0, 30.0, 26.0, 27.0, 19.0, 28.0, 27.0, 23.0, 13.0, 16.0, 14.0, 10.0, 7.0, 8.0, 9.0, 5.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.12249755859375, -0.11833477020263672, -0.11417198181152344, -0.11000919342041016, -0.10584640502929688, -0.1016836166381836, -0.09752082824707031, -0.09335803985595703, -0.08919525146484375, -0.08503246307373047, -0.08086967468261719, -0.0767068862915039, -0.07254409790039062, -0.06838130950927734, -0.06421852111816406, -0.06005573272705078, -0.0558929443359375, -0.05173015594482422, -0.04756736755371094, -0.043404579162597656, -0.039241790771484375, -0.035079002380371094, -0.030916213989257812, -0.02675342559814453, -0.02259063720703125, -0.01842784881591797, -0.014265060424804688, -0.010102272033691406, -0.005939483642578125, -0.0017766952514648438, 0.0023860931396484375, 0.006548881530761719, 0.010711669921875, 0.014874458312988281, 0.019037246704101562, 0.023200035095214844, 0.027362823486328125, 0.031525611877441406, 0.03568840026855469, 0.03985118865966797, 0.04401397705078125, 0.04817676544189453, 0.05233955383300781, 0.056502342224121094, 0.060665130615234375, 0.06482791900634766, 0.06899070739746094, 0.07315349578857422, 0.0773162841796875, 0.08147907257080078, 0.08564186096191406, 0.08980464935302734, 0.09396743774414062, 0.0981302261352539, 0.10229301452636719, 0.10645580291748047, 0.11061859130859375, 0.11478137969970703, 0.11894416809082031, 0.1231069564819336, 0.12726974487304688, 0.13143253326416016, 0.13559532165527344, 0.13975811004638672, 0.1439208984375]}, "gradients/encoder.encoder.layers.19.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 4.0, 2.0, 2.0, 2.0, 3.0, 5.0, 11.0, 6.0, 5.0, 11.0, 23.0, 37.0, 41.0, 54.0, 89.0, 134.0, 264.0, 455.0, 864.0, 1988.0, 6331.0, 29666.0, 209702.0, 605552.0, 160906.0, 23458.0, 5282.0, 1822.0, 795.0, 404.0, 254.0, 153.0, 75.0, 46.0, 35.0, 28.0, 23.0, 8.0, 9.0, 5.0, 3.0, 1.0, 5.0, 4.0, 0.0, 1.0, 2.0, 1.0, 2.0], "bins": [-0.08477783203125, -0.08257198333740234, -0.08036613464355469, -0.07816028594970703, -0.07595443725585938, -0.07374858856201172, -0.07154273986816406, -0.0693368911743164, -0.06713104248046875, -0.0649251937866211, -0.06271934509277344, -0.06051349639892578, -0.058307647705078125, -0.05610179901123047, -0.05389595031738281, -0.051690101623535156, -0.0494842529296875, -0.047278404235839844, -0.04507255554199219, -0.04286670684814453, -0.040660858154296875, -0.03845500946044922, -0.03624916076660156, -0.034043312072753906, -0.03183746337890625, -0.029631614685058594, -0.027425765991210938, -0.02521991729736328, -0.023014068603515625, -0.02080821990966797, -0.018602371215820312, -0.016396522521972656, -0.014190673828125, -0.011984825134277344, -0.009778976440429688, -0.007573127746582031, -0.005367279052734375, -0.0031614303588867188, -0.0009555816650390625, 0.0012502670288085938, 0.00345611572265625, 0.005661964416503906, 0.007867813110351562, 0.010073661804199219, 0.012279510498046875, 0.014485359191894531, 0.016691207885742188, 0.018897056579589844, 0.0211029052734375, 0.023308753967285156, 0.025514602661132812, 0.02772045135498047, 0.029926300048828125, 0.03213214874267578, 0.03433799743652344, 0.036543846130371094, 0.03874969482421875, 0.040955543518066406, 0.04316139221191406, 0.04536724090576172, 0.047573089599609375, 0.04977893829345703, 0.05198478698730469, 0.054190635681152344, 0.056396484375]}, "gradients/encoder.encoder.layers.19.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 4.0, 8.0, 4.0, 5.0, 2.0, 4.0, 7.0, 7.0, 6.0, 6.0, 12.0, 12.0, 14.0, 11.0, 23.0, 19.0, 39.0, 40.0, 37.0, 50.0, 41.0, 75.0, 60.0, 68.0, 59.0, 70.0, 53.0, 53.0, 39.0, 28.0, 27.0, 25.0, 22.0, 18.0, 10.0, 13.0, 7.0, 7.0, 4.0, 6.0, 6.0, 1.0, 3.0, 3.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 3.0, 2.0, 0.0, 1.0, 1.0], "bins": [-1.0132789611816406e-05, -9.814277291297913e-06, -9.495764970779419e-06, -9.177252650260925e-06, -8.858740329742432e-06, -8.540228009223938e-06, -8.221715688705444e-06, -7.90320336818695e-06, -7.584691047668457e-06, -7.266178727149963e-06, -6.94766640663147e-06, -6.629154086112976e-06, -6.310641765594482e-06, -5.992129445075989e-06, -5.673617124557495e-06, -5.3551048040390015e-06, -5.036592483520508e-06, -4.718080163002014e-06, -4.3995678424835205e-06, -4.081055521965027e-06, -3.762543201446533e-06, -3.4440308809280396e-06, -3.125518560409546e-06, -2.8070062398910522e-06, -2.4884939193725586e-06, -2.169981598854065e-06, -1.8514692783355713e-06, -1.5329569578170776e-06, -1.214444637298584e-06, -8.959323167800903e-07, -5.774199962615967e-07, -2.5890767574310303e-07, 5.960464477539063e-08, 3.781169652938843e-07, 6.966292858123779e-07, 1.0151416063308716e-06, 1.3336539268493652e-06, 1.6521662473678589e-06, 1.9706785678863525e-06, 2.289190888404846e-06, 2.60770320892334e-06, 2.9262155294418335e-06, 3.244727849960327e-06, 3.563240170478821e-06, 3.8817524909973145e-06, 4.200264811515808e-06, 4.518777132034302e-06, 4.837289452552795e-06, 5.155801773071289e-06, 5.474314093589783e-06, 5.792826414108276e-06, 6.11133873462677e-06, 6.429851055145264e-06, 6.748363375663757e-06, 7.066875696182251e-06, 7.385388016700745e-06, 7.703900337219238e-06, 8.022412657737732e-06, 8.340924978256226e-06, 8.65943729877472e-06, 8.977949619293213e-06, 9.296461939811707e-06, 9.6149742603302e-06, 9.933486580848694e-06, 1.0251998901367188e-05]}, "gradients/encoder.encoder.layers.19.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 2.0, 0.0, 1.0, 1.0, 0.0, 2.0, 1.0, 2.0, 3.0, 1.0, 5.0, 5.0, 6.0, 12.0, 22.0, 21.0, 40.0, 58.0, 93.0, 107.0, 167.0, 264.0, 480.0, 852.0, 1620.0, 3207.0, 7780.0, 23864.0, 93731.0, 392849.0, 391558.0, 93437.0, 23682.0, 7835.0, 3271.0, 1522.0, 784.0, 506.0, 276.0, 165.0, 109.0, 67.0, 51.0, 28.0, 13.0, 23.0, 9.0, 10.0, 7.0, 2.0, 4.0, 4.0, 3.0, 3.0, 3.0, 1.0, 0.0, 2.0, 2.0], "bins": [-0.06396484375, -0.06213712692260742, -0.060309410095214844, -0.058481693267822266, -0.05665397644042969, -0.05482625961303711, -0.05299854278564453, -0.05117082595825195, -0.049343109130859375, -0.0475153923034668, -0.04568767547607422, -0.04385995864868164, -0.04203224182128906, -0.040204524993896484, -0.038376808166503906, -0.03654909133911133, -0.03472137451171875, -0.03289365768432617, -0.031065940856933594, -0.029238224029541016, -0.027410507202148438, -0.02558279037475586, -0.02375507354736328, -0.021927356719970703, -0.020099639892578125, -0.018271923065185547, -0.01644420623779297, -0.01461648941040039, -0.012788772583007812, -0.010961055755615234, -0.009133338928222656, -0.007305622100830078, -0.0054779052734375, -0.003650188446044922, -0.0018224716186523438, 5.245208740234375e-06, 0.0018329620361328125, 0.0036606788635253906, 0.005488395690917969, 0.007316112518310547, 0.009143829345703125, 0.010971546173095703, 0.012799263000488281, 0.01462697982788086, 0.016454696655273438, 0.018282413482666016, 0.020110130310058594, 0.021937847137451172, 0.02376556396484375, 0.025593280792236328, 0.027420997619628906, 0.029248714447021484, 0.031076431274414062, 0.03290414810180664, 0.03473186492919922, 0.0365595817565918, 0.038387298583984375, 0.04021501541137695, 0.04204273223876953, 0.04387044906616211, 0.04569816589355469, 0.047525882720947266, 0.049353599548339844, 0.05118131637573242, 0.053009033203125]}, "gradients/encoder.encoder.layers.19.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 2.0, 2.0, 6.0, 0.0, 5.0, 4.0, 8.0, 5.0, 9.0, 6.0, 12.0, 11.0, 22.0, 11.0, 29.0, 35.0, 36.0, 58.0, 49.0, 51.0, 56.0, 82.0, 68.0, 66.0, 75.0, 41.0, 40.0, 41.0, 30.0, 28.0, 31.0, 11.0, 16.0, 12.0, 11.0, 8.0, 13.0, 9.0, 3.0, 5.0, 4.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0306243896484375, -0.02964329719543457, -0.02866220474243164, -0.02768111228942871, -0.02670001983642578, -0.02571892738342285, -0.024737834930419922, -0.023756742477416992, -0.022775650024414062, -0.021794557571411133, -0.020813465118408203, -0.019832372665405273, -0.018851280212402344, -0.017870187759399414, -0.016889095306396484, -0.015908002853393555, -0.014926910400390625, -0.013945817947387695, -0.012964725494384766, -0.011983633041381836, -0.011002540588378906, -0.010021448135375977, -0.009040355682373047, -0.008059263229370117, -0.0070781707763671875, -0.006097078323364258, -0.005115985870361328, -0.0041348934173583984, -0.0031538009643554688, -0.002172708511352539, -0.0011916160583496094, -0.0002105236053466797, 0.00077056884765625, 0.0017516613006591797, 0.0027327537536621094, 0.003713846206665039, 0.004694938659667969, 0.0056760311126708984, 0.006657123565673828, 0.007638216018676758, 0.008619308471679688, 0.009600400924682617, 0.010581493377685547, 0.011562585830688477, 0.012543678283691406, 0.013524770736694336, 0.014505863189697266, 0.015486955642700195, 0.016468048095703125, 0.017449140548706055, 0.018430233001708984, 0.019411325454711914, 0.020392417907714844, 0.021373510360717773, 0.022354602813720703, 0.023335695266723633, 0.024316787719726562, 0.025297880172729492, 0.026278972625732422, 0.02726006507873535, 0.02824115753173828, 0.02922224998474121, 0.03020334243774414, 0.03118443489074707, 0.03216552734375]}, "gradients/encoder.encoder.layers.19.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 5.0, 2.0, 6.0, 6.0, 20.0, 64.0, 145.0, 325.0, 246.0, 119.0, 43.0, 18.0, 8.0, 7.0, 5.0, 0.0, 0.0, 1.0], "bins": [-2.054638385772705, -2.0165109634399414, -1.9783833026885986, -1.9402557611465454, -1.9021282196044922, -1.864000678062439, -1.8258731365203857, -1.7877455949783325, -1.7496180534362793, -1.711490511894226, -1.6733629703521729, -1.6352354288101196, -1.5971078872680664, -1.5589803457260132, -1.52085280418396, -1.4827252626419067, -1.4445977210998535, -1.4064701795578003, -1.368342638015747, -1.3302150964736938, -1.2920875549316406, -1.2539600133895874, -1.2158324718475342, -1.177704930305481, -1.1395775079727173, -1.101449966430664, -1.0633224248886108, -1.0251948833465576, -0.9870673418045044, -0.9489398002624512, -0.910812258720398, -0.8726847171783447, -0.8345571756362915, -0.7964296340942383, -0.7583020925521851, -0.7201745510101318, -0.6820470094680786, -0.6439194679260254, -0.6057919263839722, -0.567664384841919, -0.5295368432998657, -0.4914093017578125, -0.4532817602157593, -0.41515421867370605, -0.37702667713165283, -0.3388991355895996, -0.3007716238498688, -0.26264408230781555, -0.22451657056808472, -0.1863890290260315, -0.14826148748397827, -0.11013396084308624, -0.07200641930103302, -0.0338788777589798, 0.0042486488819122314, 0.042376190423965454, 0.08050373196601868, 0.1186312735080719, 0.15675881505012512, 0.19488634169101715, 0.23301388323307037, 0.2711414098739624, 0.3092689514160156, 0.34739649295806885, 0.38552403450012207]}, "gradients/encoder.encoder.layers.19.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 2.0, 1.0, 6.0, 2.0, 5.0, 6.0, 8.0, 17.0, 18.0, 10.0, 19.0, 29.0, 18.0, 36.0, 25.0, 37.0, 40.0, 42.0, 50.0, 49.0, 37.0, 60.0, 52.0, 47.0, 45.0, 39.0, 48.0, 44.0, 35.0, 31.0, 25.0, 27.0, 16.0, 20.0, 15.0, 16.0, 16.0, 3.0, 5.0, 6.0, 3.0, 0.0, 5.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.410067081451416, -0.3960208296775818, -0.38197457790374756, -0.36792832612991333, -0.3538820743560791, -0.3398358225822449, -0.32578957080841064, -0.31174328923225403, -0.2976970374584198, -0.28365078568458557, -0.26960453391075134, -0.2555582821369171, -0.2415120154619217, -0.22746576368808746, -0.21341951191425323, -0.1993732452392578, -0.18532700836658478, -0.17128075659275055, -0.15723450481891632, -0.1431882381439209, -0.12914198637008667, -0.11509573459625244, -0.10104948282241821, -0.08700322359800339, -0.07295697182416916, -0.05891071632504463, -0.044864460825920105, -0.030818209052085876, -0.01677195355296135, -0.0027256980538368225, 0.011320553719997406, 0.02536681294441223, 0.03941306471824646, 0.05345932021737099, 0.06750557571649551, 0.08155182749032974, 0.09559808671474457, 0.1096443384885788, 0.12369059026241302, 0.13773685693740845, 0.15178310871124268, 0.1658293604850769, 0.17987561225891113, 0.19392186403274536, 0.20796813070774078, 0.222014382481575, 0.23606063425540924, 0.25010690093040466, 0.2641531229019165, 0.27819937467575073, 0.29224562644958496, 0.3062918782234192, 0.3203381299972534, 0.33438438177108765, 0.3484306335449219, 0.3624769151210785, 0.3765231668949127, 0.39056941866874695, 0.4046156704425812, 0.4186619222164154, 0.43270817399024963, 0.44675445556640625, 0.4608007073402405, 0.4748469591140747, 0.48889321088790894]}, "gradients/encoder.encoder.layers.18.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 2.0, 0.0, 4.0, 1.0, 7.0, 10.0, 15.0, 28.0, 23.0, 33.0, 75.0, 116.0, 195.0, 436.0, 958.0, 2431.0, 7754.0, 31736.0, 228124.0, 3349358.0, 507903.0, 48015.0, 11022.0, 3525.0, 1281.0, 597.0, 260.0, 163.0, 88.0, 44.0, 26.0, 24.0, 12.0, 11.0, 7.0, 4.0, 4.0, 1.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-0.1658935546875, -0.1604766845703125, -0.155059814453125, -0.1496429443359375, -0.14422607421875, -0.1388092041015625, -0.133392333984375, -0.1279754638671875, -0.12255859375, -0.1171417236328125, -0.111724853515625, -0.1063079833984375, -0.10089111328125, -0.0954742431640625, -0.090057373046875, -0.0846405029296875, -0.0792236328125, -0.0738067626953125, -0.068389892578125, -0.0629730224609375, -0.05755615234375, -0.0521392822265625, -0.046722412109375, -0.0413055419921875, -0.035888671875, -0.0304718017578125, -0.025054931640625, -0.0196380615234375, -0.01422119140625, -0.0088043212890625, -0.003387451171875, 0.0020294189453125, 0.0074462890625, 0.0128631591796875, 0.018280029296875, 0.0236968994140625, 0.02911376953125, 0.0345306396484375, 0.039947509765625, 0.0453643798828125, 0.05078125, 0.0561981201171875, 0.061614990234375, 0.0670318603515625, 0.07244873046875, 0.0778656005859375, 0.083282470703125, 0.0886993408203125, 0.0941162109375, 0.0995330810546875, 0.104949951171875, 0.1103668212890625, 0.11578369140625, 0.1212005615234375, 0.126617431640625, 0.1320343017578125, 0.137451171875, 0.1428680419921875, 0.148284912109375, 0.1537017822265625, 0.15911865234375, 0.1645355224609375, 0.169952392578125, 0.1753692626953125, 0.1807861328125]}, "gradients/encoder.encoder.layers.18.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 4.0, 3.0, 5.0, 6.0, 9.0, 9.0, 13.0, 16.0, 27.0, 34.0, 44.0, 33.0, 47.0, 66.0, 68.0, 81.0, 67.0, 70.0, 67.0, 70.0, 42.0, 62.0, 36.0, 25.0, 23.0, 23.0, 16.0, 23.0, 6.0, 6.0, 5.0, 2.0, 5.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0], "bins": [-0.07293701171875, -0.07077693939208984, -0.06861686706542969, -0.06645679473876953, -0.06429672241210938, -0.06213665008544922, -0.05997657775878906, -0.057816505432128906, -0.05565643310546875, -0.053496360778808594, -0.05133628845214844, -0.04917621612548828, -0.047016143798828125, -0.04485607147216797, -0.04269599914550781, -0.040535926818847656, -0.0383758544921875, -0.036215782165527344, -0.03405570983886719, -0.03189563751220703, -0.029735565185546875, -0.02757549285888672, -0.025415420532226562, -0.023255348205566406, -0.02109527587890625, -0.018935203552246094, -0.016775131225585938, -0.014615058898925781, -0.012454986572265625, -0.010294914245605469, -0.008134841918945312, -0.005974769592285156, -0.003814697265625, -0.0016546249389648438, 0.0005054473876953125, 0.0026655197143554688, 0.004825592041015625, 0.006985664367675781, 0.009145736694335938, 0.011305809020996094, 0.01346588134765625, 0.015625953674316406, 0.017786026000976562, 0.01994609832763672, 0.022106170654296875, 0.02426624298095703, 0.026426315307617188, 0.028586387634277344, 0.0307464599609375, 0.032906532287597656, 0.03506660461425781, 0.03722667694091797, 0.039386749267578125, 0.04154682159423828, 0.04370689392089844, 0.045866966247558594, 0.04802703857421875, 0.050187110900878906, 0.05234718322753906, 0.05450725555419922, 0.056667327880859375, 0.05882740020751953, 0.06098747253417969, 0.06314754486083984, 0.0653076171875]}, "gradients/encoder.encoder.layers.18.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 3.0, 2.0, 0.0, 10.0, 3.0, 8.0, 11.0, 13.0, 35.0, 43.0, 72.0, 105.0, 232.0, 566.0, 2276.0, 22367.0, 1865100.0, 2275063.0, 24857.0, 2311.0, 598.0, 268.0, 148.0, 71.0, 44.0, 23.0, 24.0, 6.0, 9.0, 6.0, 10.0, 5.0, 4.0, 3.0, 1.0, 0.0, 1.0], "bins": [-0.47509765625, -0.46430397033691406, -0.4535102844238281, -0.4427165985107422, -0.43192291259765625, -0.4211292266845703, -0.4103355407714844, -0.39954185485839844, -0.3887481689453125, -0.37795448303222656, -0.3671607971191406, -0.3563671112060547, -0.34557342529296875, -0.3347797393798828, -0.3239860534667969, -0.31319236755371094, -0.302398681640625, -0.29160499572753906, -0.2808113098144531, -0.2700176239013672, -0.25922393798828125, -0.2484302520751953, -0.23763656616210938, -0.22684288024902344, -0.2160491943359375, -0.20525550842285156, -0.19446182250976562, -0.1836681365966797, -0.17287445068359375, -0.1620807647705078, -0.15128707885742188, -0.14049339294433594, -0.12969970703125, -0.11890602111816406, -0.10811233520507812, -0.09731864929199219, -0.08652496337890625, -0.07573127746582031, -0.06493759155273438, -0.05414390563964844, -0.0433502197265625, -0.03255653381347656, -0.021762847900390625, -0.010969161987304688, -0.00017547607421875, 0.010618209838867188, 0.021411895751953125, 0.03220558166503906, 0.042999267578125, 0.05379295349121094, 0.06458663940429688, 0.07538032531738281, 0.08617401123046875, 0.09696769714355469, 0.10776138305664062, 0.11855506896972656, 0.1293487548828125, 0.14014244079589844, 0.15093612670898438, 0.1617298126220703, 0.17252349853515625, 0.1833171844482422, 0.19411087036132812, 0.20490455627441406, 0.2156982421875]}, "gradients/encoder.encoder.layers.18.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 3.0, 3.0, 10.0, 6.0, 2.0, 4.0, 12.0, 12.0, 16.0, 24.0, 42.0, 51.0, 76.0, 111.0, 155.0, 224.0, 428.0, 759.0, 821.0, 512.0, 296.0, 162.0, 116.0, 83.0, 47.0, 36.0, 22.0, 16.0, 11.0, 7.0, 7.0, 8.0, 2.0, 3.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0804443359375, -0.07662773132324219, -0.07281112670898438, -0.06899452209472656, -0.06517791748046875, -0.06136131286621094, -0.057544708251953125, -0.05372810363769531, -0.0499114990234375, -0.04609489440917969, -0.042278289794921875, -0.03846168518066406, -0.03464508056640625, -0.030828475952148438, -0.027011871337890625, -0.023195266723632812, -0.019378662109375, -0.015562057495117188, -0.011745452880859375, -0.007928848266601562, -0.00411224365234375, -0.0002956390380859375, 0.003520965576171875, 0.0073375701904296875, 0.0111541748046875, 0.014970779418945312, 0.018787384033203125, 0.022603988647460938, 0.02642059326171875, 0.030237197875976562, 0.034053802490234375, 0.03787040710449219, 0.04168701171875, 0.04550361633300781, 0.049320220947265625, 0.05313682556152344, 0.05695343017578125, 0.06077003479003906, 0.06458663940429688, 0.06840324401855469, 0.0722198486328125, 0.07603645324707031, 0.07985305786132812, 0.08366966247558594, 0.08748626708984375, 0.09130287170410156, 0.09511947631835938, 0.09893608093261719, 0.102752685546875, 0.10656929016113281, 0.11038589477539062, 0.11420249938964844, 0.11801910400390625, 0.12183570861816406, 0.12565231323242188, 0.1294689178466797, 0.1332855224609375, 0.1371021270751953, 0.14091873168945312, 0.14473533630371094, 0.14855194091796875, 0.15236854553222656, 0.15618515014648438, 0.1600017547607422, 0.163818359375]}, "gradients/encoder.encoder.layers.18.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 5.0, 9.0, 18.0, 90.0, 259.0, 396.0, 179.0, 31.0, 19.0, 6.0, 4.0, 3.0, 0.0, 1.0], "bins": [-2.58262300491333, -2.5358972549438477, -2.4891717433929443, -2.442445993423462, -2.3957202434539795, -2.348994731903076, -2.3022689819335938, -2.2555432319641113, -2.208817481994629, -2.1620917320251465, -2.115366220474243, -2.0686404705047607, -2.0219147205352783, -1.9751890897750854, -1.9284634590148926, -1.8817377090454102, -1.8350121974945068, -1.788286566734314, -1.7415608167648315, -1.6948351860046387, -1.6481094360351562, -1.6013838052749634, -1.5546581745147705, -1.507932424545288, -1.4612066745758057, -1.4144810438156128, -1.3677552938461304, -1.3210296630859375, -1.274303913116455, -1.2275782823562622, -1.1808526515960693, -1.134126901626587, -1.0874011516571045, -1.0406755208969116, -0.9939497709274292, -0.9472241401672363, -0.9004984498023987, -0.853772759437561, -0.8070470690727234, -0.7603213787078857, -0.7135957479476929, -0.6668700575828552, -0.6201443672180176, -0.5734187364578247, -0.5266930460929871, -0.4799673557281494, -0.43324166536331177, -0.3865160048007965, -0.33979034423828125, -0.2930646538734436, -0.24633899331092834, -0.1996133029460907, -0.15288762748241425, -0.10616195201873779, -0.059436261653900146, -0.012710601091384888, 0.03401508927345276, 0.08074076473712921, 0.12746644020080566, 0.1741921305656433, 0.22091780602931976, 0.2676434814929962, 0.31436917185783386, 0.3610948324203491, 0.40782052278518677]}, "gradients/encoder.encoder.layers.18.final_layer_norm.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 3.0, 3.0, 3.0, 12.0, 5.0, 11.0, 13.0, 20.0, 15.0, 19.0, 15.0, 34.0, 26.0, 38.0, 36.0, 33.0, 33.0, 39.0, 53.0, 58.0, 50.0, 48.0, 51.0, 34.0, 41.0, 39.0, 36.0, 38.0, 38.0, 39.0, 25.0, 24.0, 15.0, 17.0, 9.0, 11.0, 11.0, 2.0, 7.0, 2.0, 1.0, 3.0, 2.0, 3.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.29742372035980225, -0.28716957569122314, -0.27691543102264404, -0.26666128635406494, -0.25640714168548584, -0.24615299701690674, -0.23589886724948883, -0.22564472258090973, -0.21539057791233063, -0.20513643324375153, -0.19488228857517242, -0.18462814390659332, -0.17437401413917542, -0.1641198694705963, -0.1538657248020172, -0.1436115801334381, -0.133357435464859, -0.12310329079627991, -0.1128491461277008, -0.1025950089097023, -0.0923408642411232, -0.0820867195725441, -0.0718325823545456, -0.06157843768596649, -0.05132429301738739, -0.04107014834880829, -0.030816007405519485, -0.020561864599585533, -0.01030772179365158, -5.357712507247925e-05, 0.010200563818216324, 0.020454704761505127, 0.03070884943008423, 0.04096299409866333, 0.05121713504195213, 0.061471275985240936, 0.07172542065382004, 0.08197956532239914, 0.09223370254039764, 0.10248784720897675, 0.11274199187755585, 0.12299613654613495, 0.13325028121471405, 0.14350442588329315, 0.15375855565071106, 0.16401270031929016, 0.17426684498786926, 0.18452098965644836, 0.19477513432502747, 0.20502927899360657, 0.21528342366218567, 0.22553756833076477, 0.23579171299934387, 0.24604585766792297, 0.2563000023365021, 0.2665541172027588, 0.2768082618713379, 0.287062406539917, 0.2973165512084961, 0.3075706958770752, 0.3178248405456543, 0.3280789852142334, 0.3383331298828125, 0.3485872745513916, 0.3588414192199707]}, "gradients/encoder.encoder.layers.18.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 4.0, 1.0, 1.0, 1.0, 5.0, 5.0, 4.0, 7.0, 19.0, 13.0, 25.0, 26.0, 27.0, 50.0, 68.0, 100.0, 140.0, 204.0, 353.0, 704.0, 1224.0, 2710.0, 6005.0, 14893.0, 42372.0, 137954.0, 376074.0, 311612.0, 100594.0, 32068.0, 11747.0, 4794.0, 2238.0, 1026.0, 558.0, 310.0, 205.0, 124.0, 83.0, 82.0, 49.0, 19.0, 20.0, 13.0, 11.0, 4.0, 8.0, 5.0, 4.0, 1.0, 3.0, 2.0, 3.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.1326904296875, -0.1282482147216797, -0.12380599975585938, -0.11936378479003906, -0.11492156982421875, -0.11047935485839844, -0.10603713989257812, -0.10159492492675781, -0.0971527099609375, -0.09271049499511719, -0.08826828002929688, -0.08382606506347656, -0.07938385009765625, -0.07494163513183594, -0.07049942016601562, -0.06605720520019531, -0.061614990234375, -0.05717277526855469, -0.052730560302734375, -0.04828834533691406, -0.04384613037109375, -0.03940391540527344, -0.034961700439453125, -0.030519485473632812, -0.0260772705078125, -0.021635055541992188, -0.017192840576171875, -0.012750625610351562, -0.00830841064453125, -0.0038661956787109375, 0.000576019287109375, 0.0050182342529296875, 0.00946044921875, 0.013902664184570312, 0.018344879150390625, 0.022787094116210938, 0.02722930908203125, 0.03167152404785156, 0.036113739013671875, 0.04055595397949219, 0.0449981689453125, 0.04944038391113281, 0.053882598876953125, 0.05832481384277344, 0.06276702880859375, 0.06720924377441406, 0.07165145874023438, 0.07609367370605469, 0.080535888671875, 0.08497810363769531, 0.08942031860351562, 0.09386253356933594, 0.09830474853515625, 0.10274696350097656, 0.10718917846679688, 0.11163139343261719, 0.1160736083984375, 0.12051582336425781, 0.12495803833007812, 0.12940025329589844, 0.13384246826171875, 0.13828468322753906, 0.14272689819335938, 0.1471691131591797, 0.151611328125]}, "gradients/encoder.encoder.layers.18.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 2.0, 0.0, 1.0, 2.0, 7.0, 13.0, 18.0, 15.0, 24.0, 36.0, 53.0, 53.0, 95.0, 79.0, 92.0, 87.0, 91.0, 74.0, 62.0, 55.0, 51.0, 32.0, 18.0, 18.0, 11.0, 10.0, 9.0, 5.0, 4.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.06890869140625, -0.06647777557373047, -0.06404685974121094, -0.061615943908691406, -0.059185028076171875, -0.056754112243652344, -0.05432319641113281, -0.05189228057861328, -0.04946136474609375, -0.04703044891357422, -0.04459953308105469, -0.042168617248535156, -0.039737701416015625, -0.037306785583496094, -0.03487586975097656, -0.03244495391845703, -0.0300140380859375, -0.02758312225341797, -0.025152206420898438, -0.022721290588378906, -0.020290374755859375, -0.017859458923339844, -0.015428543090820312, -0.012997627258300781, -0.01056671142578125, -0.008135795593261719, -0.0057048797607421875, -0.0032739639282226562, -0.000843048095703125, 0.0015878677368164062, 0.0040187835693359375, 0.006449699401855469, 0.008880615234375, 0.011311531066894531, 0.013742446899414062, 0.016173362731933594, 0.018604278564453125, 0.021035194396972656, 0.023466110229492188, 0.02589702606201172, 0.02832794189453125, 0.03075885772705078, 0.03318977355957031, 0.035620689392089844, 0.038051605224609375, 0.040482521057128906, 0.04291343688964844, 0.04534435272216797, 0.0477752685546875, 0.05020618438720703, 0.05263710021972656, 0.055068016052246094, 0.057498931884765625, 0.059929847717285156, 0.06236076354980469, 0.06479167938232422, 0.06722259521484375, 0.06965351104736328, 0.07208442687988281, 0.07451534271240234, 0.07694625854492188, 0.0793771743774414, 0.08180809020996094, 0.08423900604248047, 0.086669921875]}, "gradients/encoder.encoder.layers.18.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 2.0, 1.0, 1.0, 2.0, 2.0, 1.0, 5.0, 9.0, 8.0, 6.0, 10.0, 9.0, 11.0, 22.0, 27.0, 23.0, 34.0, 39.0, 65.0, 92.0, 179.0, 347.0, 860.0, 2263.0, 6716.0, 20770.0, 71646.0, 268631.0, 453275.0, 159592.0, 43537.0, 13206.0, 4327.0, 1528.0, 581.0, 275.0, 113.0, 82.0, 63.0, 41.0, 32.0, 28.0, 24.0, 18.0, 18.0, 11.0, 7.0, 9.0, 6.0, 5.0, 1.0, 7.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0], "bins": [-0.14990234375, -0.1454010009765625, -0.140899658203125, -0.1363983154296875, -0.13189697265625, -0.1273956298828125, -0.122894287109375, -0.1183929443359375, -0.1138916015625, -0.1093902587890625, -0.104888916015625, -0.1003875732421875, -0.09588623046875, -0.0913848876953125, -0.086883544921875, -0.0823822021484375, -0.077880859375, -0.0733795166015625, -0.068878173828125, -0.0643768310546875, -0.05987548828125, -0.0553741455078125, -0.050872802734375, -0.0463714599609375, -0.0418701171875, -0.0373687744140625, -0.032867431640625, -0.0283660888671875, -0.02386474609375, -0.0193634033203125, -0.014862060546875, -0.0103607177734375, -0.005859375, -0.0013580322265625, 0.003143310546875, 0.0076446533203125, 0.01214599609375, 0.0166473388671875, 0.021148681640625, 0.0256500244140625, 0.0301513671875, 0.0346527099609375, 0.039154052734375, 0.0436553955078125, 0.04815673828125, 0.0526580810546875, 0.057159423828125, 0.0616607666015625, 0.066162109375, 0.0706634521484375, 0.075164794921875, 0.0796661376953125, 0.08416748046875, 0.0886688232421875, 0.093170166015625, 0.0976715087890625, 0.1021728515625, 0.1066741943359375, 0.111175537109375, 0.1156768798828125, 0.12017822265625, 0.1246795654296875, 0.129180908203125, 0.1336822509765625, 0.13818359375]}, "gradients/encoder.encoder.layers.18.attention.v_proj.bias": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 1.0, 1.0, 3.0, 2.0, 3.0, 7.0, 4.0, 3.0, 11.0, 10.0, 13.0, 14.0, 18.0, 24.0, 23.0, 21.0, 17.0, 27.0, 22.0, 27.0, 26.0, 26.0, 26.0, 36.0, 37.0, 28.0, 35.0, 44.0, 44.0, 35.0, 36.0, 35.0, 34.0, 30.0, 31.0, 24.0, 17.0, 23.0, 34.0, 19.0, 17.0, 23.0, 18.0, 16.0, 9.0, 10.0, 11.0, 6.0, 9.0, 8.0, 4.0, 5.0, 3.0, 0.0, 4.0, 0.0, 1.0, 2.0, 1.0, 1.0, 1.0], "bins": [-0.08612060546875, -0.08334541320800781, -0.08057022094726562, -0.07779502868652344, -0.07501983642578125, -0.07224464416503906, -0.06946945190429688, -0.06669425964355469, -0.0639190673828125, -0.06114387512207031, -0.058368682861328125, -0.05559349060058594, -0.05281829833984375, -0.05004310607910156, -0.047267913818359375, -0.04449272155761719, -0.041717529296875, -0.03894233703613281, -0.036167144775390625, -0.03339195251464844, -0.03061676025390625, -0.027841567993164062, -0.025066375732421875, -0.022291183471679688, -0.0195159912109375, -0.016740798950195312, -0.013965606689453125, -0.011190414428710938, -0.00841522216796875, -0.0056400299072265625, -0.002864837646484375, -8.96453857421875e-05, 0.002685546875, 0.0054607391357421875, 0.008235931396484375, 0.011011123657226562, 0.01378631591796875, 0.016561508178710938, 0.019336700439453125, 0.022111892700195312, 0.0248870849609375, 0.027662277221679688, 0.030437469482421875, 0.03321266174316406, 0.03598785400390625, 0.03876304626464844, 0.041538238525390625, 0.04431343078613281, 0.047088623046875, 0.04986381530761719, 0.052639007568359375, 0.05541419982910156, 0.05818939208984375, 0.06096458435058594, 0.06373977661132812, 0.06651496887207031, 0.0692901611328125, 0.07206535339355469, 0.07484054565429688, 0.07761573791503906, 0.08039093017578125, 0.08316612243652344, 0.08594131469726562, 0.08871650695800781, 0.09149169921875]}, "gradients/encoder.encoder.layers.18.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 4.0, 2.0, 5.0, 5.0, 6.0, 10.0, 17.0, 18.0, 36.0, 62.0, 104.0, 192.0, 355.0, 729.0, 1872.0, 5416.0, 20984.0, 105569.0, 445471.0, 367821.0, 76660.0, 15931.0, 4288.0, 1611.0, 662.0, 334.0, 172.0, 85.0, 62.0, 30.0, 18.0, 12.0, 7.0, 6.0, 6.0, 5.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.043487548828125, -0.04208040237426758, -0.040673255920410156, -0.039266109466552734, -0.03785896301269531, -0.03645181655883789, -0.03504467010498047, -0.03363752365112305, -0.032230377197265625, -0.030823230743408203, -0.02941608428955078, -0.02800893783569336, -0.026601791381835938, -0.025194644927978516, -0.023787498474121094, -0.022380352020263672, -0.02097320556640625, -0.019566059112548828, -0.018158912658691406, -0.016751766204833984, -0.015344619750976562, -0.01393747329711914, -0.012530326843261719, -0.011123180389404297, -0.009716033935546875, -0.008308887481689453, -0.006901741027832031, -0.005494594573974609, -0.0040874481201171875, -0.0026803016662597656, -0.0012731552124023438, 0.00013399124145507812, 0.0015411376953125, 0.002948284149169922, 0.004355430603027344, 0.005762577056884766, 0.0071697235107421875, 0.00857686996459961, 0.009984016418457031, 0.011391162872314453, 0.012798309326171875, 0.014205455780029297, 0.015612602233886719, 0.01701974868774414, 0.018426895141601562, 0.019834041595458984, 0.021241188049316406, 0.022648334503173828, 0.02405548095703125, 0.025462627410888672, 0.026869773864746094, 0.028276920318603516, 0.029684066772460938, 0.03109121322631836, 0.03249835968017578, 0.0339055061340332, 0.035312652587890625, 0.03671979904174805, 0.03812694549560547, 0.03953409194946289, 0.04094123840332031, 0.042348384857177734, 0.043755531311035156, 0.04516267776489258, 0.04656982421875]}, "gradients/encoder.encoder.layers.18.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 4.0, 1.0, 0.0, 1.0, 3.0, 3.0, 4.0, 13.0, 9.0, 18.0, 18.0, 22.0, 15.0, 35.0, 33.0, 46.0, 30.0, 50.0, 59.0, 89.0, 59.0, 75.0, 56.0, 54.0, 60.0, 52.0, 40.0, 28.0, 25.0, 18.0, 21.0, 12.0, 12.0, 8.0, 12.0, 4.0, 6.0, 2.0, 3.0, 0.0, 5.0, 8.0, 2.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6.198883056640625e-06, -5.950219929218292e-06, -5.7015568017959595e-06, -5.452893674373627e-06, -5.204230546951294e-06, -4.955567419528961e-06, -4.706904292106628e-06, -4.458241164684296e-06, -4.209578037261963e-06, -3.96091490983963e-06, -3.7122517824172974e-06, -3.4635886549949646e-06, -3.214925527572632e-06, -2.966262400150299e-06, -2.7175992727279663e-06, -2.4689361453056335e-06, -2.2202730178833008e-06, -1.971609890460968e-06, -1.7229467630386353e-06, -1.4742836356163025e-06, -1.2256205081939697e-06, -9.76957380771637e-07, -7.282942533493042e-07, -4.796311259269714e-07, -2.3096799850463867e-07, 1.7695128917694092e-08, 2.6635825634002686e-07, 5.150213837623596e-07, 7.636845111846924e-07, 1.0123476386070251e-06, 1.261010766029358e-06, 1.5096738934516907e-06, 1.7583370208740234e-06, 2.007000148296356e-06, 2.255663275718689e-06, 2.5043264031410217e-06, 2.7529895305633545e-06, 3.0016526579856873e-06, 3.25031578540802e-06, 3.4989789128303528e-06, 3.7476420402526855e-06, 3.996305167675018e-06, 4.244968295097351e-06, 4.493631422519684e-06, 4.742294549942017e-06, 4.990957677364349e-06, 5.239620804786682e-06, 5.488283932209015e-06, 5.736947059631348e-06, 5.98561018705368e-06, 6.234273314476013e-06, 6.482936441898346e-06, 6.731599569320679e-06, 6.9802626967430115e-06, 7.228925824165344e-06, 7.477588951587677e-06, 7.72625207901001e-06, 7.974915206432343e-06, 8.223578333854675e-06, 8.472241461277008e-06, 8.72090458869934e-06, 8.969567716121674e-06, 9.218230843544006e-06, 9.466893970966339e-06, 9.715557098388672e-06]}, "gradients/encoder.encoder.layers.18.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 4.0, 5.0, 7.0, 4.0, 13.0, 33.0, 43.0, 106.0, 255.0, 664.0, 2370.0, 13554.0, 141916.0, 719647.0, 152160.0, 14155.0, 2511.0, 648.0, 237.0, 120.0, 56.0, 32.0, 8.0, 11.0, 7.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.06988525390625, -0.06751728057861328, -0.06514930725097656, -0.06278133392333984, -0.060413360595703125, -0.058045387268066406, -0.05567741394042969, -0.05330944061279297, -0.05094146728515625, -0.04857349395751953, -0.04620552062988281, -0.043837547302246094, -0.041469573974609375, -0.039101600646972656, -0.03673362731933594, -0.03436565399169922, -0.0319976806640625, -0.02962970733642578, -0.027261734008789062, -0.024893760681152344, -0.022525787353515625, -0.020157814025878906, -0.017789840698242188, -0.015421867370605469, -0.01305389404296875, -0.010685920715332031, -0.008317947387695312, -0.005949974060058594, -0.003582000732421875, -0.0012140274047851562, 0.0011539459228515625, 0.0035219192504882812, 0.005889892578125, 0.008257865905761719, 0.010625839233398438, 0.012993812561035156, 0.015361785888671875, 0.017729759216308594, 0.020097732543945312, 0.02246570587158203, 0.02483367919921875, 0.02720165252685547, 0.029569625854492188, 0.031937599182128906, 0.034305572509765625, 0.036673545837402344, 0.03904151916503906, 0.04140949249267578, 0.0437774658203125, 0.04614543914794922, 0.04851341247558594, 0.050881385803222656, 0.053249359130859375, 0.055617332458496094, 0.05798530578613281, 0.06035327911376953, 0.06272125244140625, 0.06508922576904297, 0.06745719909667969, 0.0698251724243164, 0.07219314575195312, 0.07456111907958984, 0.07692909240722656, 0.07929706573486328, 0.0816650390625]}, "gradients/encoder.encoder.layers.18.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 4.0, 0.0, 3.0, 2.0, 2.0, 9.0, 8.0, 9.0, 17.0, 19.0, 24.0, 33.0, 41.0, 44.0, 55.0, 74.0, 81.0, 92.0, 63.0, 67.0, 65.0, 58.0, 54.0, 47.0, 35.0, 21.0, 26.0, 17.0, 12.0, 5.0, 11.0, 4.0, 2.0, 5.0, 2.0, 0.0, 2.0, 1.0, 2.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.03173828125, -0.03081345558166504, -0.029888629913330078, -0.028963804244995117, -0.028038978576660156, -0.027114152908325195, -0.026189327239990234, -0.025264501571655273, -0.024339675903320312, -0.02341485023498535, -0.02249002456665039, -0.02156519889831543, -0.02064037322998047, -0.019715547561645508, -0.018790721893310547, -0.017865896224975586, -0.016941070556640625, -0.016016244888305664, -0.015091419219970703, -0.014166593551635742, -0.013241767883300781, -0.01231694221496582, -0.01139211654663086, -0.010467290878295898, -0.009542465209960938, -0.008617639541625977, -0.007692813873291016, -0.006767988204956055, -0.005843162536621094, -0.004918336868286133, -0.003993511199951172, -0.003068685531616211, -0.00214385986328125, -0.001219034194946289, -0.0002942085266113281, 0.0006306171417236328, 0.0015554428100585938, 0.0024802684783935547, 0.0034050941467285156, 0.0043299198150634766, 0.0052547454833984375, 0.0061795711517333984, 0.007104396820068359, 0.00802922248840332, 0.008954048156738281, 0.009878873825073242, 0.010803699493408203, 0.011728525161743164, 0.012653350830078125, 0.013578176498413086, 0.014503002166748047, 0.015427827835083008, 0.01635265350341797, 0.01727747917175293, 0.01820230484008789, 0.01912713050842285, 0.020051956176757812, 0.020976781845092773, 0.021901607513427734, 0.022826433181762695, 0.023751258850097656, 0.024676084518432617, 0.025600910186767578, 0.02652573585510254, 0.0274505615234375]}, "gradients/encoder.encoder.layers.18.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 2.0, 6.0, 6.0, 12.0, 23.0, 43.0, 99.0, 156.0, 200.0, 205.0, 110.0, 63.0, 29.0, 22.0, 15.0, 3.0, 3.0, 7.0, 4.0, 2.0, 1.0, 3.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.6291400194168091, -0.6105483174324036, -0.591956615447998, -0.5733648538589478, -0.5547731518745422, -0.5361814498901367, -0.5175897479057312, -0.4989980161190033, -0.4804062843322754, -0.4618145823478699, -0.44322285056114197, -0.42463114857673645, -0.40603941679000854, -0.387447714805603, -0.3688560128211975, -0.3502642810344696, -0.3316725790500641, -0.31308087706565857, -0.29448914527893066, -0.27589744329452515, -0.25730571150779724, -0.23871400952339172, -0.220122292637825, -0.2015305757522583, -0.1829388588666916, -0.16434714198112488, -0.14575542509555817, -0.12716370820999146, -0.10857199877500534, -0.08998028188943863, -0.07138857245445251, -0.0527968555688858, -0.03420513868331909, -0.01561342366039753, 0.0029782913625240326, 0.021570004522800446, 0.04016172140836716, 0.05875343829393387, 0.07734514772891998, 0.0959368646144867, 0.1145285815000534, 0.13312029838562012, 0.15171201527118683, 0.17030373215675354, 0.18889543414115906, 0.20748716592788696, 0.22607886791229248, 0.2446705847978592, 0.2632623016834259, 0.2818540036678314, 0.3004457354545593, 0.31903743743896484, 0.33762916922569275, 0.35622087121009827, 0.37481260299682617, 0.3934043049812317, 0.4119960069656372, 0.4305877089500427, 0.44917944073677063, 0.46777114272117615, 0.48636287450790405, 0.5049545764923096, 0.5235462784767151, 0.5421379804611206, 0.5607297420501709]}, "gradients/encoder.encoder.layers.18.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 4.0, 3.0, 3.0, 1.0, 4.0, 6.0, 6.0, 5.0, 7.0, 10.0, 19.0, 14.0, 14.0, 22.0, 19.0, 27.0, 29.0, 24.0, 35.0, 41.0, 46.0, 39.0, 40.0, 39.0, 49.0, 37.0, 29.0, 45.0, 40.0, 32.0, 29.0, 27.0, 44.0, 41.0, 27.0, 19.0, 17.0, 26.0, 16.0, 16.0, 15.0, 16.0, 5.0, 8.0, 10.0, 5.0, 1.0, 1.0, 3.0, 2.0, 0.0, 3.0], "bins": [-0.3699676990509033, -0.3599499464035034, -0.3499321937561035, -0.3399144411087036, -0.3298966884613037, -0.3198789358139038, -0.3098611831665039, -0.2998434007167816, -0.2898256480693817, -0.2798078954219818, -0.2697901427745819, -0.259772390127182, -0.2497546225786209, -0.239736869931221, -0.2297191172838211, -0.21970134973526, -0.2096836119890213, -0.1996658593416214, -0.1896481066942215, -0.1796303391456604, -0.1696125864982605, -0.1595948338508606, -0.1495770812034607, -0.1395593285560608, -0.1295415759086609, -0.11952382326126099, -0.10950606316328049, -0.09948831051588058, -0.08947055041790009, -0.07945279777050018, -0.06943504512310028, -0.05941728502511978, -0.04939952492713928, -0.03938176855444908, -0.02936401404440403, -0.01934625953435898, -0.009328503161668777, 0.0006892532110214233, 0.010707005858421326, 0.020724765956401825, 0.030742518603801727, 0.04076027497649193, 0.05077803134918213, 0.06079578399658203, 0.07081353664398193, 0.08083129674196243, 0.09084904938936234, 0.10086680948734283, 0.11088456213474274, 0.12090231478214264, 0.13092006742954254, 0.14093783497810364, 0.15095558762550354, 0.16097334027290344, 0.17099109292030334, 0.18100884556770325, 0.19102659821510315, 0.20104435086250305, 0.21106210350990295, 0.22107985615730286, 0.23109762370586395, 0.24111537635326385, 0.25113314390182495, 0.26115089654922485, 0.27116864919662476]}, "gradients/encoder.encoder.layers.17.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 5.0, 3.0, 0.0, 6.0, 7.0, 11.0, 12.0, 26.0, 29.0, 47.0, 64.0, 99.0, 194.0, 332.0, 644.0, 1274.0, 2867.0, 7058.0, 20253.0, 72226.0, 537119.0, 2808339.0, 632454.0, 77919.0, 20838.0, 6876.0, 2796.0, 1281.0, 661.0, 333.0, 176.0, 122.0, 70.0, 42.0, 46.0, 22.0, 13.0, 6.0, 12.0, 6.0, 3.0, 2.0, 2.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-0.112548828125, -0.10928916931152344, -0.10602951049804688, -0.10276985168457031, -0.09951019287109375, -0.09625053405761719, -0.09299087524414062, -0.08973121643066406, -0.0864715576171875, -0.08321189880371094, -0.07995223999023438, -0.07669258117675781, -0.07343292236328125, -0.07017326354980469, -0.06691360473632812, -0.06365394592285156, -0.060394287109375, -0.05713462829589844, -0.053874969482421875, -0.05061531066894531, -0.04735565185546875, -0.04409599304199219, -0.040836334228515625, -0.03757667541503906, -0.0343170166015625, -0.031057357788085938, -0.027797698974609375, -0.024538040161132812, -0.02127838134765625, -0.018018722534179688, -0.014759063720703125, -0.011499404907226562, -0.00823974609375, -0.0049800872802734375, -0.001720428466796875, 0.0015392303466796875, 0.00479888916015625, 0.008058547973632812, 0.011318206787109375, 0.014577865600585938, 0.0178375244140625, 0.021097183227539062, 0.024356842041015625, 0.027616500854492188, 0.03087615966796875, 0.03413581848144531, 0.037395477294921875, 0.04065513610839844, 0.043914794921875, 0.04717445373535156, 0.050434112548828125, 0.05369377136230469, 0.05695343017578125, 0.06021308898925781, 0.06347274780273438, 0.06673240661621094, 0.0699920654296875, 0.07325172424316406, 0.07651138305664062, 0.07977104187011719, 0.08303070068359375, 0.08629035949707031, 0.08955001831054688, 0.09280967712402344, 0.0960693359375]}, "gradients/encoder.encoder.layers.17.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 2.0, 1.0, 3.0, 7.0, 5.0, 17.0, 19.0, 27.0, 24.0, 33.0, 50.0, 54.0, 60.0, 75.0, 74.0, 98.0, 65.0, 71.0, 64.0, 53.0, 65.0, 42.0, 34.0, 20.0, 20.0, 12.0, 10.0, 6.0, 2.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.0814208984375, -0.07899761199951172, -0.07657432556152344, -0.07415103912353516, -0.07172775268554688, -0.0693044662475586, -0.06688117980957031, -0.06445789337158203, -0.06203460693359375, -0.05961132049560547, -0.05718803405761719, -0.054764747619628906, -0.052341461181640625, -0.049918174743652344, -0.04749488830566406, -0.04507160186767578, -0.0426483154296875, -0.04022502899169922, -0.03780174255371094, -0.035378456115722656, -0.032955169677734375, -0.030531883239746094, -0.028108596801757812, -0.02568531036376953, -0.02326202392578125, -0.02083873748779297, -0.018415451049804688, -0.015992164611816406, -0.013568878173828125, -0.011145591735839844, -0.008722305297851562, -0.006299018859863281, -0.003875732421875, -0.0014524459838867188, 0.0009708404541015625, 0.0033941268920898438, 0.005817413330078125, 0.008240699768066406, 0.010663986206054688, 0.013087272644042969, 0.01551055908203125, 0.01793384552001953, 0.020357131958007812, 0.022780418395996094, 0.025203704833984375, 0.027626991271972656, 0.030050277709960938, 0.03247356414794922, 0.0348968505859375, 0.03732013702392578, 0.03974342346191406, 0.042166709899902344, 0.044589996337890625, 0.047013282775878906, 0.04943656921386719, 0.05185985565185547, 0.05428314208984375, 0.05670642852783203, 0.05912971496582031, 0.061553001403808594, 0.06397628784179688, 0.06639957427978516, 0.06882286071777344, 0.07124614715576172, 0.07366943359375]}, "gradients/encoder.encoder.layers.17.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 0.0, 1.0, 6.0, 3.0, 5.0, 6.0, 13.0, 24.0, 26.0, 48.0, 80.0, 176.0, 400.0, 1283.0, 12747.0, 2297764.0, 1868085.0, 11638.0, 1238.0, 369.0, 154.0, 83.0, 45.0, 28.0, 22.0, 14.0, 10.0, 9.0, 8.0, 3.0, 3.0, 1.0, 1.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.447998046875, -0.4367942810058594, -0.42559051513671875, -0.4143867492675781, -0.4031829833984375, -0.3919792175292969, -0.38077545166015625, -0.3695716857910156, -0.358367919921875, -0.3471641540527344, -0.33596038818359375, -0.3247566223144531, -0.3135528564453125, -0.3023490905761719, -0.29114532470703125, -0.2799415588378906, -0.26873779296875, -0.2575340270996094, -0.24633026123046875, -0.23512649536132812, -0.2239227294921875, -0.21271896362304688, -0.20151519775390625, -0.19031143188476562, -0.179107666015625, -0.16790390014648438, -0.15670013427734375, -0.14549636840820312, -0.1342926025390625, -0.12308883666992188, -0.11188507080078125, -0.10068130493164062, -0.0894775390625, -0.07827377319335938, -0.06707000732421875, -0.055866241455078125, -0.0446624755859375, -0.033458709716796875, -0.02225494384765625, -0.011051177978515625, 0.000152587890625, 0.011356353759765625, 0.02256011962890625, 0.033763885498046875, 0.0449676513671875, 0.056171417236328125, 0.06737518310546875, 0.07857894897460938, 0.08978271484375, 0.10098648071289062, 0.11219024658203125, 0.12339401245117188, 0.1345977783203125, 0.14580154418945312, 0.15700531005859375, 0.16820907592773438, 0.179412841796875, 0.19061660766601562, 0.20182037353515625, 0.21302413940429688, 0.2242279052734375, 0.23543167114257812, 0.24663543701171875, 0.2578392028808594, 0.26904296875]}, "gradients/encoder.encoder.layers.17.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 2.0, 0.0, 0.0, 2.0, 7.0, 4.0, 4.0, 6.0, 19.0, 20.0, 22.0, 33.0, 54.0, 85.0, 153.0, 233.0, 462.0, 946.0, 929.0, 482.0, 254.0, 126.0, 71.0, 49.0, 42.0, 36.0, 13.0, 13.0, 6.0, 5.0, 6.0, 1.0, 1.0, 3.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.09722900390625, -0.09317493438720703, -0.08912086486816406, -0.0850667953491211, -0.08101272583007812, -0.07695865631103516, -0.07290458679199219, -0.06885051727294922, -0.06479644775390625, -0.06074237823486328, -0.05668830871582031, -0.052634239196777344, -0.048580169677734375, -0.044526100158691406, -0.04047203063964844, -0.03641796112060547, -0.0323638916015625, -0.02830982208251953, -0.024255752563476562, -0.020201683044433594, -0.016147613525390625, -0.012093544006347656, -0.008039474487304688, -0.003985404968261719, 6.866455078125e-05, 0.004122734069824219, 0.008176803588867188, 0.012230873107910156, 0.016284942626953125, 0.020339012145996094, 0.024393081665039062, 0.02844715118408203, 0.032501220703125, 0.03655529022216797, 0.04060935974121094, 0.044663429260253906, 0.048717498779296875, 0.052771568298339844, 0.05682563781738281, 0.06087970733642578, 0.06493377685546875, 0.06898784637451172, 0.07304191589355469, 0.07709598541259766, 0.08115005493164062, 0.0852041244506836, 0.08925819396972656, 0.09331226348876953, 0.0973663330078125, 0.10142040252685547, 0.10547447204589844, 0.1095285415649414, 0.11358261108398438, 0.11763668060302734, 0.12169075012207031, 0.12574481964111328, 0.12979888916015625, 0.13385295867919922, 0.1379070281982422, 0.14196109771728516, 0.14601516723632812, 0.1500692367553711, 0.15412330627441406, 0.15817737579345703, 0.1622314453125]}, "gradients/encoder.encoder.layers.17.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 2.0, 4.0, 2.0, 3.0, 2.0, 13.0, 30.0, 66.0, 192.0, 252.0, 245.0, 111.0, 57.0, 16.0, 11.0, 5.0, 1.0, 2.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.8142618536949158, -0.7869841456413269, -0.759706437587738, -0.732428789138794, -0.7051510810852051, -0.6778733730316162, -0.6505956649780273, -0.6233179569244385, -0.5960402488708496, -0.5687625408172607, -0.5414848327636719, -0.514207124710083, -0.4869294762611389, -0.45965176820755005, -0.4323740601539612, -0.4050963521003723, -0.3778187036514282, -0.35054099559783936, -0.3232633173465729, -0.295985609292984, -0.26870793104171753, -0.24143022298812866, -0.2141525149345398, -0.18687482178211212, -0.15959712862968445, -0.13231943547725677, -0.1050417348742485, -0.07776403427124023, -0.05048634111881256, -0.023208647966384888, 0.0040690600872039795, 0.03134675323963165, 0.05862438678741455, 0.08590207993984222, 0.1131797805428505, 0.14045748114585876, 0.16773517429828644, 0.1950128674507141, 0.22229057550430298, 0.24956826865673065, 0.2768459618091583, 0.3041236698627472, 0.33140134811401367, 0.35867905616760254, 0.3859567642211914, 0.4132344424724579, 0.44051215052604675, 0.46778982877731323, 0.4950675368309021, 0.522345244884491, 0.5496229529380798, 0.5769006013870239, 0.6041783094406128, 0.6314560174942017, 0.6587337255477905, 0.6860114336013794, 0.7132891416549683, 0.7405668497085571, 0.767844557762146, 0.7951222658157349, 0.822399914264679, 0.8496776223182678, 0.8769553303718567, 0.9042330384254456, 0.9315106868743896]}, "gradients/encoder.encoder.layers.17.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 2.0, 0.0, 4.0, 3.0, 3.0, 7.0, 6.0, 4.0, 8.0, 15.0, 9.0, 10.0, 18.0, 18.0, 27.0, 26.0, 30.0, 36.0, 38.0, 40.0, 39.0, 43.0, 48.0, 36.0, 37.0, 54.0, 49.0, 39.0, 48.0, 32.0, 36.0, 35.0, 35.0, 28.0, 18.0, 21.0, 10.0, 20.0, 9.0, 17.0, 11.0, 12.0, 2.0, 13.0, 4.0, 3.0, 3.0, 5.0, 0.0, 2.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 1.0], "bins": [-0.2595297694206238, -0.2514130175113678, -0.24329626560211182, -0.23517951369285583, -0.22706276178359985, -0.21894600987434387, -0.2108292579650879, -0.2027125060558319, -0.19459575414657593, -0.18647900223731995, -0.17836225032806396, -0.17024549841880798, -0.162128746509552, -0.15401199460029602, -0.14589524269104004, -0.13777849078178406, -0.12966173887252808, -0.1215449869632721, -0.11342823505401611, -0.10531148314476013, -0.09719473123550415, -0.08907797932624817, -0.08096122741699219, -0.0728444755077362, -0.06472772359848022, -0.05661097168922424, -0.04849421977996826, -0.04037746787071228, -0.0322607159614563, -0.024143964052200317, -0.016027212142944336, -0.007910460233688354, 0.00020629167556762695, 0.008323043584823608, 0.01643979549407959, 0.02455654740333557, 0.03267329931259155, 0.040790051221847534, 0.048906803131103516, 0.0570235550403595, 0.06514030694961548, 0.07325705885887146, 0.08137381076812744, 0.08949056267738342, 0.0976073145866394, 0.10572406649589539, 0.11384081840515137, 0.12195757031440735, 0.13007432222366333, 0.1381910741329193, 0.1463078260421753, 0.15442457795143127, 0.16254132986068726, 0.17065808176994324, 0.17877483367919922, 0.1868915855884552, 0.19500833749771118, 0.20312508940696716, 0.21124184131622314, 0.21935859322547913, 0.2274753451347351, 0.2355920970439911, 0.24370884895324707, 0.25182560086250305, 0.25994235277175903]}, "gradients/encoder.encoder.layers.17.attention.out_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 2.0, 0.0, 3.0, 3.0, 7.0, 5.0, 8.0, 8.0, 13.0, 33.0, 31.0, 40.0, 61.0, 61.0, 122.0, 167.0, 275.0, 423.0, 757.0, 1474.0, 3048.0, 7491.0, 19823.0, 59489.0, 186408.0, 382731.0, 254793.0, 85424.0, 27707.0, 9860.0, 3979.0, 1903.0, 948.0, 530.0, 317.0, 182.0, 138.0, 85.0, 63.0, 41.0, 27.0, 28.0, 18.0, 10.0, 13.0, 6.0, 6.0, 3.0, 3.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.1519775390625, -0.1473236083984375, -0.142669677734375, -0.1380157470703125, -0.13336181640625, -0.1287078857421875, -0.124053955078125, -0.1194000244140625, -0.11474609375, -0.1100921630859375, -0.105438232421875, -0.1007843017578125, -0.09613037109375, -0.0914764404296875, -0.086822509765625, -0.0821685791015625, -0.0775146484375, -0.0728607177734375, -0.068206787109375, -0.0635528564453125, -0.05889892578125, -0.0542449951171875, -0.049591064453125, -0.0449371337890625, -0.040283203125, -0.0356292724609375, -0.030975341796875, -0.0263214111328125, -0.02166748046875, -0.0170135498046875, -0.012359619140625, -0.0077056884765625, -0.0030517578125, 0.0016021728515625, 0.006256103515625, 0.0109100341796875, 0.01556396484375, 0.0202178955078125, 0.024871826171875, 0.0295257568359375, 0.0341796875, 0.0388336181640625, 0.043487548828125, 0.0481414794921875, 0.05279541015625, 0.0574493408203125, 0.062103271484375, 0.0667572021484375, 0.0714111328125, 0.0760650634765625, 0.080718994140625, 0.0853729248046875, 0.09002685546875, 0.0946807861328125, 0.099334716796875, 0.1039886474609375, 0.108642578125, 0.1132965087890625, 0.117950439453125, 0.1226043701171875, 0.12725830078125, 0.1319122314453125, 0.136566162109375, 0.1412200927734375, 0.1458740234375]}, "gradients/encoder.encoder.layers.17.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 2.0, 3.0, 3.0, 3.0, 6.0, 9.0, 11.0, 21.0, 34.0, 39.0, 67.0, 73.0, 76.0, 84.0, 106.0, 77.0, 80.0, 88.0, 56.0, 56.0, 43.0, 27.0, 22.0, 9.0, 11.0, 7.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.0828857421875, -0.08035755157470703, -0.07782936096191406, -0.0753011703491211, -0.07277297973632812, -0.07024478912353516, -0.06771659851074219, -0.06518840789794922, -0.06266021728515625, -0.06013202667236328, -0.05760383605957031, -0.055075645446777344, -0.052547454833984375, -0.050019264221191406, -0.04749107360839844, -0.04496288299560547, -0.0424346923828125, -0.03990650177001953, -0.03737831115722656, -0.034850120544433594, -0.032321929931640625, -0.029793739318847656, -0.027265548706054688, -0.02473735809326172, -0.02220916748046875, -0.01968097686767578, -0.017152786254882812, -0.014624595642089844, -0.012096405029296875, -0.009568214416503906, -0.0070400238037109375, -0.004511833190917969, -0.001983642578125, 0.0005445480346679688, 0.0030727386474609375, 0.005600929260253906, 0.008129119873046875, 0.010657310485839844, 0.013185501098632812, 0.01571369171142578, 0.01824188232421875, 0.02077007293701172, 0.023298263549804688, 0.025826454162597656, 0.028354644775390625, 0.030882835388183594, 0.03341102600097656, 0.03593921661376953, 0.0384674072265625, 0.04099559783935547, 0.04352378845214844, 0.046051979064941406, 0.048580169677734375, 0.051108360290527344, 0.05363655090332031, 0.05616474151611328, 0.05869293212890625, 0.06122112274169922, 0.06374931335449219, 0.06627750396728516, 0.06880569458007812, 0.0713338851928711, 0.07386207580566406, 0.07639026641845703, 0.07891845703125]}, "gradients/encoder.encoder.layers.17.attention.v_proj.weight": {"_type": "histogram", "values": [4.0, 2.0, 2.0, 0.0, 3.0, 2.0, 2.0, 4.0, 3.0, 2.0, 17.0, 21.0, 22.0, 30.0, 35.0, 45.0, 70.0, 92.0, 137.0, 174.0, 263.0, 444.0, 718.0, 1549.0, 3427.0, 8882.0, 29018.0, 112251.0, 389305.0, 361511.0, 99805.0, 25988.0, 8151.0, 3126.0, 1406.0, 734.0, 399.0, 248.0, 187.0, 133.0, 92.0, 59.0, 58.0, 34.0, 26.0, 15.0, 11.0, 13.0, 10.0, 9.0, 10.0, 6.0, 4.0, 3.0, 5.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0], "bins": [-0.1248779296875, -0.12056541442871094, -0.11625289916992188, -0.11194038391113281, -0.10762786865234375, -0.10331535339355469, -0.09900283813476562, -0.09469032287597656, -0.0903778076171875, -0.08606529235839844, -0.08175277709960938, -0.07744026184082031, -0.07312774658203125, -0.06881523132324219, -0.06450271606445312, -0.06019020080566406, -0.055877685546875, -0.05156517028808594, -0.047252655029296875, -0.04294013977050781, -0.03862762451171875, -0.03431510925292969, -0.030002593994140625, -0.025690078735351562, -0.0213775634765625, -0.017065048217773438, -0.012752532958984375, -0.008440017700195312, -0.00412750244140625, 0.0001850128173828125, 0.004497528076171875, 0.008810043334960938, 0.01312255859375, 0.017435073852539062, 0.021747589111328125, 0.026060104370117188, 0.03037261962890625, 0.03468513488769531, 0.038997650146484375, 0.04331016540527344, 0.0476226806640625, 0.05193519592285156, 0.056247711181640625, 0.06056022644042969, 0.06487274169921875, 0.06918525695800781, 0.07349777221679688, 0.07781028747558594, 0.082122802734375, 0.08643531799316406, 0.09074783325195312, 0.09506034851074219, 0.09937286376953125, 0.10368537902832031, 0.10799789428710938, 0.11231040954589844, 0.1166229248046875, 0.12093544006347656, 0.12524795532226562, 0.1295604705810547, 0.13387298583984375, 0.1381855010986328, 0.14249801635742188, 0.14681053161621094, 0.151123046875]}, "gradients/encoder.encoder.layers.17.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 1.0, 2.0, 1.0, 4.0, 1.0, 4.0, 6.0, 8.0, 1.0, 7.0, 11.0, 9.0, 11.0, 15.0, 21.0, 27.0, 22.0, 29.0, 31.0, 32.0, 36.0, 38.0, 48.0, 29.0, 36.0, 42.0, 48.0, 37.0, 49.0, 39.0, 43.0, 47.0, 44.0, 27.0, 31.0, 26.0, 21.0, 26.0, 22.0, 16.0, 10.0, 13.0, 11.0, 10.0, 7.0, 5.0, 3.0, 0.0, 0.0, 2.0, 2.0, 1.0, 2.0, 1.0, 3.0, 1.0], "bins": [-0.1112060546875, -0.10800838470458984, -0.10481071472167969, -0.10161304473876953, -0.09841537475585938, -0.09521770477294922, -0.09202003479003906, -0.0888223648071289, -0.08562469482421875, -0.0824270248413086, -0.07922935485839844, -0.07603168487548828, -0.07283401489257812, -0.06963634490966797, -0.06643867492675781, -0.06324100494384766, -0.0600433349609375, -0.056845664978027344, -0.05364799499511719, -0.05045032501220703, -0.047252655029296875, -0.04405498504638672, -0.04085731506347656, -0.037659645080566406, -0.03446197509765625, -0.031264305114746094, -0.028066635131835938, -0.02486896514892578, -0.021671295166015625, -0.01847362518310547, -0.015275955200195312, -0.012078285217285156, -0.008880615234375, -0.005682945251464844, -0.0024852752685546875, 0.0007123947143554688, 0.003910064697265625, 0.007107734680175781, 0.010305404663085938, 0.013503074645996094, 0.01670074462890625, 0.019898414611816406, 0.023096084594726562, 0.02629375457763672, 0.029491424560546875, 0.03268909454345703, 0.03588676452636719, 0.039084434509277344, 0.0422821044921875, 0.045479774475097656, 0.04867744445800781, 0.05187511444091797, 0.055072784423828125, 0.05827045440673828, 0.06146812438964844, 0.0646657943725586, 0.06786346435546875, 0.0710611343383789, 0.07425880432128906, 0.07745647430419922, 0.08065414428710938, 0.08385181427001953, 0.08704948425292969, 0.09024715423583984, 0.09344482421875]}, "gradients/encoder.encoder.layers.17.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 3.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 2.0, 2.0, 3.0, 10.0, 3.0, 9.0, 13.0, 14.0, 28.0, 50.0, 67.0, 138.0, 350.0, 834.0, 1982.0, 8293.0, 228646.0, 784088.0, 19001.0, 3003.0, 1095.0, 478.0, 195.0, 84.0, 56.0, 36.0, 21.0, 15.0, 8.0, 17.0, 4.0, 4.0, 3.0, 3.0, 2.0, 1.0, 2.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0], "bins": [-0.13671875, -0.13233375549316406, -0.12794876098632812, -0.12356376647949219, -0.11917877197265625, -0.11479377746582031, -0.11040878295898438, -0.10602378845214844, -0.1016387939453125, -0.09725379943847656, -0.09286880493164062, -0.08848381042480469, -0.08409881591796875, -0.07971382141113281, -0.07532882690429688, -0.07094383239746094, -0.066558837890625, -0.06217384338378906, -0.057788848876953125, -0.05340385437011719, -0.04901885986328125, -0.04463386535644531, -0.040248870849609375, -0.03586387634277344, -0.0314788818359375, -0.027093887329101562, -0.022708892822265625, -0.018323898315429688, -0.01393890380859375, -0.009553909301757812, -0.005168914794921875, -0.0007839202880859375, 0.00360107421875, 0.007986068725585938, 0.012371063232421875, 0.016756057739257812, 0.02114105224609375, 0.025526046752929688, 0.029911041259765625, 0.03429603576660156, 0.0386810302734375, 0.04306602478027344, 0.047451019287109375, 0.05183601379394531, 0.05622100830078125, 0.06060600280761719, 0.06499099731445312, 0.06937599182128906, 0.073760986328125, 0.07814598083496094, 0.08253097534179688, 0.08691596984863281, 0.09130096435546875, 0.09568595886230469, 0.10007095336914062, 0.10445594787597656, 0.1088409423828125, 0.11322593688964844, 0.11761093139648438, 0.12199592590332031, 0.12638092041015625, 0.1307659149169922, 0.13515090942382812, 0.13953590393066406, 0.1439208984375]}, "gradients/encoder.encoder.layers.17.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 2.0, 2.0, 4.0, 4.0, 7.0, 6.0, 6.0, 6.0, 12.0, 11.0, 18.0, 21.0, 44.0, 41.0, 81.0, 77.0, 112.0, 114.0, 102.0, 81.0, 84.0, 45.0, 35.0, 24.0, 17.0, 12.0, 17.0, 7.0, 10.0, 6.0, 6.0, 1.0, 0.0, 3.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.704692840576172e-05, -1.65402889251709e-05, -1.6033649444580078e-05, -1.5527009963989258e-05, -1.5020370483398438e-05, -1.4513731002807617e-05, -1.4007091522216797e-05, -1.3500452041625977e-05, -1.2993812561035156e-05, -1.2487173080444336e-05, -1.1980533599853516e-05, -1.1473894119262695e-05, -1.0967254638671875e-05, -1.0460615158081055e-05, -9.953975677490234e-06, -9.447336196899414e-06, -8.940696716308594e-06, -8.434057235717773e-06, -7.927417755126953e-06, -7.420778274536133e-06, -6.9141387939453125e-06, -6.407499313354492e-06, -5.900859832763672e-06, -5.3942203521728516e-06, -4.887580871582031e-06, -4.380941390991211e-06, -3.874301910400391e-06, -3.3676624298095703e-06, -2.86102294921875e-06, -2.3543834686279297e-06, -1.8477439880371094e-06, -1.341104507446289e-06, -8.344650268554688e-07, -3.2782554626464844e-07, 1.7881393432617188e-07, 6.854534149169922e-07, 1.1920928955078125e-06, 1.6987323760986328e-06, 2.205371856689453e-06, 2.7120113372802734e-06, 3.2186508178710938e-06, 3.725290298461914e-06, 4.231929779052734e-06, 4.738569259643555e-06, 5.245208740234375e-06, 5.751848220825195e-06, 6.258487701416016e-06, 6.765127182006836e-06, 7.271766662597656e-06, 7.778406143188477e-06, 8.285045623779297e-06, 8.791685104370117e-06, 9.298324584960938e-06, 9.804964065551758e-06, 1.0311603546142578e-05, 1.0818243026733398e-05, 1.1324882507324219e-05, 1.1831521987915039e-05, 1.233816146850586e-05, 1.284480094909668e-05, 1.33514404296875e-05, 1.385807991027832e-05, 1.436471939086914e-05, 1.4871358871459961e-05, 1.537799835205078e-05]}, "gradients/encoder.encoder.layers.17.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 1.0, 3.0, 0.0, 2.0, 6.0, 4.0, 8.0, 8.0, 3.0, 13.0, 15.0, 19.0, 45.0, 63.0, 75.0, 87.0, 165.0, 211.0, 284.0, 410.0, 633.0, 858.0, 1283.0, 2009.0, 3337.0, 6600.0, 21371.0, 168037.0, 702475.0, 109183.0, 16825.0, 5704.0, 3051.0, 1872.0, 1160.0, 833.0, 593.0, 371.0, 307.0, 185.0, 122.0, 88.0, 69.0, 43.0, 43.0, 25.0, 16.0, 16.0, 13.0, 7.0, 8.0, 4.0, 7.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0738525390625, -0.0715017318725586, -0.06915092468261719, -0.06680011749267578, -0.06444931030273438, -0.06209850311279297, -0.05974769592285156, -0.057396888732910156, -0.05504608154296875, -0.052695274353027344, -0.05034446716308594, -0.04799365997314453, -0.045642852783203125, -0.04329204559326172, -0.04094123840332031, -0.038590431213378906, -0.0362396240234375, -0.033888816833496094, -0.03153800964355469, -0.02918720245361328, -0.026836395263671875, -0.02448558807373047, -0.022134780883789062, -0.019783973693847656, -0.01743316650390625, -0.015082359313964844, -0.012731552124023438, -0.010380744934082031, -0.008029937744140625, -0.005679130554199219, -0.0033283233642578125, -0.0009775161743164062, 0.001373291015625, 0.0037240982055664062, 0.0060749053955078125, 0.008425712585449219, 0.010776519775390625, 0.013127326965332031, 0.015478134155273438, 0.017828941345214844, 0.02017974853515625, 0.022530555725097656, 0.024881362915039062, 0.02723217010498047, 0.029582977294921875, 0.03193378448486328, 0.03428459167480469, 0.036635398864746094, 0.0389862060546875, 0.041337013244628906, 0.04368782043457031, 0.04603862762451172, 0.048389434814453125, 0.05074024200439453, 0.05309104919433594, 0.055441856384277344, 0.05779266357421875, 0.060143470764160156, 0.06249427795410156, 0.06484508514404297, 0.06719589233398438, 0.06954669952392578, 0.07189750671386719, 0.0742483139038086, 0.07659912109375]}, "gradients/encoder.encoder.layers.17.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 2.0, 2.0, 0.0, 1.0, 2.0, 1.0, 3.0, 2.0, 2.0, 1.0, 0.0, 2.0, 4.0, 3.0, 4.0, 8.0, 13.0, 19.0, 15.0, 27.0, 40.0, 68.0, 80.0, 126.0, 155.0, 125.0, 93.0, 64.0, 48.0, 33.0, 19.0, 11.0, 9.0, 11.0, 5.0, 2.0, 2.0, 4.0, 6.0, 1.0, 0.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.038604736328125, -0.03709220886230469, -0.035579681396484375, -0.03406715393066406, -0.03255462646484375, -0.031042098999023438, -0.029529571533203125, -0.028017044067382812, -0.0265045166015625, -0.024991989135742188, -0.023479461669921875, -0.021966934204101562, -0.02045440673828125, -0.018941879272460938, -0.017429351806640625, -0.015916824340820312, -0.014404296875, -0.012891769409179688, -0.011379241943359375, -0.009866714477539062, -0.00835418701171875, -0.0068416595458984375, -0.005329132080078125, -0.0038166046142578125, -0.0023040771484375, -0.0007915496826171875, 0.000720977783203125, 0.0022335052490234375, 0.00374603271484375, 0.0052585601806640625, 0.006771087646484375, 0.008283615112304688, 0.009796142578125, 0.011308670043945312, 0.012821197509765625, 0.014333724975585938, 0.01584625244140625, 0.017358779907226562, 0.018871307373046875, 0.020383834838867188, 0.0218963623046875, 0.023408889770507812, 0.024921417236328125, 0.026433944702148438, 0.02794647216796875, 0.029458999633789062, 0.030971527099609375, 0.03248405456542969, 0.03399658203125, 0.03550910949707031, 0.037021636962890625, 0.03853416442871094, 0.04004669189453125, 0.04155921936035156, 0.043071746826171875, 0.04458427429199219, 0.0460968017578125, 0.04760932922363281, 0.049121856689453125, 0.05063438415527344, 0.05214691162109375, 0.05365943908691406, 0.055171966552734375, 0.05668449401855469, 0.058197021484375]}, "gradients/encoder.encoder.layers.17.layer_norm.weight": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 0.0, 3.0, 1.0, 1.0, 0.0, 3.0, 1.0, 9.0, 23.0, 87.0, 418.0, 336.0, 99.0, 24.0, 5.0, 5.0, 0.0, 2.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.6984809041023254, -0.6479597091674805, -0.5974385738372803, -0.5469173789024353, -0.4963962435722351, -0.44587504863739014, -0.39535388350486755, -0.34483271837234497, -0.2943115532398224, -0.2437903881072998, -0.19326922297477722, -0.14274804294109344, -0.09222687780857086, -0.041705697774887085, 0.008815467357635498, 0.05933663249015808, 0.10985779762268066, 0.16037896275520325, 0.21090012788772583, 0.2614213228225708, 0.311942458152771, 0.36246365308761597, 0.41298481822013855, 0.46350598335266113, 0.5140271186828613, 0.5645483136177063, 0.6150694489479065, 0.6655906438827515, 0.7161117792129517, 0.7666329741477966, 0.8171541690826416, 0.8676753044128418, 0.9181965589523315, 0.9687177538871765, 1.0192389488220215, 1.0697600841522217, 1.1202812194824219, 1.170802354812622, 1.2213236093521118, 1.271844744682312, 1.3223658800125122, 1.3728870153427124, 1.4234082698822021, 1.4739294052124023, 1.5244505405426025, 1.5749716758728027, 1.6254929304122925, 1.6760140657424927, 1.7265353202819824, 1.7770564556121826, 1.8275777101516724, 1.8780988454818726, 1.9286199808120728, 1.9791412353515625, 2.0296623706817627, 2.080183506011963, 2.130704641342163, 2.1812257766723633, 2.2317469120025635, 2.2822680473327637, 2.332789421081543, 2.383310556411743, 2.4338316917419434, 2.4843528270721436, 2.5348739624023438]}, "gradients/encoder.encoder.layers.17.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 1.0, 4.0, 2.0, 4.0, 4.0, 9.0, 7.0, 5.0, 12.0, 22.0, 19.0, 14.0, 24.0, 33.0, 36.0, 39.0, 29.0, 35.0, 30.0, 46.0, 42.0, 49.0, 38.0, 45.0, 39.0, 48.0, 51.0, 37.0, 34.0, 29.0, 35.0, 22.0, 32.0, 29.0, 14.0, 12.0, 12.0, 14.0, 12.0, 7.0, 12.0, 11.0, 7.0, 3.0, 2.0, 2.0, 1.0, 2.0, 1.0, 0.0, 1.0], "bins": [-0.3666459321975708, -0.3564077913761139, -0.346169650554657, -0.3359315097332001, -0.32569336891174316, -0.31545522809028625, -0.30521708726882935, -0.29497894644737244, -0.2847408056259155, -0.2745026648044586, -0.2642645239830017, -0.2540263831615448, -0.2437882423400879, -0.23355010151863098, -0.22331196069717407, -0.21307381987571716, -0.20283566415309906, -0.19259752333164215, -0.18235938251018524, -0.17212124168872833, -0.16188310086727142, -0.15164496004581451, -0.1414068043231964, -0.1311686635017395, -0.12093053013086319, -0.11069238930940628, -0.10045424848794937, -0.09021610021591187, -0.07997795939445496, -0.06973981857299805, -0.05950167775154114, -0.04926353693008423, -0.03902539610862732, -0.02878725528717041, -0.018549112603068352, -0.008310969918966293, 0.0019271709024906158, 0.012165311723947525, 0.022403456270694733, 0.03264159709215164, 0.04287973791360855, 0.05311787873506546, 0.06335601955652237, 0.07359416782855988, 0.08383230865001678, 0.0940704494714737, 0.1043085902929306, 0.11454673111438751, 0.12478487193584442, 0.13502301275730133, 0.14526115357875824, 0.15549929440021515, 0.16573743522167206, 0.17597557604312897, 0.18621373176574707, 0.19645187258720398, 0.2066900134086609, 0.2169281542301178, 0.2271662950515747, 0.23740443587303162, 0.24764257669448853, 0.25788071751594543, 0.26811885833740234, 0.27835699915885925, 0.28859513998031616]}, "gradients/encoder.encoder.layers.16.feed_forward.output_dense.weight": {"_type": "histogram", "values": [3.0, 1.0, 0.0, 2.0, 1.0, 5.0, 0.0, 4.0, 9.0, 5.0, 15.0, 17.0, 44.0, 56.0, 76.0, 144.0, 271.0, 473.0, 929.0, 1993.0, 4451.0, 12427.0, 40217.0, 208663.0, 2448463.0, 1323310.0, 112102.0, 26317.0, 8213.0, 3070.0, 1348.0, 676.0, 378.0, 214.0, 130.0, 73.0, 61.0, 42.0, 26.0, 17.0, 12.0, 10.0, 7.0, 10.0, 4.0, 3.0, 4.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.08734130859375, -0.08382511138916016, -0.08030891418457031, -0.07679271697998047, -0.07327651977539062, -0.06976032257080078, -0.06624412536621094, -0.0627279281616211, -0.05921173095703125, -0.055695533752441406, -0.05217933654785156, -0.04866313934326172, -0.045146942138671875, -0.04163074493408203, -0.03811454772949219, -0.034598350524902344, -0.0310821533203125, -0.027565956115722656, -0.024049758911132812, -0.02053356170654297, -0.017017364501953125, -0.013501167297363281, -0.009984970092773438, -0.006468772888183594, -0.00295257568359375, 0.0005636215209960938, 0.0040798187255859375, 0.007596015930175781, 0.011112213134765625, 0.014628410339355469, 0.018144607543945312, 0.021660804748535156, 0.025177001953125, 0.028693199157714844, 0.03220939636230469, 0.03572559356689453, 0.039241790771484375, 0.04275798797607422, 0.04627418518066406, 0.049790382385253906, 0.05330657958984375, 0.056822776794433594, 0.06033897399902344, 0.06385517120361328, 0.06737136840820312, 0.07088756561279297, 0.07440376281738281, 0.07791996002197266, 0.0814361572265625, 0.08495235443115234, 0.08846855163574219, 0.09198474884033203, 0.09550094604492188, 0.09901714324951172, 0.10253334045410156, 0.1060495376586914, 0.10956573486328125, 0.1130819320678711, 0.11659812927246094, 0.12011432647705078, 0.12363052368164062, 0.12714672088623047, 0.1306629180908203, 0.13417911529541016, 0.1376953125]}, "gradients/encoder.encoder.layers.16.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 3.0, 5.0, 2.0, 5.0, 14.0, 16.0, 26.0, 26.0, 40.0, 53.0, 63.0, 78.0, 81.0, 96.0, 83.0, 88.0, 75.0, 82.0, 52.0, 45.0, 28.0, 28.0, 7.0, 6.0, 9.0, 4.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.08856201171875, -0.08579158782958984, -0.08302116394042969, -0.08025074005126953, -0.07748031616210938, -0.07470989227294922, -0.07193946838378906, -0.0691690444946289, -0.06639862060546875, -0.0636281967163086, -0.06085777282714844, -0.05808734893798828, -0.055316925048828125, -0.05254650115966797, -0.04977607727050781, -0.047005653381347656, -0.0442352294921875, -0.041464805603027344, -0.03869438171386719, -0.03592395782470703, -0.033153533935546875, -0.03038311004638672, -0.027612686157226562, -0.024842262268066406, -0.02207183837890625, -0.019301414489746094, -0.016530990600585938, -0.013760566711425781, -0.010990142822265625, -0.008219718933105469, -0.0054492950439453125, -0.0026788711547851562, 9.1552734375e-05, 0.0028619766235351562, 0.0056324005126953125, 0.008402824401855469, 0.011173248291015625, 0.013943672180175781, 0.016714096069335938, 0.019484519958496094, 0.02225494384765625, 0.025025367736816406, 0.027795791625976562, 0.03056621551513672, 0.033336639404296875, 0.03610706329345703, 0.03887748718261719, 0.041647911071777344, 0.0444183349609375, 0.047188758850097656, 0.04995918273925781, 0.05272960662841797, 0.055500030517578125, 0.05827045440673828, 0.06104087829589844, 0.0638113021850586, 0.06658172607421875, 0.0693521499633789, 0.07212257385253906, 0.07489299774169922, 0.07766342163085938, 0.08043384552001953, 0.08320426940917969, 0.08597469329833984, 0.0887451171875]}, "gradients/encoder.encoder.layers.16.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 0.0, 2.0, 5.0, 9.0, 9.0, 13.0, 44.0, 47.0, 106.0, 218.0, 501.0, 1681.0, 19137.0, 3927851.0, 238655.0, 4443.0, 873.0, 346.0, 164.0, 95.0, 46.0, 21.0, 9.0, 9.0, 2.0, 2.0, 1.0, 4.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.268310546875, -0.2570152282714844, -0.24571990966796875, -0.23442459106445312, -0.2231292724609375, -0.21183395385742188, -0.20053863525390625, -0.18924331665039062, -0.177947998046875, -0.16665267944335938, -0.15535736083984375, -0.14406204223632812, -0.1327667236328125, -0.12147140502929688, -0.11017608642578125, -0.09888076782226562, -0.08758544921875, -0.07629013061523438, -0.06499481201171875, -0.053699493408203125, -0.0424041748046875, -0.031108856201171875, -0.01981353759765625, -0.008518218994140625, 0.002777099609375, 0.014072418212890625, 0.02536773681640625, 0.036663055419921875, 0.0479583740234375, 0.059253692626953125, 0.07054901123046875, 0.08184432983398438, 0.0931396484375, 0.10443496704101562, 0.11573028564453125, 0.12702560424804688, 0.1383209228515625, 0.14961624145507812, 0.16091156005859375, 0.17220687866210938, 0.183502197265625, 0.19479751586914062, 0.20609283447265625, 0.21738815307617188, 0.2286834716796875, 0.23997879028320312, 0.25127410888671875, 0.2625694274902344, 0.27386474609375, 0.2851600646972656, 0.29645538330078125, 0.3077507019042969, 0.3190460205078125, 0.3303413391113281, 0.34163665771484375, 0.3529319763183594, 0.364227294921875, 0.3755226135253906, 0.38681793212890625, 0.3981132507324219, 0.4094085693359375, 0.4207038879394531, 0.43199920654296875, 0.4432945251464844, 0.45458984375]}, "gradients/encoder.encoder.layers.16.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 3.0, 2.0, 5.0, 3.0, 1.0, 5.0, 11.0, 16.0, 17.0, 27.0, 38.0, 70.0, 66.0, 132.0, 186.0, 460.0, 819.0, 1026.0, 566.0, 212.0, 143.0, 73.0, 70.0, 37.0, 31.0, 17.0, 14.0, 9.0, 7.0, 7.0, 4.0, 2.0, 3.0, 2.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.1121826171875, -0.10858154296875, -0.10498046875, -0.10137939453125, -0.0977783203125, -0.09417724609375, -0.090576171875, -0.08697509765625, -0.0833740234375, -0.07977294921875, -0.076171875, -0.07257080078125, -0.0689697265625, -0.06536865234375, -0.061767578125, -0.05816650390625, -0.0545654296875, -0.05096435546875, -0.04736328125, -0.04376220703125, -0.0401611328125, -0.03656005859375, -0.032958984375, -0.02935791015625, -0.0257568359375, -0.02215576171875, -0.0185546875, -0.01495361328125, -0.0113525390625, -0.00775146484375, -0.004150390625, -0.00054931640625, 0.0030517578125, 0.00665283203125, 0.01025390625, 0.01385498046875, 0.0174560546875, 0.02105712890625, 0.024658203125, 0.02825927734375, 0.0318603515625, 0.03546142578125, 0.0390625, 0.04266357421875, 0.0462646484375, 0.04986572265625, 0.053466796875, 0.05706787109375, 0.0606689453125, 0.06427001953125, 0.06787109375, 0.07147216796875, 0.0750732421875, 0.07867431640625, 0.082275390625, 0.08587646484375, 0.0894775390625, 0.09307861328125, 0.0966796875, 0.10028076171875, 0.1038818359375, 0.10748291015625, 0.111083984375, 0.11468505859375, 0.1182861328125]}, "gradients/encoder.encoder.layers.16.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 3.0, 0.0, 1.0, 0.0, 1.0, 8.0, 9.0, 20.0, 52.0, 130.0, 204.0, 296.0, 180.0, 64.0, 26.0, 9.0, 2.0, 2.0, 3.0, 1.0, 2.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.9999528527259827, -0.9723597764968872, -0.9447667598724365, -0.9171736836433411, -0.8895806670188904, -0.8619875907897949, -0.8343945741653442, -0.8068014979362488, -0.7792084217071533, -0.7516153454780579, -0.7240223288536072, -0.6964292526245117, -0.668836236000061, -0.6412431597709656, -0.6136500835418701, -0.5860570669174194, -0.5584640502929688, -0.5308709740638733, -0.5032779574394226, -0.47568488121032715, -0.44809186458587646, -0.420498788356781, -0.39290574193000793, -0.36531269550323486, -0.3377196490764618, -0.3101266026496887, -0.28253355622291565, -0.2549405097961426, -0.2273474484682083, -0.19975440204143524, -0.17216134071350098, -0.1445682942867279, -0.11697518825531006, -0.08938214182853699, -0.06178908795118332, -0.03419603407382965, -0.00660298764705658, 0.02099005877971649, 0.04858312010765076, 0.07617616653442383, 0.1037692129611969, 0.13136225938796997, 0.15895530581474304, 0.1865483671426773, 0.21414141356945038, 0.24173445999622345, 0.2693275213241577, 0.2969205677509308, 0.32451361417770386, 0.35210666060447693, 0.37969970703125, 0.40729278326034546, 0.43488579988479614, 0.4624788761138916, 0.4900719225406647, 0.5176649689674377, 0.5452580451965332, 0.5728511214256287, 0.6004441380500793, 0.6280372142791748, 0.6556302309036255, 0.683223307132721, 0.7108163833618164, 0.7384093999862671, 0.7660024166107178]}, "gradients/encoder.encoder.layers.16.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 6.0, 3.0, 1.0, 3.0, 6.0, 3.0, 7.0, 11.0, 6.0, 15.0, 10.0, 21.0, 20.0, 23.0, 17.0, 30.0, 28.0, 36.0, 31.0, 36.0, 41.0, 46.0, 41.0, 29.0, 34.0, 27.0, 42.0, 48.0, 47.0, 42.0, 32.0, 37.0, 32.0, 25.0, 25.0, 17.0, 24.0, 23.0, 17.0, 12.0, 14.0, 15.0, 13.0, 4.0, 2.0, 2.0, 2.0, 4.0, 5.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.2007693648338318, -0.19360747933387756, -0.18644557893276215, -0.17928369343280792, -0.1721217930316925, -0.16495990753173828, -0.15779802203178406, -0.15063613653182983, -0.14347423613071442, -0.1363123506307602, -0.12915045022964478, -0.12198856472969055, -0.11482667177915573, -0.10766477882862091, -0.10050289332866669, -0.09334100037813187, -0.08617910742759705, -0.07901721447706223, -0.0718553215265274, -0.06469343602657318, -0.05753154307603836, -0.05036965012550354, -0.04320776090025902, -0.036045871675014496, -0.028883978724479675, -0.021722087636590004, -0.014560196548700333, -0.007398305460810661, -0.00023641437292099, 0.006925478577613831, 0.014087367802858353, 0.021249257028102875, 0.028411149978637695, 0.035573042929172516, 0.04273493215441704, 0.04989682137966156, 0.05705871433019638, 0.0642206072807312, 0.07138249278068542, 0.07854438573122025, 0.08570627868175507, 0.09286817163228989, 0.10003006458282471, 0.10719195008277893, 0.11435384303331375, 0.12151573598384857, 0.1286776214838028, 0.1358395218849182, 0.14300140738487244, 0.15016329288482666, 0.15732519328594208, 0.1644870787858963, 0.17164897918701172, 0.17881086468696594, 0.18597275018692017, 0.1931346356868744, 0.2002965360879898, 0.20745842158794403, 0.21462032198905945, 0.22178220748901367, 0.2289440929889679, 0.2361059933900833, 0.24326787889003754, 0.25042977929115295, 0.2575916647911072]}, "gradients/encoder.encoder.layers.16.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 4.0, 1.0, 1.0, 1.0, 2.0, 2.0, 3.0, 3.0, 5.0, 8.0, 7.0, 17.0, 20.0, 21.0, 33.0, 37.0, 66.0, 103.0, 121.0, 198.0, 293.0, 484.0, 805.0, 1490.0, 3006.0, 6824.0, 17347.0, 48636.0, 141736.0, 347567.0, 302193.0, 113018.0, 38807.0, 14167.0, 5703.0, 2591.0, 1288.0, 716.0, 420.0, 256.0, 159.0, 103.0, 84.0, 53.0, 42.0, 37.0, 33.0, 22.0, 7.0, 9.0, 8.0, 4.0, 0.0, 4.0, 3.0, 2.0, 2.0, 0.0, 1.0, 1.0], "bins": [-0.11199951171875, -0.10869503021240234, -0.10539054870605469, -0.10208606719970703, -0.09878158569335938, -0.09547710418701172, -0.09217262268066406, -0.0888681411743164, -0.08556365966796875, -0.0822591781616211, -0.07895469665527344, -0.07565021514892578, -0.07234573364257812, -0.06904125213623047, -0.06573677062988281, -0.062432289123535156, -0.0591278076171875, -0.055823326110839844, -0.05251884460449219, -0.04921436309814453, -0.045909881591796875, -0.04260540008544922, -0.03930091857910156, -0.035996437072753906, -0.03269195556640625, -0.029387474060058594, -0.026082992553710938, -0.02277851104736328, -0.019474029541015625, -0.01616954803466797, -0.012865066528320312, -0.009560585021972656, -0.006256103515625, -0.0029516220092773438, 0.0003528594970703125, 0.0036573410034179688, 0.006961822509765625, 0.010266304016113281, 0.013570785522460938, 0.016875267028808594, 0.02017974853515625, 0.023484230041503906, 0.026788711547851562, 0.03009319305419922, 0.033397674560546875, 0.03670215606689453, 0.04000663757324219, 0.043311119079589844, 0.0466156005859375, 0.049920082092285156, 0.05322456359863281, 0.05652904510498047, 0.059833526611328125, 0.06313800811767578, 0.06644248962402344, 0.0697469711303711, 0.07305145263671875, 0.0763559341430664, 0.07966041564941406, 0.08296489715576172, 0.08626937866210938, 0.08957386016845703, 0.09287834167480469, 0.09618282318115234, 0.0994873046875]}, "gradients/encoder.encoder.layers.16.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 2.0, 4.0, 5.0, 5.0, 9.0, 16.0, 26.0, 37.0, 54.0, 56.0, 72.0, 86.0, 100.0, 113.0, 92.0, 97.0, 83.0, 51.0, 42.0, 24.0, 19.0, 13.0, 6.0, 3.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0968017578125, -0.09396648406982422, -0.09113121032714844, -0.08829593658447266, -0.08546066284179688, -0.0826253890991211, -0.07979011535644531, -0.07695484161376953, -0.07411956787109375, -0.07128429412841797, -0.06844902038574219, -0.0656137466430664, -0.06277847290039062, -0.059943199157714844, -0.05710792541503906, -0.05427265167236328, -0.0514373779296875, -0.04860210418701172, -0.04576683044433594, -0.042931556701660156, -0.040096282958984375, -0.037261009216308594, -0.03442573547363281, -0.03159046173095703, -0.02875518798828125, -0.02591991424560547, -0.023084640502929688, -0.020249366760253906, -0.017414093017578125, -0.014578819274902344, -0.011743545532226562, -0.008908271789550781, -0.006072998046875, -0.0032377243041992188, -0.0004024505615234375, 0.0024328231811523438, 0.005268096923828125, 0.008103370666503906, 0.010938644409179688, 0.013773918151855469, 0.01660919189453125, 0.01944446563720703, 0.022279739379882812, 0.025115013122558594, 0.027950286865234375, 0.030785560607910156, 0.03362083435058594, 0.03645610809326172, 0.0392913818359375, 0.04212665557861328, 0.04496192932128906, 0.047797203063964844, 0.050632476806640625, 0.053467750549316406, 0.05630302429199219, 0.05913829803466797, 0.06197357177734375, 0.06480884552001953, 0.06764411926269531, 0.0704793930053711, 0.07331466674804688, 0.07614994049072266, 0.07898521423339844, 0.08182048797607422, 0.08465576171875]}, "gradients/encoder.encoder.layers.16.attention.v_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 3.0, 0.0, 2.0, 3.0, 2.0, 6.0, 2.0, 3.0, 3.0, 9.0, 10.0, 14.0, 16.0, 24.0, 21.0, 23.0, 34.0, 45.0, 76.0, 86.0, 138.0, 223.0, 305.0, 488.0, 891.0, 1903.0, 4685.0, 14265.0, 52625.0, 219117.0, 488778.0, 196109.0, 47154.0, 12906.0, 4389.0, 1681.0, 904.0, 497.0, 337.0, 204.0, 151.0, 109.0, 83.0, 66.0, 36.0, 28.0, 16.0, 26.0, 15.0, 13.0, 5.0, 7.0, 10.0, 6.0, 4.0, 3.0, 4.0, 2.0, 2.0, 5.0, 0.0, 2.0], "bins": [-0.1142578125, -0.11073780059814453, -0.10721778869628906, -0.1036977767944336, -0.10017776489257812, -0.09665775299072266, -0.09313774108886719, -0.08961772918701172, -0.08609771728515625, -0.08257770538330078, -0.07905769348144531, -0.07553768157958984, -0.07201766967773438, -0.0684976577758789, -0.06497764587402344, -0.06145763397216797, -0.0579376220703125, -0.05441761016845703, -0.05089759826660156, -0.047377586364746094, -0.043857574462890625, -0.040337562561035156, -0.03681755065917969, -0.03329753875732422, -0.02977752685546875, -0.02625751495361328, -0.022737503051757812, -0.019217491149902344, -0.015697479248046875, -0.012177467346191406, -0.008657455444335938, -0.005137443542480469, -0.001617431640625, 0.0019025802612304688, 0.0054225921630859375, 0.008942604064941406, 0.012462615966796875, 0.015982627868652344, 0.019502639770507812, 0.02302265167236328, 0.02654266357421875, 0.03006267547607422, 0.03358268737792969, 0.037102699279785156, 0.040622711181640625, 0.044142723083496094, 0.04766273498535156, 0.05118274688720703, 0.0547027587890625, 0.05822277069091797, 0.06174278259277344, 0.0652627944946289, 0.06878280639648438, 0.07230281829833984, 0.07582283020019531, 0.07934284210205078, 0.08286285400390625, 0.08638286590576172, 0.08990287780761719, 0.09342288970947266, 0.09694290161132812, 0.1004629135131836, 0.10398292541503906, 0.10750293731689453, 0.11102294921875]}, "gradients/encoder.encoder.layers.16.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 4.0, 2.0, 4.0, 2.0, 4.0, 7.0, 3.0, 8.0, 8.0, 10.0, 11.0, 12.0, 20.0, 11.0, 28.0, 36.0, 37.0, 26.0, 38.0, 34.0, 40.0, 34.0, 50.0, 40.0, 49.0, 45.0, 45.0, 45.0, 39.0, 39.0, 44.0, 28.0, 20.0, 35.0, 25.0, 18.0, 23.0, 16.0, 17.0, 7.0, 10.0, 10.0, 9.0, 6.0, 3.0, 4.0, 3.0, 2.0, 4.0, 0.0, 2.0, 3.0, 0.0, 1.0, 1.0], "bins": [-0.1102294921875, -0.10703182220458984, -0.10383415222167969, -0.10063648223876953, -0.09743881225585938, -0.09424114227294922, -0.09104347229003906, -0.0878458023071289, -0.08464813232421875, -0.0814504623413086, -0.07825279235839844, -0.07505512237548828, -0.07185745239257812, -0.06865978240966797, -0.06546211242675781, -0.062264442443847656, -0.0590667724609375, -0.055869102478027344, -0.05267143249511719, -0.04947376251220703, -0.046276092529296875, -0.04307842254638672, -0.03988075256347656, -0.036683082580566406, -0.03348541259765625, -0.030287742614746094, -0.027090072631835938, -0.02389240264892578, -0.020694732666015625, -0.01749706268310547, -0.014299392700195312, -0.011101722717285156, -0.007904052734375, -0.004706382751464844, -0.0015087127685546875, 0.0016889572143554688, 0.004886627197265625, 0.008084297180175781, 0.011281967163085938, 0.014479637145996094, 0.01767730712890625, 0.020874977111816406, 0.024072647094726562, 0.02727031707763672, 0.030467987060546875, 0.03366565704345703, 0.03686332702636719, 0.040060997009277344, 0.0432586669921875, 0.046456336975097656, 0.04965400695800781, 0.05285167694091797, 0.056049346923828125, 0.05924701690673828, 0.06244468688964844, 0.0656423568725586, 0.06884002685546875, 0.0720376968383789, 0.07523536682128906, 0.07843303680419922, 0.08163070678710938, 0.08482837677001953, 0.08802604675292969, 0.09122371673583984, 0.09442138671875]}, "gradients/encoder.encoder.layers.16.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 2.0, 2.0, 4.0, 6.0, 6.0, 10.0, 16.0, 18.0, 23.0, 38.0, 85.0, 108.0, 231.0, 371.0, 678.0, 1218.0, 2523.0, 5370.0, 13942.0, 43310.0, 165735.0, 474296.0, 245864.0, 62196.0, 19023.0, 7066.0, 3098.0, 1502.0, 786.0, 404.0, 221.0, 163.0, 76.0, 58.0, 36.0, 16.0, 15.0, 12.0, 12.0, 6.0, 9.0, 3.0, 4.0, 2.0, 0.0, 2.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.031402587890625, -0.030440568923950195, -0.02947854995727539, -0.028516530990600586, -0.02755451202392578, -0.026592493057250977, -0.025630474090576172, -0.024668455123901367, -0.023706436157226562, -0.022744417190551758, -0.021782398223876953, -0.02082037925720215, -0.019858360290527344, -0.01889634132385254, -0.017934322357177734, -0.01697230339050293, -0.016010284423828125, -0.01504826545715332, -0.014086246490478516, -0.013124227523803711, -0.012162208557128906, -0.011200189590454102, -0.010238170623779297, -0.009276151657104492, -0.008314132690429688, -0.007352113723754883, -0.006390094757080078, -0.0054280757904052734, -0.004466056823730469, -0.003504037857055664, -0.0025420188903808594, -0.0015799999237060547, -0.00061798095703125, 0.0003440380096435547, 0.0013060569763183594, 0.002268075942993164, 0.0032300949096679688, 0.0041921138763427734, 0.005154132843017578, 0.006116151809692383, 0.0070781707763671875, 0.008040189743041992, 0.009002208709716797, 0.009964227676391602, 0.010926246643066406, 0.011888265609741211, 0.012850284576416016, 0.01381230354309082, 0.014774322509765625, 0.01573634147644043, 0.016698360443115234, 0.01766037940979004, 0.018622398376464844, 0.01958441734313965, 0.020546436309814453, 0.021508455276489258, 0.022470474243164062, 0.023432493209838867, 0.024394512176513672, 0.025356531143188477, 0.02631855010986328, 0.027280569076538086, 0.02824258804321289, 0.029204607009887695, 0.0301666259765625]}, "gradients/encoder.encoder.layers.16.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 5.0, 3.0, 2.0, 3.0, 4.0, 3.0, 4.0, 7.0, 8.0, 11.0, 10.0, 14.0, 23.0, 37.0, 32.0, 46.0, 53.0, 39.0, 57.0, 73.0, 70.0, 68.0, 76.0, 64.0, 51.0, 48.0, 48.0, 30.0, 27.0, 22.0, 20.0, 9.0, 13.0, 5.0, 7.0, 6.0, 0.0, 5.0, 2.0, 3.0, 3.0, 1.0, 2.0, 2.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-9.298324584960938e-06, -9.008683264255524e-06, -8.71904194355011e-06, -8.429400622844696e-06, -8.139759302139282e-06, -7.850117981433868e-06, -7.560476660728455e-06, -7.270835340023041e-06, -6.981194019317627e-06, -6.691552698612213e-06, -6.401911377906799e-06, -6.1122700572013855e-06, -5.822628736495972e-06, -5.532987415790558e-06, -5.243346095085144e-06, -4.95370477437973e-06, -4.664063453674316e-06, -4.374422132968903e-06, -4.084780812263489e-06, -3.795139491558075e-06, -3.505498170852661e-06, -3.2158568501472473e-06, -2.9262155294418335e-06, -2.6365742087364197e-06, -2.346932888031006e-06, -2.057291567325592e-06, -1.7676502466201782e-06, -1.4780089259147644e-06, -1.1883676052093506e-06, -8.987262845039368e-07, -6.09084963798523e-07, -3.1944364309310913e-07, -2.9802322387695312e-08, 2.598389983177185e-07, 5.494803190231323e-07, 8.391216397285461e-07, 1.12876296043396e-06, 1.4184042811393738e-06, 1.7080456018447876e-06, 1.9976869225502014e-06, 2.2873282432556152e-06, 2.576969563961029e-06, 2.866610884666443e-06, 3.1562522053718567e-06, 3.4458935260772705e-06, 3.7355348467826843e-06, 4.025176167488098e-06, 4.314817488193512e-06, 4.604458808898926e-06, 4.89410012960434e-06, 5.183741450309753e-06, 5.473382771015167e-06, 5.763024091720581e-06, 6.052665412425995e-06, 6.342306733131409e-06, 6.6319480538368225e-06, 6.921589374542236e-06, 7.21123069524765e-06, 7.500872015953064e-06, 7.790513336658478e-06, 8.080154657363892e-06, 8.369795978069305e-06, 8.65943729877472e-06, 8.949078619480133e-06, 9.238719940185547e-06]}, "gradients/encoder.encoder.layers.16.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 2.0, 0.0, 2.0, 3.0, 2.0, 5.0, 4.0, 3.0, 17.0, 16.0, 36.0, 38.0, 61.0, 84.0, 123.0, 233.0, 388.0, 749.0, 1421.0, 2778.0, 5837.0, 14378.0, 43116.0, 188509.0, 533795.0, 187811.0, 42888.0, 14449.0, 5832.0, 2798.0, 1420.0, 745.0, 397.0, 231.0, 138.0, 94.0, 51.0, 37.0, 22.0, 22.0, 12.0, 5.0, 7.0, 3.0, 6.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.032440185546875, -0.03126049041748047, -0.030080795288085938, -0.028901100158691406, -0.027721405029296875, -0.026541709899902344, -0.025362014770507812, -0.02418231964111328, -0.02300262451171875, -0.02182292938232422, -0.020643234252929688, -0.019463539123535156, -0.018283843994140625, -0.017104148864746094, -0.015924453735351562, -0.014744758605957031, -0.0135650634765625, -0.012385368347167969, -0.011205673217773438, -0.010025978088378906, -0.008846282958984375, -0.007666587829589844, -0.0064868927001953125, -0.005307197570800781, -0.00412750244140625, -0.0029478073120117188, -0.0017681121826171875, -0.0005884170532226562, 0.000591278076171875, 0.0017709732055664062, 0.0029506683349609375, 0.004130363464355469, 0.00531005859375, 0.006489753723144531, 0.0076694488525390625, 0.008849143981933594, 0.010028839111328125, 0.011208534240722656, 0.012388229370117188, 0.013567924499511719, 0.01474761962890625, 0.01592731475830078, 0.017107009887695312, 0.018286705017089844, 0.019466400146484375, 0.020646095275878906, 0.021825790405273438, 0.02300548553466797, 0.0241851806640625, 0.02536487579345703, 0.026544570922851562, 0.027724266052246094, 0.028903961181640625, 0.030083656311035156, 0.03126335144042969, 0.03244304656982422, 0.03362274169921875, 0.03480243682861328, 0.03598213195800781, 0.037161827087402344, 0.038341522216796875, 0.039521217346191406, 0.04070091247558594, 0.04188060760498047, 0.043060302734375]}, "gradients/encoder.encoder.layers.16.attention.q_proj.bias": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 3.0, 3.0, 1.0, 4.0, 3.0, 4.0, 4.0, 3.0, 6.0, 4.0, 9.0, 19.0, 19.0, 16.0, 21.0, 25.0, 42.0, 58.0, 78.0, 75.0, 88.0, 90.0, 81.0, 79.0, 55.0, 63.0, 38.0, 24.0, 22.0, 18.0, 15.0, 13.0, 5.0, 8.0, 2.0, 2.0, 2.0, 6.0, 1.0, 0.0, 3.0, 1.0, 2.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.0206451416015625, -0.019794702529907227, -0.018944263458251953, -0.01809382438659668, -0.017243385314941406, -0.016392946243286133, -0.01554250717163086, -0.014692068099975586, -0.013841629028320312, -0.012991189956665039, -0.012140750885009766, -0.011290311813354492, -0.010439872741699219, -0.009589433670043945, -0.008738994598388672, -0.007888555526733398, -0.007038116455078125, -0.0061876773834228516, -0.005337238311767578, -0.004486799240112305, -0.0036363601684570312, -0.002785921096801758, -0.0019354820251464844, -0.001085042953491211, -0.0002346038818359375, 0.0006158351898193359, 0.0014662742614746094, 0.002316713333129883, 0.0031671524047851562, 0.00401759147644043, 0.004868030548095703, 0.0057184696197509766, 0.00656890869140625, 0.0074193477630615234, 0.008269786834716797, 0.00912022590637207, 0.009970664978027344, 0.010821104049682617, 0.01167154312133789, 0.012521982192993164, 0.013372421264648438, 0.014222860336303711, 0.015073299407958984, 0.015923738479614258, 0.01677417755126953, 0.017624616622924805, 0.018475055694580078, 0.01932549476623535, 0.020175933837890625, 0.0210263729095459, 0.021876811981201172, 0.022727251052856445, 0.02357769012451172, 0.024428129196166992, 0.025278568267822266, 0.02612900733947754, 0.026979446411132812, 0.027829885482788086, 0.02868032455444336, 0.029530763626098633, 0.030381202697753906, 0.03123164176940918, 0.03208208084106445, 0.03293251991271973, 0.033782958984375]}, "gradients/encoder.encoder.layers.16.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 4.0, 2.0, 3.0, 9.0, 14.0, 24.0, 63.0, 115.0, 159.0, 206.0, 174.0, 115.0, 53.0, 36.0, 11.0, 11.0, 4.0, 0.0, 5.0, 2.0, 2.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.4796094596385956, -0.4638553857803345, -0.44810131192207336, -0.43234723806381226, -0.41659319400787354, -0.4008391201496124, -0.3850850462913513, -0.3693309724330902, -0.3535768985748291, -0.337822824716568, -0.3220687508583069, -0.30631470680236816, -0.29056063294410706, -0.27480655908584595, -0.25905248522758484, -0.24329841136932373, -0.227544367313385, -0.2117902934551239, -0.196036234498024, -0.18028216063976288, -0.16452810168266296, -0.14877402782440186, -0.13301995396614075, -0.11726588755846024, -0.10151182115077972, -0.08575775474309921, -0.0700036883354187, -0.05424961447715759, -0.03849554806947708, -0.02274148166179657, -0.006987407803535461, 0.00876665860414505, 0.024520695209503174, 0.040274761617183685, 0.056028831750154495, 0.0717829018831253, 0.08753696829080582, 0.10329103469848633, 0.11904510855674744, 0.13479918241500854, 0.15055324137210846, 0.16630731523036957, 0.18206137418746948, 0.1978154480457306, 0.2135695219039917, 0.2293235808610916, 0.24507765471935272, 0.26083171367645264, 0.27658578753471375, 0.29233986139297485, 0.30809393525123596, 0.32384800910949707, 0.3396020531654358, 0.3553561270236969, 0.371110200881958, 0.3868642747402191, 0.4026183485984802, 0.41837242245674133, 0.43412649631500244, 0.44988054037094116, 0.46563461422920227, 0.4813886880874634, 0.4971427619457245, 0.5128968358039856, 0.5286508798599243]}, "gradients/encoder.encoder.layers.16.layer_norm.bias": {"_type": "histogram", "values": [1.0, 2.0, 2.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0, 2.0, 7.0, 10.0, 9.0, 11.0, 11.0, 11.0, 13.0, 14.0, 13.0, 26.0, 26.0, 25.0, 27.0, 29.0, 30.0, 36.0, 38.0, 37.0, 34.0, 39.0, 47.0, 29.0, 43.0, 44.0, 39.0, 38.0, 32.0, 28.0, 30.0, 30.0, 24.0, 28.0, 24.0, 20.0, 17.0, 7.0, 20.0, 9.0, 11.0, 3.0, 14.0, 8.0, 4.0, 3.0, 3.0, 2.0, 2.0, 0.0, 3.0, 1.0, 1.0, 2.0, 1.0], "bins": [-0.25301098823547363, -0.24512891471385956, -0.23724684119224548, -0.2293647676706314, -0.22148269414901733, -0.21360062062740326, -0.20571854710578918, -0.19783645868301392, -0.18995440006256104, -0.18207232654094696, -0.17419025301933289, -0.1663081794977188, -0.15842610597610474, -0.15054403245449066, -0.1426619589328766, -0.13477987051010132, -0.12689779698848724, -0.11901572346687317, -0.1111336499452591, -0.10325157642364502, -0.09536950290203094, -0.08748742938041687, -0.0796053484082222, -0.07172327488660812, -0.06384120136499405, -0.055959127843379974, -0.0480770543217659, -0.040194977074861526, -0.03231290355324745, -0.024430830031633377, -0.016548752784729004, -0.00866667926311493, -0.0007846057415008545, 0.007097468711435795, 0.014979543164372444, 0.022861618548631668, 0.030743692070245743, 0.03862576559185982, 0.04650784283876419, 0.054389916360378265, 0.06227198988199234, 0.07015406340360641, 0.07803613692522049, 0.08591821789741516, 0.09380029141902924, 0.10168236494064331, 0.10956443846225739, 0.11744651198387146, 0.12532858550548553, 0.1332106590270996, 0.14109273254871368, 0.14897480607032776, 0.15685687959194183, 0.1647389531135559, 0.17262104153633118, 0.18050310015678406, 0.18838518857955933, 0.1962672621011734, 0.20414933562278748, 0.21203140914440155, 0.21991348266601562, 0.2277955561876297, 0.23567762970924377, 0.24355971813201904, 0.2514417767524719]}, "gradients/encoder.encoder.layers.15.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 4.0, 6.0, 15.0, 9.0, 21.0, 43.0, 77.0, 132.0, 274.0, 664.0, 1915.0, 7713.0, 46663.0, 1042178.0, 2941893.0, 133717.0, 14708.0, 2786.0, 837.0, 301.0, 154.0, 93.0, 43.0, 23.0, 12.0, 7.0, 3.0, 3.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.1365966796875, -0.13248252868652344, -0.12836837768554688, -0.12425422668457031, -0.12014007568359375, -0.11602592468261719, -0.11191177368164062, -0.10779762268066406, -0.1036834716796875, -0.09956932067871094, -0.09545516967773438, -0.09134101867675781, -0.08722686767578125, -0.08311271667480469, -0.07899856567382812, -0.07488441467285156, -0.070770263671875, -0.06665611267089844, -0.06254196166992188, -0.05842781066894531, -0.05431365966796875, -0.05019950866699219, -0.046085357666015625, -0.04197120666503906, -0.0378570556640625, -0.03374290466308594, -0.029628753662109375, -0.025514602661132812, -0.02140045166015625, -0.017286300659179688, -0.013172149658203125, -0.009057998657226562, -0.00494384765625, -0.0008296966552734375, 0.003284454345703125, 0.0073986053466796875, 0.01151275634765625, 0.015626907348632812, 0.019741058349609375, 0.023855209350585938, 0.0279693603515625, 0.03208351135253906, 0.036197662353515625, 0.04031181335449219, 0.04442596435546875, 0.04854011535644531, 0.052654266357421875, 0.05676841735839844, 0.060882568359375, 0.06499671936035156, 0.06911087036132812, 0.07322502136230469, 0.07733917236328125, 0.08145332336425781, 0.08556747436523438, 0.08968162536621094, 0.0937957763671875, 0.09790992736816406, 0.10202407836914062, 0.10613822937011719, 0.11025238037109375, 0.11436653137207031, 0.11848068237304688, 0.12259483337402344, 0.126708984375]}, "gradients/encoder.encoder.layers.15.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 3.0, 4.0, 10.0, 12.0, 19.0, 18.0, 34.0, 37.0, 63.0, 78.0, 93.0, 96.0, 99.0, 98.0, 103.0, 77.0, 49.0, 46.0, 33.0, 12.0, 12.0, 9.0, 4.0, 3.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.09820556640625, -0.0953378677368164, -0.09247016906738281, -0.08960247039794922, -0.08673477172851562, -0.08386707305908203, -0.08099937438964844, -0.07813167572021484, -0.07526397705078125, -0.07239627838134766, -0.06952857971191406, -0.06666088104248047, -0.06379318237304688, -0.06092548370361328, -0.05805778503417969, -0.055190086364746094, -0.0523223876953125, -0.049454689025878906, -0.04658699035644531, -0.04371929168701172, -0.040851593017578125, -0.03798389434814453, -0.03511619567871094, -0.032248497009277344, -0.02938079833984375, -0.026513099670410156, -0.023645401000976562, -0.02077770233154297, -0.017910003662109375, -0.015042304992675781, -0.012174606323242188, -0.009306907653808594, -0.006439208984375, -0.0035715103149414062, -0.0007038116455078125, 0.0021638870239257812, 0.005031585693359375, 0.007899284362792969, 0.010766983032226562, 0.013634681701660156, 0.01650238037109375, 0.019370079040527344, 0.022237777709960938, 0.02510547637939453, 0.027973175048828125, 0.03084087371826172, 0.03370857238769531, 0.036576271057128906, 0.0394439697265625, 0.042311668395996094, 0.04517936706542969, 0.04804706573486328, 0.050914764404296875, 0.05378246307373047, 0.05665016174316406, 0.059517860412597656, 0.06238555908203125, 0.06525325775146484, 0.06812095642089844, 0.07098865509033203, 0.07385635375976562, 0.07672405242919922, 0.07959175109863281, 0.0824594497680664, 0.0853271484375]}, "gradients/encoder.encoder.layers.15.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 4.0, 1.0, 7.0, 4.0, 11.0, 22.0, 45.0, 56.0, 96.0, 174.0, 294.0, 588.0, 1582.0, 6360.0, 46620.0, 1053610.0, 2966101.0, 104019.0, 10852.0, 2256.0, 754.0, 359.0, 161.0, 129.0, 75.0, 45.0, 32.0, 11.0, 10.0, 6.0, 2.0, 3.0, 4.0, 1.0, 0.0, 0.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.131591796875, -0.1276226043701172, -0.12365341186523438, -0.11968421936035156, -0.11571502685546875, -0.11174583435058594, -0.10777664184570312, -0.10380744934082031, -0.0998382568359375, -0.09586906433105469, -0.09189987182617188, -0.08793067932128906, -0.08396148681640625, -0.07999229431152344, -0.07602310180664062, -0.07205390930175781, -0.068084716796875, -0.06411552429199219, -0.060146331787109375, -0.05617713928222656, -0.05220794677734375, -0.04823875427246094, -0.044269561767578125, -0.04030036926269531, -0.0363311767578125, -0.03236198425292969, -0.028392791748046875, -0.024423599243164062, -0.02045440673828125, -0.016485214233398438, -0.012516021728515625, -0.008546829223632812, -0.00457763671875, -0.0006084442138671875, 0.003360748291015625, 0.0073299407958984375, 0.01129913330078125, 0.015268325805664062, 0.019237518310546875, 0.023206710815429688, 0.0271759033203125, 0.031145095825195312, 0.035114288330078125, 0.03908348083496094, 0.04305267333984375, 0.04702186584472656, 0.050991058349609375, 0.05496025085449219, 0.058929443359375, 0.06289863586425781, 0.06686782836914062, 0.07083702087402344, 0.07480621337890625, 0.07877540588378906, 0.08274459838867188, 0.08671379089355469, 0.0906829833984375, 0.09465217590332031, 0.09862136840820312, 0.10259056091308594, 0.10655975341796875, 0.11052894592285156, 0.11449813842773438, 0.11846733093261719, 0.1224365234375]}, "gradients/encoder.encoder.layers.15.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 5.0, 1.0, 1.0, 2.0, 3.0, 4.0, 2.0, 5.0, 7.0, 10.0, 22.0, 18.0, 20.0, 35.0, 43.0, 73.0, 86.0, 140.0, 197.0, 397.0, 602.0, 724.0, 626.0, 382.0, 230.0, 121.0, 81.0, 68.0, 44.0, 45.0, 19.0, 13.0, 16.0, 13.0, 11.0, 7.0, 3.0, 7.0, 2.0, 2.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.07366943359375, -0.07152938842773438, -0.06938934326171875, -0.06724929809570312, -0.0651092529296875, -0.06296920776367188, -0.06082916259765625, -0.058689117431640625, -0.056549072265625, -0.054409027099609375, -0.05226898193359375, -0.050128936767578125, -0.0479888916015625, -0.045848846435546875, -0.04370880126953125, -0.041568756103515625, -0.0394287109375, -0.037288665771484375, -0.03514862060546875, -0.033008575439453125, -0.0308685302734375, -0.028728485107421875, -0.02658843994140625, -0.024448394775390625, -0.022308349609375, -0.020168304443359375, -0.01802825927734375, -0.015888214111328125, -0.0137481689453125, -0.011608123779296875, -0.00946807861328125, -0.007328033447265625, -0.00518798828125, -0.003047943115234375, -0.00090789794921875, 0.001232147216796875, 0.0033721923828125, 0.005512237548828125, 0.00765228271484375, 0.009792327880859375, 0.011932373046875, 0.014072418212890625, 0.01621246337890625, 0.018352508544921875, 0.0204925537109375, 0.022632598876953125, 0.02477264404296875, 0.026912689208984375, 0.029052734375, 0.031192779541015625, 0.03333282470703125, 0.035472869873046875, 0.0376129150390625, 0.039752960205078125, 0.04189300537109375, 0.044033050537109375, 0.046173095703125, 0.048313140869140625, 0.05045318603515625, 0.052593231201171875, 0.0547332763671875, 0.056873321533203125, 0.05901336669921875, 0.061153411865234375, 0.06329345703125]}, "gradients/encoder.encoder.layers.15.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 5.0, 1.0, 1.0, 2.0, 4.0, 4.0, 42.0, 277.0, 488.0, 164.0, 23.0, 6.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-2.072157382965088, -2.0325255393981934, -1.9928934574127197, -1.9532614946365356, -1.9136295318603516, -1.8739975690841675, -1.8343656063079834, -1.7947336435317993, -1.7551016807556152, -1.7154697179794312, -1.675837755203247, -1.636205792427063, -1.596573829650879, -1.5569418668746948, -1.5173099040985107, -1.4776779413223267, -1.4380459785461426, -1.3984140157699585, -1.3587820529937744, -1.3191500902175903, -1.2795181274414062, -1.2398861646652222, -1.200254201889038, -1.160622239112854, -1.1209903955459595, -1.0813584327697754, -1.0417264699935913, -1.0020945072174072, -0.9624625444412231, -0.9228305816650391, -0.883198618888855, -0.8435666561126709, -0.8039346933364868, -0.7643027305603027, -0.7246707677841187, -0.6850388050079346, -0.6454068422317505, -0.6057748794555664, -0.5661429166793823, -0.5265109539031982, -0.48687899112701416, -0.4472470283508301, -0.407615065574646, -0.3679831027984619, -0.32835114002227783, -0.28871917724609375, -0.24908724427223206, -0.20945528149604797, -0.1698233187198639, -0.1301913559436798, -0.09055940061807632, -0.05092744529247284, -0.011295482516288757, 0.028336480259895325, 0.06796842813491821, 0.1076003909111023, 0.14723235368728638, 0.18686431646347046, 0.22649627923965454, 0.2661282420158386, 0.3057602047920227, 0.3453921675682068, 0.3850241005420685, 0.42465606331825256, 0.46428802609443665]}, "gradients/encoder.encoder.layers.15.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 2.0, 1.0, 3.0, 0.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 4.0, 6.0, 8.0, 7.0, 22.0, 15.0, 22.0, 22.0, 30.0, 21.0, 19.0, 24.0, 30.0, 31.0, 34.0, 42.0, 39.0, 37.0, 51.0, 40.0, 46.0, 32.0, 41.0, 48.0, 39.0, 28.0, 28.0, 27.0, 26.0, 24.0, 25.0, 16.0, 17.0, 14.0, 16.0, 11.0, 13.0, 13.0, 4.0, 5.0, 5.0, 3.0, 4.0, 5.0, 1.0, 3.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.17259842157363892, -0.16733457148075104, -0.16207072138786316, -0.15680688619613647, -0.1515430361032486, -0.14627918601036072, -0.14101535081863403, -0.13575150072574615, -0.13048765063285828, -0.1252238005399704, -0.11995995789766312, -0.11469611525535583, -0.10943226516246796, -0.10416841506958008, -0.0989045724272728, -0.09364072978496552, -0.08837687969207764, -0.08311302959918976, -0.07784918695688248, -0.0725853443145752, -0.06732149422168732, -0.06205764785408974, -0.05679380148649216, -0.05152995511889458, -0.046266108751297, -0.04100226238369942, -0.03573841601610184, -0.030474569648504257, -0.025210723280906677, -0.019946876913309097, -0.014683030545711517, -0.009419184178113937, -0.004155337810516357, 0.0011085085570812225, 0.0063723549246788025, 0.011636201292276382, 0.016900047659873962, 0.022163894027471542, 0.027427740395069122, 0.0326915867626667, 0.03795543313026428, 0.04321927949786186, 0.04848312586545944, 0.05374697223305702, 0.0590108186006546, 0.06427466869354248, 0.06953851133584976, 0.07480235397815704, 0.08006620407104492, 0.0853300541639328, 0.09059389680624008, 0.09585773944854736, 0.10112158954143524, 0.10638543963432312, 0.1116492822766304, 0.11691312491893768, 0.12217697501182556, 0.12744082510471344, 0.13270467519760132, 0.137968510389328, 0.14323236048221588, 0.14849621057510376, 0.15376004576683044, 0.15902389585971832, 0.1642877459526062]}, "gradients/encoder.encoder.layers.15.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 2.0, 0.0, 0.0, 3.0, 3.0, 2.0, 5.0, 9.0, 12.0, 9.0, 11.0, 12.0, 22.0, 29.0, 33.0, 40.0, 54.0, 77.0, 105.0, 133.0, 164.0, 316.0, 438.0, 645.0, 1028.0, 2032.0, 4088.0, 9362.0, 24088.0, 63603.0, 159858.0, 301715.0, 269981.0, 127190.0, 49317.0, 18756.0, 7554.0, 3463.0, 1670.0, 874.0, 564.0, 359.0, 236.0, 178.0, 121.0, 94.0, 69.0, 56.0, 47.0, 36.0, 24.0, 19.0, 15.0, 15.0, 10.0, 8.0, 8.0, 2.0, 4.0, 3.0, 0.0, 2.0, 2.0], "bins": [-0.0703125, -0.06817340850830078, -0.06603431701660156, -0.06389522552490234, -0.061756134033203125, -0.059617042541503906, -0.05747795104980469, -0.05533885955810547, -0.05319976806640625, -0.05106067657470703, -0.04892158508300781, -0.046782493591308594, -0.044643402099609375, -0.042504310607910156, -0.04036521911621094, -0.03822612762451172, -0.0360870361328125, -0.03394794464111328, -0.03180885314941406, -0.029669761657714844, -0.027530670166015625, -0.025391578674316406, -0.023252487182617188, -0.02111339569091797, -0.01897430419921875, -0.01683521270751953, -0.014696121215820312, -0.012557029724121094, -0.010417938232421875, -0.008278846740722656, -0.0061397552490234375, -0.004000663757324219, -0.001861572265625, 0.00027751922607421875, 0.0024166107177734375, 0.004555702209472656, 0.006694793701171875, 0.008833885192871094, 0.010972976684570312, 0.013112068176269531, 0.01525115966796875, 0.01739025115966797, 0.019529342651367188, 0.021668434143066406, 0.023807525634765625, 0.025946617126464844, 0.028085708618164062, 0.03022480010986328, 0.0323638916015625, 0.03450298309326172, 0.03664207458496094, 0.038781166076660156, 0.040920257568359375, 0.043059349060058594, 0.04519844055175781, 0.04733753204345703, 0.04947662353515625, 0.05161571502685547, 0.05375480651855469, 0.055893898010253906, 0.058032989501953125, 0.060172080993652344, 0.06231117248535156, 0.06445026397705078, 0.06658935546875]}, "gradients/encoder.encoder.layers.15.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 3.0, 5.0, 6.0, 13.0, 12.0, 25.0, 35.0, 54.0, 60.0, 71.0, 104.0, 106.0, 134.0, 88.0, 88.0, 59.0, 58.0, 40.0, 25.0, 16.0, 7.0, 3.0, 2.0, 1.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.09454345703125, -0.09176349639892578, -0.08898353576660156, -0.08620357513427734, -0.08342361450195312, -0.0806436538696289, -0.07786369323730469, -0.07508373260498047, -0.07230377197265625, -0.06952381134033203, -0.06674385070800781, -0.0639638900756836, -0.061183929443359375, -0.058403968811035156, -0.05562400817871094, -0.05284404754638672, -0.0500640869140625, -0.04728412628173828, -0.04450416564941406, -0.041724205017089844, -0.038944244384765625, -0.036164283752441406, -0.03338432312011719, -0.03060436248779297, -0.02782440185546875, -0.02504444122314453, -0.022264480590820312, -0.019484519958496094, -0.016704559326171875, -0.013924598693847656, -0.011144638061523438, -0.008364677429199219, -0.005584716796875, -0.0028047561645507812, -2.47955322265625e-05, 0.0027551651000976562, 0.005535125732421875, 0.008315086364746094, 0.011095046997070312, 0.013875007629394531, 0.01665496826171875, 0.01943492889404297, 0.022214889526367188, 0.024994850158691406, 0.027774810791015625, 0.030554771423339844, 0.03333473205566406, 0.03611469268798828, 0.0388946533203125, 0.04167461395263672, 0.04445457458496094, 0.047234535217285156, 0.050014495849609375, 0.052794456481933594, 0.05557441711425781, 0.05835437774658203, 0.06113433837890625, 0.06391429901123047, 0.06669425964355469, 0.0694742202758789, 0.07225418090820312, 0.07503414154052734, 0.07781410217285156, 0.08059406280517578, 0.0833740234375]}, "gradients/encoder.encoder.layers.15.attention.v_proj.weight": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 3.0, 6.0, 2.0, 3.0, 5.0, 4.0, 4.0, 2.0, 12.0, 10.0, 8.0, 5.0, 17.0, 19.0, 25.0, 38.0, 43.0, 53.0, 99.0, 142.0, 257.0, 486.0, 1221.0, 3462.0, 13196.0, 56469.0, 245189.0, 480627.0, 189541.0, 42594.0, 9969.0, 2831.0, 1017.0, 466.0, 239.0, 128.0, 84.0, 71.0, 46.0, 32.0, 24.0, 17.0, 17.0, 16.0, 15.0, 11.0, 9.0, 7.0, 8.0, 4.0, 2.0, 6.0, 2.0, 2.0, 2.0, 0.0, 0.0, 2.0, 1.0, 1.0, 2.0], "bins": [-0.08380126953125, -0.08104515075683594, -0.07828903198242188, -0.07553291320800781, -0.07277679443359375, -0.07002067565917969, -0.06726455688476562, -0.06450843811035156, -0.0617523193359375, -0.05899620056152344, -0.056240081787109375, -0.05348396301269531, -0.05072784423828125, -0.04797172546386719, -0.045215606689453125, -0.04245948791503906, -0.039703369140625, -0.03694725036621094, -0.034191131591796875, -0.03143501281738281, -0.02867889404296875, -0.025922775268554688, -0.023166656494140625, -0.020410537719726562, -0.0176544189453125, -0.014898300170898438, -0.012142181396484375, -0.009386062622070312, -0.00662994384765625, -0.0038738250732421875, -0.001117706298828125, 0.0016384124755859375, 0.00439453125, 0.0071506500244140625, 0.009906768798828125, 0.012662887573242188, 0.01541900634765625, 0.018175125122070312, 0.020931243896484375, 0.023687362670898438, 0.0264434814453125, 0.029199600219726562, 0.031955718994140625, 0.03471183776855469, 0.03746795654296875, 0.04022407531738281, 0.042980194091796875, 0.04573631286621094, 0.048492431640625, 0.05124855041503906, 0.054004669189453125, 0.05676078796386719, 0.05951690673828125, 0.06227302551269531, 0.06502914428710938, 0.06778526306152344, 0.0705413818359375, 0.07329750061035156, 0.07605361938476562, 0.07880973815917969, 0.08156585693359375, 0.08432197570800781, 0.08707809448242188, 0.08983421325683594, 0.09259033203125]}, "gradients/encoder.encoder.layers.15.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 2.0, 2.0, 1.0, 5.0, 8.0, 1.0, 6.0, 13.0, 21.0, 8.0, 15.0, 14.0, 17.0, 29.0, 16.0, 15.0, 27.0, 36.0, 46.0, 28.0, 36.0, 33.0, 24.0, 31.0, 46.0, 39.0, 44.0, 42.0, 37.0, 44.0, 39.0, 29.0, 25.0, 29.0, 35.0, 27.0, 19.0, 18.0, 15.0, 17.0, 13.0, 10.0, 7.0, 11.0, 4.0, 5.0, 4.0, 4.0, 3.0, 6.0, 2.0, 5.0, 2.0, 1.0, 1.0], "bins": [-0.08245849609375, -0.08009719848632812, -0.07773590087890625, -0.07537460327148438, -0.0730133056640625, -0.07065200805664062, -0.06829071044921875, -0.06592941284179688, -0.063568115234375, -0.061206817626953125, -0.05884552001953125, -0.056484222412109375, -0.0541229248046875, -0.051761627197265625, -0.04940032958984375, -0.047039031982421875, -0.044677734375, -0.042316436767578125, -0.03995513916015625, -0.037593841552734375, -0.0352325439453125, -0.032871246337890625, -0.03050994873046875, -0.028148651123046875, -0.025787353515625, -0.023426055908203125, -0.02106475830078125, -0.018703460693359375, -0.0163421630859375, -0.013980865478515625, -0.01161956787109375, -0.009258270263671875, -0.00689697265625, -0.004535675048828125, -0.00217437744140625, 0.000186920166015625, 0.0025482177734375, 0.004909515380859375, 0.00727081298828125, 0.009632110595703125, 0.011993408203125, 0.014354705810546875, 0.01671600341796875, 0.019077301025390625, 0.0214385986328125, 0.023799896240234375, 0.02616119384765625, 0.028522491455078125, 0.0308837890625, 0.033245086669921875, 0.03560638427734375, 0.037967681884765625, 0.0403289794921875, 0.042690277099609375, 0.04505157470703125, 0.047412872314453125, 0.049774169921875, 0.052135467529296875, 0.05449676513671875, 0.056858062744140625, 0.0592193603515625, 0.061580657958984375, 0.06394195556640625, 0.06630325317382812, 0.06866455078125]}, "gradients/encoder.encoder.layers.15.attention.k_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 5.0, 1.0, 4.0, 5.0, 3.0, 5.0, 4.0, 6.0, 10.0, 8.0, 19.0, 20.0, 28.0, 37.0, 49.0, 57.0, 85.0, 104.0, 171.0, 229.0, 466.0, 801.0, 1512.0, 3006.0, 6318.0, 14098.0, 34117.0, 85747.0, 198118.0, 302768.0, 226071.0, 102303.0, 40545.0, 16832.0, 7415.0, 3390.0, 1749.0, 954.0, 534.0, 309.0, 201.0, 111.0, 104.0, 66.0, 43.0, 36.0, 25.0, 20.0, 6.0, 8.0, 12.0, 8.0, 5.0, 3.0, 5.0, 7.0, 2.0, 6.0, 0.0, 0.0, 1.0, 2.0], "bins": [-0.016204833984375, -0.01569199562072754, -0.015179157257080078, -0.014666318893432617, -0.014153480529785156, -0.013640642166137695, -0.013127803802490234, -0.012614965438842773, -0.012102127075195312, -0.011589288711547852, -0.01107645034790039, -0.01056361198425293, -0.010050773620605469, -0.009537935256958008, -0.009025096893310547, -0.008512258529663086, -0.007999420166015625, -0.007486581802368164, -0.006973743438720703, -0.006460905075073242, -0.005948066711425781, -0.00543522834777832, -0.004922389984130859, -0.0044095516204833984, -0.0038967132568359375, -0.0033838748931884766, -0.0028710365295410156, -0.0023581981658935547, -0.0018453598022460938, -0.0013325214385986328, -0.0008196830749511719, -0.00030684471130371094, 0.00020599365234375, 0.0007188320159912109, 0.0012316703796386719, 0.0017445087432861328, 0.0022573471069335938, 0.0027701854705810547, 0.0032830238342285156, 0.0037958621978759766, 0.0043087005615234375, 0.0048215389251708984, 0.005334377288818359, 0.00584721565246582, 0.006360054016113281, 0.006872892379760742, 0.007385730743408203, 0.007898569107055664, 0.008411407470703125, 0.008924245834350586, 0.009437084197998047, 0.009949922561645508, 0.010462760925292969, 0.01097559928894043, 0.01148843765258789, 0.012001276016235352, 0.012514114379882812, 0.013026952743530273, 0.013539791107177734, 0.014052629470825195, 0.014565467834472656, 0.015078306198120117, 0.015591144561767578, 0.01610398292541504, 0.0166168212890625]}, "gradients/encoder.encoder.layers.15.attention.k_proj.bias": {"_type": "histogram", "values": [3.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 2.0, 1.0, 1.0, 2.0, 1.0, 6.0, 2.0, 9.0, 5.0, 8.0, 13.0, 15.0, 17.0, 17.0, 21.0, 22.0, 33.0, 25.0, 33.0, 44.0, 33.0, 66.0, 56.0, 53.0, 50.0, 62.0, 52.0, 51.0, 59.0, 38.0, 27.0, 33.0, 23.0, 23.0, 21.0, 19.0, 14.0, 12.0, 5.0, 5.0, 8.0, 6.0, 7.0, 3.0, 3.0, 1.0, 1.0, 3.0, 2.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-7.748603820800781e-06, -7.5018033385276794e-06, -7.255002856254578e-06, -7.008202373981476e-06, -6.761401891708374e-06, -6.514601409435272e-06, -6.26780092716217e-06, -6.021000444889069e-06, -5.774199962615967e-06, -5.527399480342865e-06, -5.280598998069763e-06, -5.033798515796661e-06, -4.7869980335235596e-06, -4.540197551250458e-06, -4.293397068977356e-06, -4.046596586704254e-06, -3.7997961044311523e-06, -3.5529956221580505e-06, -3.3061951398849487e-06, -3.059394657611847e-06, -2.812594175338745e-06, -2.5657936930656433e-06, -2.3189932107925415e-06, -2.0721927285194397e-06, -1.8253922462463379e-06, -1.578591763973236e-06, -1.3317912817001343e-06, -1.0849907994270325e-06, -8.381903171539307e-07, -5.913898348808289e-07, -3.4458935260772705e-07, -9.778887033462524e-08, 1.4901161193847656e-07, 3.9581209421157837e-07, 6.426125764846802e-07, 8.89413058757782e-07, 1.1362135410308838e-06, 1.3830140233039856e-06, 1.6298145055770874e-06, 1.8766149878501892e-06, 2.123415470123291e-06, 2.370215952396393e-06, 2.6170164346694946e-06, 2.8638169169425964e-06, 3.1106173992156982e-06, 3.3574178814888e-06, 3.604218363761902e-06, 3.851018846035004e-06, 4.0978193283081055e-06, 4.344619810581207e-06, 4.591420292854309e-06, 4.838220775127411e-06, 5.085021257400513e-06, 5.3318217396736145e-06, 5.578622221946716e-06, 5.825422704219818e-06, 6.07222318649292e-06, 6.319023668766022e-06, 6.5658241510391235e-06, 6.812624633312225e-06, 7.059425115585327e-06, 7.306225597858429e-06, 7.553026080131531e-06, 7.799826562404633e-06, 8.046627044677734e-06]}, "gradients/encoder.encoder.layers.15.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 4.0, 5.0, 7.0, 8.0, 11.0, 17.0, 17.0, 37.0, 25.0, 69.0, 72.0, 120.0, 152.0, 248.0, 344.0, 573.0, 890.0, 1423.0, 2490.0, 4671.0, 8786.0, 17721.0, 37295.0, 79246.0, 154339.0, 233728.0, 225512.0, 142132.0, 70800.0, 33487.0, 15892.0, 8005.0, 4270.0, 2334.0, 1436.0, 843.0, 515.0, 340.0, 221.0, 145.0, 86.0, 58.0, 51.0, 39.0, 33.0, 12.0, 18.0, 15.0, 7.0, 3.0, 9.0, 3.0, 4.0, 2.0, 2.0], "bins": [-0.015472412109375, -0.015029430389404297, -0.014586448669433594, -0.01414346694946289, -0.013700485229492188, -0.013257503509521484, -0.012814521789550781, -0.012371540069580078, -0.011928558349609375, -0.011485576629638672, -0.011042594909667969, -0.010599613189697266, -0.010156631469726562, -0.00971364974975586, -0.009270668029785156, -0.008827686309814453, -0.00838470458984375, -0.007941722869873047, -0.007498741149902344, -0.007055759429931641, -0.0066127777099609375, -0.006169795989990234, -0.005726814270019531, -0.005283832550048828, -0.004840850830078125, -0.004397869110107422, -0.003954887390136719, -0.0035119056701660156, -0.0030689239501953125, -0.0026259422302246094, -0.0021829605102539062, -0.0017399787902832031, -0.0012969970703125, -0.0008540153503417969, -0.00041103363037109375, 3.1948089599609375e-05, 0.0004749298095703125, 0.0009179115295410156, 0.0013608932495117188, 0.0018038749694824219, 0.002246856689453125, 0.002689838409423828, 0.0031328201293945312, 0.0035758018493652344, 0.0040187835693359375, 0.004461765289306641, 0.004904747009277344, 0.005347728729248047, 0.00579071044921875, 0.006233692169189453, 0.006676673889160156, 0.007119655609130859, 0.0075626373291015625, 0.008005619049072266, 0.008448600769042969, 0.008891582489013672, 0.009334564208984375, 0.009777545928955078, 0.010220527648925781, 0.010663509368896484, 0.011106491088867188, 0.01154947280883789, 0.011992454528808594, 0.012435436248779297, 0.01287841796875]}, "gradients/encoder.encoder.layers.15.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 1.0, 2.0, 3.0, 2.0, 2.0, 2.0, 2.0, 4.0, 4.0, 9.0, 9.0, 8.0, 8.0, 13.0, 21.0, 25.0, 23.0, 37.0, 40.0, 39.0, 57.0, 49.0, 50.0, 61.0, 64.0, 61.0, 54.0, 47.0, 49.0, 38.0, 36.0, 40.0, 34.0, 20.0, 19.0, 21.0, 9.0, 10.0, 16.0, 4.0, 4.0, 2.0, 4.0, 8.0, 3.0, 2.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0128173828125, -0.012355804443359375, -0.01189422607421875, -0.011432647705078125, -0.0109710693359375, -0.010509490966796875, -0.01004791259765625, -0.009586334228515625, -0.009124755859375, -0.008663177490234375, -0.00820159912109375, -0.007740020751953125, -0.0072784423828125, -0.006816864013671875, -0.00635528564453125, -0.005893707275390625, -0.00543212890625, -0.004970550537109375, -0.00450897216796875, -0.004047393798828125, -0.0035858154296875, -0.003124237060546875, -0.00266265869140625, -0.002201080322265625, -0.001739501953125, -0.001277923583984375, -0.00081634521484375, -0.000354766845703125, 0.0001068115234375, 0.000568389892578125, 0.00102996826171875, 0.001491546630859375, 0.001953125, 0.002414703369140625, 0.00287628173828125, 0.003337860107421875, 0.0037994384765625, 0.004261016845703125, 0.00472259521484375, 0.005184173583984375, 0.005645751953125, 0.006107330322265625, 0.00656890869140625, 0.007030487060546875, 0.0074920654296875, 0.007953643798828125, 0.00841522216796875, 0.008876800537109375, 0.00933837890625, 0.009799957275390625, 0.01026153564453125, 0.010723114013671875, 0.0111846923828125, 0.011646270751953125, 0.01210784912109375, 0.012569427490234375, 0.013031005859375, 0.013492584228515625, 0.01395416259765625, 0.014415740966796875, 0.0148773193359375, 0.015338897705078125, 0.01580047607421875, 0.016262054443359375, 0.0167236328125]}, "gradients/encoder.encoder.layers.15.layer_norm.weight": {"_type": "histogram", "values": [2.0, 0.0, 2.0, 0.0, 4.0, 6.0, 11.0, 15.0, 44.0, 118.0, 229.0, 253.0, 157.0, 81.0, 45.0, 19.0, 13.0, 5.0, 5.0, 5.0, 3.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.15779608488082886, -0.14364859461784363, -0.1295011043548584, -0.11535362154245377, -0.10120613127946854, -0.0870586410164833, -0.07291115820407867, -0.058763667941093445, -0.044616177678108215, -0.030468689277768135, -0.016321200877428055, -0.0021737143397331238, 0.011973775923252106, 0.026121266186237335, 0.04026874899864197, 0.0544162392616272, 0.06856372952461243, 0.08271121978759766, 0.09685871005058289, 0.11100619286298752, 0.12515369057655334, 0.13930118083953857, 0.1534486562013626, 0.16759614646434784, 0.18174363672733307, 0.1958911269903183, 0.21003861725330353, 0.22418609261512756, 0.2383335828781128, 0.252481073141098, 0.26662856340408325, 0.2807760536670685, 0.2949235439300537, 0.30907103419303894, 0.32321852445602417, 0.3373660147190094, 0.35151350498199463, 0.36566099524497986, 0.3798084855079651, 0.39395594596862793, 0.40810346603393555, 0.4222509562969208, 0.436398446559906, 0.45054593682289124, 0.46469342708587646, 0.4788409173488617, 0.4929884076118469, 0.5071358680725098, 0.5212833881378174, 0.5354308485984802, 0.5495783686637878, 0.5637258291244507, 0.5778733491897583, 0.5920208096504211, 0.6061683297157288, 0.6203157901763916, 0.6344632506370544, 0.6486107110977173, 0.6627582311630249, 0.6769056916236877, 0.6910532116889954, 0.7052006721496582, 0.7193481922149658, 0.7334956526756287, 0.7476431727409363]}, "gradients/encoder.encoder.layers.15.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 1.0, 1.0, 3.0, 4.0, 4.0, 3.0, 8.0, 9.0, 7.0, 10.0, 5.0, 13.0, 14.0, 14.0, 25.0, 23.0, 24.0, 29.0, 37.0, 35.0, 31.0, 35.0, 37.0, 33.0, 47.0, 62.0, 42.0, 39.0, 40.0, 46.0, 42.0, 44.0, 22.0, 25.0, 28.0, 28.0, 20.0, 28.0, 12.0, 22.0, 12.0, 16.0, 11.0, 4.0, 3.0, 3.0, 6.0, 4.0, 2.0, 1.0, 2.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.22517025470733643, -0.21760836243629456, -0.2100464552640915, -0.20248456299304962, -0.19492267072200775, -0.1873607635498047, -0.17979887127876282, -0.17223697900772095, -0.16467508673667908, -0.1571131944656372, -0.14955128729343414, -0.14198939502239227, -0.1344275027513504, -0.12686559557914734, -0.11930370330810547, -0.1117418110370636, -0.10417990386486053, -0.09661800414323807, -0.0890561118721962, -0.08149421215057373, -0.07393231987953186, -0.0663704201579094, -0.058808520436286926, -0.05124662443995476, -0.04368472844362259, -0.03612283244729042, -0.028560934588313103, -0.020999036729335785, -0.013437140733003616, -0.005875244736671448, 0.0016866549849510193, 0.009248550981283188, 0.016810446977615356, 0.024372342973947525, 0.031934238970279694, 0.03949613869190216, 0.04705803468823433, 0.0546199306845665, 0.062181830406188965, 0.06974372267723083, 0.0773056223988533, 0.08486752212047577, 0.09242941439151764, 0.0999913141131401, 0.10755321383476257, 0.11511510610580444, 0.12267700582742691, 0.13023890554904938, 0.13780079782009125, 0.14536269009113312, 0.15292459726333618, 0.16048648953437805, 0.16804838180541992, 0.1756102740764618, 0.18317218124866486, 0.19073407351970673, 0.1982959806919098, 0.20585787296295166, 0.21341978013515472, 0.2209816724061966, 0.22854356467723846, 0.23610547184944153, 0.2436673641204834, 0.25122925639152527, 0.25879114866256714]}, "gradients/encoder.encoder.layers.14.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 2.0, 11.0, 2.0, 9.0, 6.0, 13.0, 20.0, 26.0, 37.0, 63.0, 95.0, 143.0, 191.0, 346.0, 578.0, 1140.0, 2413.0, 4858.0, 12469.0, 46326.0, 322594.0, 2221195.0, 1367788.0, 168173.0, 29215.0, 8892.0, 3742.0, 1878.0, 919.0, 457.0, 255.0, 145.0, 107.0, 51.0, 53.0, 30.0, 17.0, 18.0, 7.0, 4.0, 2.0, 2.0, 2.0, 1.0, 2.0, 0.0, 2.0], "bins": [-0.070556640625, -0.0687403678894043, -0.0669240951538086, -0.06510782241821289, -0.06329154968261719, -0.061475276947021484, -0.05965900421142578, -0.05784273147583008, -0.056026458740234375, -0.05421018600463867, -0.05239391326904297, -0.050577640533447266, -0.04876136779785156, -0.04694509506225586, -0.045128822326660156, -0.04331254959106445, -0.04149627685546875, -0.03968000411987305, -0.037863731384277344, -0.03604745864868164, -0.03423118591308594, -0.032414913177490234, -0.03059864044189453, -0.028782367706298828, -0.026966094970703125, -0.025149822235107422, -0.02333354949951172, -0.021517276763916016, -0.019701004028320312, -0.01788473129272461, -0.016068458557128906, -0.014252185821533203, -0.0124359130859375, -0.010619640350341797, -0.008803367614746094, -0.006987094879150391, -0.0051708221435546875, -0.0033545494079589844, -0.0015382766723632812, 0.0002779960632324219, 0.002094268798828125, 0.003910541534423828, 0.005726814270019531, 0.007543087005615234, 0.009359359741210938, 0.01117563247680664, 0.012991905212402344, 0.014808177947998047, 0.01662445068359375, 0.018440723419189453, 0.020256996154785156, 0.02207326889038086, 0.023889541625976562, 0.025705814361572266, 0.02752208709716797, 0.029338359832763672, 0.031154632568359375, 0.03297090530395508, 0.03478717803955078, 0.036603450775146484, 0.03841972351074219, 0.04023599624633789, 0.042052268981933594, 0.0438685417175293, 0.045684814453125]}, "gradients/encoder.encoder.layers.14.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 3.0, 2.0, 2.0, 5.0, 19.0, 19.0, 44.0, 60.0, 84.0, 87.0, 112.0, 105.0, 119.0, 92.0, 79.0, 69.0, 56.0, 23.0, 15.0, 7.0, 5.0, 6.0, 3.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.099853515625, -0.09697246551513672, -0.09409141540527344, -0.09121036529541016, -0.08832931518554688, -0.0854482650756836, -0.08256721496582031, -0.07968616485595703, -0.07680511474609375, -0.07392406463623047, -0.07104301452636719, -0.0681619644165039, -0.06528091430664062, -0.062399864196777344, -0.05951881408691406, -0.05663776397705078, -0.0537567138671875, -0.05087566375732422, -0.04799461364746094, -0.045113563537597656, -0.042232513427734375, -0.039351463317871094, -0.03647041320800781, -0.03358936309814453, -0.03070831298828125, -0.02782726287841797, -0.024946212768554688, -0.022065162658691406, -0.019184112548828125, -0.016303062438964844, -0.013422012329101562, -0.010540962219238281, -0.007659912109375, -0.004778861999511719, -0.0018978118896484375, 0.0009832382202148438, 0.003864288330078125, 0.006745338439941406, 0.009626388549804688, 0.012507438659667969, 0.01538848876953125, 0.01826953887939453, 0.021150588989257812, 0.024031639099121094, 0.026912689208984375, 0.029793739318847656, 0.03267478942871094, 0.03555583953857422, 0.0384368896484375, 0.04131793975830078, 0.04419898986816406, 0.047080039978027344, 0.049961090087890625, 0.052842140197753906, 0.05572319030761719, 0.05860424041748047, 0.06148529052734375, 0.06436634063720703, 0.06724739074707031, 0.0701284408569336, 0.07300949096679688, 0.07589054107666016, 0.07877159118652344, 0.08165264129638672, 0.08453369140625]}, "gradients/encoder.encoder.layers.14.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 0.0, 2.0, 4.0, 3.0, 7.0, 4.0, 6.0, 13.0, 19.0, 24.0, 39.0, 70.0, 105.0, 175.0, 261.0, 461.0, 834.0, 1794.0, 4048.0, 10854.0, 32493.0, 114242.0, 561208.0, 2498861.0, 767183.0, 141434.0, 38654.0, 12633.0, 4748.0, 1978.0, 900.0, 493.0, 269.0, 158.0, 94.0, 66.0, 41.0, 27.0, 17.0, 21.0, 16.0, 12.0, 5.0, 3.0, 3.0, 4.0, 1.0, 2.0, 1.0, 2.0, 2.0, 0.0, 0.0, 1.0, 2.0, 0.0, 1.0], "bins": [-0.042327880859375, -0.04094362258911133, -0.039559364318847656, -0.038175106048583984, -0.03679084777832031, -0.03540658950805664, -0.03402233123779297, -0.0326380729675293, -0.031253814697265625, -0.029869556427001953, -0.02848529815673828, -0.02710103988647461, -0.025716781616210938, -0.024332523345947266, -0.022948265075683594, -0.021564006805419922, -0.02017974853515625, -0.018795490264892578, -0.017411231994628906, -0.016026973724365234, -0.014642715454101562, -0.01325845718383789, -0.011874198913574219, -0.010489940643310547, -0.009105682373046875, -0.007721424102783203, -0.006337165832519531, -0.004952907562255859, -0.0035686492919921875, -0.0021843910217285156, -0.0008001327514648438, 0.0005841255187988281, 0.0019683837890625, 0.003352642059326172, 0.004736900329589844, 0.006121158599853516, 0.0075054168701171875, 0.00888967514038086, 0.010273933410644531, 0.011658191680908203, 0.013042449951171875, 0.014426708221435547, 0.01581096649169922, 0.01719522476196289, 0.018579483032226562, 0.019963741302490234, 0.021347999572753906, 0.022732257843017578, 0.02411651611328125, 0.025500774383544922, 0.026885032653808594, 0.028269290924072266, 0.029653549194335938, 0.03103780746459961, 0.03242206573486328, 0.03380632400512695, 0.035190582275390625, 0.0365748405456543, 0.03795909881591797, 0.03934335708618164, 0.04072761535644531, 0.042111873626708984, 0.043496131896972656, 0.04488039016723633, 0.0462646484375]}, "gradients/encoder.encoder.layers.14.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 3.0, 5.0, 3.0, 7.0, 2.0, 7.0, 6.0, 6.0, 10.0, 23.0, 28.0, 24.0, 42.0, 58.0, 63.0, 89.0, 174.0, 255.0, 412.0, 609.0, 654.0, 585.0, 370.0, 236.0, 144.0, 73.0, 55.0, 39.0, 30.0, 17.0, 19.0, 7.0, 11.0, 7.0, 4.0, 1.0, 2.0, 3.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0, 0.0, 2.0], "bins": [-0.04931640625, -0.04795217514038086, -0.04658794403076172, -0.04522371292114258, -0.04385948181152344, -0.0424952507019043, -0.041131019592285156, -0.039766788482666016, -0.038402557373046875, -0.037038326263427734, -0.035674095153808594, -0.03430986404418945, -0.03294563293457031, -0.03158140182495117, -0.03021717071533203, -0.02885293960571289, -0.02748870849609375, -0.02612447738647461, -0.02476024627685547, -0.023396015167236328, -0.022031784057617188, -0.020667552947998047, -0.019303321838378906, -0.017939090728759766, -0.016574859619140625, -0.015210628509521484, -0.013846397399902344, -0.012482166290283203, -0.011117935180664062, -0.009753704071044922, -0.008389472961425781, -0.007025241851806641, -0.0056610107421875, -0.004296779632568359, -0.0029325485229492188, -0.0015683174133300781, -0.0002040863037109375, 0.0011601448059082031, 0.0025243759155273438, 0.0038886070251464844, 0.005252838134765625, 0.006617069244384766, 0.007981300354003906, 0.009345531463623047, 0.010709762573242188, 0.012073993682861328, 0.013438224792480469, 0.01480245590209961, 0.01616668701171875, 0.01753091812133789, 0.01889514923095703, 0.020259380340576172, 0.021623611450195312, 0.022987842559814453, 0.024352073669433594, 0.025716304779052734, 0.027080535888671875, 0.028444766998291016, 0.029808998107910156, 0.031173229217529297, 0.03253746032714844, 0.03390169143676758, 0.03526592254638672, 0.03663015365600586, 0.037994384765625]}, "gradients/encoder.encoder.layers.14.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 3.0, 2.0, 2.0, 5.0, 17.0, 62.0, 204.0, 384.0, 269.0, 58.0, 4.0, 2.0, 2.0, 0.0, 1.0, 0.0, 2.0], "bins": [-1.075838565826416, -1.0561712980270386, -1.0365040302276611, -1.0168367624282837, -0.9971694946289062, -0.9775022864341736, -0.9578350186347961, -0.9381677508354187, -0.9185004830360413, -0.8988332152366638, -0.8791659474372864, -0.8594986796379089, -0.8398314714431763, -0.8201642036437988, -0.8004969358444214, -0.780829668045044, -0.7611624002456665, -0.7414951324462891, -0.7218278646469116, -0.7021605968475342, -0.6824933290481567, -0.6628261208534241, -0.6431588530540466, -0.6234915852546692, -0.6038243174552917, -0.5841570496559143, -0.5644897818565369, -0.5448225140571594, -0.5251553058624268, -0.5054880380630493, -0.4858207702636719, -0.46615350246429443, -0.446486234664917, -0.42681896686553955, -0.4071516990661621, -0.38748446106910706, -0.3678171932697296, -0.3481499254703522, -0.3284826874732971, -0.3088154196739197, -0.28914815187454224, -0.2694808840751648, -0.24981363117694855, -0.2301463782787323, -0.21047911047935486, -0.19081184267997742, -0.17114458978176117, -0.15147733688354492, -0.13181006908416748, -0.11214280873537064, -0.09247554838657379, -0.07280828803777695, -0.0531410276889801, -0.03347376734018326, -0.013806506991386414, 0.005860745906829834, 0.025528013706207275, 0.04519527405500412, 0.06486253440380096, 0.08452979475259781, 0.10419705510139465, 0.1238643154501915, 0.14353157579898834, 0.1631988286972046, 0.18286609649658203]}, "gradients/encoder.encoder.layers.14.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 0.0, 0.0, 2.0, 2.0, 6.0, 5.0, 1.0, 10.0, 9.0, 11.0, 14.0, 13.0, 15.0, 29.0, 34.0, 31.0, 36.0, 37.0, 34.0, 55.0, 55.0, 46.0, 60.0, 47.0, 47.0, 39.0, 48.0, 35.0, 46.0, 39.0, 35.0, 28.0, 27.0, 26.0, 26.0, 18.0, 7.0, 7.0, 15.0, 3.0, 6.0, 4.0, 2.0, 1.0, 1.0, 0.0, 2.0, 2.0, 0.0, 0.0, 0.0, 3.0], "bins": [-0.16697907447814941, -0.16230547428131104, -0.15763185918331146, -0.15295825898647308, -0.1482846438884735, -0.14361104369163513, -0.13893742859363556, -0.13426382839679718, -0.1295902132987976, -0.12491660565137863, -0.12024299800395966, -0.11556939035654068, -0.1108957827091217, -0.10622218251228333, -0.10154857486486435, -0.09687496721744537, -0.092201367020607, -0.08752775937318802, -0.08285415172576904, -0.07818054407835007, -0.07350693643093109, -0.06883333623409271, -0.06415972858667374, -0.05948612093925476, -0.054812513291835785, -0.05013890564441681, -0.04546529799699783, -0.040791694074869156, -0.03611808642745018, -0.031444478780031204, -0.026770872995257378, -0.02209726721048355, -0.017423659563064575, -0.012750052846968174, -0.008076446130871773, -0.0034028394147753716, 0.0012707673013210297, 0.0059443749487400055, 0.010617980733513832, 0.015291586518287659, 0.019965194165706635, 0.02463880181312561, 0.029312407597899437, 0.033986013382673264, 0.03865962103009224, 0.043333228677511215, 0.04800683259963989, 0.05268044024705887, 0.057354047894477844, 0.06202765554189682, 0.0667012631893158, 0.07137487083673477, 0.07604847848415375, 0.08072207868099213, 0.0853956863284111, 0.09006929397583008, 0.09474290162324905, 0.09941650927066803, 0.104090116918087, 0.10876372456550598, 0.11343732476234436, 0.11811093986034393, 0.12278454005718231, 0.12745815515518188, 0.13213175535202026]}, "gradients/encoder.encoder.layers.14.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 2.0, 0.0, 4.0, 4.0, 2.0, 7.0, 8.0, 9.0, 10.0, 20.0, 18.0, 35.0, 34.0, 65.0, 93.0, 127.0, 151.0, 255.0, 350.0, 511.0, 1052.0, 2194.0, 5180.0, 14623.0, 48250.0, 157620.0, 369098.0, 295520.0, 104729.0, 31188.0, 9759.0, 3648.0, 1608.0, 884.0, 471.0, 308.0, 213.0, 136.0, 95.0, 87.0, 47.0, 43.0, 25.0, 25.0, 20.0, 14.0, 8.0, 7.0, 3.0, 2.0, 4.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.06512451171875, -0.06294059753417969, -0.060756683349609375, -0.05857276916503906, -0.05638885498046875, -0.05420494079589844, -0.052021026611328125, -0.04983711242675781, -0.0476531982421875, -0.04546928405761719, -0.043285369873046875, -0.04110145568847656, -0.03891754150390625, -0.03673362731933594, -0.034549713134765625, -0.03236579895019531, -0.030181884765625, -0.027997970581054688, -0.025814056396484375, -0.023630142211914062, -0.02144622802734375, -0.019262313842773438, -0.017078399658203125, -0.014894485473632812, -0.0127105712890625, -0.010526657104492188, -0.008342742919921875, -0.0061588287353515625, -0.00397491455078125, -0.0017910003662109375, 0.000392913818359375, 0.0025768280029296875, 0.0047607421875, 0.0069446563720703125, 0.009128570556640625, 0.011312484741210938, 0.01349639892578125, 0.015680313110351562, 0.017864227294921875, 0.020048141479492188, 0.0222320556640625, 0.024415969848632812, 0.026599884033203125, 0.028783798217773438, 0.03096771240234375, 0.03315162658691406, 0.035335540771484375, 0.03751945495605469, 0.039703369140625, 0.04188728332519531, 0.044071197509765625, 0.04625511169433594, 0.04843902587890625, 0.05062294006347656, 0.052806854248046875, 0.05499076843261719, 0.0571746826171875, 0.05935859680175781, 0.061542510986328125, 0.06372642517089844, 0.06591033935546875, 0.06809425354003906, 0.07027816772460938, 0.07246208190917969, 0.07464599609375]}, "gradients/encoder.encoder.layers.14.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 4.0, 1.0, 1.0, 2.0, 7.0, 18.0, 28.0, 42.0, 57.0, 58.0, 91.0, 97.0, 122.0, 118.0, 95.0, 73.0, 76.0, 56.0, 32.0, 18.0, 8.0, 7.0, 3.0, 3.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0950927734375, -0.0923614501953125, -0.089630126953125, -0.0868988037109375, -0.08416748046875, -0.0814361572265625, -0.078704833984375, -0.0759735107421875, -0.0732421875, -0.0705108642578125, -0.067779541015625, -0.0650482177734375, -0.06231689453125, -0.0595855712890625, -0.056854248046875, -0.0541229248046875, -0.0513916015625, -0.0486602783203125, -0.045928955078125, -0.0431976318359375, -0.04046630859375, -0.0377349853515625, -0.035003662109375, -0.0322723388671875, -0.029541015625, -0.0268096923828125, -0.024078369140625, -0.0213470458984375, -0.01861572265625, -0.0158843994140625, -0.013153076171875, -0.0104217529296875, -0.0076904296875, -0.0049591064453125, -0.002227783203125, 0.0005035400390625, 0.00323486328125, 0.0059661865234375, 0.008697509765625, 0.0114288330078125, 0.01416015625, 0.0168914794921875, 0.019622802734375, 0.0223541259765625, 0.02508544921875, 0.0278167724609375, 0.030548095703125, 0.0332794189453125, 0.0360107421875, 0.0387420654296875, 0.041473388671875, 0.0442047119140625, 0.04693603515625, 0.0496673583984375, 0.052398681640625, 0.0551300048828125, 0.057861328125, 0.0605926513671875, 0.063323974609375, 0.0660552978515625, 0.06878662109375, 0.0715179443359375, 0.074249267578125, 0.0769805908203125, 0.0797119140625]}, "gradients/encoder.encoder.layers.14.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 3.0, 1.0, 0.0, 2.0, 3.0, 5.0, 3.0, 5.0, 12.0, 14.0, 20.0, 25.0, 21.0, 51.0, 64.0, 127.0, 245.0, 966.0, 5617.0, 60736.0, 588109.0, 359892.0, 28303.0, 3147.0, 637.0, 219.0, 91.0, 66.0, 39.0, 26.0, 23.0, 18.0, 18.0, 17.0, 19.0, 8.0, 6.0, 4.0, 1.0, 4.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.1197509765625, -0.11586952209472656, -0.11198806762695312, -0.10810661315917969, -0.10422515869140625, -0.10034370422363281, -0.09646224975585938, -0.09258079528808594, -0.0886993408203125, -0.08481788635253906, -0.08093643188476562, -0.07705497741699219, -0.07317352294921875, -0.06929206848144531, -0.06541061401367188, -0.06152915954589844, -0.057647705078125, -0.05376625061035156, -0.049884796142578125, -0.04600334167480469, -0.04212188720703125, -0.03824043273925781, -0.034358978271484375, -0.030477523803710938, -0.0265960693359375, -0.022714614868164062, -0.018833160400390625, -0.014951705932617188, -0.01107025146484375, -0.0071887969970703125, -0.003307342529296875, 0.0005741119384765625, 0.00445556640625, 0.008337020874023438, 0.012218475341796875, 0.016099929809570312, 0.01998138427734375, 0.023862838745117188, 0.027744293212890625, 0.03162574768066406, 0.0355072021484375, 0.03938865661621094, 0.043270111083984375, 0.04715156555175781, 0.05103302001953125, 0.05491447448730469, 0.058795928955078125, 0.06267738342285156, 0.066558837890625, 0.07044029235839844, 0.07432174682617188, 0.07820320129394531, 0.08208465576171875, 0.08596611022949219, 0.08984756469726562, 0.09372901916503906, 0.0976104736328125, 0.10149192810058594, 0.10537338256835938, 0.10925483703613281, 0.11313629150390625, 0.11701774597167969, 0.12089920043945312, 0.12478065490722656, 0.128662109375]}, "gradients/encoder.encoder.layers.14.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 2.0, 1.0, 0.0, 3.0, 1.0, 3.0, 12.0, 12.0, 16.0, 10.0, 27.0, 21.0, 21.0, 31.0, 37.0, 46.0, 40.0, 44.0, 50.0, 42.0, 51.0, 68.0, 55.0, 48.0, 44.0, 44.0, 47.0, 43.0, 42.0, 31.0, 28.0, 15.0, 21.0, 16.0, 15.0, 8.0, 5.0, 5.0, 2.0, 3.0, 2.0, 1.0, 1.0, 3.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.09698486328125, -0.09404468536376953, -0.09110450744628906, -0.0881643295288086, -0.08522415161132812, -0.08228397369384766, -0.07934379577636719, -0.07640361785888672, -0.07346343994140625, -0.07052326202392578, -0.06758308410644531, -0.06464290618896484, -0.061702728271484375, -0.058762550354003906, -0.05582237243652344, -0.05288219451904297, -0.0499420166015625, -0.04700183868408203, -0.04406166076660156, -0.041121482849121094, -0.038181304931640625, -0.035241127014160156, -0.03230094909667969, -0.02936077117919922, -0.02642059326171875, -0.02348041534423828, -0.020540237426757812, -0.017600059509277344, -0.014659881591796875, -0.011719703674316406, -0.008779525756835938, -0.005839347839355469, -0.002899169921875, 4.100799560546875e-05, 0.0029811859130859375, 0.005921363830566406, 0.008861541748046875, 0.011801719665527344, 0.014741897583007812, 0.01768207550048828, 0.02062225341796875, 0.02356243133544922, 0.026502609252929688, 0.029442787170410156, 0.032382965087890625, 0.035323143005371094, 0.03826332092285156, 0.04120349884033203, 0.0441436767578125, 0.04708385467529297, 0.05002403259277344, 0.052964210510253906, 0.055904388427734375, 0.058844566345214844, 0.06178474426269531, 0.06472492218017578, 0.06766510009765625, 0.07060527801513672, 0.07354545593261719, 0.07648563385009766, 0.07942581176757812, 0.0823659896850586, 0.08530616760253906, 0.08824634552001953, 0.0911865234375]}, "gradients/encoder.encoder.layers.14.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 2.0, 3.0, 2.0, 2.0, 3.0, 7.0, 10.0, 18.0, 20.0, 41.0, 67.0, 158.0, 314.0, 779.0, 1855.0, 5097.0, 15717.0, 52043.0, 170278.0, 367263.0, 287733.0, 102444.0, 30063.0, 9281.0, 3132.0, 1221.0, 519.0, 227.0, 126.0, 58.0, 31.0, 15.0, 12.0, 12.0, 4.0, 5.0, 3.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.0197906494140625, -0.01922154426574707, -0.01865243911743164, -0.01808333396911621, -0.01751422882080078, -0.01694512367248535, -0.016376018524169922, -0.015806913375854492, -0.015237808227539062, -0.014668703079223633, -0.014099597930908203, -0.013530492782592773, -0.012961387634277344, -0.012392282485961914, -0.011823177337646484, -0.011254072189331055, -0.010684967041015625, -0.010115861892700195, -0.009546756744384766, -0.008977651596069336, -0.008408546447753906, -0.007839441299438477, -0.007270336151123047, -0.006701231002807617, -0.0061321258544921875, -0.005563020706176758, -0.004993915557861328, -0.0044248104095458984, -0.0038557052612304688, -0.003286600112915039, -0.0027174949645996094, -0.0021483898162841797, -0.00157928466796875, -0.0010101795196533203, -0.0004410743713378906, 0.00012803077697753906, 0.0006971359252929688, 0.0012662410736083984, 0.0018353462219238281, 0.002404451370239258, 0.0029735565185546875, 0.003542661666870117, 0.004111766815185547, 0.0046808719635009766, 0.005249977111816406, 0.005819082260131836, 0.006388187408447266, 0.006957292556762695, 0.007526397705078125, 0.008095502853393555, 0.008664608001708984, 0.009233713150024414, 0.009802818298339844, 0.010371923446655273, 0.010941028594970703, 0.011510133743286133, 0.012079238891601562, 0.012648344039916992, 0.013217449188232422, 0.013786554336547852, 0.014355659484863281, 0.014924764633178711, 0.01549386978149414, 0.01606297492980957, 0.016632080078125]}, "gradients/encoder.encoder.layers.14.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 0.0, 2.0, 3.0, 5.0, 3.0, 2.0, 5.0, 3.0, 2.0, 14.0, 12.0, 16.0, 9.0, 30.0, 23.0, 31.0, 30.0, 41.0, 40.0, 58.0, 42.0, 74.0, 39.0, 57.0, 36.0, 64.0, 58.0, 45.0, 46.0, 35.0, 30.0, 19.0, 33.0, 17.0, 28.0, 10.0, 12.0, 4.0, 8.0, 6.0, 9.0, 5.0, 0.0, 5.0, 1.0, 2.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-6.4373016357421875e-06, -6.226822733879089e-06, -6.016343832015991e-06, -5.805864930152893e-06, -5.595386028289795e-06, -5.384907126426697e-06, -5.174428224563599e-06, -4.9639493227005005e-06, -4.753470420837402e-06, -4.542991518974304e-06, -4.332512617111206e-06, -4.122033715248108e-06, -3.91155481338501e-06, -3.7010759115219116e-06, -3.4905970096588135e-06, -3.2801181077957153e-06, -3.069639205932617e-06, -2.859160304069519e-06, -2.648681402206421e-06, -2.4382025003433228e-06, -2.2277235984802246e-06, -2.0172446966171265e-06, -1.8067657947540283e-06, -1.5962868928909302e-06, -1.385807991027832e-06, -1.1753290891647339e-06, -9.648501873016357e-07, -7.543712854385376e-07, -5.438923835754395e-07, -3.334134817123413e-07, -1.2293457984924316e-07, 8.754432201385498e-08, 2.980232238769531e-07, 5.085021257400513e-07, 7.189810276031494e-07, 9.294599294662476e-07, 1.1399388313293457e-06, 1.3504177331924438e-06, 1.560896635055542e-06, 1.7713755369186401e-06, 1.9818544387817383e-06, 2.1923333406448364e-06, 2.4028122425079346e-06, 2.6132911443710327e-06, 2.823770046234131e-06, 3.034248948097229e-06, 3.244727849960327e-06, 3.4552067518234253e-06, 3.6656856536865234e-06, 3.876164555549622e-06, 4.08664345741272e-06, 4.297122359275818e-06, 4.507601261138916e-06, 4.718080163002014e-06, 4.928559064865112e-06, 5.1390379667282104e-06, 5.349516868591309e-06, 5.559995770454407e-06, 5.770474672317505e-06, 5.980953574180603e-06, 6.191432476043701e-06, 6.401911377906799e-06, 6.6123902797698975e-06, 6.822869181632996e-06, 7.033348083496094e-06]}, "gradients/encoder.encoder.layers.14.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 2.0, 0.0, 0.0, 3.0, 2.0, 0.0, 4.0, 5.0, 5.0, 7.0, 15.0, 18.0, 30.0, 44.0, 59.0, 123.0, 195.0, 306.0, 593.0, 1122.0, 2278.0, 5329.0, 14112.0, 43278.0, 136356.0, 325222.0, 321068.0, 132848.0, 41916.0, 13833.0, 5189.0, 2154.0, 1098.0, 573.0, 310.0, 166.0, 107.0, 62.0, 44.0, 28.0, 21.0, 13.0, 6.0, 9.0, 3.0, 3.0, 2.0, 3.0, 2.0, 2.0, 0.0, 2.0, 0.0, 1.0], "bins": [-0.02093505859375, -0.020353317260742188, -0.019771575927734375, -0.019189834594726562, -0.01860809326171875, -0.018026351928710938, -0.017444610595703125, -0.016862869262695312, -0.0162811279296875, -0.015699386596679688, -0.015117645263671875, -0.014535903930664062, -0.01395416259765625, -0.013372421264648438, -0.012790679931640625, -0.012208938598632812, -0.011627197265625, -0.011045455932617188, -0.010463714599609375, -0.009881973266601562, -0.00930023193359375, -0.008718490600585938, -0.008136749267578125, -0.0075550079345703125, -0.0069732666015625, -0.0063915252685546875, -0.005809783935546875, -0.0052280426025390625, -0.00464630126953125, -0.0040645599365234375, -0.003482818603515625, -0.0029010772705078125, -0.0023193359375, -0.0017375946044921875, -0.001155853271484375, -0.0005741119384765625, 7.62939453125e-06, 0.0005893707275390625, 0.001171112060546875, 0.0017528533935546875, 0.0023345947265625, 0.0029163360595703125, 0.003498077392578125, 0.0040798187255859375, 0.00466156005859375, 0.0052433013916015625, 0.005825042724609375, 0.0064067840576171875, 0.006988525390625, 0.0075702667236328125, 0.008152008056640625, 0.008733749389648438, 0.00931549072265625, 0.009897232055664062, 0.010478973388671875, 0.011060714721679688, 0.0116424560546875, 0.012224197387695312, 0.012805938720703125, 0.013387680053710938, 0.01396942138671875, 0.014551162719726562, 0.015132904052734375, 0.015714645385742188, 0.01629638671875]}, "gradients/encoder.encoder.layers.14.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 3.0, 3.0, 0.0, 1.0, 2.0, 3.0, 2.0, 0.0, 4.0, 7.0, 5.0, 9.0, 16.0, 17.0, 21.0, 26.0, 36.0, 30.0, 36.0, 59.0, 59.0, 77.0, 57.0, 76.0, 65.0, 56.0, 52.0, 43.0, 46.0, 40.0, 32.0, 30.0, 22.0, 18.0, 10.0, 18.0, 7.0, 8.0, 7.0, 1.0, 4.0, 2.0, 4.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 2.0], "bins": [-0.01611328125, -0.015616416931152344, -0.015119552612304688, -0.014622688293457031, -0.014125823974609375, -0.013628959655761719, -0.013132095336914062, -0.012635231018066406, -0.01213836669921875, -0.011641502380371094, -0.011144638061523438, -0.010647773742675781, -0.010150909423828125, -0.009654045104980469, -0.009157180786132812, -0.008660316467285156, -0.0081634521484375, -0.007666587829589844, -0.0071697235107421875, -0.006672859191894531, -0.006175994873046875, -0.005679130554199219, -0.0051822662353515625, -0.004685401916503906, -0.00418853759765625, -0.0036916732788085938, -0.0031948089599609375, -0.0026979446411132812, -0.002201080322265625, -0.0017042160034179688, -0.0012073516845703125, -0.0007104873657226562, -0.000213623046875, 0.00028324127197265625, 0.0007801055908203125, 0.0012769699096679688, 0.001773834228515625, 0.0022706985473632812, 0.0027675628662109375, 0.0032644271850585938, 0.00376129150390625, 0.004258155822753906, 0.0047550201416015625, 0.005251884460449219, 0.005748748779296875, 0.006245613098144531, 0.0067424774169921875, 0.007239341735839844, 0.0077362060546875, 0.008233070373535156, 0.008729934692382812, 0.009226799011230469, 0.009723663330078125, 0.010220527648925781, 0.010717391967773438, 0.011214256286621094, 0.01171112060546875, 0.012207984924316406, 0.012704849243164062, 0.013201713562011719, 0.013698577880859375, 0.014195442199707031, 0.014692306518554688, 0.015189170837402344, 0.01568603515625]}, "gradients/encoder.encoder.layers.14.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 5.0, 18.0, 176.0, 599.0, 193.0, 20.0, 5.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.36426013708114624, -0.3296321630477905, -0.2950042188167572, -0.2603762745857239, -0.22574830055236816, -0.19112034142017365, -0.15649238228797913, -0.1218644231557846, -0.08723646402359009, -0.05260850489139557, -0.01798054575920105, 0.01664741337299347, 0.05127537250518799, 0.08590333163738251, 0.12053129076957703, 0.15515924990177155, 0.18978720903396606, 0.22441516816616058, 0.2590431272983551, 0.2936710715293884, 0.32829904556274414, 0.36292701959609985, 0.3975549638271332, 0.4321829080581665, 0.4668108820915222, 0.5014388561248779, 0.5360667705535889, 0.5706947445869446, 0.6053227186203003, 0.639950692653656, 0.6745786666870117, 0.7092065811157227, 0.7438344955444336, 0.7784624695777893, 0.813090443611145, 0.847718358039856, 0.8823463320732117, 0.9169743061065674, 0.9516022205352783, 0.986230194568634, 1.0208581686019897, 1.0554860830307007, 1.0901141166687012, 1.124742031097412, 1.159369945526123, 1.1939979791641235, 1.2286258935928345, 1.263253927230835, 1.297881841659546, 1.3325097560882568, 1.3671377897262573, 1.4017657041549683, 1.4363937377929688, 1.4710216522216797, 1.5056495666503906, 1.5402776002883911, 1.574905514717102, 1.609533429145813, 1.6441614627838135, 1.6787893772125244, 1.7134172916412354, 1.7480453252792358, 1.7826732397079468, 1.8173012733459473, 1.8519291877746582]}, "gradients/encoder.encoder.layers.14.layer_norm.bias": {"_type": "histogram", "values": [3.0, 0.0, 2.0, 0.0, 0.0, 4.0, 2.0, 3.0, 8.0, 16.0, 13.0, 6.0, 11.0, 13.0, 13.0, 18.0, 24.0, 23.0, 27.0, 40.0, 35.0, 40.0, 32.0, 46.0, 41.0, 43.0, 41.0, 48.0, 38.0, 41.0, 45.0, 36.0, 29.0, 35.0, 30.0, 21.0, 26.0, 17.0, 23.0, 23.0, 19.0, 15.0, 12.0, 10.0, 10.0, 13.0, 10.0, 3.0, 2.0, 5.0, 3.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.17232096195220947, -0.16597825288772583, -0.159635528922081, -0.15329281985759735, -0.14695009589195251, -0.14060738682746887, -0.13426467776298523, -0.1279219537973404, -0.12157923728227615, -0.11523652076721191, -0.10889380425214767, -0.10255108773708344, -0.09620837867259979, -0.08986565470695496, -0.08352294564247131, -0.07718022912740707, -0.07083751261234283, -0.0644947960972786, -0.058152079582214355, -0.051809366792440414, -0.045466650277376175, -0.039123933762311935, -0.032781220972537994, -0.026438504457473755, -0.020095787942409515, -0.01375307235866785, -0.007410356774926186, -0.0010676421225070953, 0.005275074392557144, 0.011617790907621384, 0.017960503697395325, 0.024303220212459564, 0.030645936727523804, 0.03698865324258804, 0.04333136975765228, 0.049674082547426224, 0.05601679906249046, 0.0623595155775547, 0.06870222836732864, 0.07504494488239288, 0.08138766139745712, 0.08773037791252136, 0.0940730944275856, 0.10041581094264984, 0.10675852000713348, 0.11310124397277832, 0.11944395303726196, 0.1257866621017456, 0.13212938606739044, 0.13847209513187408, 0.14481481909751892, 0.15115752816200256, 0.1575002521276474, 0.16384296119213104, 0.17018568515777588, 0.17652839422225952, 0.18287110328674316, 0.1892138123512268, 0.19555653631687164, 0.20189924538135529, 0.20824196934700012, 0.21458467841148376, 0.2209273874759674, 0.22727011144161224, 0.23361283540725708]}, "gradients/encoder.encoder.layers.13.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 3.0, 4.0, 2.0, 1.0, 4.0, 7.0, 11.0, 6.0, 17.0, 23.0, 37.0, 47.0, 77.0, 139.0, 214.0, 413.0, 901.0, 2019.0, 4665.0, 14316.0, 89750.0, 1042835.0, 2753834.0, 242958.0, 29410.0, 7360.0, 2829.0, 1238.0, 561.0, 237.0, 125.0, 93.0, 43.0, 33.0, 29.0, 22.0, 15.0, 7.0, 2.0, 3.0, 0.0, 5.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.060546875, -0.05860710144042969, -0.056667327880859375, -0.05472755432128906, -0.05278778076171875, -0.05084800720214844, -0.048908233642578125, -0.04696846008300781, -0.0450286865234375, -0.04308891296386719, -0.041149139404296875, -0.03920936584472656, -0.03726959228515625, -0.03532981872558594, -0.033390045166015625, -0.03145027160644531, -0.029510498046875, -0.027570724487304688, -0.025630950927734375, -0.023691177368164062, -0.02175140380859375, -0.019811630249023438, -0.017871856689453125, -0.015932083129882812, -0.0139923095703125, -0.012052536010742188, -0.010112762451171875, -0.008172988891601562, -0.00623321533203125, -0.0042934417724609375, -0.002353668212890625, -0.0004138946533203125, 0.00152587890625, 0.0034656524658203125, 0.005405426025390625, 0.0073451995849609375, 0.00928497314453125, 0.011224746704101562, 0.013164520263671875, 0.015104293823242188, 0.0170440673828125, 0.018983840942382812, 0.020923614501953125, 0.022863388061523438, 0.02480316162109375, 0.026742935180664062, 0.028682708740234375, 0.030622482299804688, 0.032562255859375, 0.03450202941894531, 0.036441802978515625, 0.03838157653808594, 0.04032135009765625, 0.04226112365722656, 0.044200897216796875, 0.04614067077636719, 0.0480804443359375, 0.05002021789550781, 0.051959991455078125, 0.05389976501464844, 0.05583953857421875, 0.05777931213378906, 0.059719085693359375, 0.06165885925292969, 0.0635986328125]}, "gradients/encoder.encoder.layers.13.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 3.0, 2.0, 5.0, 14.0, 15.0, 27.0, 46.0, 68.0, 86.0, 100.0, 105.0, 119.0, 101.0, 92.0, 85.0, 63.0, 36.0, 20.0, 6.0, 8.0, 5.0, 4.0, 5.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.0985107421875, -0.0957174301147461, -0.09292411804199219, -0.09013080596923828, -0.08733749389648438, -0.08454418182373047, -0.08175086975097656, -0.07895755767822266, -0.07616424560546875, -0.07337093353271484, -0.07057762145996094, -0.06778430938720703, -0.06499099731445312, -0.06219768524169922, -0.05940437316894531, -0.056611061096191406, -0.0538177490234375, -0.051024436950683594, -0.04823112487792969, -0.04543781280517578, -0.042644500732421875, -0.03985118865966797, -0.03705787658691406, -0.034264564514160156, -0.03147125244140625, -0.028677940368652344, -0.025884628295898438, -0.02309131622314453, -0.020298004150390625, -0.01750469207763672, -0.014711380004882812, -0.011918067932128906, -0.009124755859375, -0.006331443786621094, -0.0035381317138671875, -0.0007448196411132812, 0.002048492431640625, 0.004841804504394531, 0.0076351165771484375, 0.010428428649902344, 0.01322174072265625, 0.016015052795410156, 0.018808364868164062, 0.02160167694091797, 0.024394989013671875, 0.02718830108642578, 0.029981613159179688, 0.032774925231933594, 0.0355682373046875, 0.038361549377441406, 0.04115486145019531, 0.04394817352294922, 0.046741485595703125, 0.04953479766845703, 0.05232810974121094, 0.055121421813964844, 0.05791473388671875, 0.060708045959472656, 0.06350135803222656, 0.06629467010498047, 0.06908798217773438, 0.07188129425048828, 0.07467460632324219, 0.0774679183959961, 0.08026123046875]}, "gradients/encoder.encoder.layers.13.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 6.0, 1.0, 3.0, 2.0, 12.0, 21.0, 31.0, 58.0, 73.0, 132.0, 273.0, 535.0, 955.0, 2219.0, 5493.0, 15463.0, 54555.0, 269682.0, 2196587.0, 1409609.0, 178840.0, 39663.0, 11908.0, 4420.0, 1924.0, 876.0, 438.0, 226.0, 117.0, 70.0, 38.0, 29.0, 13.0, 10.0, 5.0, 5.0, 4.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.041778564453125, -0.040425777435302734, -0.03907299041748047, -0.0377202033996582, -0.03636741638183594, -0.03501462936401367, -0.033661842346191406, -0.03230905532836914, -0.030956268310546875, -0.02960348129272461, -0.028250694274902344, -0.026897907257080078, -0.025545120239257812, -0.024192333221435547, -0.02283954620361328, -0.021486759185791016, -0.02013397216796875, -0.018781185150146484, -0.01742839813232422, -0.016075611114501953, -0.014722824096679688, -0.013370037078857422, -0.012017250061035156, -0.01066446304321289, -0.009311676025390625, -0.00795888900756836, -0.006606101989746094, -0.005253314971923828, -0.0039005279541015625, -0.002547740936279297, -0.0011949539184570312, 0.00015783309936523438, 0.0015106201171875, 0.0028634071350097656, 0.004216194152832031, 0.005568981170654297, 0.0069217681884765625, 0.008274555206298828, 0.009627342224121094, 0.01098012924194336, 0.012332916259765625, 0.01368570327758789, 0.015038490295410156, 0.016391277313232422, 0.017744064331054688, 0.019096851348876953, 0.02044963836669922, 0.021802425384521484, 0.02315521240234375, 0.024507999420166016, 0.02586078643798828, 0.027213573455810547, 0.028566360473632812, 0.029919147491455078, 0.031271934509277344, 0.03262472152709961, 0.033977508544921875, 0.03533029556274414, 0.036683082580566406, 0.03803586959838867, 0.03938865661621094, 0.0407414436340332, 0.04209423065185547, 0.043447017669677734, 0.0447998046875]}, "gradients/encoder.encoder.layers.13.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 2.0, 0.0, 3.0, 3.0, 6.0, 8.0, 6.0, 20.0, 16.0, 33.0, 34.0, 85.0, 153.0, 273.0, 537.0, 775.0, 820.0, 573.0, 300.0, 184.0, 98.0, 48.0, 36.0, 21.0, 19.0, 8.0, 6.0, 5.0, 8.0, 2.0, 1.0, 3.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0440673828125, -0.04273366928100586, -0.04139995574951172, -0.04006624221801758, -0.03873252868652344, -0.0373988151550293, -0.036065101623535156, -0.034731388092041016, -0.033397674560546875, -0.032063961029052734, -0.030730247497558594, -0.029396533966064453, -0.028062820434570312, -0.026729106903076172, -0.02539539337158203, -0.02406167984008789, -0.02272796630859375, -0.02139425277709961, -0.02006053924560547, -0.018726825714111328, -0.017393112182617188, -0.016059398651123047, -0.014725685119628906, -0.013391971588134766, -0.012058258056640625, -0.010724544525146484, -0.009390830993652344, -0.008057117462158203, -0.0067234039306640625, -0.005389690399169922, -0.004055976867675781, -0.0027222633361816406, -0.0013885498046875, -5.4836273193359375e-05, 0.0012788772583007812, 0.002612590789794922, 0.0039463043212890625, 0.005280017852783203, 0.006613731384277344, 0.007947444915771484, 0.009281158447265625, 0.010614871978759766, 0.011948585510253906, 0.013282299041748047, 0.014616012573242188, 0.015949726104736328, 0.01728343963623047, 0.01861715316772461, 0.01995086669921875, 0.02128458023071289, 0.02261829376220703, 0.023952007293701172, 0.025285720825195312, 0.026619434356689453, 0.027953147888183594, 0.029286861419677734, 0.030620574951171875, 0.031954288482666016, 0.033288002014160156, 0.0346217155456543, 0.03595542907714844, 0.03728914260864258, 0.03862285614013672, 0.03995656967163086, 0.041290283203125]}, "gradients/encoder.encoder.layers.13.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 4.0, 11.0, 19.0, 71.0, 144.0, 302.0, 239.0, 141.0, 48.0, 22.0, 7.0, 4.0, 1.0, 1.0, 0.0, 2.0, 2.0], "bins": [-0.5724175572395325, -0.5616039037704468, -0.5507901906967163, -0.5399765372276306, -0.5291628241539001, -0.5183491706848145, -0.507535457611084, -0.4967217743396759, -0.4859080910682678, -0.47509440779685974, -0.46428072452545166, -0.4534670412540436, -0.4426533579826355, -0.4318396747112274, -0.42102599143981934, -0.41021230816841125, -0.3993986248970032, -0.3885849416255951, -0.377771258354187, -0.36695757508277893, -0.35614389181137085, -0.34533020853996277, -0.3345165252685547, -0.3237028419971466, -0.3128891885280609, -0.30207550525665283, -0.29126182198524475, -0.28044813871383667, -0.2696344554424286, -0.2588207721710205, -0.24800708889961243, -0.23719340562820435, -0.22637970745563507, -0.215566024184227, -0.2047523409128189, -0.19393865764141083, -0.18312497437000275, -0.17231129109859467, -0.16149762272834778, -0.1506839394569397, -0.13987025618553162, -0.12905657291412354, -0.11824288964271545, -0.10742920637130737, -0.09661552309989929, -0.08580183982849121, -0.07498816400766373, -0.06417448073625565, -0.05336079001426697, -0.04254710674285889, -0.031733423471450806, -0.020919743925333023, -0.010106060653924942, 0.000707622617483139, 0.011521302163600922, 0.022334985435009003, 0.033148668706417084, 0.043962351977825165, 0.054776035249233246, 0.06558971107006073, 0.07640339434146881, 0.08721707761287689, 0.09803076088428497, 0.10884444415569305, 0.11965812742710114]}, "gradients/encoder.encoder.layers.13.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0, 0.0, 0.0, 1.0, 1.0, 4.0, 2.0, 5.0, 8.0, 8.0, 7.0, 10.0, 15.0, 13.0, 17.0, 28.0, 29.0, 31.0, 35.0, 35.0, 52.0, 42.0, 53.0, 48.0, 45.0, 53.0, 54.0, 47.0, 46.0, 36.0, 40.0, 37.0, 29.0, 33.0, 27.0, 17.0, 20.0, 23.0, 12.0, 13.0, 18.0, 6.0, 3.0, 5.0, 5.0, 1.0, 0.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.10895055532455444, -0.10589355230331421, -0.10283655673265457, -0.09977956116199493, -0.0967225581407547, -0.09366555511951447, -0.09060855954885483, -0.08755156397819519, -0.08449456095695496, -0.08143755793571472, -0.07838056236505508, -0.07532356679439545, -0.07226656377315521, -0.06920956075191498, -0.06615256518125534, -0.0630955696105957, -0.06003856658935547, -0.05698156729340553, -0.0539245679974556, -0.05086756870150566, -0.047810569405555725, -0.04475357010960579, -0.04169657081365585, -0.03863957151770592, -0.03558257222175598, -0.032525572925806046, -0.02946857362985611, -0.026411574333906174, -0.023354575037956238, -0.020297575742006302, -0.017240576446056366, -0.01418357715010643, -0.011126577854156494, -0.008069578558206558, -0.005012579262256622, -0.0019555799663066864, 0.0011014193296432495, 0.004158418625593185, 0.007215417921543121, 0.010272417217493057, 0.013329416513442993, 0.01638641580939293, 0.019443415105342865, 0.0225004144012928, 0.025557413697242737, 0.028614412993192673, 0.03167141228914261, 0.034728411585092545, 0.03778541088104248, 0.040842410176992416, 0.04389940947294235, 0.04695640876889229, 0.050013408064842224, 0.05307040736079216, 0.056127406656742096, 0.05918440595269203, 0.06224140524864197, 0.0652984082698822, 0.06835540384054184, 0.07141239941120148, 0.07446940243244171, 0.07752640545368195, 0.08058340102434158, 0.08364039659500122, 0.08669739961624146]}, "gradients/encoder.encoder.layers.13.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 3.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 7.0, 6.0, 8.0, 8.0, 13.0, 19.0, 22.0, 35.0, 57.0, 68.0, 97.0, 164.0, 250.0, 334.0, 589.0, 1053.0, 2190.0, 5253.0, 15749.0, 53014.0, 188964.0, 439203.0, 240334.0, 69122.0, 20027.0, 6500.0, 2490.0, 1197.0, 659.0, 363.0, 234.0, 160.0, 110.0, 79.0, 59.0, 34.0, 24.0, 28.0, 10.0, 16.0, 3.0, 3.0, 4.0, 4.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.061187744140625, -0.05912065505981445, -0.057053565979003906, -0.05498647689819336, -0.05291938781738281, -0.050852298736572266, -0.04878520965576172, -0.04671812057495117, -0.044651031494140625, -0.04258394241333008, -0.04051685333251953, -0.038449764251708984, -0.03638267517089844, -0.03431558609008789, -0.032248497009277344, -0.030181407928466797, -0.02811431884765625, -0.026047229766845703, -0.023980140686035156, -0.02191305160522461, -0.019845962524414062, -0.017778873443603516, -0.01571178436279297, -0.013644695281982422, -0.011577606201171875, -0.009510517120361328, -0.007443428039550781, -0.005376338958740234, -0.0033092498779296875, -0.0012421607971191406, 0.0008249282836914062, 0.002892017364501953, 0.0049591064453125, 0.007026195526123047, 0.009093284606933594, 0.01116037368774414, 0.013227462768554688, 0.015294551849365234, 0.01736164093017578, 0.019428730010986328, 0.021495819091796875, 0.023562908172607422, 0.02562999725341797, 0.027697086334228516, 0.029764175415039062, 0.03183126449584961, 0.033898353576660156, 0.0359654426574707, 0.03803253173828125, 0.0400996208190918, 0.042166709899902344, 0.04423379898071289, 0.04630088806152344, 0.048367977142333984, 0.05043506622314453, 0.05250215530395508, 0.054569244384765625, 0.05663633346557617, 0.05870342254638672, 0.060770511627197266, 0.06283760070800781, 0.06490468978881836, 0.0669717788696289, 0.06903886795043945, 0.07110595703125]}, "gradients/encoder.encoder.layers.13.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 4.0, 2.0, 7.0, 11.0, 19.0, 32.0, 44.0, 58.0, 82.0, 90.0, 114.0, 118.0, 107.0, 95.0, 75.0, 73.0, 35.0, 18.0, 10.0, 7.0, 7.0, 2.0, 3.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.09564208984375, -0.09293556213378906, -0.09022903442382812, -0.08752250671386719, -0.08481597900390625, -0.08210945129394531, -0.07940292358398438, -0.07669639587402344, -0.0739898681640625, -0.07128334045410156, -0.06857681274414062, -0.06587028503417969, -0.06316375732421875, -0.06045722961425781, -0.057750701904296875, -0.05504417419433594, -0.052337646484375, -0.04963111877441406, -0.046924591064453125, -0.04421806335449219, -0.04151153564453125, -0.03880500793457031, -0.036098480224609375, -0.03339195251464844, -0.0306854248046875, -0.027978897094726562, -0.025272369384765625, -0.022565841674804688, -0.01985931396484375, -0.017152786254882812, -0.014446258544921875, -0.011739730834960938, -0.009033203125, -0.0063266754150390625, -0.003620147705078125, -0.0009136199951171875, 0.00179290771484375, 0.0044994354248046875, 0.007205963134765625, 0.009912490844726562, 0.0126190185546875, 0.015325546264648438, 0.018032073974609375, 0.020738601684570312, 0.02344512939453125, 0.026151657104492188, 0.028858184814453125, 0.03156471252441406, 0.034271240234375, 0.03697776794433594, 0.039684295654296875, 0.04239082336425781, 0.04509735107421875, 0.04780387878417969, 0.050510406494140625, 0.05321693420410156, 0.0559234619140625, 0.05862998962402344, 0.061336517333984375, 0.06404304504394531, 0.06674957275390625, 0.06945610046386719, 0.07216262817382812, 0.07486915588378906, 0.07757568359375]}, "gradients/encoder.encoder.layers.13.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 0.0, 2.0, 1.0, 1.0, 2.0, 0.0, 1.0, 5.0, 6.0, 8.0, 6.0, 12.0, 19.0, 8.0, 16.0, 13.0, 23.0, 19.0, 20.0, 29.0, 39.0, 49.0, 86.0, 144.0, 321.0, 1016.0, 3907.0, 20769.0, 144657.0, 601424.0, 234116.0, 33809.0, 5758.0, 1338.0, 416.0, 165.0, 91.0, 50.0, 38.0, 21.0, 19.0, 13.0, 21.0, 26.0, 13.0, 13.0, 10.0, 14.0, 7.0, 6.0, 5.0, 3.0, 4.0, 2.0, 1.0, 5.0, 2.0, 1.0, 0.0, 2.0, 1.0], "bins": [-0.0858154296875, -0.08318519592285156, -0.08055496215820312, -0.07792472839355469, -0.07529449462890625, -0.07266426086425781, -0.07003402709960938, -0.06740379333496094, -0.0647735595703125, -0.06214332580566406, -0.059513092041015625, -0.05688285827636719, -0.05425262451171875, -0.05162239074707031, -0.048992156982421875, -0.04636192321777344, -0.043731689453125, -0.04110145568847656, -0.038471221923828125, -0.03584098815917969, -0.03321075439453125, -0.030580520629882812, -0.027950286865234375, -0.025320053100585938, -0.0226898193359375, -0.020059585571289062, -0.017429351806640625, -0.014799118041992188, -0.01216888427734375, -0.009538650512695312, -0.006908416748046875, -0.0042781829833984375, -0.00164794921875, 0.0009822845458984375, 0.003612518310546875, 0.0062427520751953125, 0.00887298583984375, 0.011503219604492188, 0.014133453369140625, 0.016763687133789062, 0.0193939208984375, 0.022024154663085938, 0.024654388427734375, 0.027284622192382812, 0.02991485595703125, 0.03254508972167969, 0.035175323486328125, 0.03780555725097656, 0.040435791015625, 0.04306602478027344, 0.045696258544921875, 0.04832649230957031, 0.05095672607421875, 0.05358695983886719, 0.056217193603515625, 0.05884742736816406, 0.0614776611328125, 0.06410789489746094, 0.06673812866210938, 0.06936836242675781, 0.07199859619140625, 0.07462882995605469, 0.07725906372070312, 0.07988929748535156, 0.08251953125]}, "gradients/encoder.encoder.layers.13.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 3.0, 0.0, 0.0, 1.0, 1.0, 5.0, 1.0, 6.0, 3.0, 7.0, 5.0, 8.0, 9.0, 10.0, 11.0, 16.0, 22.0, 18.0, 25.0, 19.0, 17.0, 18.0, 21.0, 34.0, 45.0, 27.0, 32.0, 42.0, 37.0, 42.0, 31.0, 49.0, 31.0, 53.0, 33.0, 42.0, 28.0, 35.0, 29.0, 29.0, 21.0, 13.0, 26.0, 22.0, 16.0, 14.0, 13.0, 11.0, 12.0, 7.0, 4.0, 4.0, 3.0, 3.0, 3.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0], "bins": [-0.0665283203125, -0.06438732147216797, -0.06224632263183594, -0.060105323791503906, -0.057964324951171875, -0.055823326110839844, -0.05368232727050781, -0.05154132843017578, -0.04940032958984375, -0.04725933074951172, -0.04511833190917969, -0.042977333068847656, -0.040836334228515625, -0.038695335388183594, -0.03655433654785156, -0.03441333770751953, -0.0322723388671875, -0.03013134002685547, -0.027990341186523438, -0.025849342346191406, -0.023708343505859375, -0.021567344665527344, -0.019426345825195312, -0.01728534698486328, -0.01514434814453125, -0.013003349304199219, -0.010862350463867188, -0.008721351623535156, -0.006580352783203125, -0.004439353942871094, -0.0022983551025390625, -0.00015735626220703125, 0.001983642578125, 0.004124641418457031, 0.0062656402587890625, 0.008406639099121094, 0.010547637939453125, 0.012688636779785156, 0.014829635620117188, 0.01697063446044922, 0.01911163330078125, 0.02125263214111328, 0.023393630981445312, 0.025534629821777344, 0.027675628662109375, 0.029816627502441406, 0.03195762634277344, 0.03409862518310547, 0.0362396240234375, 0.03838062286376953, 0.04052162170410156, 0.042662620544433594, 0.044803619384765625, 0.046944618225097656, 0.04908561706542969, 0.05122661590576172, 0.05336761474609375, 0.05550861358642578, 0.05764961242675781, 0.059790611267089844, 0.061931610107421875, 0.0640726089477539, 0.06621360778808594, 0.06835460662841797, 0.07049560546875]}, "gradients/encoder.encoder.layers.13.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 2.0, 7.0, 4.0, 4.0, 8.0, 10.0, 9.0, 14.0, 14.0, 18.0, 50.0, 48.0, 70.0, 106.0, 163.0, 233.0, 320.0, 473.0, 687.0, 1023.0, 1573.0, 2518.0, 4139.0, 7175.0, 13186.0, 24946.0, 50630.0, 114269.0, 244149.0, 288670.0, 154662.0, 67782.0, 32174.0, 16628.0, 9065.0, 5127.0, 3037.0, 1880.0, 1182.0, 773.0, 523.0, 351.0, 238.0, 172.0, 113.0, 87.0, 82.0, 52.0, 35.0, 30.0, 19.0, 13.0, 11.0, 4.0, 5.0, 3.0, 2.0, 2.0, 1.0, 2.0], "bins": [-0.0116424560546875, -0.01129162311553955, -0.010940790176391602, -0.010589957237243652, -0.010239124298095703, -0.009888291358947754, -0.009537458419799805, -0.009186625480651855, -0.008835792541503906, -0.008484959602355957, -0.008134126663208008, -0.007783293724060059, -0.007432460784912109, -0.00708162784576416, -0.006730794906616211, -0.006379961967468262, -0.0060291290283203125, -0.005678296089172363, -0.005327463150024414, -0.004976630210876465, -0.004625797271728516, -0.004274964332580566, -0.003924131393432617, -0.003573298454284668, -0.0032224655151367188, -0.0028716325759887695, -0.0025207996368408203, -0.002169966697692871, -0.0018191337585449219, -0.0014683008193969727, -0.0011174678802490234, -0.0007666349411010742, -0.000415802001953125, -6.496906280517578e-05, 0.00028586387634277344, 0.0006366968154907227, 0.0009875297546386719, 0.001338362693786621, 0.0016891956329345703, 0.0020400285720825195, 0.0023908615112304688, 0.002741694450378418, 0.003092527389526367, 0.0034433603286743164, 0.0037941932678222656, 0.004145026206970215, 0.004495859146118164, 0.004846692085266113, 0.0051975250244140625, 0.005548357963562012, 0.005899190902709961, 0.00625002384185791, 0.006600856781005859, 0.006951689720153809, 0.007302522659301758, 0.007653355598449707, 0.008004188537597656, 0.008355021476745605, 0.008705854415893555, 0.009056687355041504, 0.009407520294189453, 0.009758353233337402, 0.010109186172485352, 0.0104600191116333, 0.01081085205078125]}, "gradients/encoder.encoder.layers.13.attention.k_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 4.0, 3.0, 2.0, 1.0, 4.0, 4.0, 7.0, 9.0, 10.0, 9.0, 22.0, 21.0, 20.0, 28.0, 24.0, 44.0, 42.0, 38.0, 59.0, 56.0, 66.0, 50.0, 55.0, 64.0, 43.0, 56.0, 44.0, 40.0, 23.0, 25.0, 34.0, 13.0, 22.0, 14.0, 10.0, 14.0, 8.0, 12.0, 4.0, 1.0, 5.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-6.258487701416016e-06, -6.055459380149841e-06, -5.852431058883667e-06, -5.649402737617493e-06, -5.446374416351318e-06, -5.243346095085144e-06, -5.04031777381897e-06, -4.837289452552795e-06, -4.634261131286621e-06, -4.431232810020447e-06, -4.2282044887542725e-06, -4.025176167488098e-06, -3.822147846221924e-06, -3.6191195249557495e-06, -3.416091203689575e-06, -3.213062882423401e-06, -3.0100345611572266e-06, -2.8070062398910522e-06, -2.603977918624878e-06, -2.4009495973587036e-06, -2.1979212760925293e-06, -1.994892954826355e-06, -1.7918646335601807e-06, -1.5888363122940063e-06, -1.385807991027832e-06, -1.1827796697616577e-06, -9.797513484954834e-07, -7.767230272293091e-07, -5.736947059631348e-07, -3.7066638469696045e-07, -1.6763806343078613e-07, 3.5390257835388184e-08, 2.384185791015625e-07, 4.414469003677368e-07, 6.444752216339111e-07, 8.475035429000854e-07, 1.0505318641662598e-06, 1.253560185432434e-06, 1.4565885066986084e-06, 1.6596168279647827e-06, 1.862645149230957e-06, 2.0656734704971313e-06, 2.2687017917633057e-06, 2.47173011302948e-06, 2.6747584342956543e-06, 2.8777867555618286e-06, 3.080815076828003e-06, 3.2838433980941772e-06, 3.4868717193603516e-06, 3.689900040626526e-06, 3.8929283618927e-06, 4.0959566831588745e-06, 4.298985004425049e-06, 4.502013325691223e-06, 4.7050416469573975e-06, 4.908069968223572e-06, 5.111098289489746e-06, 5.31412661075592e-06, 5.517154932022095e-06, 5.720183253288269e-06, 5.923211574554443e-06, 6.126239895820618e-06, 6.329268217086792e-06, 6.532296538352966e-06, 6.735324859619141e-06]}, "gradients/encoder.encoder.layers.13.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 1.0, 2.0, 5.0, 4.0, 8.0, 6.0, 9.0, 24.0, 27.0, 35.0, 68.0, 93.0, 137.0, 192.0, 278.0, 470.0, 827.0, 1474.0, 2912.0, 6262.0, 14774.0, 39082.0, 122641.0, 369659.0, 328365.0, 102909.0, 33920.0, 12871.0, 5519.0, 2579.0, 1374.0, 767.0, 433.0, 294.0, 153.0, 131.0, 87.0, 54.0, 42.0, 20.0, 22.0, 9.0, 11.0, 5.0, 2.0, 2.0, 2.0, 0.0, 5.0, 4.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0188140869140625, -0.018259525299072266, -0.01770496368408203, -0.017150402069091797, -0.016595840454101562, -0.016041278839111328, -0.015486717224121094, -0.01493215560913086, -0.014377593994140625, -0.01382303237915039, -0.013268470764160156, -0.012713909149169922, -0.012159347534179688, -0.011604785919189453, -0.011050224304199219, -0.010495662689208984, -0.00994110107421875, -0.009386539459228516, -0.008831977844238281, -0.008277416229248047, -0.0077228546142578125, -0.007168292999267578, -0.006613731384277344, -0.006059169769287109, -0.005504608154296875, -0.004950046539306641, -0.004395484924316406, -0.003840923309326172, -0.0032863616943359375, -0.002731800079345703, -0.0021772384643554688, -0.0016226768493652344, -0.001068115234375, -0.0005135536193847656, 4.100799560546875e-05, 0.0005955696105957031, 0.0011501312255859375, 0.0017046928405761719, 0.0022592544555664062, 0.0028138160705566406, 0.003368377685546875, 0.003922939300537109, 0.004477500915527344, 0.005032062530517578, 0.0055866241455078125, 0.006141185760498047, 0.006695747375488281, 0.007250308990478516, 0.00780487060546875, 0.008359432220458984, 0.008913993835449219, 0.009468555450439453, 0.010023117065429688, 0.010577678680419922, 0.011132240295410156, 0.01168680191040039, 0.012241363525390625, 0.01279592514038086, 0.013350486755371094, 0.013905048370361328, 0.014459609985351562, 0.015014171600341797, 0.015568733215332031, 0.016123294830322266, 0.0166778564453125]}, "gradients/encoder.encoder.layers.13.attention.q_proj.bias": {"_type": "histogram", "values": [3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 4.0, 1.0, 2.0, 2.0, 9.0, 7.0, 9.0, 13.0, 9.0, 9.0, 18.0, 20.0, 32.0, 35.0, 43.0, 53.0, 80.0, 89.0, 69.0, 71.0, 83.0, 71.0, 56.0, 52.0, 37.0, 26.0, 16.0, 21.0, 18.0, 13.0, 10.0, 5.0, 3.0, 6.0, 4.0, 2.0, 5.0, 5.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0], "bins": [-0.0137176513671875, -0.013248920440673828, -0.012780189514160156, -0.012311458587646484, -0.011842727661132812, -0.01137399673461914, -0.010905265808105469, -0.010436534881591797, -0.009967803955078125, -0.009499073028564453, -0.009030342102050781, -0.00856161117553711, -0.008092880249023438, -0.007624149322509766, -0.007155418395996094, -0.006686687469482422, -0.00621795654296875, -0.005749225616455078, -0.005280494689941406, -0.004811763763427734, -0.0043430328369140625, -0.0038743019104003906, -0.0034055709838867188, -0.002936840057373047, -0.002468109130859375, -0.001999378204345703, -0.0015306472778320312, -0.0010619163513183594, -0.0005931854248046875, -0.00012445449829101562, 0.00034427642822265625, 0.0008130073547363281, 0.00128173828125, 0.0017504692077636719, 0.0022192001342773438, 0.0026879310607910156, 0.0031566619873046875, 0.0036253929138183594, 0.004094123840332031, 0.004562854766845703, 0.005031585693359375, 0.005500316619873047, 0.005969047546386719, 0.006437778472900391, 0.0069065093994140625, 0.007375240325927734, 0.007843971252441406, 0.008312702178955078, 0.00878143310546875, 0.009250164031982422, 0.009718894958496094, 0.010187625885009766, 0.010656356811523438, 0.01112508773803711, 0.011593818664550781, 0.012062549591064453, 0.012531280517578125, 0.013000011444091797, 0.013468742370605469, 0.01393747329711914, 0.014406204223632812, 0.014874935150146484, 0.015343666076660156, 0.015812397003173828, 0.0162811279296875]}, "gradients/encoder.encoder.layers.13.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 9.0, 14.0, 56.0, 225.0, 388.0, 226.0, 69.0, 28.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.1836555004119873, -0.16432428359985352, -0.14499308168888092, -0.12566187977790833, -0.10633066296577454, -0.08699945360422134, -0.06766824424266815, -0.04833704233169556, -0.029005825519561768, -0.009674616158008575, 0.009656593203544617, 0.02898780256509781, 0.048319011926651, 0.0676502212882042, 0.08698143064975739, 0.10631263256072998, 0.12564384937286377, 0.14497506618499756, 0.16430626809597015, 0.18363747000694275, 0.20296868681907654, 0.22229990363121033, 0.24163110554218292, 0.2609623074531555, 0.2802935242652893, 0.2996247410774231, 0.3189559578895569, 0.3382871448993683, 0.3576183617115021, 0.37694957852363586, 0.39628076553344727, 0.41561198234558105, 0.43494319915771484, 0.45427441596984863, 0.4736056327819824, 0.4929368197917938, 0.51226806640625, 0.531599223613739, 0.5509304404258728, 0.5702616572380066, 0.5895928740501404, 0.6089240908622742, 0.628255307674408, 0.6475865244865417, 0.6669176816940308, 0.6862488985061646, 0.7055801153182983, 0.7249113321304321, 0.7442425489425659, 0.7635737657546997, 0.7829049825668335, 0.8022361993789673, 0.8215674161911011, 0.8408985733985901, 0.8602297902107239, 0.8795610070228577, 0.8988922238349915, 0.9182234406471252, 0.937554657459259, 0.9568858742713928, 0.9762170314788818, 0.9955482482910156, 1.0148794651031494, 1.0342106819152832, 1.053541898727417]}, "gradients/encoder.encoder.layers.13.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 6.0, 0.0, 6.0, 2.0, 3.0, 4.0, 6.0, 5.0, 9.0, 6.0, 11.0, 20.0, 15.0, 20.0, 29.0, 27.0, 31.0, 31.0, 32.0, 30.0, 44.0, 31.0, 39.0, 41.0, 45.0, 37.0, 29.0, 47.0, 41.0, 30.0, 42.0, 38.0, 21.0, 39.0, 24.0, 17.0, 26.0, 16.0, 19.0, 20.0, 9.0, 11.0, 17.0, 9.0, 5.0, 5.0, 8.0, 1.0, 2.0, 3.0, 3.0, 5.0, 0.0, 1.0, 2.0], "bins": [-0.1948336362838745, -0.1891840249300003, -0.1835343986749649, -0.1778847873210907, -0.1722351610660553, -0.1665855497121811, -0.16093593835830688, -0.15528631210327148, -0.14963668584823608, -0.14398707449436188, -0.13833744823932648, -0.13268783688545227, -0.12703821063041687, -0.12138859927654266, -0.11573898047208786, -0.11008936166763306, -0.10443975031375885, -0.09879013150930405, -0.09314051270484924, -0.08749090135097504, -0.08184127509593964, -0.07619166374206543, -0.07054204493761063, -0.06489242613315582, -0.05924280732870102, -0.053593188524246216, -0.04794356971979141, -0.04229395464062691, -0.036644335836172104, -0.0309947170317173, -0.025345101952552795, -0.019695483148097992, -0.014045864343643188, -0.00839624647051096, -0.0027466285973787308, 0.0029029883444309235, 0.008552607148885727, 0.01420222595334053, 0.019851841032505035, 0.02550145983695984, 0.031151078641414642, 0.036800697445869446, 0.04245031625032425, 0.048099931329488754, 0.05374955013394356, 0.05939916893839836, 0.06504878401756287, 0.07069840282201767, 0.07634802162647247, 0.08199764043092728, 0.08764725923538208, 0.09329687058925629, 0.09894649684429169, 0.1045961081981659, 0.1102457270026207, 0.1158953458070755, 0.1215449646115303, 0.1271945834159851, 0.13284419476985931, 0.13849382102489471, 0.14414343237876892, 0.14979305863380432, 0.15544266998767853, 0.16109228134155273, 0.16674190759658813]}, "gradients/encoder.encoder.layers.12.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 4.0, 0.0, 5.0, 8.0, 6.0, 13.0, 19.0, 19.0, 27.0, 36.0, 55.0, 60.0, 104.0, 99.0, 185.0, 334.0, 520.0, 1028.0, 1885.0, 4033.0, 10057.0, 39057.0, 250203.0, 2357748.0, 1330622.0, 154544.0, 27868.0, 8432.0, 3566.0, 1693.0, 910.0, 498.0, 261.0, 131.0, 83.0, 42.0, 37.0, 36.0, 21.0, 12.0, 9.0, 6.0, 4.0, 4.0, 3.0, 5.0, 4.0, 2.0], "bins": [-0.05963134765625, -0.058135986328125, -0.056640625, -0.055145263671875, -0.05364990234375, -0.052154541015625, -0.0506591796875, -0.049163818359375, -0.04766845703125, -0.046173095703125, -0.044677734375, -0.043182373046875, -0.04168701171875, -0.040191650390625, -0.0386962890625, -0.037200927734375, -0.03570556640625, -0.034210205078125, -0.03271484375, -0.031219482421875, -0.02972412109375, -0.028228759765625, -0.0267333984375, -0.025238037109375, -0.02374267578125, -0.022247314453125, -0.020751953125, -0.019256591796875, -0.01776123046875, -0.016265869140625, -0.0147705078125, -0.013275146484375, -0.01177978515625, -0.010284423828125, -0.0087890625, -0.007293701171875, -0.00579833984375, -0.004302978515625, -0.0028076171875, -0.001312255859375, 0.00018310546875, 0.001678466796875, 0.003173828125, 0.004669189453125, 0.00616455078125, 0.007659912109375, 0.0091552734375, 0.010650634765625, 0.01214599609375, 0.013641357421875, 0.01513671875, 0.016632080078125, 0.01812744140625, 0.019622802734375, 0.0211181640625, 0.022613525390625, 0.02410888671875, 0.025604248046875, 0.027099609375, 0.028594970703125, 0.03009033203125, 0.031585693359375, 0.0330810546875, 0.034576416015625, 0.03607177734375]}, "gradients/encoder.encoder.layers.12.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 3.0, 2.0, 3.0, 7.0, 12.0, 22.0, 33.0, 45.0, 71.0, 85.0, 106.0, 128.0, 130.0, 107.0, 87.0, 59.0, 39.0, 37.0, 20.0, 6.0, 6.0, 3.0, 4.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.09796142578125, -0.09523487091064453, -0.09250831604003906, -0.0897817611694336, -0.08705520629882812, -0.08432865142822266, -0.08160209655761719, -0.07887554168701172, -0.07614898681640625, -0.07342243194580078, -0.07069587707519531, -0.06796932220458984, -0.06524276733398438, -0.0625162124633789, -0.05978965759277344, -0.05706310272216797, -0.0543365478515625, -0.05160999298095703, -0.04888343811035156, -0.046156883239746094, -0.043430328369140625, -0.040703773498535156, -0.03797721862792969, -0.03525066375732422, -0.03252410888671875, -0.02979755401611328, -0.027070999145507812, -0.024344444274902344, -0.021617889404296875, -0.018891334533691406, -0.016164779663085938, -0.013438224792480469, -0.010711669921875, -0.007985115051269531, -0.0052585601806640625, -0.0025320053100585938, 0.000194549560546875, 0.0029211044311523438, 0.0056476593017578125, 0.008374214172363281, 0.01110076904296875, 0.013827323913574219, 0.016553878784179688, 0.019280433654785156, 0.022006988525390625, 0.024733543395996094, 0.027460098266601562, 0.03018665313720703, 0.0329132080078125, 0.03563976287841797, 0.03836631774902344, 0.041092872619628906, 0.043819427490234375, 0.046545982360839844, 0.04927253723144531, 0.05199909210205078, 0.05472564697265625, 0.05745220184326172, 0.06017875671386719, 0.06290531158447266, 0.06563186645507812, 0.0683584213256836, 0.07108497619628906, 0.07381153106689453, 0.0765380859375]}, "gradients/encoder.encoder.layers.12.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 4.0, 3.0, 9.0, 8.0, 9.0, 29.0, 53.0, 72.0, 135.0, 280.0, 473.0, 920.0, 1696.0, 3536.0, 7874.0, 18428.0, 48201.0, 164174.0, 891894.0, 2410842.0, 480249.0, 105271.0, 34600.0, 13613.0, 5975.0, 2797.0, 1426.0, 801.0, 400.0, 234.0, 121.0, 75.0, 44.0, 24.0, 13.0, 8.0, 2.0, 2.0, 1.0, 2.0, 0.0, 3.0], "bins": [-0.044219970703125, -0.04315018653869629, -0.04208040237426758, -0.04101061820983887, -0.039940834045410156, -0.038871049880981445, -0.037801265716552734, -0.03673148155212402, -0.03566169738769531, -0.0345919132232666, -0.03352212905883789, -0.03245234489440918, -0.03138256072998047, -0.030312776565551758, -0.029242992401123047, -0.028173208236694336, -0.027103424072265625, -0.026033639907836914, -0.024963855743408203, -0.023894071578979492, -0.02282428741455078, -0.02175450325012207, -0.02068471908569336, -0.01961493492126465, -0.018545150756835938, -0.017475366592407227, -0.016405582427978516, -0.015335798263549805, -0.014266014099121094, -0.013196229934692383, -0.012126445770263672, -0.011056661605834961, -0.00998687744140625, -0.008917093276977539, -0.007847309112548828, -0.006777524948120117, -0.005707740783691406, -0.004637956619262695, -0.0035681724548339844, -0.0024983882904052734, -0.0014286041259765625, -0.00035881996154785156, 0.0007109642028808594, 0.0017807483673095703, 0.0028505325317382812, 0.003920316696166992, 0.004990100860595703, 0.006059885025024414, 0.007129669189453125, 0.008199453353881836, 0.009269237518310547, 0.010339021682739258, 0.011408805847167969, 0.01247859001159668, 0.01354837417602539, 0.014618158340454102, 0.015687942504882812, 0.016757726669311523, 0.017827510833740234, 0.018897294998168945, 0.019967079162597656, 0.021036863327026367, 0.022106647491455078, 0.02317643165588379, 0.0242462158203125]}, "gradients/encoder.encoder.layers.12.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 4.0, 1.0, 0.0, 6.0, 5.0, 6.0, 5.0, 8.0, 5.0, 12.0, 17.0, 15.0, 36.0, 44.0, 44.0, 69.0, 129.0, 187.0, 263.0, 406.0, 574.0, 673.0, 467.0, 339.0, 264.0, 136.0, 112.0, 62.0, 48.0, 22.0, 30.0, 23.0, 16.0, 18.0, 5.0, 9.0, 4.0, 7.0, 5.0, 4.0, 2.0, 2.0, 3.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0223541259765625, -0.021533489227294922, -0.020712852478027344, -0.019892215728759766, -0.019071578979492188, -0.01825094223022461, -0.01743030548095703, -0.016609668731689453, -0.015789031982421875, -0.014968395233154297, -0.014147758483886719, -0.01332712173461914, -0.012506484985351562, -0.011685848236083984, -0.010865211486816406, -0.010044574737548828, -0.00922393798828125, -0.008403301239013672, -0.007582664489746094, -0.006762027740478516, -0.0059413909912109375, -0.005120754241943359, -0.004300117492675781, -0.003479480743408203, -0.002658843994140625, -0.0018382072448730469, -0.0010175704956054688, -0.00019693374633789062, 0.0006237030029296875, 0.0014443397521972656, 0.0022649765014648438, 0.003085613250732422, 0.00390625, 0.004726886749267578, 0.005547523498535156, 0.006368160247802734, 0.0071887969970703125, 0.00800943374633789, 0.008830070495605469, 0.009650707244873047, 0.010471343994140625, 0.011291980743408203, 0.012112617492675781, 0.01293325424194336, 0.013753890991210938, 0.014574527740478516, 0.015395164489746094, 0.016215801239013672, 0.01703643798828125, 0.017857074737548828, 0.018677711486816406, 0.019498348236083984, 0.020318984985351562, 0.02113962173461914, 0.02196025848388672, 0.022780895233154297, 0.023601531982421875, 0.024422168731689453, 0.02524280548095703, 0.02606344223022461, 0.026884078979492188, 0.027704715728759766, 0.028525352478027344, 0.029345989227294922, 0.0301666259765625]}, "gradients/encoder.encoder.layers.12.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 2.0, 0.0, 5.0, 6.0, 11.0, 19.0, 22.0, 45.0, 62.0, 91.0, 83.0, 111.0, 116.0, 104.0, 108.0, 78.0, 50.0, 36.0, 21.0, 15.0, 6.0, 7.0, 7.0, 0.0, 1.0, 5.0, 1.0, 0.0, 1.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.08133012056350708, -0.077098049223423, -0.07286597788333893, -0.06863391399383545, -0.06440184265375137, -0.0601697713136673, -0.05593770369887352, -0.05170563608407974, -0.047473564743995667, -0.04324149340391159, -0.03900942578911781, -0.034777358174324036, -0.03054528683423996, -0.026313217356801033, -0.022081147879362106, -0.01784907840192318, -0.013617008924484253, -0.009384939447045326, -0.0051528699696063995, -0.0009208004921674728, 0.003311268985271454, 0.0075433384627103806, 0.011775407940149307, 0.016007477417588234, 0.02023954689502716, 0.024471616372466087, 0.028703685849905014, 0.03293575346469879, 0.03716782480478287, 0.04139989614486694, 0.04563196375966072, 0.0498640313744545, 0.054096102714538574, 0.05832817405462265, 0.06256024539470673, 0.0667923092842102, 0.07102438062429428, 0.07525645196437836, 0.07948851585388184, 0.08372058719396591, 0.08795265853404999, 0.09218472987413406, 0.09641680121421814, 0.10064886510372162, 0.1048809364438057, 0.10911300778388977, 0.11334507167339325, 0.11757714301347733, 0.1218092143535614, 0.12604127824306488, 0.13027335703372955, 0.13450542092323303, 0.1387374997138977, 0.14296956360340118, 0.14720162749290466, 0.15143370628356934, 0.15566577017307281, 0.1598978340625763, 0.16412991285324097, 0.16836197674274445, 0.17259404063224792, 0.1768261194229126, 0.18105818331241608, 0.18529026210308075, 0.18952232599258423]}, "gradients/encoder.encoder.layers.12.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 3.0, 0.0, 3.0, 7.0, 4.0, 0.0, 2.0, 9.0, 10.0, 5.0, 21.0, 13.0, 17.0, 17.0, 18.0, 32.0, 27.0, 31.0, 23.0, 38.0, 32.0, 37.0, 46.0, 41.0, 48.0, 42.0, 34.0, 35.0, 35.0, 40.0, 36.0, 44.0, 35.0, 25.0, 32.0, 27.0, 25.0, 24.0, 18.0, 16.0, 9.0, 8.0, 13.0, 4.0, 7.0, 5.0, 4.0, 4.0, 6.0, 1.0, 2.0, 0.0, 2.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.06265407800674438, -0.06050029397010803, -0.05834650993347168, -0.056192729622125626, -0.05403894558548927, -0.05188516154885292, -0.049731381237506866, -0.047577597200870514, -0.04542381316423416, -0.04327002912759781, -0.041116245090961456, -0.0389624647796154, -0.03680868074297905, -0.0346548967063427, -0.03250111639499664, -0.03034733235836029, -0.028193548321723938, -0.026039764285087585, -0.023885982111096382, -0.02173219993710518, -0.019578415900468826, -0.017424631863832474, -0.01527084968984127, -0.013117066584527493, -0.010963283479213715, -0.008809500373899937, -0.006655717268586159, -0.004501934163272381, -0.002348151057958603, -0.00019436795264482498, 0.001959415152668953, 0.004113198257982731, 0.006266981363296509, 0.008420764468610287, 0.010574547573924065, 0.012728330679237843, 0.01488211378455162, 0.017035897821187973, 0.019189679995179176, 0.02134346216917038, 0.023497246205806732, 0.025651030242443085, 0.027804812416434288, 0.02995859459042549, 0.032112378627061844, 0.034266162663698196, 0.03641994297504425, 0.0385737270116806, 0.040727511048316956, 0.04288129508495331, 0.04503507912158966, 0.047188859432935715, 0.04934264346957207, 0.05149642750620842, 0.053650207817554474, 0.055803991854190826, 0.05795777589082718, 0.06011155992746353, 0.062265343964099884, 0.06441912800073624, 0.06657290458679199, 0.06872668862342834, 0.0708804726600647, 0.07303425669670105, 0.0751880407333374]}, "gradients/encoder.encoder.layers.12.attention.out_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 2.0, 1.0, 1.0, 2.0, 2.0, 4.0, 7.0, 7.0, 12.0, 16.0, 22.0, 23.0, 37.0, 54.0, 74.0, 123.0, 138.0, 222.0, 323.0, 529.0, 1080.0, 2224.0, 5572.0, 16151.0, 53176.0, 189970.0, 434993.0, 241241.0, 69428.0, 20564.0, 6904.0, 2698.0, 1168.0, 607.0, 371.0, 269.0, 154.0, 127.0, 61.0, 60.0, 50.0, 29.0, 25.0, 15.0, 9.0, 6.0, 7.0, 2.0, 2.0, 1.0, 3.0, 1.0, 2.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.05352783203125, -0.05171966552734375, -0.0499114990234375, -0.04810333251953125, -0.046295166015625, -0.04448699951171875, -0.0426788330078125, -0.04087066650390625, -0.0390625, -0.03725433349609375, -0.0354461669921875, -0.03363800048828125, -0.031829833984375, -0.03002166748046875, -0.0282135009765625, -0.02640533447265625, -0.02459716796875, -0.02278900146484375, -0.0209808349609375, -0.01917266845703125, -0.017364501953125, -0.01555633544921875, -0.0137481689453125, -0.01194000244140625, -0.0101318359375, -0.00832366943359375, -0.0065155029296875, -0.00470733642578125, -0.002899169921875, -0.00109100341796875, 0.0007171630859375, 0.00252532958984375, 0.00433349609375, 0.00614166259765625, 0.0079498291015625, 0.00975799560546875, 0.011566162109375, 0.01337432861328125, 0.0151824951171875, 0.01699066162109375, 0.018798828125, 0.02060699462890625, 0.0224151611328125, 0.02422332763671875, 0.026031494140625, 0.02783966064453125, 0.0296478271484375, 0.03145599365234375, 0.03326416015625, 0.03507232666015625, 0.0368804931640625, 0.03868865966796875, 0.040496826171875, 0.04230499267578125, 0.0441131591796875, 0.04592132568359375, 0.0477294921875, 0.04953765869140625, 0.0513458251953125, 0.05315399169921875, 0.054962158203125, 0.05677032470703125, 0.0585784912109375, 0.06038665771484375, 0.06219482421875]}, "gradients/encoder.encoder.layers.12.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 1.0, 5.0, 7.0, 10.0, 19.0, 25.0, 35.0, 57.0, 97.0, 90.0, 117.0, 123.0, 112.0, 91.0, 78.0, 63.0, 28.0, 26.0, 11.0, 6.0, 9.0, 2.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.09442138671875, -0.09174823760986328, -0.08907508850097656, -0.08640193939208984, -0.08372879028320312, -0.0810556411743164, -0.07838249206542969, -0.07570934295654297, -0.07303619384765625, -0.07036304473876953, -0.06768989562988281, -0.0650167465209961, -0.062343597412109375, -0.059670448303222656, -0.05699729919433594, -0.05432415008544922, -0.0516510009765625, -0.04897785186767578, -0.04630470275878906, -0.043631553649902344, -0.040958404541015625, -0.038285255432128906, -0.03561210632324219, -0.03293895721435547, -0.03026580810546875, -0.02759265899658203, -0.024919509887695312, -0.022246360778808594, -0.019573211669921875, -0.016900062561035156, -0.014226913452148438, -0.011553764343261719, -0.008880615234375, -0.006207466125488281, -0.0035343170166015625, -0.0008611679077148438, 0.001811981201171875, 0.004485130310058594, 0.0071582794189453125, 0.009831428527832031, 0.01250457763671875, 0.015177726745605469, 0.017850875854492188, 0.020524024963378906, 0.023197174072265625, 0.025870323181152344, 0.028543472290039062, 0.03121662139892578, 0.0338897705078125, 0.03656291961669922, 0.03923606872558594, 0.041909217834472656, 0.044582366943359375, 0.047255516052246094, 0.04992866516113281, 0.05260181427001953, 0.05527496337890625, 0.05794811248779297, 0.06062126159667969, 0.0632944107055664, 0.06596755981445312, 0.06864070892333984, 0.07131385803222656, 0.07398700714111328, 0.07666015625]}, "gradients/encoder.encoder.layers.12.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 3.0, 1.0, 1.0, 4.0, 3.0, 6.0, 4.0, 4.0, 7.0, 11.0, 7.0, 16.0, 6.0, 22.0, 22.0, 24.0, 28.0, 20.0, 37.0, 61.0, 75.0, 169.0, 570.0, 2604.0, 17957.0, 163678.0, 671110.0, 169820.0, 18459.0, 2728.0, 570.0, 187.0, 81.0, 52.0, 36.0, 15.0, 33.0, 27.0, 28.0, 17.0, 7.0, 6.0, 10.0, 12.0, 7.0, 11.0, 3.0, 4.0, 2.0, 2.0, 2.0, 1.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.08355712890625, -0.08081817626953125, -0.0780792236328125, -0.07534027099609375, -0.072601318359375, -0.06986236572265625, -0.0671234130859375, -0.06438446044921875, -0.0616455078125, -0.05890655517578125, -0.0561676025390625, -0.05342864990234375, -0.050689697265625, -0.04795074462890625, -0.0452117919921875, -0.04247283935546875, -0.03973388671875, -0.03699493408203125, -0.0342559814453125, -0.03151702880859375, -0.028778076171875, -0.02603912353515625, -0.0233001708984375, -0.02056121826171875, -0.017822265625, -0.01508331298828125, -0.0123443603515625, -0.00960540771484375, -0.006866455078125, -0.00412750244140625, -0.0013885498046875, 0.00135040283203125, 0.00408935546875, 0.00682830810546875, 0.0095672607421875, 0.01230621337890625, 0.015045166015625, 0.01778411865234375, 0.0205230712890625, 0.02326202392578125, 0.0260009765625, 0.02873992919921875, 0.0314788818359375, 0.03421783447265625, 0.036956787109375, 0.03969573974609375, 0.0424346923828125, 0.04517364501953125, 0.04791259765625, 0.05065155029296875, 0.0533905029296875, 0.05612945556640625, 0.058868408203125, 0.06160736083984375, 0.0643463134765625, 0.06708526611328125, 0.06982421875, 0.07256317138671875, 0.0753021240234375, 0.07804107666015625, 0.080780029296875, 0.08351898193359375, 0.0862579345703125, 0.08899688720703125, 0.09173583984375]}, "gradients/encoder.encoder.layers.12.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 2.0, 2.0, 2.0, 0.0, 4.0, 1.0, 4.0, 1.0, 4.0, 9.0, 12.0, 12.0, 12.0, 9.0, 7.0, 14.0, 20.0, 22.0, 19.0, 34.0, 25.0, 29.0, 39.0, 38.0, 45.0, 30.0, 32.0, 40.0, 42.0, 30.0, 28.0, 50.0, 31.0, 21.0, 34.0, 37.0, 33.0, 37.0, 25.0, 21.0, 17.0, 30.0, 25.0, 18.0, 11.0, 16.0, 6.0, 7.0, 10.0, 4.0, 5.0, 2.0, 4.0, 3.0, 2.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0], "bins": [-0.06036376953125, -0.05838775634765625, -0.0564117431640625, -0.05443572998046875, -0.052459716796875, -0.05048370361328125, -0.0485076904296875, -0.04653167724609375, -0.0445556640625, -0.04257965087890625, -0.0406036376953125, -0.03862762451171875, -0.036651611328125, -0.03467559814453125, -0.0326995849609375, -0.03072357177734375, -0.02874755859375, -0.02677154541015625, -0.0247955322265625, -0.02281951904296875, -0.020843505859375, -0.01886749267578125, -0.0168914794921875, -0.01491546630859375, -0.012939453125, -0.01096343994140625, -0.0089874267578125, -0.00701141357421875, -0.005035400390625, -0.00305938720703125, -0.0010833740234375, 0.00089263916015625, 0.00286865234375, 0.00484466552734375, 0.0068206787109375, 0.00879669189453125, 0.010772705078125, 0.01274871826171875, 0.0147247314453125, 0.01670074462890625, 0.0186767578125, 0.02065277099609375, 0.0226287841796875, 0.02460479736328125, 0.026580810546875, 0.02855682373046875, 0.0305328369140625, 0.03250885009765625, 0.03448486328125, 0.03646087646484375, 0.0384368896484375, 0.04041290283203125, 0.042388916015625, 0.04436492919921875, 0.0463409423828125, 0.04831695556640625, 0.05029296875, 0.05226898193359375, 0.0542449951171875, 0.05622100830078125, 0.058197021484375, 0.06017303466796875, 0.0621490478515625, 0.06412506103515625, 0.06610107421875]}, "gradients/encoder.encoder.layers.12.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 3.0, 6.0, 8.0, 16.0, 20.0, 24.0, 47.0, 80.0, 165.0, 247.0, 459.0, 745.0, 1442.0, 2759.0, 6160.0, 14946.0, 39069.0, 116844.0, 319712.0, 341777.0, 130905.0, 43782.0, 16198.0, 6605.0, 3110.0, 1497.0, 766.0, 486.0, 268.0, 156.0, 94.0, 58.0, 42.0, 34.0, 12.0, 10.0, 6.0, 2.0, 6.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.016021728515625, -0.015551209449768066, -0.015080690383911133, -0.0146101713180542, -0.014139652252197266, -0.013669133186340332, -0.013198614120483398, -0.012728095054626465, -0.012257575988769531, -0.011787056922912598, -0.011316537857055664, -0.01084601879119873, -0.010375499725341797, -0.009904980659484863, -0.00943446159362793, -0.008963942527770996, -0.008493423461914062, -0.008022904396057129, -0.007552385330200195, -0.007081866264343262, -0.006611347198486328, -0.0061408281326293945, -0.005670309066772461, -0.005199790000915527, -0.004729270935058594, -0.00425875186920166, -0.0037882328033447266, -0.003317713737487793, -0.0028471946716308594, -0.0023766756057739258, -0.0019061565399169922, -0.0014356374740600586, -0.000965118408203125, -0.0004945993423461914, -2.4080276489257812e-05, 0.0004464387893676758, 0.0009169578552246094, 0.001387476921081543, 0.0018579959869384766, 0.00232851505279541, 0.0027990341186523438, 0.0032695531845092773, 0.003740072250366211, 0.0042105913162231445, 0.004681110382080078, 0.005151629447937012, 0.005622148513793945, 0.006092667579650879, 0.0065631866455078125, 0.007033705711364746, 0.00750422477722168, 0.007974743843078613, 0.008445262908935547, 0.00891578197479248, 0.009386301040649414, 0.009856820106506348, 0.010327339172363281, 0.010797858238220215, 0.011268377304077148, 0.011738896369934082, 0.012209415435791016, 0.01267993450164795, 0.013150453567504883, 0.013620972633361816, 0.01409149169921875]}, "gradients/encoder.encoder.layers.12.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 4.0, 1.0, 6.0, 2.0, 18.0, 14.0, 23.0, 29.0, 50.0, 69.0, 93.0, 90.0, 81.0, 135.0, 123.0, 82.0, 53.0, 29.0, 47.0, 24.0, 14.0, 12.0, 3.0, 6.0, 4.0, 2.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-8.106231689453125e-06, -7.701106369495392e-06, -7.295981049537659e-06, -6.8908557295799255e-06, -6.485730409622192e-06, -6.080605089664459e-06, -5.675479769706726e-06, -5.270354449748993e-06, -4.86522912979126e-06, -4.460103809833527e-06, -4.0549784898757935e-06, -3.6498531699180603e-06, -3.244727849960327e-06, -2.839602530002594e-06, -2.434477210044861e-06, -2.0293518900871277e-06, -1.6242265701293945e-06, -1.2191012501716614e-06, -8.139759302139282e-07, -4.0885061025619507e-07, -3.725290298461914e-09, 4.0140002965927124e-07, 8.065253496170044e-07, 1.2116506695747375e-06, 1.6167759895324707e-06, 2.021901309490204e-06, 2.427026629447937e-06, 2.83215194940567e-06, 3.2372772693634033e-06, 3.6424025893211365e-06, 4.04752790927887e-06, 4.452653229236603e-06, 4.857778549194336e-06, 5.262903869152069e-06, 5.668029189109802e-06, 6.073154509067535e-06, 6.4782798290252686e-06, 6.883405148983002e-06, 7.288530468940735e-06, 7.693655788898468e-06, 8.098781108856201e-06, 8.503906428813934e-06, 8.909031748771667e-06, 9.3141570687294e-06, 9.719282388687134e-06, 1.0124407708644867e-05, 1.05295330286026e-05, 1.0934658348560333e-05, 1.1339783668518066e-05, 1.17449089884758e-05, 1.2150034308433533e-05, 1.2555159628391266e-05, 1.2960284948348999e-05, 1.3365410268306732e-05, 1.3770535588264465e-05, 1.4175660908222198e-05, 1.4580786228179932e-05, 1.4985911548137665e-05, 1.5391036868095398e-05, 1.579616218805313e-05, 1.6201287508010864e-05, 1.6606412827968597e-05, 1.701153814792633e-05, 1.7416663467884064e-05, 1.7821788787841797e-05]}, "gradients/encoder.encoder.layers.12.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 2.0, 3.0, 3.0, 4.0, 5.0, 3.0, 5.0, 8.0, 19.0, 16.0, 34.0, 36.0, 70.0, 131.0, 264.0, 707.0, 2609.0, 13987.0, 126211.0, 709160.0, 172909.0, 17802.0, 3086.0, 842.0, 316.0, 118.0, 74.0, 50.0, 28.0, 17.0, 14.0, 6.0, 10.0, 11.0, 0.0, 2.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.034088134765625, -0.03297281265258789, -0.03185749053955078, -0.030742168426513672, -0.029626846313476562, -0.028511524200439453, -0.027396202087402344, -0.026280879974365234, -0.025165557861328125, -0.024050235748291016, -0.022934913635253906, -0.021819591522216797, -0.020704269409179688, -0.019588947296142578, -0.01847362518310547, -0.01735830307006836, -0.01624298095703125, -0.01512765884399414, -0.014012336730957031, -0.012897014617919922, -0.011781692504882812, -0.010666370391845703, -0.009551048278808594, -0.008435726165771484, -0.007320404052734375, -0.006205081939697266, -0.005089759826660156, -0.003974437713623047, -0.0028591156005859375, -0.0017437934875488281, -0.0006284713745117188, 0.0004868507385253906, 0.0016021728515625, 0.0027174949645996094, 0.0038328170776367188, 0.004948139190673828, 0.0060634613037109375, 0.007178783416748047, 0.008294105529785156, 0.009409427642822266, 0.010524749755859375, 0.011640071868896484, 0.012755393981933594, 0.013870716094970703, 0.014986038208007812, 0.016101360321044922, 0.01721668243408203, 0.01833200454711914, 0.01944732666015625, 0.02056264877319336, 0.02167797088623047, 0.022793292999267578, 0.023908615112304688, 0.025023937225341797, 0.026139259338378906, 0.027254581451416016, 0.028369903564453125, 0.029485225677490234, 0.030600547790527344, 0.03171586990356445, 0.03283119201660156, 0.03394651412963867, 0.03506183624267578, 0.03617715835571289, 0.03729248046875]}, "gradients/encoder.encoder.layers.12.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 2.0, 0.0, 4.0, 7.0, 5.0, 7.0, 14.0, 22.0, 29.0, 52.0, 97.0, 127.0, 163.0, 153.0, 126.0, 79.0, 47.0, 33.0, 23.0, 9.0, 4.0, 7.0, 2.0, 4.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0142059326171875, -0.013358831405639648, -0.012511730194091797, -0.011664628982543945, -0.010817527770996094, -0.009970426559448242, -0.00912332534790039, -0.008276224136352539, -0.0074291229248046875, -0.006582021713256836, -0.005734920501708984, -0.004887819290161133, -0.004040718078613281, -0.0031936168670654297, -0.002346515655517578, -0.0014994144439697266, -0.000652313232421875, 0.00019478797912597656, 0.0010418891906738281, 0.0018889904022216797, 0.0027360916137695312, 0.003583192825317383, 0.004430294036865234, 0.005277395248413086, 0.0061244964599609375, 0.006971597671508789, 0.00781869888305664, 0.008665800094604492, 0.009512901306152344, 0.010360002517700195, 0.011207103729248047, 0.012054204940795898, 0.01290130615234375, 0.013748407363891602, 0.014595508575439453, 0.015442609786987305, 0.016289710998535156, 0.017136812210083008, 0.01798391342163086, 0.01883101463317871, 0.019678115844726562, 0.020525217056274414, 0.021372318267822266, 0.022219419479370117, 0.02306652069091797, 0.02391362190246582, 0.024760723114013672, 0.025607824325561523, 0.026454925537109375, 0.027302026748657227, 0.028149127960205078, 0.02899622917175293, 0.02984333038330078, 0.030690431594848633, 0.031537532806396484, 0.032384634017944336, 0.03323173522949219, 0.03407883644104004, 0.03492593765258789, 0.03577303886413574, 0.036620140075683594, 0.037467241287231445, 0.0383143424987793, 0.03916144371032715, 0.040008544921875]}, "gradients/encoder.encoder.layers.12.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 1.0, 1.0, 1.0, 12.0, 16.0, 46.0, 104.0, 188.0, 254.0, 201.0, 106.0, 48.0, 20.0, 8.0, 4.0, 1.0, 3.0, 0.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.16989654302597046, -0.15891921520233154, -0.14794187247753143, -0.13696454465389252, -0.1259872019290924, -0.11500987410545349, -0.10403253883123398, -0.09305520355701447, -0.08207786828279495, -0.07110053300857544, -0.060123197734355927, -0.04914586618542671, -0.0381685309112072, -0.027191195636987686, -0.01621386408805847, -0.005236528813838959, 0.005740806460380554, 0.016718141734600067, 0.02769547514617443, 0.038672808557748795, 0.04965014383196831, 0.06062747910618782, 0.07160481065511703, 0.08258214592933655, 0.09355948120355606, 0.10453681647777557, 0.11551415175199509, 0.1264914870262146, 0.13746881484985352, 0.14844615757465363, 0.15942348539829254, 0.17040082812309265, 0.18137815594673157, 0.19235548377037048, 0.2033328264951706, 0.2143101543188095, 0.22528749704360962, 0.23626482486724854, 0.24724215269088745, 0.25821948051452637, 0.26919683814048767, 0.2801741659641266, 0.2911514937877655, 0.3021288514137268, 0.3131061792373657, 0.32408350706100464, 0.33506083488464355, 0.34603816270828247, 0.3570154905319214, 0.3679928183555603, 0.3789701461791992, 0.3899475038051605, 0.40092483162879944, 0.41190215945243835, 0.42287948727607727, 0.4338568449020386, 0.4448341727256775, 0.4558115005493164, 0.4667888283729553, 0.4777661859989166, 0.48874351382255554, 0.49972084164619446, 0.5106981992721558, 0.5216755270957947, 0.5326528549194336]}, "gradients/encoder.encoder.layers.12.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 3.0, 2.0, 5.0, 6.0, 8.0, 10.0, 15.0, 12.0, 25.0, 17.0, 16.0, 21.0, 34.0, 37.0, 41.0, 32.0, 46.0, 33.0, 46.0, 48.0, 48.0, 52.0, 46.0, 38.0, 46.0, 43.0, 39.0, 44.0, 37.0, 29.0, 14.0, 30.0, 21.0, 7.0, 11.0, 6.0, 8.0, 3.0, 5.0, 9.0, 4.0, 5.0, 3.0, 0.0, 1.0, 2.0, 2.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-0.18048226833343506, -0.17455172538757324, -0.16862118244171143, -0.1626906394958496, -0.1567600965499878, -0.15082955360412598, -0.14489901065826416, -0.13896846771240234, -0.13303792476654053, -0.1271073818206787, -0.1211768388748169, -0.11524629592895508, -0.10931575298309326, -0.10338521003723145, -0.09745465964078903, -0.09152411669492722, -0.0855935662984848, -0.07966302335262299, -0.07373248040676117, -0.06780193746089935, -0.06187139078974724, -0.05594084784388542, -0.05001030117273331, -0.04407975822687149, -0.038149215281009674, -0.03221867233514786, -0.026288127526640892, -0.020357582718133926, -0.01442703977227211, -0.008496496826410294, -0.0025659501552581787, 0.0033645927906036377, 0.009295135736465454, 0.015225679613649845, 0.021156223490834236, 0.027086768299341202, 0.03301731124520302, 0.038947854191064835, 0.04487840086221695, 0.050808943808078766, 0.05673948675394058, 0.0626700296998024, 0.06860057264566422, 0.07453112304210663, 0.08046166598796844, 0.08639220893383026, 0.09232275187969208, 0.0982532948255539, 0.10418383777141571, 0.11011438071727753, 0.11604492366313934, 0.12197546660900116, 0.12790600955486298, 0.1338365525007248, 0.1397671103477478, 0.14569765329360962, 0.15162819623947144, 0.15755873918533325, 0.16348928213119507, 0.16941982507705688, 0.1753503680229187, 0.18128091096878052, 0.18721145391464233, 0.19314199686050415, 0.19907253980636597]}, "gradients/encoder.encoder.layers.11.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 2.0, 2.0, 7.0, 8.0, 10.0, 13.0, 39.0, 71.0, 115.0, 239.0, 571.0, 1744.0, 7637.0, 75680.0, 3629248.0, 458058.0, 16251.0, 3153.0, 819.0, 332.0, 153.0, 59.0, 41.0, 19.0, 13.0, 5.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 2.0, 0.0, 0.0, 1.0], "bins": [-0.12322998046875, -0.1194620132446289, -0.11569404602050781, -0.11192607879638672, -0.10815811157226562, -0.10439014434814453, -0.10062217712402344, -0.09685420989990234, -0.09308624267578125, -0.08931827545166016, -0.08555030822753906, -0.08178234100341797, -0.07801437377929688, -0.07424640655517578, -0.07047843933105469, -0.0667104721069336, -0.0629425048828125, -0.059174537658691406, -0.05540657043457031, -0.05163860321044922, -0.047870635986328125, -0.04410266876220703, -0.04033470153808594, -0.036566734313964844, -0.03279876708984375, -0.029030799865722656, -0.025262832641601562, -0.02149486541748047, -0.017726898193359375, -0.013958930969238281, -0.010190963745117188, -0.006422996520996094, -0.002655029296875, 0.0011129379272460938, 0.0048809051513671875, 0.008648872375488281, 0.012416839599609375, 0.01618480682373047, 0.019952774047851562, 0.023720741271972656, 0.02748870849609375, 0.031256675720214844, 0.03502464294433594, 0.03879261016845703, 0.042560577392578125, 0.04632854461669922, 0.05009651184082031, 0.053864479064941406, 0.0576324462890625, 0.061400413513183594, 0.06516838073730469, 0.06893634796142578, 0.07270431518554688, 0.07647228240966797, 0.08024024963378906, 0.08400821685791016, 0.08777618408203125, 0.09154415130615234, 0.09531211853027344, 0.09908008575439453, 0.10284805297851562, 0.10661602020263672, 0.11038398742675781, 0.1141519546508789, 0.117919921875]}, "gradients/encoder.encoder.layers.11.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 5.0, 7.0, 10.0, 18.0, 39.0, 55.0, 59.0, 110.0, 112.0, 134.0, 111.0, 113.0, 76.0, 70.0, 40.0, 23.0, 16.0, 5.0, 6.0, 4.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0958251953125, -0.09306144714355469, -0.09029769897460938, -0.08753395080566406, -0.08477020263671875, -0.08200645446777344, -0.07924270629882812, -0.07647895812988281, -0.0737152099609375, -0.07095146179199219, -0.06818771362304688, -0.06542396545410156, -0.06266021728515625, -0.05989646911621094, -0.057132720947265625, -0.05436897277832031, -0.051605224609375, -0.04884147644042969, -0.046077728271484375, -0.04331398010253906, -0.04055023193359375, -0.03778648376464844, -0.035022735595703125, -0.03225898742675781, -0.0294952392578125, -0.026731491088867188, -0.023967742919921875, -0.021203994750976562, -0.01844024658203125, -0.015676498413085938, -0.012912750244140625, -0.010149002075195312, -0.00738525390625, -0.0046215057373046875, -0.001857757568359375, 0.0009059906005859375, 0.00366973876953125, 0.0064334869384765625, 0.009197235107421875, 0.011960983276367188, 0.0147247314453125, 0.017488479614257812, 0.020252227783203125, 0.023015975952148438, 0.02577972412109375, 0.028543472290039062, 0.031307220458984375, 0.03407096862792969, 0.036834716796875, 0.03959846496582031, 0.042362213134765625, 0.04512596130371094, 0.04788970947265625, 0.05065345764160156, 0.053417205810546875, 0.05618095397949219, 0.0589447021484375, 0.06170845031738281, 0.06447219848632812, 0.06723594665527344, 0.06999969482421875, 0.07276344299316406, 0.07552719116210938, 0.07829093933105469, 0.0810546875]}, "gradients/encoder.encoder.layers.11.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0, 3.0, 4.0, 7.0, 9.0, 9.0, 16.0, 26.0, 39.0, 69.0, 122.0, 210.0, 387.0, 805.0, 1585.0, 3594.0, 8435.0, 21887.0, 66748.0, 279377.0, 2138099.0, 1403838.0, 189511.0, 50095.0, 17042.0, 6593.0, 2939.0, 1330.0, 673.0, 356.0, 190.0, 125.0, 67.0, 34.0, 19.0, 17.0, 10.0, 12.0, 8.0, 6.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.033111572265625, -0.03187990188598633, -0.030648231506347656, -0.029416561126708984, -0.028184890747070312, -0.02695322036743164, -0.02572154998779297, -0.024489879608154297, -0.023258209228515625, -0.022026538848876953, -0.02079486846923828, -0.01956319808959961, -0.018331527709960938, -0.017099857330322266, -0.015868186950683594, -0.014636516571044922, -0.01340484619140625, -0.012173175811767578, -0.010941505432128906, -0.009709835052490234, -0.008478164672851562, -0.007246494293212891, -0.006014823913574219, -0.004783153533935547, -0.003551483154296875, -0.002319812774658203, -0.0010881423950195312, 0.00014352798461914062, 0.0013751983642578125, 0.0026068687438964844, 0.0038385391235351562, 0.005070209503173828, 0.0063018798828125, 0.007533550262451172, 0.008765220642089844, 0.009996891021728516, 0.011228561401367188, 0.01246023178100586, 0.013691902160644531, 0.014923572540283203, 0.016155242919921875, 0.017386913299560547, 0.01861858367919922, 0.01985025405883789, 0.021081924438476562, 0.022313594818115234, 0.023545265197753906, 0.024776935577392578, 0.02600860595703125, 0.027240276336669922, 0.028471946716308594, 0.029703617095947266, 0.030935287475585938, 0.03216695785522461, 0.03339862823486328, 0.03463029861450195, 0.035861968994140625, 0.0370936393737793, 0.03832530975341797, 0.03955698013305664, 0.04078865051269531, 0.042020320892333984, 0.043251991271972656, 0.04448366165161133, 0.04571533203125]}, "gradients/encoder.encoder.layers.11.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 5.0, 8.0, 7.0, 21.0, 21.0, 30.0, 37.0, 58.0, 97.0, 162.0, 332.0, 599.0, 917.0, 763.0, 464.0, 242.0, 110.0, 70.0, 43.0, 28.0, 25.0, 14.0, 6.0, 1.0, 4.0, 6.0, 3.0, 0.0, 3.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0], "bins": [-0.065185546875, -0.06362342834472656, -0.062061309814453125, -0.06049919128417969, -0.05893707275390625, -0.05737495422363281, -0.055812835693359375, -0.05425071716308594, -0.0526885986328125, -0.05112648010253906, -0.049564361572265625, -0.04800224304199219, -0.04644012451171875, -0.04487800598144531, -0.043315887451171875, -0.04175376892089844, -0.040191650390625, -0.03862953186035156, -0.037067413330078125, -0.03550529479980469, -0.03394317626953125, -0.03238105773925781, -0.030818939208984375, -0.029256820678710938, -0.0276947021484375, -0.026132583618164062, -0.024570465087890625, -0.023008346557617188, -0.02144622802734375, -0.019884109497070312, -0.018321990966796875, -0.016759872436523438, -0.01519775390625, -0.013635635375976562, -0.012073516845703125, -0.010511398315429688, -0.00894927978515625, -0.0073871612548828125, -0.005825042724609375, -0.0042629241943359375, -0.0027008056640625, -0.0011386871337890625, 0.000423431396484375, 0.0019855499267578125, 0.00354766845703125, 0.0051097869873046875, 0.006671905517578125, 0.008234024047851562, 0.009796142578125, 0.011358261108398438, 0.012920379638671875, 0.014482498168945312, 0.01604461669921875, 0.017606735229492188, 0.019168853759765625, 0.020730972290039062, 0.0222930908203125, 0.023855209350585938, 0.025417327880859375, 0.026979446411132812, 0.02854156494140625, 0.030103683471679688, 0.031665802001953125, 0.03322792053222656, 0.0347900390625]}, "gradients/encoder.encoder.layers.11.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 3.0, 1.0, 1.0, 0.0, 4.0, 3.0, 5.0, 4.0, 19.0, 31.0, 49.0, 50.0, 119.0, 127.0, 139.0, 141.0, 119.0, 77.0, 50.0, 33.0, 19.0, 9.0, 7.0, 2.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.210373193025589, -0.20381109416484833, -0.19724899530410767, -0.1906868815422058, -0.18412478268146515, -0.1775626838207245, -0.17100058495998383, -0.16443848609924316, -0.1578763723373413, -0.15131427347660065, -0.14475217461585999, -0.13819006085395813, -0.13162796199321747, -0.1250658631324768, -0.11850376427173615, -0.11194166541099548, -0.10537956655025482, -0.09881746768951416, -0.0922553613781929, -0.08569326251745224, -0.07913115620613098, -0.07256905734539032, -0.06600695848464966, -0.0594448558986187, -0.05288275331258774, -0.04632065072655678, -0.03975854814052582, -0.033196449279785156, -0.026634346693754196, -0.020072244107723236, -0.013510145246982574, -0.006948042660951614, -0.00038592517375946045, 0.006176176480948925, 0.01273827813565731, 0.01930037885904312, 0.02586248144507408, 0.03242458403110504, 0.0389866828918457, 0.04554878547787666, 0.05211088806390762, 0.05867299064993858, 0.06523509323596954, 0.0717971920967102, 0.07835929095745087, 0.08492139726877213, 0.09148349612951279, 0.09804560244083405, 0.10460770130157471, 0.11116980016231537, 0.11773190647363663, 0.12429400533437729, 0.13085611164569855, 0.1374182105064392, 0.14398030936717987, 0.15054240822792053, 0.1571045219898224, 0.16366662085056305, 0.1702287197113037, 0.17679083347320557, 0.18335293233394623, 0.1899150311946869, 0.19647713005542755, 0.2030392289161682, 0.20960132777690887]}, "gradients/encoder.encoder.layers.11.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 2.0, 3.0, 1.0, 3.0, 3.0, 3.0, 5.0, 6.0, 7.0, 7.0, 14.0, 10.0, 11.0, 22.0, 12.0, 24.0, 26.0, 26.0, 27.0, 33.0, 37.0, 34.0, 44.0, 41.0, 41.0, 33.0, 39.0, 46.0, 39.0, 38.0, 44.0, 35.0, 41.0, 31.0, 31.0, 25.0, 28.0, 19.0, 14.0, 24.0, 22.0, 5.0, 15.0, 7.0, 12.0, 8.0, 5.0, 6.0, 4.0, 1.0, 3.0, 0.0, 1.0, 3.0], "bins": [-0.10558658838272095, -0.1027383953332901, -0.09989019483327866, -0.09704200178384781, -0.09419380128383636, -0.09134560823440552, -0.08849741518497467, -0.08564922213554382, -0.08280102163553238, -0.07995282858610153, -0.07710462808609009, -0.07425643503665924, -0.0714082419872284, -0.06856004148721695, -0.0657118484377861, -0.06286364793777466, -0.06001545488834381, -0.057167258113622665, -0.05431906133890152, -0.05147086828947067, -0.04862267151474953, -0.04577447474002838, -0.042926281690597534, -0.04007808491587639, -0.03722988814115524, -0.0343816913664341, -0.03153349459171295, -0.028685301542282104, -0.02583710476756096, -0.022988907992839813, -0.020140713080763817, -0.01729251816868782, -0.014444321393966675, -0.011596125550568104, -0.008747929707169533, -0.005899733863770962, -0.0030515380203723907, -0.00020334217697381973, 0.0026448536664247513, 0.005493048578500748, 0.008341245353221893, 0.011189441196620464, 0.014037637040019035, 0.016885831952095032, 0.019734028726816177, 0.022582225501537323, 0.02543042041361332, 0.028278615325689316, 0.03112681210041046, 0.03397500887513161, 0.03682320564985275, 0.0396713986992836, 0.042519595474004745, 0.04536779224872589, 0.04821598529815674, 0.051064182072877884, 0.05391237884759903, 0.056760575622320175, 0.05960877239704132, 0.06245696544647217, 0.06530515849590302, 0.06815335899591446, 0.0710015520453453, 0.07384975254535675, 0.0766979455947876]}, "gradients/encoder.encoder.layers.11.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 3.0, 2.0, 3.0, 2.0, 1.0, 3.0, 11.0, 10.0, 9.0, 12.0, 20.0, 18.0, 28.0, 49.0, 41.0, 81.0, 82.0, 138.0, 194.0, 253.0, 393.0, 608.0, 1230.0, 2674.0, 6836.0, 20380.0, 68141.0, 237961.0, 439391.0, 189392.0, 53577.0, 16400.0, 5539.0, 2174.0, 1074.0, 623.0, 364.0, 226.0, 175.0, 99.0, 78.0, 60.0, 45.0, 44.0, 33.0, 26.0, 22.0, 11.0, 6.0, 7.0, 5.0, 5.0, 3.0, 6.0, 2.0, 3.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.068603515625, -0.06648635864257812, -0.06436920166015625, -0.062252044677734375, -0.0601348876953125, -0.058017730712890625, -0.05590057373046875, -0.053783416748046875, -0.051666259765625, -0.049549102783203125, -0.04743194580078125, -0.045314788818359375, -0.0431976318359375, -0.041080474853515625, -0.03896331787109375, -0.036846160888671875, -0.03472900390625, -0.032611846923828125, -0.03049468994140625, -0.028377532958984375, -0.0262603759765625, -0.024143218994140625, -0.02202606201171875, -0.019908905029296875, -0.017791748046875, -0.015674591064453125, -0.01355743408203125, -0.011440277099609375, -0.0093231201171875, -0.007205963134765625, -0.00508880615234375, -0.002971649169921875, -0.0008544921875, 0.001262664794921875, 0.00337982177734375, 0.005496978759765625, 0.0076141357421875, 0.009731292724609375, 0.01184844970703125, 0.013965606689453125, 0.016082763671875, 0.018199920654296875, 0.02031707763671875, 0.022434234619140625, 0.0245513916015625, 0.026668548583984375, 0.02878570556640625, 0.030902862548828125, 0.03302001953125, 0.035137176513671875, 0.03725433349609375, 0.039371490478515625, 0.0414886474609375, 0.043605804443359375, 0.04572296142578125, 0.047840118408203125, 0.049957275390625, 0.052074432373046875, 0.05419158935546875, 0.056308746337890625, 0.0584259033203125, 0.060543060302734375, 0.06266021728515625, 0.06477737426757812, 0.06689453125]}, "gradients/encoder.encoder.layers.11.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 2.0, 3.0, 8.0, 15.0, 23.0, 38.0, 52.0, 77.0, 144.0, 125.0, 135.0, 126.0, 94.0, 76.0, 43.0, 26.0, 13.0, 6.0, 5.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.1051025390625, -0.10219192504882812, -0.09928131103515625, -0.09637069702148438, -0.0934600830078125, -0.09054946899414062, -0.08763885498046875, -0.08472824096679688, -0.081817626953125, -0.07890701293945312, -0.07599639892578125, -0.07308578491210938, -0.0701751708984375, -0.06726455688476562, -0.06435394287109375, -0.061443328857421875, -0.05853271484375, -0.055622100830078125, -0.05271148681640625, -0.049800872802734375, -0.0468902587890625, -0.043979644775390625, -0.04106903076171875, -0.038158416748046875, -0.035247802734375, -0.032337188720703125, -0.02942657470703125, -0.026515960693359375, -0.0236053466796875, -0.020694732666015625, -0.01778411865234375, -0.014873504638671875, -0.011962890625, -0.009052276611328125, -0.00614166259765625, -0.003231048583984375, -0.0003204345703125, 0.002590179443359375, 0.00550079345703125, 0.008411407470703125, 0.011322021484375, 0.014232635498046875, 0.01714324951171875, 0.020053863525390625, 0.0229644775390625, 0.025875091552734375, 0.02878570556640625, 0.031696319580078125, 0.03460693359375, 0.037517547607421875, 0.04042816162109375, 0.043338775634765625, 0.0462493896484375, 0.049160003662109375, 0.05207061767578125, 0.054981231689453125, 0.057891845703125, 0.060802459716796875, 0.06371307373046875, 0.06662368774414062, 0.0695343017578125, 0.07244491577148438, 0.07535552978515625, 0.07826614379882812, 0.0811767578125]}, "gradients/encoder.encoder.layers.11.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 2.0, 2.0, 3.0, 9.0, 9.0, 9.0, 16.0, 21.0, 32.0, 37.0, 67.0, 105.0, 170.0, 285.0, 608.0, 1234.0, 3533.0, 11435.0, 40618.0, 153582.0, 427789.0, 294560.0, 81828.0, 22134.0, 6532.0, 2145.0, 786.0, 395.0, 216.0, 136.0, 87.0, 53.0, 33.0, 24.0, 21.0, 10.0, 8.0, 11.0, 4.0, 6.0, 2.0, 2.0, 1.0, 1.0, 2.0, 0.0, 2.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.0526123046875, -0.050844669342041016, -0.04907703399658203, -0.04730939865112305, -0.04554176330566406, -0.04377412796020508, -0.042006492614746094, -0.04023885726928711, -0.038471221923828125, -0.03670358657836914, -0.034935951232910156, -0.03316831588745117, -0.03140068054199219, -0.029633045196533203, -0.02786540985107422, -0.026097774505615234, -0.02433013916015625, -0.022562503814697266, -0.02079486846923828, -0.019027233123779297, -0.017259597778320312, -0.015491962432861328, -0.013724327087402344, -0.01195669174194336, -0.010189056396484375, -0.00842142105102539, -0.006653785705566406, -0.004886150360107422, -0.0031185150146484375, -0.0013508796691894531, 0.00041675567626953125, 0.0021843910217285156, 0.0039520263671875, 0.005719661712646484, 0.007487297058105469, 0.009254932403564453, 0.011022567749023438, 0.012790203094482422, 0.014557838439941406, 0.01632547378540039, 0.018093109130859375, 0.01986074447631836, 0.021628379821777344, 0.023396015167236328, 0.025163650512695312, 0.026931285858154297, 0.02869892120361328, 0.030466556549072266, 0.03223419189453125, 0.034001827239990234, 0.03576946258544922, 0.0375370979309082, 0.03930473327636719, 0.04107236862182617, 0.042840003967285156, 0.04460763931274414, 0.046375274658203125, 0.04814291000366211, 0.049910545349121094, 0.05167818069458008, 0.05344581604003906, 0.05521345138549805, 0.05698108673095703, 0.058748722076416016, 0.060516357421875]}, "gradients/encoder.encoder.layers.11.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 4.0, 3.0, 2.0, 1.0, 3.0, 4.0, 6.0, 6.0, 8.0, 8.0, 11.0, 8.0, 13.0, 17.0, 16.0, 22.0, 22.0, 30.0, 40.0, 29.0, 29.0, 39.0, 40.0, 45.0, 39.0, 43.0, 54.0, 35.0, 57.0, 48.0, 43.0, 34.0, 27.0, 28.0, 43.0, 26.0, 25.0, 19.0, 14.0, 20.0, 8.0, 15.0, 8.0, 9.0, 7.0, 3.0, 3.0, 2.0, 2.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.08685302734375, -0.08446407318115234, -0.08207511901855469, -0.07968616485595703, -0.07729721069335938, -0.07490825653076172, -0.07251930236816406, -0.0701303482055664, -0.06774139404296875, -0.0653524398803711, -0.06296348571777344, -0.06057453155517578, -0.058185577392578125, -0.05579662322998047, -0.05340766906738281, -0.051018714904785156, -0.0486297607421875, -0.046240806579589844, -0.04385185241699219, -0.04146289825439453, -0.039073944091796875, -0.03668498992919922, -0.03429603576660156, -0.031907081604003906, -0.02951812744140625, -0.027129173278808594, -0.024740219116210938, -0.02235126495361328, -0.019962310791015625, -0.01757335662841797, -0.015184402465820312, -0.012795448303222656, -0.010406494140625, -0.008017539978027344, -0.0056285858154296875, -0.0032396316528320312, -0.000850677490234375, 0.0015382766723632812, 0.0039272308349609375, 0.006316184997558594, 0.00870513916015625, 0.011094093322753906, 0.013483047485351562, 0.01587200164794922, 0.018260955810546875, 0.02064990997314453, 0.023038864135742188, 0.025427818298339844, 0.0278167724609375, 0.030205726623535156, 0.03259468078613281, 0.03498363494873047, 0.037372589111328125, 0.03976154327392578, 0.04215049743652344, 0.044539451599121094, 0.04692840576171875, 0.049317359924316406, 0.05170631408691406, 0.05409526824951172, 0.056484222412109375, 0.05887317657470703, 0.06126213073730469, 0.06365108489990234, 0.0660400390625]}, "gradients/encoder.encoder.layers.11.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 2.0, 2.0, 3.0, 4.0, 2.0, 7.0, 7.0, 16.0, 15.0, 23.0, 25.0, 31.0, 52.0, 71.0, 89.0, 135.0, 199.0, 257.0, 344.0, 547.0, 825.0, 1180.0, 1922.0, 3275.0, 6374.0, 14905.0, 42559.0, 149879.0, 407259.0, 287573.0, 83551.0, 25501.0, 9828.0, 4760.0, 2564.0, 1579.0, 982.0, 644.0, 416.0, 344.0, 213.0, 171.0, 112.0, 90.0, 62.0, 44.0, 30.0, 32.0, 18.0, 10.0, 11.0, 6.0, 9.0, 6.0, 2.0, 1.0, 3.0, 1.0], "bins": [-0.0198974609375, -0.01932525634765625, -0.0187530517578125, -0.01818084716796875, -0.017608642578125, -0.01703643798828125, -0.0164642333984375, -0.01589202880859375, -0.01531982421875, -0.01474761962890625, -0.0141754150390625, -0.01360321044921875, -0.013031005859375, -0.01245880126953125, -0.0118865966796875, -0.01131439208984375, -0.0107421875, -0.01016998291015625, -0.0095977783203125, -0.00902557373046875, -0.008453369140625, -0.00788116455078125, -0.0073089599609375, -0.00673675537109375, -0.00616455078125, -0.00559234619140625, -0.0050201416015625, -0.00444793701171875, -0.003875732421875, -0.00330352783203125, -0.0027313232421875, -0.00215911865234375, -0.0015869140625, -0.00101470947265625, -0.0004425048828125, 0.00012969970703125, 0.000701904296875, 0.00127410888671875, 0.0018463134765625, 0.00241851806640625, 0.00299072265625, 0.00356292724609375, 0.0041351318359375, 0.00470733642578125, 0.005279541015625, 0.00585174560546875, 0.0064239501953125, 0.00699615478515625, 0.007568359375, 0.00814056396484375, 0.0087127685546875, 0.00928497314453125, 0.009857177734375, 0.01042938232421875, 0.0110015869140625, 0.01157379150390625, 0.01214599609375, 0.01271820068359375, 0.0132904052734375, 0.01386260986328125, 0.014434814453125, 0.01500701904296875, 0.0155792236328125, 0.01615142822265625, 0.0167236328125]}, "gradients/encoder.encoder.layers.11.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 3.0, 2.0, 1.0, 1.0, 2.0, 2.0, 2.0, 3.0, 4.0, 4.0, 7.0, 8.0, 12.0, 21.0, 10.0, 21.0, 23.0, 28.0, 44.0, 36.0, 50.0, 47.0, 64.0, 75.0, 71.0, 44.0, 53.0, 65.0, 38.0, 57.0, 38.0, 26.0, 56.0, 15.0, 13.0, 16.0, 15.0, 9.0, 3.0, 3.0, 10.0, 5.0, 4.0, 1.0, 4.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-8.463859558105469e-06, -8.21240246295929e-06, -7.96094536781311e-06, -7.709488272666931e-06, -7.458031177520752e-06, -7.206574082374573e-06, -6.9551169872283936e-06, -6.703659892082214e-06, -6.452202796936035e-06, -6.200745701789856e-06, -5.949288606643677e-06, -5.6978315114974976e-06, -5.446374416351318e-06, -5.194917321205139e-06, -4.94346022605896e-06, -4.692003130912781e-06, -4.4405460357666016e-06, -4.189088940620422e-06, -3.937631845474243e-06, -3.686174750328064e-06, -3.4347176551818848e-06, -3.1832605600357056e-06, -2.9318034648895264e-06, -2.680346369743347e-06, -2.428889274597168e-06, -2.1774321794509888e-06, -1.9259750843048096e-06, -1.6745179891586304e-06, -1.4230608940124512e-06, -1.171603798866272e-06, -9.201467037200928e-07, -6.686896085739136e-07, -4.172325134277344e-07, -1.6577541828155518e-07, 8.568167686462402e-08, 3.371387720108032e-07, 5.885958671569824e-07, 8.400529623031616e-07, 1.0915100574493408e-06, 1.34296715259552e-06, 1.5944242477416992e-06, 1.8458813428878784e-06, 2.0973384380340576e-06, 2.348795533180237e-06, 2.600252628326416e-06, 2.8517097234725952e-06, 3.1031668186187744e-06, 3.3546239137649536e-06, 3.606081008911133e-06, 3.857538104057312e-06, 4.108995199203491e-06, 4.36045229434967e-06, 4.61190938949585e-06, 4.863366484642029e-06, 5.114823579788208e-06, 5.366280674934387e-06, 5.617737770080566e-06, 5.869194865226746e-06, 6.120651960372925e-06, 6.372109055519104e-06, 6.623566150665283e-06, 6.875023245811462e-06, 7.126480340957642e-06, 7.377937436103821e-06, 7.62939453125e-06]}, "gradients/encoder.encoder.layers.11.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 3.0, 1.0, 2.0, 4.0, 1.0, 11.0, 10.0, 15.0, 12.0, 14.0, 27.0, 48.0, 52.0, 98.0, 151.0, 240.0, 394.0, 575.0, 1037.0, 1971.0, 4339.0, 12062.0, 51806.0, 325383.0, 519783.0, 99323.0, 19342.0, 6046.0, 2529.0, 1297.0, 775.0, 403.0, 287.0, 160.0, 112.0, 76.0, 55.0, 38.0, 25.0, 20.0, 15.0, 8.0, 6.0, 3.0, 6.0, 2.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.0299530029296875, -0.028992414474487305, -0.02803182601928711, -0.027071237564086914, -0.02611064910888672, -0.025150060653686523, -0.024189472198486328, -0.023228883743286133, -0.022268295288085938, -0.021307706832885742, -0.020347118377685547, -0.01938652992248535, -0.018425941467285156, -0.01746535301208496, -0.016504764556884766, -0.01554417610168457, -0.014583587646484375, -0.01362299919128418, -0.012662410736083984, -0.011701822280883789, -0.010741233825683594, -0.009780645370483398, -0.008820056915283203, -0.007859468460083008, -0.0068988800048828125, -0.005938291549682617, -0.004977703094482422, -0.0040171146392822266, -0.0030565261840820312, -0.002095937728881836, -0.0011353492736816406, -0.0001747608184814453, 0.00078582763671875, 0.0017464160919189453, 0.0027070045471191406, 0.003667593002319336, 0.004628181457519531, 0.0055887699127197266, 0.006549358367919922, 0.007509946823120117, 0.008470535278320312, 0.009431123733520508, 0.010391712188720703, 0.011352300643920898, 0.012312889099121094, 0.013273477554321289, 0.014234066009521484, 0.01519465446472168, 0.016155242919921875, 0.01711583137512207, 0.018076419830322266, 0.01903700828552246, 0.019997596740722656, 0.02095818519592285, 0.021918773651123047, 0.022879362106323242, 0.023839950561523438, 0.024800539016723633, 0.025761127471923828, 0.026721715927124023, 0.02768230438232422, 0.028642892837524414, 0.02960348129272461, 0.030564069747924805, 0.031524658203125]}, "gradients/encoder.encoder.layers.11.attention.q_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 0.0, 2.0, 1.0, 1.0, 5.0, 3.0, 6.0, 4.0, 11.0, 7.0, 16.0, 31.0, 40.0, 56.0, 87.0, 101.0, 119.0, 114.0, 99.0, 71.0, 68.0, 55.0, 30.0, 28.0, 9.0, 12.0, 9.0, 7.0, 3.0, 3.0, 4.0, 4.0, 4.0, 1.0, 2.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.02423095703125, -0.023371219635009766, -0.02251148223876953, -0.021651744842529297, -0.020792007446289062, -0.019932270050048828, -0.019072532653808594, -0.01821279525756836, -0.017353057861328125, -0.01649332046508789, -0.015633583068847656, -0.014773845672607422, -0.013914108276367188, -0.013054370880126953, -0.012194633483886719, -0.011334896087646484, -0.01047515869140625, -0.009615421295166016, -0.008755683898925781, -0.007895946502685547, -0.0070362091064453125, -0.006176471710205078, -0.005316734313964844, -0.004456996917724609, -0.003597259521484375, -0.0027375221252441406, -0.0018777847290039062, -0.0010180473327636719, -0.0001583099365234375, 0.0007014274597167969, 0.0015611648559570312, 0.0024209022521972656, 0.0032806396484375, 0.004140377044677734, 0.005000114440917969, 0.005859851837158203, 0.0067195892333984375, 0.007579326629638672, 0.008439064025878906, 0.00929880142211914, 0.010158538818359375, 0.01101827621459961, 0.011878013610839844, 0.012737751007080078, 0.013597488403320312, 0.014457225799560547, 0.015316963195800781, 0.016176700592041016, 0.01703643798828125, 0.017896175384521484, 0.01875591278076172, 0.019615650177001953, 0.020475387573242188, 0.021335124969482422, 0.022194862365722656, 0.02305459976196289, 0.023914337158203125, 0.02477407455444336, 0.025633811950683594, 0.026493549346923828, 0.027353286743164062, 0.028213024139404297, 0.02907276153564453, 0.029932498931884766, 0.030792236328125]}, "gradients/encoder.encoder.layers.11.layer_norm.weight": {"_type": "histogram", "values": [2.0, 3.0, 8.0, 33.0, 531.0, 402.0, 40.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.22229546308517456, -0.17703565955162048, -0.1317758411169052, -0.08651602268218994, -0.041256219148635864, 0.004003584384918213, 0.04926341772079468, 0.09452322125434875, 0.13978302478790283, 0.1850428283214569, 0.23030264675617218, 0.27556246519088745, 0.32082226872444153, 0.3660820722579956, 0.41134190559387207, 0.45660170912742615, 0.5018615126609802, 0.5471213459968567, 0.5923811197280884, 0.6376409530639648, 0.6829007863998413, 0.728160560131073, 0.7734203934669495, 0.8186801671981812, 0.8639400005340576, 0.9091998338699341, 0.9544596076011658, 0.9997194409370422, 1.044979214668274, 1.0902390480041504, 1.1354988813400269, 1.1807587146759033, 1.2260184288024902, 1.2712782621383667, 1.3165380954742432, 1.36179780960083, 1.4070576429367065, 1.452317476272583, 1.4975773096084595, 1.542837142944336, 1.5880968570709229, 1.6333566904067993, 1.6786165237426758, 1.7238762378692627, 1.7691360712051392, 1.8143959045410156, 1.859655737876892, 1.9049155712127686, 1.950175404548645, 1.9954352378845215, 2.0406949520111084, 2.0859549045562744, 2.1312146186828613, 2.1764745712280273, 2.2217342853546143, 2.266993999481201, 2.312253952026367, 2.357513666152954, 2.40277361869812, 2.448033332824707, 2.493293285369873, 2.53855299949646, 2.583812713623047, 2.629072666168213, 2.6743323802948]}, "gradients/encoder.encoder.layers.11.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 0.0, 1.0, 3.0, 3.0, 3.0, 7.0, 3.0, 6.0, 11.0, 16.0, 12.0, 16.0, 24.0, 17.0, 30.0, 24.0, 33.0, 32.0, 28.0, 46.0, 46.0, 39.0, 36.0, 36.0, 43.0, 44.0, 53.0, 43.0, 27.0, 58.0, 35.0, 34.0, 23.0, 33.0, 23.0, 18.0, 17.0, 12.0, 18.0, 12.0, 18.0, 8.0, 6.0, 7.0, 4.0, 2.0, 1.0, 1.0, 0.0, 2.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0], "bins": [-0.20782434940338135, -0.2012813687324524, -0.19473838806152344, -0.1881953924894333, -0.18165241181850433, -0.17510943114757538, -0.16856643557548523, -0.16202345490455627, -0.15548047423362732, -0.14893749356269836, -0.1423945128917694, -0.13585151731967926, -0.1293085366487503, -0.12276555597782135, -0.1162225678563118, -0.10967957973480225, -0.10313659906387329, -0.09659361839294434, -0.09005063027143478, -0.08350764214992523, -0.07696466147899628, -0.07042168080806732, -0.06387869268655777, -0.057335708290338516, -0.05079272389411926, -0.04424973949790001, -0.037706755101680756, -0.031163770705461502, -0.02462078630924225, -0.018077801913022995, -0.011534817516803741, -0.004991833120584488, 0.0015511512756347656, 0.00809413567185402, 0.014637120068073273, 0.021180104464292526, 0.02772308886051178, 0.03426607325673103, 0.04080905765295029, 0.04735204204916954, 0.053895026445388794, 0.06043801084160805, 0.0669809952378273, 0.07352398335933685, 0.08006696403026581, 0.08660994470119476, 0.09315293282270432, 0.09969592094421387, 0.10623890161514282, 0.11278188228607178, 0.11932487040758133, 0.12586785852909088, 0.13241083920001984, 0.1389538198709488, 0.14549681544303894, 0.1520397961139679, 0.15858277678489685, 0.1651257574558258, 0.17166873812675476, 0.1782117336988449, 0.18475471436977386, 0.19129769504070282, 0.19784069061279297, 0.20438367128372192, 0.21092665195465088]}, "gradients/encoder.encoder.layers.10.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 3.0, 3.0, 5.0, 12.0, 15.0, 20.0, 32.0, 44.0, 74.0, 133.0, 225.0, 432.0, 1048.0, 2806.0, 9931.0, 61728.0, 1665469.0, 2346663.0, 88857.0, 11611.0, 3207.0, 1144.0, 399.0, 196.0, 104.0, 50.0, 29.0, 21.0, 10.0, 9.0, 4.0, 3.0, 2.0, 1.0, 1.0, 3.0, 1.0, 1.0, 1.0, 0.0, 1.0], "bins": [-0.10980224609375, -0.10706138610839844, -0.10432052612304688, -0.10157966613769531, -0.09883880615234375, -0.09609794616699219, -0.09335708618164062, -0.09061622619628906, -0.0878753662109375, -0.08513450622558594, -0.08239364624023438, -0.07965278625488281, -0.07691192626953125, -0.07417106628417969, -0.07143020629882812, -0.06868934631347656, -0.065948486328125, -0.06320762634277344, -0.060466766357421875, -0.05772590637207031, -0.05498504638671875, -0.05224418640136719, -0.049503326416015625, -0.04676246643066406, -0.0440216064453125, -0.04128074645996094, -0.038539886474609375, -0.03579902648925781, -0.03305816650390625, -0.030317306518554688, -0.027576446533203125, -0.024835586547851562, -0.0220947265625, -0.019353866577148438, -0.016613006591796875, -0.013872146606445312, -0.01113128662109375, -0.008390426635742188, -0.005649566650390625, -0.0029087066650390625, -0.0001678466796875, 0.0025730133056640625, 0.005313873291015625, 0.008054733276367188, 0.01079559326171875, 0.013536453247070312, 0.016277313232421875, 0.019018173217773438, 0.021759033203125, 0.024499893188476562, 0.027240753173828125, 0.029981613159179688, 0.03272247314453125, 0.03546333312988281, 0.038204193115234375, 0.04094505310058594, 0.0436859130859375, 0.04642677307128906, 0.049167633056640625, 0.05190849304199219, 0.05464935302734375, 0.05739021301269531, 0.060131072998046875, 0.06287193298339844, 0.06561279296875]}, "gradients/encoder.encoder.layers.10.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 2.0, 4.0, 10.0, 16.0, 26.0, 36.0, 51.0, 65.0, 113.0, 116.0, 148.0, 129.0, 107.0, 73.0, 44.0, 32.0, 21.0, 10.0, 6.0, 2.0, 1.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0941162109375, -0.09137535095214844, -0.08863449096679688, -0.08589363098144531, -0.08315277099609375, -0.08041191101074219, -0.07767105102539062, -0.07493019104003906, -0.0721893310546875, -0.06944847106933594, -0.06670761108398438, -0.06396675109863281, -0.06122589111328125, -0.05848503112792969, -0.055744171142578125, -0.05300331115722656, -0.050262451171875, -0.04752159118652344, -0.044780731201171875, -0.04203987121582031, -0.03929901123046875, -0.03655815124511719, -0.033817291259765625, -0.031076431274414062, -0.0283355712890625, -0.025594711303710938, -0.022853851318359375, -0.020112991333007812, -0.01737213134765625, -0.014631271362304688, -0.011890411376953125, -0.009149551391601562, -0.00640869140625, -0.0036678314208984375, -0.000926971435546875, 0.0018138885498046875, 0.00455474853515625, 0.0072956085205078125, 0.010036468505859375, 0.012777328491210938, 0.0155181884765625, 0.018259048461914062, 0.020999908447265625, 0.023740768432617188, 0.02648162841796875, 0.029222488403320312, 0.031963348388671875, 0.03470420837402344, 0.037445068359375, 0.04018592834472656, 0.042926788330078125, 0.04566764831542969, 0.04840850830078125, 0.05114936828613281, 0.053890228271484375, 0.05663108825683594, 0.0593719482421875, 0.06211280822753906, 0.06485366821289062, 0.06759452819824219, 0.07033538818359375, 0.07307624816894531, 0.07581710815429688, 0.07855796813964844, 0.081298828125]}, "gradients/encoder.encoder.layers.10.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [2.0, 2.0, 2.0, 6.0, 2.0, 3.0, 6.0, 6.0, 4.0, 8.0, 7.0, 12.0, 16.0, 21.0, 21.0, 30.0, 42.0, 52.0, 84.0, 166.0, 294.0, 590.0, 1303.0, 3418.0, 9741.0, 32727.0, 142444.0, 1116036.0, 2494462.0, 308095.0, 59483.0, 16116.0, 5309.0, 1950.0, 829.0, 409.0, 219.0, 111.0, 64.0, 42.0, 25.0, 27.0, 22.0, 10.0, 21.0, 6.0, 5.0, 13.0, 7.0, 5.0, 4.0, 7.0, 4.0, 6.0, 2.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.04425048828125, -0.04268217086791992, -0.041113853454589844, -0.039545536041259766, -0.03797721862792969, -0.03640890121459961, -0.03484058380126953, -0.03327226638793945, -0.031703948974609375, -0.030135631561279297, -0.02856731414794922, -0.02699899673461914, -0.025430679321289062, -0.023862361907958984, -0.022294044494628906, -0.020725727081298828, -0.01915740966796875, -0.017589092254638672, -0.016020774841308594, -0.014452457427978516, -0.012884140014648438, -0.01131582260131836, -0.009747505187988281, -0.008179187774658203, -0.006610870361328125, -0.005042552947998047, -0.0034742355346679688, -0.0019059181213378906, -0.0003376007080078125, 0.0012307167053222656, 0.0027990341186523438, 0.004367351531982422, 0.0059356689453125, 0.007503986358642578, 0.009072303771972656, 0.010640621185302734, 0.012208938598632812, 0.01377725601196289, 0.015345573425292969, 0.016913890838623047, 0.018482208251953125, 0.020050525665283203, 0.02161884307861328, 0.02318716049194336, 0.024755477905273438, 0.026323795318603516, 0.027892112731933594, 0.029460430145263672, 0.03102874755859375, 0.03259706497192383, 0.034165382385253906, 0.035733699798583984, 0.03730201721191406, 0.03887033462524414, 0.04043865203857422, 0.0420069694519043, 0.043575286865234375, 0.04514360427856445, 0.04671192169189453, 0.04828023910522461, 0.04984855651855469, 0.051416873931884766, 0.052985191345214844, 0.05455350875854492, 0.056121826171875]}, "gradients/encoder.encoder.layers.10.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 1.0, 1.0, 5.0, 6.0, 6.0, 6.0, 8.0, 19.0, 19.0, 39.0, 61.0, 95.0, 165.0, 247.0, 503.0, 754.0, 789.0, 565.0, 342.0, 170.0, 95.0, 57.0, 37.0, 26.0, 27.0, 14.0, 12.0, 4.0, 3.0, 5.0, 2.0, 0.0, 0.0, 2.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.048309326171875, -0.04670381546020508, -0.045098304748535156, -0.043492794036865234, -0.04188728332519531, -0.04028177261352539, -0.03867626190185547, -0.03707075119018555, -0.035465240478515625, -0.0338597297668457, -0.03225421905517578, -0.03064870834350586, -0.029043197631835938, -0.027437686920166016, -0.025832176208496094, -0.024226665496826172, -0.02262115478515625, -0.021015644073486328, -0.019410133361816406, -0.017804622650146484, -0.016199111938476562, -0.01459360122680664, -0.012988090515136719, -0.011382579803466797, -0.009777069091796875, -0.008171558380126953, -0.006566047668457031, -0.004960536956787109, -0.0033550262451171875, -0.0017495155334472656, -0.00014400482177734375, 0.0014615058898925781, 0.0030670166015625, 0.004672527313232422, 0.006278038024902344, 0.007883548736572266, 0.009489059448242188, 0.01109457015991211, 0.012700080871582031, 0.014305591583251953, 0.015911102294921875, 0.017516613006591797, 0.01912212371826172, 0.02072763442993164, 0.022333145141601562, 0.023938655853271484, 0.025544166564941406, 0.027149677276611328, 0.02875518798828125, 0.030360698699951172, 0.031966209411621094, 0.033571720123291016, 0.03517723083496094, 0.03678274154663086, 0.03838825225830078, 0.0399937629699707, 0.041599273681640625, 0.04320478439331055, 0.04481029510498047, 0.04641580581665039, 0.04802131652832031, 0.049626827239990234, 0.051232337951660156, 0.05283784866333008, 0.054443359375]}, "gradients/encoder.encoder.layers.10.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 2.0, 1.0, 1.0, 6.0, 12.0, 21.0, 50.0, 81.0, 132.0, 168.0, 207.0, 127.0, 98.0, 40.0, 39.0, 12.0, 7.0, 4.0, 0.0, 4.0, 3.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.12716475129127502, -0.11738310754299164, -0.10760146379470825, -0.09781982004642487, -0.08803817629814148, -0.0782565325498581, -0.06847488880157471, -0.05869324505329132, -0.048911601305007935, -0.03912995755672455, -0.029348313808441162, -0.019566670060157776, -0.00978502631187439, -3.382563591003418e-06, 0.009778261184692383, 0.01955990493297577, 0.029341548681259155, 0.03912319242954254, 0.04890483617782593, 0.058686479926109314, 0.0684681236743927, 0.07824976742267609, 0.08803141117095947, 0.09781305491924286, 0.10759469866752625, 0.11737634241580963, 0.12715798616409302, 0.1369396299123764, 0.1467212736606598, 0.15650291740894318, 0.16628456115722656, 0.17606620490550995, 0.18584787845611572, 0.1956295222043991, 0.2054111659526825, 0.21519280970096588, 0.22497445344924927, 0.23475609719753265, 0.24453774094581604, 0.25431936979293823, 0.2641010284423828, 0.2738826870918274, 0.2836643159389496, 0.2934459447860718, 0.30322760343551636, 0.31300926208496094, 0.32279089093208313, 0.3325725197792053, 0.3423541784286499, 0.3521358370780945, 0.3619174659252167, 0.37169909477233887, 0.38148075342178345, 0.391262412071228, 0.4010440409183502, 0.4108256697654724, 0.420607328414917, 0.4303889870643616, 0.44017061591148376, 0.44995224475860596, 0.45973390340805054, 0.4695155620574951, 0.4792971909046173, 0.4890788197517395, 0.4988604784011841]}, "gradients/encoder.encoder.layers.10.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 4.0, 1.0, 3.0, 5.0, 9.0, 6.0, 13.0, 20.0, 13.0, 27.0, 31.0, 30.0, 27.0, 28.0, 36.0, 42.0, 36.0, 39.0, 47.0, 54.0, 34.0, 47.0, 37.0, 38.0, 45.0, 43.0, 40.0, 40.0, 37.0, 27.0, 23.0, 23.0, 25.0, 18.0, 14.0, 8.0, 8.0, 7.0, 7.0, 4.0, 8.0, 3.0, 7.0, 1.0, 2.0, 1.0, 0.0, 0.0, 2.0], "bins": [-0.13960421085357666, -0.13575610518455505, -0.13190799951553345, -0.12805989384651184, -0.12421178072690964, -0.12036367505788803, -0.11651556193828583, -0.11266745626926422, -0.10881935060024261, -0.10497124493122101, -0.1011231392621994, -0.0972750261425972, -0.09342692047357559, -0.08957881480455399, -0.08573070168495178, -0.08188259601593018, -0.07803449034690857, -0.07418638467788696, -0.07033827900886536, -0.06649016588926315, -0.06264206022024155, -0.05879395455121994, -0.054945845156908035, -0.05109773576259613, -0.047249630093574524, -0.04340152442455292, -0.03955341503024101, -0.03570530563592911, -0.0318571999669075, -0.028009092435240746, -0.02416098490357399, -0.020312877371907234, -0.01646476984024048, -0.012616662308573723, -0.008768554776906967, -0.0049204472452402115, -0.0010723397135734558, 0.0027757678180933, 0.0066238753497600555, 0.010471982881426811, 0.014320090413093567, 0.018168197944760323, 0.022016305476427078, 0.025864413008093834, 0.02971252053976059, 0.033560626208782196, 0.0374087356030941, 0.041256844997406006, 0.04510495066642761, 0.04895305633544922, 0.052801165729761124, 0.05664927512407303, 0.060497380793094635, 0.06434548646211624, 0.06819359958171844, 0.07204170525074005, 0.07588981091976166, 0.07973791658878326, 0.08358602225780487, 0.08743413537740707, 0.09128224104642868, 0.09513034671545029, 0.09897845983505249, 0.1028265655040741, 0.1066746711730957]}, "gradients/encoder.encoder.layers.10.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 1.0, 2.0, 3.0, 2.0, 6.0, 10.0, 12.0, 12.0, 26.0, 26.0, 51.0, 72.0, 110.0, 206.0, 326.0, 662.0, 1210.0, 3247.0, 11116.0, 53974.0, 277804.0, 520376.0, 141765.0, 27151.0, 6196.0, 2098.0, 893.0, 491.0, 269.0, 143.0, 80.0, 77.0, 56.0, 37.0, 21.0, 7.0, 8.0, 6.0, 2.0, 7.0, 3.0, 2.0, 1.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0927734375, -0.09006786346435547, -0.08736228942871094, -0.0846567153930664, -0.08195114135742188, -0.07924556732177734, -0.07653999328613281, -0.07383441925048828, -0.07112884521484375, -0.06842327117919922, -0.06571769714355469, -0.06301212310791016, -0.060306549072265625, -0.057600975036621094, -0.05489540100097656, -0.05218982696533203, -0.0494842529296875, -0.04677867889404297, -0.04407310485839844, -0.041367530822753906, -0.038661956787109375, -0.035956382751464844, -0.03325080871582031, -0.03054523468017578, -0.02783966064453125, -0.02513408660888672, -0.022428512573242188, -0.019722938537597656, -0.017017364501953125, -0.014311790466308594, -0.011606216430664062, -0.008900642395019531, -0.006195068359375, -0.0034894943237304688, -0.0007839202880859375, 0.0019216537475585938, 0.004627227783203125, 0.007332801818847656, 0.010038375854492188, 0.012743949890136719, 0.01544952392578125, 0.01815509796142578, 0.020860671997070312, 0.023566246032714844, 0.026271820068359375, 0.028977394104003906, 0.03168296813964844, 0.03438854217529297, 0.0370941162109375, 0.03979969024658203, 0.04250526428222656, 0.045210838317871094, 0.047916412353515625, 0.050621986389160156, 0.05332756042480469, 0.05603313446044922, 0.05873870849609375, 0.06144428253173828, 0.06414985656738281, 0.06685543060302734, 0.06956100463867188, 0.0722665786743164, 0.07497215270996094, 0.07767772674560547, 0.08038330078125]}, "gradients/encoder.encoder.layers.10.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 4.0, 8.0, 4.0, 26.0, 19.0, 39.0, 69.0, 93.0, 113.0, 132.0, 131.0, 124.0, 91.0, 63.0, 41.0, 23.0, 17.0, 9.0, 3.0, 2.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.09454345703125, -0.09183883666992188, -0.08913421630859375, -0.08642959594726562, -0.0837249755859375, -0.08102035522460938, -0.07831573486328125, -0.07561111450195312, -0.072906494140625, -0.07020187377929688, -0.06749725341796875, -0.06479263305664062, -0.0620880126953125, -0.059383392333984375, -0.05667877197265625, -0.053974151611328125, -0.05126953125, -0.048564910888671875, -0.04586029052734375, -0.043155670166015625, -0.0404510498046875, -0.037746429443359375, -0.03504180908203125, -0.032337188720703125, -0.029632568359375, -0.026927947998046875, -0.02422332763671875, -0.021518707275390625, -0.0188140869140625, -0.016109466552734375, -0.01340484619140625, -0.010700225830078125, -0.00799560546875, -0.005290985107421875, -0.00258636474609375, 0.000118255615234375, 0.0028228759765625, 0.005527496337890625, 0.00823211669921875, 0.010936737060546875, 0.013641357421875, 0.016345977783203125, 0.01905059814453125, 0.021755218505859375, 0.0244598388671875, 0.027164459228515625, 0.02986907958984375, 0.032573699951171875, 0.0352783203125, 0.037982940673828125, 0.04068756103515625, 0.043392181396484375, 0.0460968017578125, 0.048801422119140625, 0.05150604248046875, 0.054210662841796875, 0.056915283203125, 0.059619903564453125, 0.06232452392578125, 0.06502914428710938, 0.0677337646484375, 0.07043838500976562, 0.07314300537109375, 0.07584762573242188, 0.07855224609375]}, "gradients/encoder.encoder.layers.10.attention.v_proj.weight": {"_type": "histogram", "values": [3.0, 2.0, 4.0, 2.0, 4.0, 4.0, 3.0, 5.0, 8.0, 8.0, 13.0, 14.0, 18.0, 28.0, 29.0, 33.0, 68.0, 75.0, 99.0, 161.0, 285.0, 517.0, 927.0, 2046.0, 4418.0, 9672.0, 22618.0, 53806.0, 129792.0, 278613.0, 295489.0, 144130.0, 60407.0, 25107.0, 10760.0, 4802.0, 2111.0, 1052.0, 512.0, 331.0, 153.0, 121.0, 82.0, 49.0, 48.0, 39.0, 19.0, 17.0, 14.0, 11.0, 8.0, 7.0, 3.0, 6.0, 6.0, 5.0, 2.0, 1.0, 1.0, 3.0, 2.0, 2.0, 0.0, 1.0], "bins": [-0.03570556640625, -0.034517765045166016, -0.03332996368408203, -0.03214216232299805, -0.030954360961914062, -0.029766559600830078, -0.028578758239746094, -0.02739095687866211, -0.026203155517578125, -0.02501535415649414, -0.023827552795410156, -0.022639751434326172, -0.021451950073242188, -0.020264148712158203, -0.01907634735107422, -0.017888545989990234, -0.01670074462890625, -0.015512943267822266, -0.014325141906738281, -0.013137340545654297, -0.011949539184570312, -0.010761737823486328, -0.009573936462402344, -0.00838613510131836, -0.007198333740234375, -0.006010532379150391, -0.004822731018066406, -0.003634929656982422, -0.0024471282958984375, -0.0012593269348144531, -7.152557373046875e-05, 0.0011162757873535156, 0.0023040771484375, 0.0034918785095214844, 0.004679679870605469, 0.005867481231689453, 0.0070552825927734375, 0.008243083953857422, 0.009430885314941406, 0.01061868667602539, 0.011806488037109375, 0.01299428939819336, 0.014182090759277344, 0.015369892120361328, 0.016557693481445312, 0.017745494842529297, 0.01893329620361328, 0.020121097564697266, 0.02130889892578125, 0.022496700286865234, 0.02368450164794922, 0.024872303009033203, 0.026060104370117188, 0.027247905731201172, 0.028435707092285156, 0.02962350845336914, 0.030811309814453125, 0.03199911117553711, 0.033186912536621094, 0.03437471389770508, 0.03556251525878906, 0.03675031661987305, 0.03793811798095703, 0.039125919342041016, 0.040313720703125]}, "gradients/encoder.encoder.layers.10.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 2.0, 2.0, 2.0, 3.0, 3.0, 2.0, 7.0, 9.0, 4.0, 5.0, 6.0, 6.0, 6.0, 13.0, 19.0, 13.0, 15.0, 16.0, 19.0, 17.0, 29.0, 17.0, 25.0, 37.0, 31.0, 35.0, 38.0, 35.0, 30.0, 50.0, 32.0, 34.0, 37.0, 30.0, 43.0, 37.0, 25.0, 30.0, 32.0, 27.0, 25.0, 32.0, 8.0, 23.0, 19.0, 12.0, 12.0, 14.0, 8.0, 11.0, 6.0, 4.0, 5.0, 2.0, 3.0, 3.0, 2.0, 5.0, 1.0, 1.0, 3.0, 0.0, 1.0], "bins": [-0.0556640625, -0.053888797760009766, -0.05211353302001953, -0.0503382682800293, -0.04856300354003906, -0.04678773880004883, -0.045012474060058594, -0.04323720932006836, -0.041461944580078125, -0.03968667984008789, -0.037911415100097656, -0.03613615036010742, -0.03436088562011719, -0.03258562088012695, -0.03081035614013672, -0.029035091400146484, -0.02725982666015625, -0.025484561920166016, -0.02370929718017578, -0.021934032440185547, -0.020158767700195312, -0.018383502960205078, -0.016608238220214844, -0.01483297348022461, -0.013057708740234375, -0.01128244400024414, -0.009507179260253906, -0.007731914520263672, -0.0059566497802734375, -0.004181385040283203, -0.0024061203002929688, -0.0006308555603027344, 0.0011444091796875, 0.0029196739196777344, 0.004694938659667969, 0.006470203399658203, 0.008245468139648438, 0.010020732879638672, 0.011795997619628906, 0.01357126235961914, 0.015346527099609375, 0.01712179183959961, 0.018897056579589844, 0.020672321319580078, 0.022447586059570312, 0.024222850799560547, 0.02599811553955078, 0.027773380279541016, 0.02954864501953125, 0.031323909759521484, 0.03309917449951172, 0.03487443923950195, 0.03664970397949219, 0.03842496871948242, 0.040200233459472656, 0.04197549819946289, 0.043750762939453125, 0.04552602767944336, 0.047301292419433594, 0.04907655715942383, 0.05085182189941406, 0.0526270866394043, 0.05440235137939453, 0.056177616119384766, 0.057952880859375]}, "gradients/encoder.encoder.layers.10.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 1.0, 6.0, 7.0, 9.0, 7.0, 10.0, 14.0, 23.0, 30.0, 60.0, 74.0, 122.0, 147.0, 263.0, 359.0, 639.0, 906.0, 1534.0, 2624.0, 4543.0, 8238.0, 16782.0, 36384.0, 85892.0, 212921.0, 348512.0, 187170.0, 75675.0, 32710.0, 14842.0, 7443.0, 4274.0, 2299.0, 1447.0, 901.0, 553.0, 386.0, 249.0, 156.0, 113.0, 55.0, 51.0, 35.0, 27.0, 20.0, 13.0, 14.0, 4.0, 4.0, 3.0, 2.0, 3.0, 5.0, 4.0, 6.0, 0.0, 1.0], "bins": [-0.0135040283203125, -0.013087630271911621, -0.012671232223510742, -0.012254834175109863, -0.011838436126708984, -0.011422038078308105, -0.011005640029907227, -0.010589241981506348, -0.010172843933105469, -0.00975644588470459, -0.009340047836303711, -0.008923649787902832, -0.008507251739501953, -0.008090853691101074, -0.007674455642700195, -0.007258057594299316, -0.0068416595458984375, -0.006425261497497559, -0.00600886344909668, -0.005592465400695801, -0.005176067352294922, -0.004759669303894043, -0.004343271255493164, -0.003926873207092285, -0.0035104751586914062, -0.0030940771102905273, -0.0026776790618896484, -0.0022612810134887695, -0.0018448829650878906, -0.0014284849166870117, -0.0010120868682861328, -0.0005956888198852539, -0.000179290771484375, 0.0002371072769165039, 0.0006535053253173828, 0.0010699033737182617, 0.0014863014221191406, 0.0019026994705200195, 0.0023190975189208984, 0.0027354955673217773, 0.0031518936157226562, 0.003568291664123535, 0.003984689712524414, 0.004401087760925293, 0.004817485809326172, 0.005233883857727051, 0.00565028190612793, 0.006066679954528809, 0.0064830780029296875, 0.006899476051330566, 0.007315874099731445, 0.007732272148132324, 0.008148670196533203, 0.008565068244934082, 0.008981466293334961, 0.00939786434173584, 0.009814262390136719, 0.010230660438537598, 0.010647058486938477, 0.011063456535339355, 0.011479854583740234, 0.011896252632141113, 0.012312650680541992, 0.012729048728942871, 0.01314544677734375]}, "gradients/encoder.encoder.layers.10.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 3.0, 4.0, 2.0, 1.0, 2.0, 4.0, 9.0, 2.0, 7.0, 4.0, 15.0, 15.0, 18.0, 19.0, 21.0, 30.0, 23.0, 36.0, 26.0, 43.0, 72.0, 37.0, 51.0, 59.0, 40.0, 55.0, 50.0, 40.0, 42.0, 35.0, 39.0, 34.0, 27.0, 25.0, 30.0, 17.0, 19.0, 19.0, 6.0, 9.0, 2.0, 6.0, 2.0, 9.0, 2.0, 2.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0], "bins": [-6.854534149169922e-06, -6.654299795627594e-06, -6.454065442085266e-06, -6.253831088542938e-06, -6.05359673500061e-06, -5.8533623814582825e-06, -5.653128027915955e-06, -5.452893674373627e-06, -5.252659320831299e-06, -5.052424967288971e-06, -4.852190613746643e-06, -4.651956260204315e-06, -4.451721906661987e-06, -4.2514875531196594e-06, -4.0512531995773315e-06, -3.851018846035004e-06, -3.6507844924926758e-06, -3.450550138950348e-06, -3.25031578540802e-06, -3.050081431865692e-06, -2.8498470783233643e-06, -2.6496127247810364e-06, -2.4493783712387085e-06, -2.2491440176963806e-06, -2.0489096641540527e-06, -1.8486753106117249e-06, -1.648440957069397e-06, -1.448206603527069e-06, -1.2479722499847412e-06, -1.0477378964424133e-06, -8.475035429000854e-07, -6.472691893577576e-07, -4.470348358154297e-07, -2.468004822731018e-07, -4.6566128730773926e-08, 1.5366822481155396e-07, 3.5390257835388184e-07, 5.541369318962097e-07, 7.543712854385376e-07, 9.546056389808655e-07, 1.1548399925231934e-06, 1.3550743460655212e-06, 1.5553086996078491e-06, 1.755543053150177e-06, 1.955777406692505e-06, 2.1560117602348328e-06, 2.3562461137771606e-06, 2.5564804673194885e-06, 2.7567148208618164e-06, 2.9569491744041443e-06, 3.157183527946472e-06, 3.3574178814888e-06, 3.557652235031128e-06, 3.757886588573456e-06, 3.958120942115784e-06, 4.1583552956581116e-06, 4.3585896492004395e-06, 4.558824002742767e-06, 4.759058356285095e-06, 4.959292709827423e-06, 5.159527063369751e-06, 5.359761416912079e-06, 5.559995770454407e-06, 5.760230123996735e-06, 5.9604644775390625e-06]}, "gradients/encoder.encoder.layers.10.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 1.0, 1.0, 2.0, 1.0, 2.0, 4.0, 8.0, 3.0, 12.0, 14.0, 30.0, 59.0, 72.0, 147.0, 244.0, 519.0, 955.0, 2191.0, 5756.0, 18261.0, 82148.0, 486444.0, 368943.0, 59779.0, 14467.0, 4684.0, 1923.0, 926.0, 425.0, 220.0, 132.0, 81.0, 44.0, 23.0, 14.0, 9.0, 9.0, 5.0, 1.0, 5.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 2.0], "bins": [-0.0338134765625, -0.03284478187561035, -0.0318760871887207, -0.030907392501831055, -0.029938697814941406, -0.028970003128051758, -0.02800130844116211, -0.02703261375427246, -0.026063919067382812, -0.025095224380493164, -0.024126529693603516, -0.023157835006713867, -0.02218914031982422, -0.02122044563293457, -0.020251750946044922, -0.019283056259155273, -0.018314361572265625, -0.017345666885375977, -0.016376972198486328, -0.01540827751159668, -0.014439582824707031, -0.013470888137817383, -0.012502193450927734, -0.011533498764038086, -0.010564804077148438, -0.009596109390258789, -0.00862741470336914, -0.007658720016479492, -0.006690025329589844, -0.005721330642700195, -0.004752635955810547, -0.0037839412689208984, -0.00281524658203125, -0.0018465518951416016, -0.0008778572082519531, 9.083747863769531e-05, 0.0010595321655273438, 0.002028226852416992, 0.0029969215393066406, 0.003965616226196289, 0.0049343109130859375, 0.005903005599975586, 0.006871700286865234, 0.007840394973754883, 0.008809089660644531, 0.00977778434753418, 0.010746479034423828, 0.011715173721313477, 0.012683868408203125, 0.013652563095092773, 0.014621257781982422, 0.01558995246887207, 0.01655864715576172, 0.017527341842651367, 0.018496036529541016, 0.019464731216430664, 0.020433425903320312, 0.02140212059020996, 0.02237081527709961, 0.023339509963989258, 0.024308204650878906, 0.025276899337768555, 0.026245594024658203, 0.02721428871154785, 0.0281829833984375]}, "gradients/encoder.encoder.layers.10.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 0.0, 2.0, 1.0, 6.0, 7.0, 4.0, 18.0, 8.0, 18.0, 26.0, 25.0, 43.0, 50.0, 69.0, 97.0, 133.0, 149.0, 109.0, 85.0, 54.0, 30.0, 26.0, 14.0, 7.0, 11.0, 4.0, 6.0, 3.0, 1.0, 2.0, 4.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0238800048828125, -0.023084640502929688, -0.022289276123046875, -0.021493911743164062, -0.02069854736328125, -0.019903182983398438, -0.019107818603515625, -0.018312454223632812, -0.01751708984375, -0.016721725463867188, -0.015926361083984375, -0.015130996704101562, -0.01433563232421875, -0.013540267944335938, -0.012744903564453125, -0.011949539184570312, -0.0111541748046875, -0.010358810424804688, -0.009563446044921875, -0.008768081665039062, -0.00797271728515625, -0.0071773529052734375, -0.006381988525390625, -0.0055866241455078125, -0.004791259765625, -0.0039958953857421875, -0.003200531005859375, -0.0024051666259765625, -0.00160980224609375, -0.0008144378662109375, -1.9073486328125e-05, 0.0007762908935546875, 0.0015716552734375, 0.0023670196533203125, 0.003162384033203125, 0.0039577484130859375, 0.00475311279296875, 0.0055484771728515625, 0.006343841552734375, 0.0071392059326171875, 0.0079345703125, 0.008729934692382812, 0.009525299072265625, 0.010320663452148438, 0.01111602783203125, 0.011911392211914062, 0.012706756591796875, 0.013502120971679688, 0.0142974853515625, 0.015092849731445312, 0.015888214111328125, 0.016683578491210938, 0.01747894287109375, 0.018274307250976562, 0.019069671630859375, 0.019865036010742188, 0.020660400390625, 0.021455764770507812, 0.022251129150390625, 0.023046493530273438, 0.02384185791015625, 0.024637222290039062, 0.025432586669921875, 0.026227951049804688, 0.0270233154296875]}, "gradients/encoder.encoder.layers.10.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 7.0, 12.0, 165.0, 577.0, 205.0, 38.0, 9.0, 3.0, 3.0, 1.0, 0.0, 2.0, 0.0, 1.0], "bins": [-1.649794578552246, -1.6189637184143066, -1.5881328582763672, -1.5573018789291382, -1.5264710187911987, -1.4956401586532593, -1.4648092985153198, -1.4339783191680908, -1.4031474590301514, -1.372316598892212, -1.3414857387542725, -1.3106547594070435, -1.279823899269104, -1.2489930391311646, -1.218162178993225, -1.187331199645996, -1.1565003395080566, -1.1256694793701172, -1.0948386192321777, -1.0640076398849487, -1.0331767797470093, -1.0023459196090698, -0.9715149998664856, -0.9406841397285461, -0.9098532795906067, -0.8790224194526672, -0.848191499710083, -0.8173606395721436, -0.7865297198295593, -0.7556988596916199, -0.7248679399490356, -0.6940370798110962, -0.663206160068512, -0.6323752999305725, -0.6015443801879883, -0.5707135200500488, -0.5398826003074646, -0.5090517401695251, -0.4782208204269409, -0.44738996028900146, -0.41655904054641724, -0.3857281506061554, -0.35489726066589355, -0.3240663707256317, -0.2932354807853699, -0.2624046206474304, -0.23157371580600739, -0.20074282586574554, -0.1699119508266449, -0.13908106088638306, -0.10825017094612122, -0.07741928845643997, -0.04658839851617813, -0.015757516026496887, 0.015073373913764954, 0.045904263854026794, 0.07673515379428864, 0.10756604373455048, 0.13839693367481232, 0.16922780871391296, 0.2000586986541748, 0.23088958859443665, 0.2617204785346985, 0.2925513684749603, 0.32338225841522217]}, "gradients/encoder.encoder.layers.10.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 2.0, 0.0, 1.0, 2.0, 4.0, 4.0, 10.0, 7.0, 9.0, 13.0, 7.0, 14.0, 19.0, 19.0, 20.0, 20.0, 31.0, 17.0, 24.0, 25.0, 44.0, 47.0, 42.0, 32.0, 44.0, 42.0, 40.0, 46.0, 33.0, 42.0, 38.0, 42.0, 32.0, 34.0, 25.0, 23.0, 25.0, 29.0, 14.0, 16.0, 11.0, 11.0, 10.0, 9.0, 6.0, 9.0, 6.0, 6.0, 4.0, 4.0, 3.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.14031648635864258, -0.1355638951063156, -0.13081130385398865, -0.12605871260166168, -0.12130612879991531, -0.11655353754758835, -0.11180095374584198, -0.10704836249351501, -0.10229577124118805, -0.09754317998886108, -0.09279058873653412, -0.08803800493478775, -0.08328541368246078, -0.07853282243013382, -0.07378023862838745, -0.06902764737606049, -0.06427505612373352, -0.059522464871406555, -0.05476987734436989, -0.05001728981733322, -0.045264698565006256, -0.04051210731267929, -0.035759519785642624, -0.031006932258605957, -0.02625434100627899, -0.021501751616597176, -0.01674916222691536, -0.011996572837233543, -0.007243983447551727, -0.002491394057869911, 0.002261195331811905, 0.007013782858848572, 0.011766374111175537, 0.016518963500857353, 0.02127155289053917, 0.026024142280220985, 0.0307767316699028, 0.03552932292222977, 0.040281910449266434, 0.0450344979763031, 0.049787089228630066, 0.05453968048095703, 0.0592922680079937, 0.06404485553503036, 0.06879744678735733, 0.0735500380396843, 0.07830262184143066, 0.08305521309375763, 0.0878078043460846, 0.09256039559841156, 0.09731298685073853, 0.1020655706524849, 0.10681816190481186, 0.11157075315713882, 0.11632333695888519, 0.12107592821121216, 0.12582851946353912, 0.1305811107158661, 0.13533370196819305, 0.14008629322052002, 0.1448388695716858, 0.14959146082401276, 0.15434405207633972, 0.1590966433286667, 0.16384923458099365]}, "gradients/encoder.encoder.layers.9.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 2.0, 1.0, 0.0, 2.0, 4.0, 11.0, 13.0, 21.0, 35.0, 64.0, 128.0, 293.0, 890.0, 4281.0, 49371.0, 3723156.0, 402732.0, 10952.0, 1661.0, 393.0, 147.0, 66.0, 35.0, 14.0, 12.0, 4.0, 2.0, 4.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0], "bins": [-0.1915283203125, -0.18693828582763672, -0.18234825134277344, -0.17775821685791016, -0.17316818237304688, -0.1685781478881836, -0.1639881134033203, -0.15939807891845703, -0.15480804443359375, -0.15021800994873047, -0.1456279754638672, -0.1410379409790039, -0.13644790649414062, -0.13185787200927734, -0.12726783752441406, -0.12267780303955078, -0.1180877685546875, -0.11349773406982422, -0.10890769958496094, -0.10431766510009766, -0.09972763061523438, -0.0951375961303711, -0.09054756164550781, -0.08595752716064453, -0.08136749267578125, -0.07677745819091797, -0.07218742370605469, -0.0675973892211914, -0.06300735473632812, -0.058417320251464844, -0.05382728576660156, -0.04923725128173828, -0.044647216796875, -0.04005718231201172, -0.03546714782714844, -0.030877113342285156, -0.026287078857421875, -0.021697044372558594, -0.017107009887695312, -0.012516975402832031, -0.00792694091796875, -0.0033369064331054688, 0.0012531280517578125, 0.005843162536621094, 0.010433197021484375, 0.015023231506347656, 0.019613265991210938, 0.02420330047607422, 0.0287933349609375, 0.03338336944580078, 0.03797340393066406, 0.042563438415527344, 0.047153472900390625, 0.051743507385253906, 0.05633354187011719, 0.06092357635498047, 0.06551361083984375, 0.07010364532470703, 0.07469367980957031, 0.0792837142944336, 0.08387374877929688, 0.08846378326416016, 0.09305381774902344, 0.09764385223388672, 0.10223388671875]}, "gradients/encoder.encoder.layers.9.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 4.0, 14.0, 8.0, 16.0, 47.0, 46.0, 88.0, 97.0, 140.0, 141.0, 127.0, 105.0, 71.0, 49.0, 29.0, 15.0, 9.0, 2.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0946044921875, -0.09192085266113281, -0.08923721313476562, -0.08655357360839844, -0.08386993408203125, -0.08118629455566406, -0.07850265502929688, -0.07581901550292969, -0.0731353759765625, -0.07045173645019531, -0.06776809692382812, -0.06508445739746094, -0.06240081787109375, -0.05971717834472656, -0.057033538818359375, -0.05434989929199219, -0.051666259765625, -0.04898262023925781, -0.046298980712890625, -0.04361534118652344, -0.04093170166015625, -0.03824806213378906, -0.035564422607421875, -0.03288078308105469, -0.0301971435546875, -0.027513504028320312, -0.024829864501953125, -0.022146224975585938, -0.01946258544921875, -0.016778945922851562, -0.014095306396484375, -0.011411666870117188, -0.00872802734375, -0.0060443878173828125, -0.003360748291015625, -0.0006771087646484375, 0.00200653076171875, 0.0046901702880859375, 0.007373809814453125, 0.010057449340820312, 0.0127410888671875, 0.015424728393554688, 0.018108367919921875, 0.020792007446289062, 0.02347564697265625, 0.026159286499023438, 0.028842926025390625, 0.03152656555175781, 0.034210205078125, 0.03689384460449219, 0.039577484130859375, 0.04226112365722656, 0.04494476318359375, 0.04762840270996094, 0.050312042236328125, 0.05299568176269531, 0.0556793212890625, 0.05836296081542969, 0.061046600341796875, 0.06373023986816406, 0.06641387939453125, 0.06909751892089844, 0.07178115844726562, 0.07446479797363281, 0.0771484375]}, "gradients/encoder.encoder.layers.9.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 2.0, 0.0, 3.0, 4.0, 9.0, 8.0, 8.0, 20.0, 25.0, 27.0, 35.0, 56.0, 105.0, 122.0, 209.0, 362.0, 560.0, 998.0, 1708.0, 3367.0, 6843.0, 15079.0, 35697.0, 94423.0, 316004.0, 1686934.0, 1580772.0, 297856.0, 90677.0, 34404.0, 14158.0, 6662.0, 3040.0, 1652.0, 968.0, 526.0, 343.0, 210.0, 147.0, 93.0, 67.0, 40.0, 21.0, 17.0, 13.0, 10.0, 5.0, 2.0, 4.0, 3.0, 1.0, 0.0, 1.0], "bins": [-0.040924072265625, -0.03981733322143555, -0.038710594177246094, -0.03760385513305664, -0.03649711608886719, -0.035390377044677734, -0.03428363800048828, -0.03317689895629883, -0.032070159912109375, -0.030963420867919922, -0.02985668182373047, -0.028749942779541016, -0.027643203735351562, -0.02653646469116211, -0.025429725646972656, -0.024322986602783203, -0.02321624755859375, -0.022109508514404297, -0.021002769470214844, -0.01989603042602539, -0.018789291381835938, -0.017682552337646484, -0.01657581329345703, -0.015469074249267578, -0.014362335205078125, -0.013255596160888672, -0.012148857116699219, -0.011042118072509766, -0.009935379028320312, -0.00882863998413086, -0.007721900939941406, -0.006615161895751953, -0.0055084228515625, -0.004401683807373047, -0.0032949447631835938, -0.0021882057189941406, -0.0010814666748046875, 2.5272369384765625e-05, 0.0011320114135742188, 0.002238750457763672, 0.003345489501953125, 0.004452228546142578, 0.005558967590332031, 0.006665706634521484, 0.0077724456787109375, 0.00887918472290039, 0.009985923767089844, 0.011092662811279297, 0.01219940185546875, 0.013306140899658203, 0.014412879943847656, 0.01551961898803711, 0.016626358032226562, 0.017733097076416016, 0.01883983612060547, 0.019946575164794922, 0.021053314208984375, 0.022160053253173828, 0.02326679229736328, 0.024373531341552734, 0.025480270385742188, 0.02658700942993164, 0.027693748474121094, 0.028800487518310547, 0.0299072265625]}, "gradients/encoder.encoder.layers.9.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 4.0, 2.0, 4.0, 4.0, 3.0, 7.0, 11.0, 11.0, 25.0, 20.0, 35.0, 59.0, 84.0, 116.0, 193.0, 339.0, 607.0, 819.0, 725.0, 389.0, 218.0, 125.0, 76.0, 65.0, 55.0, 28.0, 16.0, 11.0, 14.0, 10.0, 1.0, 3.0, 2.0, 5.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.045074462890625, -0.0435490608215332, -0.042023658752441406, -0.04049825668334961, -0.03897285461425781, -0.037447452545166016, -0.03592205047607422, -0.03439664840698242, -0.032871246337890625, -0.03134584426879883, -0.02982044219970703, -0.028295040130615234, -0.026769638061523438, -0.02524423599243164, -0.023718833923339844, -0.022193431854248047, -0.02066802978515625, -0.019142627716064453, -0.017617225646972656, -0.01609182357788086, -0.014566421508789062, -0.013041019439697266, -0.011515617370605469, -0.009990215301513672, -0.008464813232421875, -0.006939411163330078, -0.005414009094238281, -0.0038886070251464844, -0.0023632049560546875, -0.0008378028869628906, 0.0006875991821289062, 0.002213001251220703, 0.0037384033203125, 0.005263805389404297, 0.006789207458496094, 0.00831460952758789, 0.009840011596679688, 0.011365413665771484, 0.012890815734863281, 0.014416217803955078, 0.015941619873046875, 0.017467021942138672, 0.01899242401123047, 0.020517826080322266, 0.022043228149414062, 0.02356863021850586, 0.025094032287597656, 0.026619434356689453, 0.02814483642578125, 0.029670238494873047, 0.031195640563964844, 0.03272104263305664, 0.03424644470214844, 0.035771846771240234, 0.03729724884033203, 0.03882265090942383, 0.040348052978515625, 0.04187345504760742, 0.04339885711669922, 0.044924259185791016, 0.04644966125488281, 0.04797506332397461, 0.049500465393066406, 0.0510258674621582, 0.05255126953125]}, "gradients/encoder.encoder.layers.9.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 4.0, 5.0, 5.0, 18.0, 28.0, 49.0, 112.0, 116.0, 179.0, 193.0, 124.0, 85.0, 47.0, 26.0, 10.0, 6.0, 4.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.4807577133178711, -0.47068750858306885, -0.4606173038482666, -0.45054709911346436, -0.4404768943786621, -0.43040668964385986, -0.4203364849090576, -0.41026628017425537, -0.4001960754394531, -0.3901258707046509, -0.38005566596984863, -0.3699854612350464, -0.35991525650024414, -0.3498450517654419, -0.33977484703063965, -0.3297046422958374, -0.31963446736335754, -0.3095642626285553, -0.29949405789375305, -0.2894238531589508, -0.27935364842414856, -0.2692834436893463, -0.25921323895454407, -0.24914304912090302, -0.23907284438610077, -0.22900263965129852, -0.21893243491649628, -0.20886223018169403, -0.19879204034805298, -0.18872183561325073, -0.1786516308784485, -0.16858142614364624, -0.15851125121116638, -0.14844104647636414, -0.1383708417415619, -0.12830063700675964, -0.118230439722538, -0.10816023498773575, -0.0980900377035141, -0.08801983296871185, -0.0779496282339096, -0.06787942349910736, -0.05780922248959541, -0.047739021480083466, -0.03766881674528122, -0.027598612010478973, -0.017528411000967026, -0.007458209991455078, 0.002611994743347168, 0.012682197615504265, 0.02275240048766136, 0.03282260149717331, 0.042892806231975555, 0.0529630109667778, 0.06303320825099945, 0.0731034129858017, 0.08317361772060394, 0.09324382245540619, 0.10331402719020844, 0.11338422447443008, 0.12345442920923233, 0.13352462649345398, 0.14359483122825623, 0.15366503596305847, 0.16373524069786072]}, "gradients/encoder.encoder.layers.9.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 2.0, 1.0, 0.0, 2.0, 0.0, 1.0, 4.0, 2.0, 1.0, 4.0, 6.0, 3.0, 7.0, 4.0, 8.0, 12.0, 20.0, 14.0, 13.0, 22.0, 20.0, 21.0, 26.0, 35.0, 36.0, 25.0, 35.0, 37.0, 40.0, 42.0, 47.0, 37.0, 39.0, 46.0, 29.0, 43.0, 37.0, 37.0, 21.0, 19.0, 20.0, 25.0, 35.0, 20.0, 14.0, 12.0, 12.0, 12.0, 16.0, 11.0, 3.0, 8.0, 4.0, 3.0, 8.0, 6.0, 4.0, 1.0, 3.0, 7.0, 0.0, 0.0, 1.0], "bins": [-0.10535866022109985, -0.10217900574207306, -0.09899934381246567, -0.09581968188285828, -0.09264002740383148, -0.08946037292480469, -0.0862807109951973, -0.0831010490655899, -0.07992139458656311, -0.07674174010753632, -0.07356207817792892, -0.07038241624832153, -0.06720276176929474, -0.06402310729026794, -0.06084344536066055, -0.05766378715634346, -0.05448412895202637, -0.051304470747709274, -0.04812481254339218, -0.04494515433907509, -0.041765496134757996, -0.0385858379304409, -0.03540617972612381, -0.03222652152180672, -0.029046863317489624, -0.02586720511317253, -0.022687546908855438, -0.019507888704538345, -0.016328230500221252, -0.01314857229590416, -0.009968914091587067, -0.006789255887269974, -0.003609597682952881, -0.00042993947863578796, 0.002749718725681305, 0.005929376929998398, 0.00910903513431549, 0.012288693338632584, 0.015468351542949677, 0.01864800974726677, 0.021827667951583862, 0.025007326155900955, 0.028186984360218048, 0.03136664256453514, 0.034546300768852234, 0.03772595897316933, 0.04090561717748642, 0.04408527538180351, 0.047264933586120605, 0.0504445917904377, 0.05362424999475479, 0.056803908199071884, 0.05998356640338898, 0.06316322088241577, 0.06634288281202316, 0.06952254474163055, 0.07270219922065735, 0.07588185369968414, 0.07906151562929153, 0.08224117755889893, 0.08542083203792572, 0.08860048651695251, 0.0917801484465599, 0.0949598103761673, 0.09813946485519409]}, "gradients/encoder.encoder.layers.9.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 1.0, 1.0, 7.0, 7.0, 5.0, 12.0, 10.0, 18.0, 25.0, 38.0, 50.0, 61.0, 85.0, 124.0, 194.0, 363.0, 640.0, 1365.0, 3456.0, 11067.0, 45645.0, 223218.0, 539473.0, 171999.0, 35838.0, 9221.0, 2957.0, 1167.0, 559.0, 317.0, 202.0, 139.0, 82.0, 75.0, 41.0, 24.0, 21.0, 15.0, 8.0, 8.0, 9.0, 5.0, 5.0, 0.0, 5.0, 3.0, 1.0, 3.0, 0.0, 0.0, 2.0], "bins": [-0.09039306640625, -0.08784103393554688, -0.08528900146484375, -0.08273696899414062, -0.0801849365234375, -0.07763290405273438, -0.07508087158203125, -0.07252883911132812, -0.069976806640625, -0.06742477416992188, -0.06487274169921875, -0.062320709228515625, -0.0597686767578125, -0.057216644287109375, -0.05466461181640625, -0.052112579345703125, -0.049560546875, -0.047008514404296875, -0.04445648193359375, -0.041904449462890625, -0.0393524169921875, -0.036800384521484375, -0.03424835205078125, -0.031696319580078125, -0.029144287109375, -0.026592254638671875, -0.02404022216796875, -0.021488189697265625, -0.0189361572265625, -0.016384124755859375, -0.01383209228515625, -0.011280059814453125, -0.00872802734375, -0.006175994873046875, -0.00362396240234375, -0.001071929931640625, 0.0014801025390625, 0.004032135009765625, 0.00658416748046875, 0.009136199951171875, 0.011688232421875, 0.014240264892578125, 0.01679229736328125, 0.019344329833984375, 0.0218963623046875, 0.024448394775390625, 0.02700042724609375, 0.029552459716796875, 0.0321044921875, 0.034656524658203125, 0.03720855712890625, 0.039760589599609375, 0.0423126220703125, 0.044864654541015625, 0.04741668701171875, 0.049968719482421875, 0.052520751953125, 0.055072784423828125, 0.05762481689453125, 0.060176849365234375, 0.0627288818359375, 0.06528091430664062, 0.06783294677734375, 0.07038497924804688, 0.07293701171875]}, "gradients/encoder.encoder.layers.9.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 4.0, 1.0, 5.0, 9.0, 9.0, 28.0, 50.0, 73.0, 97.0, 116.0, 137.0, 133.0, 109.0, 97.0, 69.0, 33.0, 21.0, 9.0, 9.0, 3.0, 3.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.09429931640625, -0.09159183502197266, -0.08888435363769531, -0.08617687225341797, -0.08346939086914062, -0.08076190948486328, -0.07805442810058594, -0.0753469467163086, -0.07263946533203125, -0.0699319839477539, -0.06722450256347656, -0.06451702117919922, -0.061809539794921875, -0.05910205841064453, -0.05639457702636719, -0.053687095642089844, -0.0509796142578125, -0.048272132873535156, -0.04556465148925781, -0.04285717010498047, -0.040149688720703125, -0.03744220733642578, -0.03473472595214844, -0.032027244567871094, -0.02931976318359375, -0.026612281799316406, -0.023904800415039062, -0.02119731903076172, -0.018489837646484375, -0.01578235626220703, -0.013074874877929688, -0.010367393493652344, -0.007659912109375, -0.004952430725097656, -0.0022449493408203125, 0.00046253204345703125, 0.003170013427734375, 0.005877494812011719, 0.008584976196289062, 0.011292457580566406, 0.01399993896484375, 0.016707420349121094, 0.019414901733398438, 0.02212238311767578, 0.024829864501953125, 0.02753734588623047, 0.030244827270507812, 0.032952308654785156, 0.0356597900390625, 0.038367271423339844, 0.04107475280761719, 0.04378223419189453, 0.046489715576171875, 0.04919719696044922, 0.05190467834472656, 0.054612159729003906, 0.05731964111328125, 0.060027122497558594, 0.06273460388183594, 0.06544208526611328, 0.06814956665039062, 0.07085704803466797, 0.07356452941894531, 0.07627201080322266, 0.0789794921875]}, "gradients/encoder.encoder.layers.9.attention.v_proj.weight": {"_type": "histogram", "values": [3.0, 1.0, 0.0, 0.0, 1.0, 3.0, 1.0, 2.0, 3.0, 4.0, 1.0, 5.0, 10.0, 16.0, 8.0, 20.0, 27.0, 42.0, 38.0, 98.0, 153.0, 226.0, 400.0, 830.0, 1558.0, 3228.0, 6654.0, 14178.0, 31474.0, 72291.0, 166977.0, 300309.0, 244754.0, 114725.0, 49367.0, 21707.0, 9879.0, 4673.0, 2286.0, 1114.0, 625.0, 354.0, 193.0, 103.0, 67.0, 44.0, 34.0, 29.0, 14.0, 11.0, 10.0, 6.0, 5.0, 4.0, 3.0, 3.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0], "bins": [-0.03570556640625, -0.034581661224365234, -0.03345775604248047, -0.0323338508605957, -0.031209945678710938, -0.030086040496826172, -0.028962135314941406, -0.02783823013305664, -0.026714324951171875, -0.02559041976928711, -0.024466514587402344, -0.023342609405517578, -0.022218704223632812, -0.021094799041748047, -0.01997089385986328, -0.018846988677978516, -0.01772308349609375, -0.016599178314208984, -0.015475273132324219, -0.014351367950439453, -0.013227462768554688, -0.012103557586669922, -0.010979652404785156, -0.00985574722290039, -0.008731842041015625, -0.007607936859130859, -0.006484031677246094, -0.005360126495361328, -0.0042362213134765625, -0.003112316131591797, -0.0019884109497070312, -0.0008645057678222656, 0.0002593994140625, 0.0013833045959472656, 0.0025072097778320312, 0.003631114959716797, 0.0047550201416015625, 0.005878925323486328, 0.007002830505371094, 0.00812673568725586, 0.009250640869140625, 0.01037454605102539, 0.011498451232910156, 0.012622356414794922, 0.013746261596679688, 0.014870166778564453, 0.01599407196044922, 0.017117977142333984, 0.01824188232421875, 0.019365787506103516, 0.02048969268798828, 0.021613597869873047, 0.022737503051757812, 0.023861408233642578, 0.024985313415527344, 0.02610921859741211, 0.027233123779296875, 0.02835702896118164, 0.029480934143066406, 0.030604839324951172, 0.03172874450683594, 0.0328526496887207, 0.03397655487060547, 0.035100460052490234, 0.036224365234375]}, "gradients/encoder.encoder.layers.9.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 2.0, 1.0, 3.0, 7.0, 8.0, 9.0, 8.0, 4.0, 9.0, 15.0, 17.0, 28.0, 25.0, 19.0, 37.0, 35.0, 32.0, 49.0, 51.0, 57.0, 59.0, 56.0, 56.0, 52.0, 53.0, 37.0, 31.0, 29.0, 32.0, 38.0, 33.0, 21.0, 16.0, 22.0, 15.0, 11.0, 7.0, 8.0, 7.0, 4.0, 3.0, 0.0, 5.0, 2.0, 2.0, 1.0, 1.0, 1.0, 2.0], "bins": [-0.095703125, -0.09309959411621094, -0.09049606323242188, -0.08789253234863281, -0.08528900146484375, -0.08268547058105469, -0.08008193969726562, -0.07747840881347656, -0.0748748779296875, -0.07227134704589844, -0.06966781616210938, -0.06706428527832031, -0.06446075439453125, -0.06185722351074219, -0.059253692626953125, -0.05665016174316406, -0.054046630859375, -0.05144309997558594, -0.048839569091796875, -0.04623603820800781, -0.04363250732421875, -0.04102897644042969, -0.038425445556640625, -0.03582191467285156, -0.0332183837890625, -0.030614852905273438, -0.028011322021484375, -0.025407791137695312, -0.02280426025390625, -0.020200729370117188, -0.017597198486328125, -0.014993667602539062, -0.01239013671875, -0.009786605834960938, -0.007183074951171875, -0.0045795440673828125, -0.00197601318359375, 0.0006275177001953125, 0.003231048583984375, 0.0058345794677734375, 0.0084381103515625, 0.011041641235351562, 0.013645172119140625, 0.016248703002929688, 0.01885223388671875, 0.021455764770507812, 0.024059295654296875, 0.026662826538085938, 0.029266357421875, 0.03186988830566406, 0.034473419189453125, 0.03707695007324219, 0.03968048095703125, 0.04228401184082031, 0.044887542724609375, 0.04749107360839844, 0.0500946044921875, 0.05269813537597656, 0.055301666259765625, 0.05790519714355469, 0.06050872802734375, 0.06311225891113281, 0.06571578979492188, 0.06831932067871094, 0.0709228515625]}, "gradients/encoder.encoder.layers.9.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 0.0, 2.0, 1.0, 1.0, 5.0, 7.0, 4.0, 14.0, 19.0, 30.0, 36.0, 54.0, 76.0, 109.0, 160.0, 269.0, 421.0, 628.0, 1059.0, 1733.0, 3150.0, 6334.0, 14763.0, 38593.0, 125577.0, 366264.0, 326676.0, 104342.0, 32670.0, 12824.0, 5686.0, 2784.0, 1598.0, 919.0, 543.0, 381.0, 279.0, 176.0, 121.0, 92.0, 55.0, 39.0, 25.0, 17.0, 9.0, 9.0, 7.0, 3.0, 1.0, 3.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.01873779296875, -0.01816868782043457, -0.01759958267211914, -0.01703047752380371, -0.01646137237548828, -0.01589226722717285, -0.015323162078857422, -0.014754056930541992, -0.014184951782226562, -0.013615846633911133, -0.013046741485595703, -0.012477636337280273, -0.011908531188964844, -0.011339426040649414, -0.010770320892333984, -0.010201215744018555, -0.009632110595703125, -0.009063005447387695, -0.008493900299072266, -0.007924795150756836, -0.007355690002441406, -0.0067865848541259766, -0.006217479705810547, -0.005648374557495117, -0.0050792694091796875, -0.004510164260864258, -0.003941059112548828, -0.0033719539642333984, -0.0028028488159179688, -0.002233743667602539, -0.0016646385192871094, -0.0010955333709716797, -0.00052642822265625, 4.267692565917969e-05, 0.0006117820739746094, 0.001180887222290039, 0.0017499923706054688, 0.0023190975189208984, 0.002888202667236328, 0.003457307815551758, 0.0040264129638671875, 0.004595518112182617, 0.005164623260498047, 0.0057337284088134766, 0.006302833557128906, 0.006871938705444336, 0.007441043853759766, 0.008010149002075195, 0.008579254150390625, 0.009148359298706055, 0.009717464447021484, 0.010286569595336914, 0.010855674743652344, 0.011424779891967773, 0.011993885040283203, 0.012562990188598633, 0.013132095336914062, 0.013701200485229492, 0.014270305633544922, 0.014839410781860352, 0.015408515930175781, 0.01597762107849121, 0.01654672622680664, 0.01711583137512207, 0.0176849365234375]}, "gradients/encoder.encoder.layers.9.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 3.0, 4.0, 4.0, 5.0, 4.0, 10.0, 14.0, 19.0, 18.0, 21.0, 27.0, 25.0, 34.0, 46.0, 56.0, 65.0, 78.0, 86.0, 61.0, 64.0, 41.0, 60.0, 59.0, 41.0, 30.0, 31.0, 31.0, 14.0, 16.0, 9.0, 11.0, 12.0, 5.0, 3.0, 0.0, 5.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-9.298324584960938e-06, -9.046867489814758e-06, -8.795410394668579e-06, -8.5439532995224e-06, -8.29249620437622e-06, -8.041039109230042e-06, -7.789582014083862e-06, -7.538124918937683e-06, -7.286667823791504e-06, -7.035210728645325e-06, -6.7837536334991455e-06, -6.532296538352966e-06, -6.280839443206787e-06, -6.029382348060608e-06, -5.777925252914429e-06, -5.5264681577682495e-06, -5.27501106262207e-06, -5.023553967475891e-06, -4.772096872329712e-06, -4.520639777183533e-06, -4.2691826820373535e-06, -4.017725586891174e-06, -3.766268491744995e-06, -3.514811396598816e-06, -3.2633543014526367e-06, -3.0118972063064575e-06, -2.7604401111602783e-06, -2.508983016014099e-06, -2.25752592086792e-06, -2.0060688257217407e-06, -1.7546117305755615e-06, -1.5031546354293823e-06, -1.2516975402832031e-06, -1.000240445137024e-06, -7.487833499908447e-07, -4.973262548446655e-07, -2.4586915969848633e-07, 5.587935447692871e-09, 2.5704503059387207e-07, 5.085021257400513e-07, 7.599592208862305e-07, 1.0114163160324097e-06, 1.2628734111785889e-06, 1.514330506324768e-06, 1.7657876014709473e-06, 2.0172446966171265e-06, 2.2687017917633057e-06, 2.520158886909485e-06, 2.771615982055664e-06, 3.0230730772018433e-06, 3.2745301723480225e-06, 3.5259872674942017e-06, 3.777444362640381e-06, 4.02890145778656e-06, 4.280358552932739e-06, 4.5318156480789185e-06, 4.783272743225098e-06, 5.034729838371277e-06, 5.286186933517456e-06, 5.537644028663635e-06, 5.7891011238098145e-06, 6.040558218955994e-06, 6.292015314102173e-06, 6.543472409248352e-06, 6.794929504394531e-06]}, "gradients/encoder.encoder.layers.9.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 2.0, 1.0, 1.0, 5.0, 4.0, 4.0, 13.0, 12.0, 27.0, 26.0, 29.0, 47.0, 58.0, 84.0, 125.0, 306.0, 648.0, 1569.0, 4116.0, 13488.0, 54405.0, 273200.0, 521353.0, 136816.0, 29432.0, 8010.0, 2678.0, 1036.0, 433.0, 237.0, 133.0, 89.0, 53.0, 43.0, 25.0, 19.0, 17.0, 9.0, 3.0, 6.0, 5.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.0231475830078125, -0.02226710319519043, -0.02138662338256836, -0.02050614356994629, -0.01962566375732422, -0.01874518394470215, -0.017864704132080078, -0.016984224319458008, -0.016103744506835938, -0.015223264694213867, -0.014342784881591797, -0.013462305068969727, -0.012581825256347656, -0.011701345443725586, -0.010820865631103516, -0.009940385818481445, -0.009059906005859375, -0.008179426193237305, -0.007298946380615234, -0.006418466567993164, -0.005537986755371094, -0.0046575069427490234, -0.003777027130126953, -0.002896547317504883, -0.0020160675048828125, -0.0011355876922607422, -0.0002551078796386719, 0.0006253719329833984, 0.0015058517456054688, 0.002386331558227539, 0.0032668113708496094, 0.00414729118347168, 0.00502777099609375, 0.00590825080871582, 0.006788730621337891, 0.007669210433959961, 0.008549690246582031, 0.009430170059204102, 0.010310649871826172, 0.011191129684448242, 0.012071609497070312, 0.012952089309692383, 0.013832569122314453, 0.014713048934936523, 0.015593528747558594, 0.016474008560180664, 0.017354488372802734, 0.018234968185424805, 0.019115447998046875, 0.019995927810668945, 0.020876407623291016, 0.021756887435913086, 0.022637367248535156, 0.023517847061157227, 0.024398326873779297, 0.025278806686401367, 0.026159286499023438, 0.027039766311645508, 0.027920246124267578, 0.02880072593688965, 0.02968120574951172, 0.03056168556213379, 0.03144216537475586, 0.03232264518737793, 0.033203125]}, "gradients/encoder.encoder.layers.9.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 0.0, 1.0, 2.0, 0.0, 2.0, 3.0, 3.0, 5.0, 7.0, 11.0, 21.0, 26.0, 45.0, 63.0, 72.0, 90.0, 100.0, 94.0, 103.0, 89.0, 66.0, 59.0, 52.0, 31.0, 12.0, 23.0, 10.0, 4.0, 7.0, 4.0, 5.0, 1.0, 0.0, 2.0, 2.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0204925537109375, -0.019776582717895508, -0.019060611724853516, -0.018344640731811523, -0.01762866973876953, -0.01691269874572754, -0.016196727752685547, -0.015480756759643555, -0.014764785766601562, -0.01404881477355957, -0.013332843780517578, -0.012616872787475586, -0.011900901794433594, -0.011184930801391602, -0.01046895980834961, -0.009752988815307617, -0.009037017822265625, -0.008321046829223633, -0.007605075836181641, -0.0068891048431396484, -0.006173133850097656, -0.005457162857055664, -0.004741191864013672, -0.00402522087097168, -0.0033092498779296875, -0.0025932788848876953, -0.0018773078918457031, -0.001161336898803711, -0.00044536590576171875, 0.00027060508728027344, 0.0009865760803222656, 0.0017025470733642578, 0.00241851806640625, 0.003134489059448242, 0.0038504600524902344, 0.0045664310455322266, 0.005282402038574219, 0.005998373031616211, 0.006714344024658203, 0.007430315017700195, 0.008146286010742188, 0.00886225700378418, 0.009578227996826172, 0.010294198989868164, 0.011010169982910156, 0.011726140975952148, 0.01244211196899414, 0.013158082962036133, 0.013874053955078125, 0.014590024948120117, 0.01530599594116211, 0.0160219669342041, 0.016737937927246094, 0.017453908920288086, 0.018169879913330078, 0.01888585090637207, 0.019601821899414062, 0.020317792892456055, 0.021033763885498047, 0.02174973487854004, 0.02246570587158203, 0.023181676864624023, 0.023897647857666016, 0.024613618850708008, 0.02532958984375]}, "gradients/encoder.encoder.layers.9.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 4.0, 6.0, 16.0, 33.0, 47.0, 95.0, 159.0, 172.0, 190.0, 129.0, 76.0, 44.0, 24.0, 5.0, 3.0, 4.0, 2.0, 4.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.14397837221622467, -0.13476581871509552, -0.12555326521396637, -0.11634071916341782, -0.10712817311286926, -0.09791561961174011, -0.08870306611061096, -0.07949052006006241, -0.07027796655893326, -0.061065416783094406, -0.051852867007255554, -0.042640313506126404, -0.03342776373028755, -0.0242152139544487, -0.01500266045331955, -0.005790114402770996, 0.0034224390983581543, 0.01263498980551958, 0.021847540512681007, 0.03106009215116501, 0.04027264192700386, 0.04948519170284271, 0.05869774520397186, 0.06791029125452042, 0.07712284475564957, 0.08633539825677872, 0.09554794430732727, 0.10476049780845642, 0.11397305130958557, 0.12318559736013412, 0.13239815831184387, 0.14161069691181183, 0.15082326531410217, 0.16003581881523132, 0.16924837231636047, 0.17846092581748962, 0.18767346441745758, 0.19688601791858673, 0.20609857141971588, 0.21531111001968384, 0.224523663520813, 0.23373621702194214, 0.2429487705230713, 0.25216132402420044, 0.2613738775253296, 0.27058643102645874, 0.2797989845275879, 0.28901150822639465, 0.2982240915298462, 0.30743664503097534, 0.3166491985321045, 0.32586175203323364, 0.3350743055343628, 0.34428685903549194, 0.3534994125366211, 0.36271193623542786, 0.371924489736557, 0.38113704323768616, 0.3903495967388153, 0.39956215023994446, 0.4087747037410736, 0.41798722743988037, 0.4271997809410095, 0.43641233444213867, 0.4456248879432678]}, "gradients/encoder.encoder.layers.9.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 2.0, 1.0, 0.0, 0.0, 2.0, 2.0, 3.0, 7.0, 6.0, 6.0, 9.0, 10.0, 12.0, 15.0, 16.0, 21.0, 21.0, 25.0, 19.0, 23.0, 38.0, 19.0, 35.0, 33.0, 41.0, 41.0, 24.0, 29.0, 37.0, 32.0, 39.0, 38.0, 30.0, 33.0, 36.0, 29.0, 27.0, 35.0, 25.0, 26.0, 25.0, 24.0, 22.0, 13.0, 10.0, 11.0, 15.0, 8.0, 11.0, 11.0, 5.0, 3.0, 4.0, 3.0, 4.0, 2.0, 0.0, 1.0, 1.0, 1.0], "bins": [-0.15370112657546997, -0.14907239377498627, -0.14444366097450256, -0.13981491327285767, -0.13518618047237396, -0.13055744767189026, -0.12592869997024536, -0.12129996716976166, -0.11667123436927795, -0.11204250156879425, -0.10741376131772995, -0.10278502106666565, -0.09815628826618195, -0.09352755546569824, -0.08889881521463394, -0.08427007496356964, -0.07964134216308594, -0.07501260936260223, -0.07038386911153793, -0.06575512886047363, -0.06112639605998993, -0.05649765953421593, -0.051868923008441925, -0.04724018648266792, -0.04261144995689392, -0.03798271343111992, -0.03335397690534592, -0.028725240379571915, -0.024096503853797913, -0.01946776732802391, -0.014839030802249908, -0.010210294276475906, -0.005581557750701904, -0.0009528212249279022, 0.0036759153008461, 0.008304651826620102, 0.012933388352394104, 0.017562124878168106, 0.022190861403942108, 0.02681959792971611, 0.03144833445549011, 0.036077070981264114, 0.040705807507038116, 0.04533454403281212, 0.04996328055858612, 0.05459201708436012, 0.059220753610134125, 0.06384949386119843, 0.06847822666168213, 0.07310695946216583, 0.07773569971323013, 0.08236443996429443, 0.08699317276477814, 0.09162190556526184, 0.09625064581632614, 0.10087938606739044, 0.10550811886787415, 0.11013685166835785, 0.11476559191942215, 0.11939433217048645, 0.12402306497097015, 0.12865179777145386, 0.13328054547309875, 0.13790927827358246, 0.14253801107406616]}, "gradients/encoder.encoder.layers.8.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 3.0, 2.0, 2.0, 6.0, 2.0, 9.0, 7.0, 11.0, 19.0, 36.0, 67.0, 98.0, 159.0, 293.0, 604.0, 1237.0, 3992.0, 15322.0, 112259.0, 2558642.0, 1423130.0, 62734.0, 10713.0, 3072.0, 962.0, 420.0, 226.0, 115.0, 60.0, 29.0, 22.0, 17.0, 10.0, 6.0, 5.0, 1.0, 2.0, 0.0, 1.0, 0.0, 1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0777587890625, -0.07486629486083984, -0.07197380065917969, -0.06908130645751953, -0.06618881225585938, -0.06329631805419922, -0.06040382385253906, -0.057511329650878906, -0.05461883544921875, -0.051726341247558594, -0.04883384704589844, -0.04594135284423828, -0.043048858642578125, -0.04015636444091797, -0.03726387023925781, -0.034371376037597656, -0.0314788818359375, -0.028586387634277344, -0.025693893432617188, -0.02280139923095703, -0.019908905029296875, -0.01701641082763672, -0.014123916625976562, -0.011231422424316406, -0.00833892822265625, -0.005446434020996094, -0.0025539398193359375, 0.00033855438232421875, 0.003231048583984375, 0.006123542785644531, 0.009016036987304688, 0.011908531188964844, 0.014801025390625, 0.017693519592285156, 0.020586013793945312, 0.02347850799560547, 0.026371002197265625, 0.02926349639892578, 0.03215599060058594, 0.035048484802246094, 0.03794097900390625, 0.040833473205566406, 0.04372596740722656, 0.04661846160888672, 0.049510955810546875, 0.05240345001220703, 0.05529594421386719, 0.058188438415527344, 0.0610809326171875, 0.06397342681884766, 0.06686592102050781, 0.06975841522216797, 0.07265090942382812, 0.07554340362548828, 0.07843589782714844, 0.0813283920288086, 0.08422088623046875, 0.0871133804321289, 0.09000587463378906, 0.09289836883544922, 0.09579086303710938, 0.09868335723876953, 0.10157585144042969, 0.10446834564208984, 0.10736083984375]}, "gradients/encoder.encoder.layers.8.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 2.0, 1.0, 10.0, 13.0, 18.0, 34.0, 48.0, 81.0, 101.0, 125.0, 143.0, 104.0, 115.0, 78.0, 49.0, 42.0, 25.0, 10.0, 8.0, 3.0, 3.0, 0.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.09112548828125, -0.08849334716796875, -0.0858612060546875, -0.08322906494140625, -0.080596923828125, -0.07796478271484375, -0.0753326416015625, -0.07270050048828125, -0.070068359375, -0.06743621826171875, -0.0648040771484375, -0.06217193603515625, -0.059539794921875, -0.05690765380859375, -0.0542755126953125, -0.05164337158203125, -0.04901123046875, -0.04637908935546875, -0.0437469482421875, -0.04111480712890625, -0.038482666015625, -0.03585052490234375, -0.0332183837890625, -0.03058624267578125, -0.0279541015625, -0.02532196044921875, -0.0226898193359375, -0.02005767822265625, -0.017425537109375, -0.01479339599609375, -0.0121612548828125, -0.00952911376953125, -0.00689697265625, -0.00426483154296875, -0.0016326904296875, 0.00099945068359375, 0.003631591796875, 0.00626373291015625, 0.0088958740234375, 0.01152801513671875, 0.01416015625, 0.01679229736328125, 0.0194244384765625, 0.02205657958984375, 0.024688720703125, 0.02732086181640625, 0.0299530029296875, 0.03258514404296875, 0.03521728515625, 0.03784942626953125, 0.0404815673828125, 0.04311370849609375, 0.045745849609375, 0.04837799072265625, 0.0510101318359375, 0.05364227294921875, 0.0562744140625, 0.05890655517578125, 0.0615386962890625, 0.06417083740234375, 0.066802978515625, 0.06943511962890625, 0.0720672607421875, 0.07469940185546875, 0.07733154296875]}, "gradients/encoder.encoder.layers.8.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 2.0, 0.0, 1.0, 3.0, 3.0, 4.0, 2.0, 5.0, 17.0, 21.0, 24.0, 44.0, 55.0, 118.0, 160.0, 263.0, 457.0, 817.0, 1423.0, 2561.0, 4749.0, 9051.0, 18261.0, 38954.0, 89774.0, 240190.0, 866365.0, 1909140.0, 669741.0, 198357.0, 76798.0, 33818.0, 15798.0, 7832.0, 4166.0, 2278.0, 1222.0, 752.0, 398.0, 249.0, 152.0, 105.0, 49.0, 46.0, 15.0, 13.0, 19.0, 7.0, 7.0, 5.0, 1.0, 3.0, 2.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.0294647216796875, -0.02858281135559082, -0.02770090103149414, -0.02681899070739746, -0.02593708038330078, -0.0250551700592041, -0.024173259735107422, -0.023291349411010742, -0.022409439086914062, -0.021527528762817383, -0.020645618438720703, -0.019763708114624023, -0.018881797790527344, -0.017999887466430664, -0.017117977142333984, -0.016236066818237305, -0.015354156494140625, -0.014472246170043945, -0.013590335845947266, -0.012708425521850586, -0.011826515197753906, -0.010944604873657227, -0.010062694549560547, -0.009180784225463867, -0.008298873901367188, -0.007416963577270508, -0.006535053253173828, -0.0056531429290771484, -0.004771232604980469, -0.003889322280883789, -0.0030074119567871094, -0.0021255016326904297, -0.00124359130859375, -0.0003616809844970703, 0.0005202293395996094, 0.001402139663696289, 0.0022840499877929688, 0.0031659603118896484, 0.004047870635986328, 0.004929780960083008, 0.0058116912841796875, 0.006693601608276367, 0.007575511932373047, 0.008457422256469727, 0.009339332580566406, 0.010221242904663086, 0.011103153228759766, 0.011985063552856445, 0.012866973876953125, 0.013748884201049805, 0.014630794525146484, 0.015512704849243164, 0.016394615173339844, 0.017276525497436523, 0.018158435821533203, 0.019040346145629883, 0.019922256469726562, 0.020804166793823242, 0.021686077117919922, 0.0225679874420166, 0.02344989776611328, 0.02433180809020996, 0.02521371841430664, 0.02609562873840332, 0.0269775390625]}, "gradients/encoder.encoder.layers.8.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [3.0, 2.0, 4.0, 0.0, 4.0, 1.0, 4.0, 6.0, 3.0, 5.0, 2.0, 8.0, 11.0, 6.0, 11.0, 14.0, 10.0, 12.0, 15.0, 29.0, 24.0, 47.0, 49.0, 68.0, 84.0, 101.0, 116.0, 184.0, 243.0, 309.0, 379.0, 436.0, 403.0, 338.0, 289.0, 194.0, 144.0, 118.0, 92.0, 63.0, 42.0, 34.0, 27.0, 28.0, 25.0, 11.0, 18.0, 13.0, 13.0, 9.0, 10.0, 4.0, 8.0, 2.0, 4.0, 2.0, 2.0, 2.0, 4.0, 0.0, 1.0, 3.0, 1.0, 2.0], "bins": [-0.0303802490234375, -0.02942371368408203, -0.028467178344726562, -0.027510643005371094, -0.026554107666015625, -0.025597572326660156, -0.024641036987304688, -0.02368450164794922, -0.02272796630859375, -0.02177143096923828, -0.020814895629882812, -0.019858360290527344, -0.018901824951171875, -0.017945289611816406, -0.016988754272460938, -0.01603221893310547, -0.01507568359375, -0.014119148254394531, -0.013162612915039062, -0.012206077575683594, -0.011249542236328125, -0.010293006896972656, -0.009336471557617188, -0.008379936218261719, -0.00742340087890625, -0.006466865539550781, -0.0055103302001953125, -0.004553794860839844, -0.003597259521484375, -0.0026407241821289062, -0.0016841888427734375, -0.0007276535034179688, 0.0002288818359375, 0.0011854171752929688, 0.0021419525146484375, 0.0030984878540039062, 0.004055023193359375, 0.005011558532714844, 0.0059680938720703125, 0.006924629211425781, 0.00788116455078125, 0.008837699890136719, 0.009794235229492188, 0.010750770568847656, 0.011707305908203125, 0.012663841247558594, 0.013620376586914062, 0.014576911926269531, 0.015533447265625, 0.01648998260498047, 0.017446517944335938, 0.018403053283691406, 0.019359588623046875, 0.020316123962402344, 0.021272659301757812, 0.02222919464111328, 0.02318572998046875, 0.02414226531982422, 0.025098800659179688, 0.026055335998535156, 0.027011871337890625, 0.027968406677246094, 0.028924942016601562, 0.02988147735595703, 0.0308380126953125]}, "gradients/encoder.encoder.layers.8.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 2.0, 1.0, 2.0, 5.0, 4.0, 10.0, 23.0, 34.0, 64.0, 92.0, 163.0, 152.0, 157.0, 117.0, 82.0, 46.0, 23.0, 16.0, 4.0, 3.0, 2.0, 6.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.404960036277771, -0.39456427097320557, -0.38416847586631775, -0.37377268075942993, -0.3633769154548645, -0.3529811501502991, -0.34258535504341125, -0.33218955993652344, -0.321793794631958, -0.3113980293273926, -0.30100223422050476, -0.29060643911361694, -0.2802106738090515, -0.2698149085044861, -0.25941911339759827, -0.24902333319187164, -0.23862755298614502, -0.2282317727804184, -0.21783599257469177, -0.20744021236896515, -0.19704443216323853, -0.1866486519575119, -0.17625287175178528, -0.16585709154605865, -0.15546131134033203, -0.1450655311346054, -0.13466975092887878, -0.12427397072315216, -0.11387819051742554, -0.10348241031169891, -0.09308663010597229, -0.08269084990024567, -0.07229506969451904, -0.06189928948879242, -0.051503509283065796, -0.04110772907733917, -0.03071194887161255, -0.020316168665885925, -0.009920388460159302, 0.0004753917455673218, 0.010871171951293945, 0.02126695215702057, 0.03166273236274719, 0.042058512568473816, 0.05245429277420044, 0.06285007297992706, 0.07324585318565369, 0.08364163339138031, 0.09403741359710693, 0.10443319380283356, 0.11482897400856018, 0.1252247542142868, 0.13562053442001343, 0.14601631462574005, 0.15641209483146667, 0.1668078750371933, 0.17720365524291992, 0.18759943544864655, 0.19799521565437317, 0.2083909958600998, 0.21878677606582642, 0.22918255627155304, 0.23957833647727966, 0.2499741166830063, 0.2603698968887329]}, "gradients/encoder.encoder.layers.8.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 4.0, 2.0, 2.0, 3.0, 4.0, 6.0, 10.0, 8.0, 10.0, 13.0, 9.0, 11.0, 27.0, 31.0, 19.0, 40.0, 25.0, 37.0, 48.0, 57.0, 52.0, 50.0, 52.0, 54.0, 43.0, 48.0, 44.0, 39.0, 35.0, 31.0, 34.0, 26.0, 25.0, 22.0, 16.0, 21.0, 12.0, 10.0, 6.0, 7.0, 3.0, 9.0, 6.0, 2.0, 3.0, 1.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.15037477016448975, -0.14538130164146423, -0.14038784801959991, -0.1353943794965744, -0.13040092587471008, -0.12540745735168457, -0.12041399627923965, -0.11542053520679474, -0.11042707413434982, -0.10543361306190491, -0.10044015198945999, -0.09544669091701508, -0.09045322239398956, -0.08545976877212524, -0.08046630024909973, -0.07547283917665482, -0.0704793781042099, -0.06548591703176498, -0.06049245595932007, -0.055498991161584854, -0.05050553008913994, -0.04551206901669502, -0.04051860421895981, -0.03552514314651489, -0.030531682074069977, -0.02553822100162506, -0.020544758066534996, -0.015551296062767506, -0.010557834059000015, -0.0055643729865550995, -0.0005709100514650345, 0.0044225528836250305, 0.009416013956069946, 0.014409475959837437, 0.019402937963604927, 0.024396400898694992, 0.029389861971139908, 0.034383323043584824, 0.03937678784132004, 0.044370248913764954, 0.04936370998620987, 0.054357171058654785, 0.0593506321310997, 0.06434409320354462, 0.06933756172657013, 0.07433101534843445, 0.07932448387145996, 0.08431794494390488, 0.08931140601634979, 0.09430486708879471, 0.09929832816123962, 0.10429178923368454, 0.10928525030612946, 0.11427871882915497, 0.11927217990159988, 0.1242656409740448, 0.1292591094970703, 0.13425257802009583, 0.13924603164196014, 0.14423950016498566, 0.14923295378684998, 0.1542264223098755, 0.1592198759317398, 0.16421334445476532, 0.16920679807662964]}, "gradients/encoder.encoder.layers.8.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 0.0, 2.0, 2.0, 5.0, 5.0, 6.0, 9.0, 11.0, 13.0, 12.0, 25.0, 35.0, 51.0, 62.0, 78.0, 114.0, 163.0, 270.0, 417.0, 713.0, 1444.0, 3394.0, 9181.0, 29230.0, 108000.0, 393421.0, 363885.0, 96667.0, 26510.0, 8451.0, 3150.0, 1348.0, 678.0, 391.0, 252.0, 159.0, 106.0, 88.0, 46.0, 62.0, 30.0, 19.0, 19.0, 16.0, 8.0, 7.0, 4.0, 2.0, 0.0, 1.0, 5.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.068603515625, -0.06623458862304688, -0.06386566162109375, -0.061496734619140625, -0.0591278076171875, -0.056758880615234375, -0.05438995361328125, -0.052021026611328125, -0.049652099609375, -0.047283172607421875, -0.04491424560546875, -0.042545318603515625, -0.0401763916015625, -0.037807464599609375, -0.03543853759765625, -0.033069610595703125, -0.03070068359375, -0.028331756591796875, -0.02596282958984375, -0.023593902587890625, -0.0212249755859375, -0.018856048583984375, -0.01648712158203125, -0.014118194580078125, -0.011749267578125, -0.009380340576171875, -0.00701141357421875, -0.004642486572265625, -0.0022735595703125, 9.5367431640625e-05, 0.00246429443359375, 0.004833221435546875, 0.0072021484375, 0.009571075439453125, 0.01194000244140625, 0.014308929443359375, 0.0166778564453125, 0.019046783447265625, 0.02141571044921875, 0.023784637451171875, 0.026153564453125, 0.028522491455078125, 0.03089141845703125, 0.033260345458984375, 0.0356292724609375, 0.037998199462890625, 0.04036712646484375, 0.042736053466796875, 0.04510498046875, 0.047473907470703125, 0.04984283447265625, 0.052211761474609375, 0.0545806884765625, 0.056949615478515625, 0.05931854248046875, 0.061687469482421875, 0.064056396484375, 0.06642532348632812, 0.06879425048828125, 0.07116317749023438, 0.0735321044921875, 0.07590103149414062, 0.07826995849609375, 0.08063888549804688, 0.0830078125]}, "gradients/encoder.encoder.layers.8.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 2.0, 3.0, 7.0, 13.0, 33.0, 46.0, 76.0, 100.0, 132.0, 148.0, 149.0, 109.0, 76.0, 57.0, 34.0, 9.0, 10.0, 5.0, 3.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.100830078125, -0.09799766540527344, -0.09516525268554688, -0.09233283996582031, -0.08950042724609375, -0.08666801452636719, -0.08383560180664062, -0.08100318908691406, -0.0781707763671875, -0.07533836364746094, -0.07250595092773438, -0.06967353820800781, -0.06684112548828125, -0.06400871276855469, -0.061176300048828125, -0.05834388732910156, -0.055511474609375, -0.05267906188964844, -0.049846649169921875, -0.04701423645019531, -0.04418182373046875, -0.04134941101074219, -0.038516998291015625, -0.03568458557128906, -0.0328521728515625, -0.030019760131835938, -0.027187347412109375, -0.024354934692382812, -0.02152252197265625, -0.018690109252929688, -0.015857696533203125, -0.013025283813476562, -0.01019287109375, -0.0073604583740234375, -0.004528045654296875, -0.0016956329345703125, 0.00113677978515625, 0.0039691925048828125, 0.006801605224609375, 0.009634017944335938, 0.0124664306640625, 0.015298843383789062, 0.018131256103515625, 0.020963668823242188, 0.02379608154296875, 0.026628494262695312, 0.029460906982421875, 0.03229331970214844, 0.035125732421875, 0.03795814514160156, 0.040790557861328125, 0.04362297058105469, 0.04645538330078125, 0.04928779602050781, 0.052120208740234375, 0.05495262145996094, 0.0577850341796875, 0.06061744689941406, 0.06344985961914062, 0.06628227233886719, 0.06911468505859375, 0.07194709777832031, 0.07477951049804688, 0.07761192321777344, 0.0804443359375]}, "gradients/encoder.encoder.layers.8.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 2.0, 0.0, 1.0, 3.0, 3.0, 1.0, 4.0, 3.0, 4.0, 8.0, 13.0, 11.0, 29.0, 32.0, 48.0, 71.0, 98.0, 156.0, 230.0, 376.0, 756.0, 1417.0, 3014.0, 7447.0, 19253.0, 54556.0, 166748.0, 399657.0, 260196.0, 85541.0, 29312.0, 10887.0, 4427.0, 2046.0, 912.0, 512.0, 262.0, 171.0, 108.0, 62.0, 52.0, 46.0, 22.0, 23.0, 15.0, 10.0, 7.0, 6.0, 5.0, 3.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.04901123046875, -0.04730367660522461, -0.04559612274169922, -0.04388856887817383, -0.04218101501464844, -0.04047346115112305, -0.038765907287597656, -0.037058353424072266, -0.035350799560546875, -0.033643245697021484, -0.031935691833496094, -0.030228137969970703, -0.028520584106445312, -0.026813030242919922, -0.02510547637939453, -0.02339792251586914, -0.02169036865234375, -0.01998281478881836, -0.01827526092529297, -0.016567707061767578, -0.014860153198242188, -0.013152599334716797, -0.011445045471191406, -0.009737491607666016, -0.008029937744140625, -0.006322383880615234, -0.004614830017089844, -0.002907276153564453, -0.0011997222900390625, 0.0005078315734863281, 0.0022153854370117188, 0.003922939300537109, 0.0056304931640625, 0.007338047027587891, 0.009045600891113281, 0.010753154754638672, 0.012460708618164062, 0.014168262481689453, 0.015875816345214844, 0.017583370208740234, 0.019290924072265625, 0.020998477935791016, 0.022706031799316406, 0.024413585662841797, 0.026121139526367188, 0.027828693389892578, 0.02953624725341797, 0.03124380111694336, 0.03295135498046875, 0.03465890884399414, 0.03636646270751953, 0.03807401657104492, 0.03978157043457031, 0.0414891242980957, 0.043196678161621094, 0.044904232025146484, 0.046611785888671875, 0.048319339752197266, 0.050026893615722656, 0.05173444747924805, 0.05344200134277344, 0.05514955520629883, 0.05685710906982422, 0.05856466293334961, 0.060272216796875]}, "gradients/encoder.encoder.layers.8.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 2.0, 1.0, 8.0, 7.0, 3.0, 9.0, 13.0, 13.0, 17.0, 19.0, 24.0, 39.0, 46.0, 45.0, 39.0, 54.0, 57.0, 56.0, 60.0, 53.0, 52.0, 47.0, 56.0, 49.0, 44.0, 32.0, 26.0, 25.0, 29.0, 23.0, 10.0, 8.0, 12.0, 8.0, 7.0, 6.0, 2.0, 4.0, 2.0, 3.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.08526611328125, -0.08250713348388672, -0.07974815368652344, -0.07698917388916016, -0.07423019409179688, -0.0714712142944336, -0.06871223449707031, -0.06595325469970703, -0.06319427490234375, -0.06043529510498047, -0.05767631530761719, -0.054917335510253906, -0.052158355712890625, -0.049399375915527344, -0.04664039611816406, -0.04388141632080078, -0.0411224365234375, -0.03836345672607422, -0.03560447692871094, -0.032845497131347656, -0.030086517333984375, -0.027327537536621094, -0.024568557739257812, -0.02180957794189453, -0.01905059814453125, -0.01629161834716797, -0.013532638549804688, -0.010773658752441406, -0.008014678955078125, -0.005255699157714844, -0.0024967193603515625, 0.00026226043701171875, 0.003021240234375, 0.005780220031738281, 0.008539199829101562, 0.011298179626464844, 0.014057159423828125, 0.016816139221191406, 0.019575119018554688, 0.02233409881591797, 0.02509307861328125, 0.02785205841064453, 0.030611038208007812, 0.033370018005371094, 0.036128997802734375, 0.038887977600097656, 0.04164695739746094, 0.04440593719482422, 0.0471649169921875, 0.04992389678955078, 0.05268287658691406, 0.055441856384277344, 0.058200836181640625, 0.060959815979003906, 0.06371879577636719, 0.06647777557373047, 0.06923675537109375, 0.07199573516845703, 0.07475471496582031, 0.0775136947631836, 0.08027267456054688, 0.08303165435791016, 0.08579063415527344, 0.08854961395263672, 0.09130859375]}, "gradients/encoder.encoder.layers.8.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 1.0, 4.0, 4.0, 5.0, 6.0, 12.0, 19.0, 25.0, 35.0, 60.0, 103.0, 174.0, 314.0, 642.0, 1301.0, 2798.0, 6553.0, 15506.0, 39503.0, 107172.0, 292836.0, 350948.0, 144310.0, 51410.0, 19896.0, 8103.0, 3550.0, 1613.0, 788.0, 377.0, 200.0, 110.0, 67.0, 47.0, 27.0, 13.0, 14.0, 8.0, 5.0, 5.0, 1.0, 1.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.019012451171875, -0.01845550537109375, -0.0178985595703125, -0.01734161376953125, -0.01678466796875, -0.01622772216796875, -0.0156707763671875, -0.01511383056640625, -0.014556884765625, -0.01399993896484375, -0.0134429931640625, -0.01288604736328125, -0.0123291015625, -0.01177215576171875, -0.0112152099609375, -0.01065826416015625, -0.010101318359375, -0.00954437255859375, -0.0089874267578125, -0.00843048095703125, -0.00787353515625, -0.00731658935546875, -0.0067596435546875, -0.00620269775390625, -0.005645751953125, -0.00508880615234375, -0.0045318603515625, -0.00397491455078125, -0.00341796875, -0.00286102294921875, -0.0023040771484375, -0.00174713134765625, -0.001190185546875, -0.00063323974609375, -7.62939453125e-05, 0.00048065185546875, 0.00103759765625, 0.00159454345703125, 0.0021514892578125, 0.00270843505859375, 0.003265380859375, 0.00382232666015625, 0.0043792724609375, 0.00493621826171875, 0.0054931640625, 0.00605010986328125, 0.0066070556640625, 0.00716400146484375, 0.007720947265625, 0.00827789306640625, 0.0088348388671875, 0.00939178466796875, 0.00994873046875, 0.01050567626953125, 0.0110626220703125, 0.01161956787109375, 0.012176513671875, 0.01273345947265625, 0.0132904052734375, 0.01384735107421875, 0.014404296875, 0.01496124267578125, 0.0155181884765625, 0.01607513427734375, 0.016632080078125]}, "gradients/encoder.encoder.layers.8.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 3.0, 1.0, 5.0, 2.0, 3.0, 3.0, 7.0, 10.0, 7.0, 12.0, 14.0, 22.0, 32.0, 55.0, 50.0, 69.0, 106.0, 80.0, 99.0, 90.0, 90.0, 51.0, 44.0, 36.0, 34.0, 23.0, 15.0, 11.0, 3.0, 6.0, 4.0, 4.0, 4.0, 4.0, 2.0, 2.0, 3.0, 2.0, 2.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.138448715209961e-05, -1.1050142347812653e-05, -1.0715797543525696e-05, -1.0381452739238739e-05, -1.0047107934951782e-05, -9.712763130664825e-06, -9.378418326377869e-06, -9.044073522090912e-06, -8.709728717803955e-06, -8.375383913516998e-06, -8.041039109230042e-06, -7.706694304943085e-06, -7.372349500656128e-06, -7.038004696369171e-06, -6.703659892082214e-06, -6.3693150877952576e-06, -6.034970283508301e-06, -5.700625479221344e-06, -5.366280674934387e-06, -5.03193587064743e-06, -4.697591066360474e-06, -4.363246262073517e-06, -4.02890145778656e-06, -3.6945566534996033e-06, -3.3602118492126465e-06, -3.0258670449256897e-06, -2.691522240638733e-06, -2.357177436351776e-06, -2.0228326320648193e-06, -1.6884878277778625e-06, -1.3541430234909058e-06, -1.019798219203949e-06, -6.854534149169922e-07, -3.511086106300354e-07, -1.6763806343078613e-08, 3.175809979438782e-07, 6.51925802230835e-07, 9.862706065177917e-07, 1.3206154108047485e-06, 1.6549602150917053e-06, 1.989305019378662e-06, 2.323649823665619e-06, 2.6579946279525757e-06, 2.9923394322395325e-06, 3.3266842365264893e-06, 3.661029040813446e-06, 3.995373845100403e-06, 4.32971864938736e-06, 4.664063453674316e-06, 4.998408257961273e-06, 5.33275306224823e-06, 5.667097866535187e-06, 6.0014426708221436e-06, 6.3357874751091e-06, 6.670132279396057e-06, 7.004477083683014e-06, 7.338821887969971e-06, 7.673166692256927e-06, 8.007511496543884e-06, 8.341856300830841e-06, 8.676201105117798e-06, 9.010545909404755e-06, 9.344890713691711e-06, 9.679235517978668e-06, 1.0013580322265625e-05]}, "gradients/encoder.encoder.layers.8.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 3.0, 0.0, 4.0, 4.0, 4.0, 6.0, 9.0, 9.0, 11.0, 29.0, 41.0, 46.0, 73.0, 83.0, 161.0, 254.0, 466.0, 934.0, 1726.0, 3468.0, 7000.0, 14656.0, 32842.0, 78309.0, 183682.0, 306706.0, 232483.0, 104999.0, 43721.0, 18927.0, 8804.0, 4317.0, 2163.0, 1116.0, 624.0, 365.0, 185.0, 110.0, 70.0, 48.0, 34.0, 22.0, 14.0, 12.0, 9.0, 5.0, 6.0, 3.0, 2.0, 0.0, 2.0, 2.0, 0.0, 2.0], "bins": [-0.016998291015625, -0.016521692276000977, -0.016045093536376953, -0.01556849479675293, -0.015091896057128906, -0.014615297317504883, -0.01413869857788086, -0.013662099838256836, -0.013185501098632812, -0.012708902359008789, -0.012232303619384766, -0.011755704879760742, -0.011279106140136719, -0.010802507400512695, -0.010325908660888672, -0.009849309921264648, -0.009372711181640625, -0.008896112442016602, -0.008419513702392578, -0.007942914962768555, -0.007466316223144531, -0.006989717483520508, -0.006513118743896484, -0.006036520004272461, -0.0055599212646484375, -0.005083322525024414, -0.004606723785400391, -0.004130125045776367, -0.0036535263061523438, -0.0031769275665283203, -0.002700328826904297, -0.0022237300872802734, -0.00174713134765625, -0.0012705326080322266, -0.0007939338684082031, -0.0003173351287841797, 0.00015926361083984375, 0.0006358623504638672, 0.0011124610900878906, 0.001589059829711914, 0.0020656585693359375, 0.002542257308959961, 0.0030188560485839844, 0.003495454788208008, 0.003972053527832031, 0.004448652267456055, 0.004925251007080078, 0.0054018497467041016, 0.005878448486328125, 0.0063550472259521484, 0.006831645965576172, 0.007308244705200195, 0.007784843444824219, 0.008261442184448242, 0.008738040924072266, 0.009214639663696289, 0.009691238403320312, 0.010167837142944336, 0.01064443588256836, 0.011121034622192383, 0.011597633361816406, 0.01207423210144043, 0.012550830841064453, 0.013027429580688477, 0.0135040283203125]}, "gradients/encoder.encoder.layers.8.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 2.0, 0.0, 2.0, 0.0, 3.0, 4.0, 6.0, 5.0, 11.0, 11.0, 18.0, 21.0, 29.0, 34.0, 44.0, 62.0, 68.0, 72.0, 76.0, 88.0, 69.0, 65.0, 79.0, 61.0, 37.0, 36.0, 26.0, 22.0, 12.0, 12.0, 12.0, 7.0, 10.0, 0.0, 4.0, 2.0, 2.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0206451416015625, -0.020056962966918945, -0.01946878433227539, -0.018880605697631836, -0.01829242706298828, -0.017704248428344727, -0.017116069793701172, -0.016527891159057617, -0.015939712524414062, -0.015351533889770508, -0.014763355255126953, -0.014175176620483398, -0.013586997985839844, -0.012998819351196289, -0.012410640716552734, -0.01182246208190918, -0.011234283447265625, -0.01064610481262207, -0.010057926177978516, -0.009469747543334961, -0.008881568908691406, -0.008293390274047852, -0.007705211639404297, -0.007117033004760742, -0.0065288543701171875, -0.005940675735473633, -0.005352497100830078, -0.0047643184661865234, -0.004176139831542969, -0.003587961196899414, -0.0029997825622558594, -0.0024116039276123047, -0.00182342529296875, -0.0012352466583251953, -0.0006470680236816406, -5.888938903808594e-05, 0.0005292892456054688, 0.0011174678802490234, 0.0017056465148925781, 0.002293825149536133, 0.0028820037841796875, 0.003470182418823242, 0.004058361053466797, 0.0046465396881103516, 0.005234718322753906, 0.005822896957397461, 0.006411075592041016, 0.00699925422668457, 0.007587432861328125, 0.00817561149597168, 0.008763790130615234, 0.009351968765258789, 0.009940147399902344, 0.010528326034545898, 0.011116504669189453, 0.011704683303833008, 0.012292861938476562, 0.012881040573120117, 0.013469219207763672, 0.014057397842407227, 0.014645576477050781, 0.015233755111694336, 0.01582193374633789, 0.016410112380981445, 0.016998291015625]}, "gradients/encoder.encoder.layers.8.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 1.0, 2.0, 2.0, 2.0, 4.0, 8.0, 6.0, 34.0, 70.0, 113.0, 130.0, 176.0, 144.0, 124.0, 90.0, 53.0, 27.0, 10.0, 4.0, 3.0, 5.0, 0.0, 0.0, 2.0, 2.0, 2.0, 0.0, 0.0, 2.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.32812750339508057, -0.31812193989753723, -0.3081164062023163, -0.29811084270477295, -0.288105309009552, -0.27809974551200867, -0.26809418201446533, -0.2580886483192444, -0.24808308482170105, -0.2380775362253189, -0.22807198762893677, -0.21806642413139343, -0.2080608755350113, -0.19805532693862915, -0.18804976344108582, -0.17804421484470367, -0.16803866624832153, -0.1580331176519394, -0.14802756905555725, -0.13802200555801392, -0.12801645696163177, -0.11801090836524963, -0.1080053523182869, -0.09799979627132416, -0.08799424767494202, -0.07798869907855988, -0.06798314303159714, -0.0579775907099247, -0.04797203838825226, -0.03796648606657982, -0.02796093374490738, -0.01795537769794464, -0.0079498291015625, 0.0020557232201099396, 0.01206127554178238, 0.02206682786345482, 0.03207238018512726, 0.0420779325067997, 0.05208348482847214, 0.062089040875434875, 0.07209458947181702, 0.08210013806819916, 0.0921056941151619, 0.10211125016212463, 0.11211679875850677, 0.12212234735488892, 0.13212791085243225, 0.1421334594488144, 0.15213900804519653, 0.16214455664157867, 0.17215010523796082, 0.18215566873550415, 0.1921612173318863, 0.20216676592826843, 0.21217232942581177, 0.2221778780221939, 0.23218342661857605, 0.2421889752149582, 0.25219452381134033, 0.26220008730888367, 0.272205650806427, 0.28221118450164795, 0.2922167479991913, 0.3022223114967346, 0.31222784519195557]}, "gradients/encoder.encoder.layers.8.layer_norm.bias": {"_type": "histogram", "values": [3.0, 3.0, 0.0, 0.0, 2.0, 1.0, 5.0, 3.0, 5.0, 5.0, 10.0, 5.0, 18.0, 15.0, 11.0, 16.0, 11.0, 23.0, 27.0, 21.0, 28.0, 35.0, 43.0, 36.0, 45.0, 34.0, 48.0, 41.0, 50.0, 41.0, 42.0, 47.0, 39.0, 46.0, 32.0, 30.0, 29.0, 25.0, 17.0, 21.0, 16.0, 18.0, 17.0, 18.0, 7.0, 9.0, 6.0, 4.0, 1.0, 6.0, 1.0, 2.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.16635072231292725, -0.16050544381141663, -0.1546601504087448, -0.148814857006073, -0.14296957850456238, -0.13712430000305176, -0.13127900660037994, -0.12543371319770813, -0.11958843469619751, -0.11374314874410629, -0.10789786279201508, -0.10205257683992386, -0.09620729088783264, -0.09036200493574142, -0.08451671898365021, -0.07867143303155899, -0.07282614707946777, -0.06698086112737656, -0.06113557517528534, -0.05529028922319412, -0.049445003271102905, -0.04359971731901169, -0.03775443136692047, -0.031909145414829254, -0.026063859462738037, -0.02021857351064682, -0.014373287558555603, -0.008528001606464386, -0.002682715654373169, 0.003162570297718048, 0.009007856249809265, 0.014853142201900482, 0.0206984281539917, 0.026543714106082916, 0.03238900005817413, 0.03823428601026535, 0.04407957196235657, 0.049924857914447784, 0.055770143866539, 0.06161542981863022, 0.06746071577072144, 0.07330600172281265, 0.07915128767490387, 0.08499657362699509, 0.0908418595790863, 0.09668714553117752, 0.10253243148326874, 0.10837771743535995, 0.11422300338745117, 0.12006828933954239, 0.1259135752916336, 0.13175886869430542, 0.13760414719581604, 0.14344942569732666, 0.14929471909999847, 0.1551400125026703, 0.1609852910041809, 0.16683056950569153, 0.17267586290836334, 0.17852115631103516, 0.18436643481254578, 0.1902117133140564, 0.1960570067167282, 0.20190230011940002, 0.20774757862091064]}, "gradients/encoder.encoder.layers.7.feed_forward.output_dense.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 2.0, 0.0, 0.0, 1.0, 2.0, 2.0, 3.0, 4.0, 5.0, 10.0, 16.0, 22.0, 44.0, 65.0, 127.0, 218.0, 423.0, 835.0, 1849.0, 6709.0, 29624.0, 395881.0, 3366860.0, 354780.0, 27523.0, 5998.0, 1885.0, 661.0, 337.0, 155.0, 102.0, 62.0, 33.0, 21.0, 13.0, 2.0, 1.0, 6.0, 3.0, 3.0, 0.0, 2.0, 3.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0927734375, -0.08982658386230469, -0.08687973022460938, -0.08393287658691406, -0.08098602294921875, -0.07803916931152344, -0.07509231567382812, -0.07214546203613281, -0.0691986083984375, -0.06625175476074219, -0.06330490112304688, -0.06035804748535156, -0.05741119384765625, -0.05446434020996094, -0.051517486572265625, -0.04857063293457031, -0.045623779296875, -0.04267692565917969, -0.039730072021484375, -0.03678321838378906, -0.03383636474609375, -0.030889511108398438, -0.027942657470703125, -0.024995803833007812, -0.0220489501953125, -0.019102096557617188, -0.016155242919921875, -0.013208389282226562, -0.01026153564453125, -0.0073146820068359375, -0.004367828369140625, -0.0014209747314453125, 0.00152587890625, 0.0044727325439453125, 0.007419586181640625, 0.010366439819335938, 0.01331329345703125, 0.016260147094726562, 0.019207000732421875, 0.022153854370117188, 0.0251007080078125, 0.028047561645507812, 0.030994415283203125, 0.03394126892089844, 0.03688812255859375, 0.03983497619628906, 0.042781829833984375, 0.04572868347167969, 0.048675537109375, 0.05162239074707031, 0.054569244384765625, 0.05751609802246094, 0.06046295166015625, 0.06340980529785156, 0.06635665893554688, 0.06930351257324219, 0.0722503662109375, 0.07519721984863281, 0.07814407348632812, 0.08109092712402344, 0.08403778076171875, 0.08698463439941406, 0.08993148803710938, 0.09287834167480469, 0.0958251953125]}, "gradients/encoder.encoder.layers.7.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 1.0, 1.0, 2.0, 3.0, 9.0, 13.0, 26.0, 51.0, 66.0, 101.0, 135.0, 148.0, 126.0, 106.0, 89.0, 62.0, 37.0, 21.0, 6.0, 5.0, 3.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.09375, -0.0910491943359375, -0.088348388671875, -0.0856475830078125, -0.08294677734375, -0.0802459716796875, -0.077545166015625, -0.0748443603515625, -0.0721435546875, -0.0694427490234375, -0.066741943359375, -0.0640411376953125, -0.06134033203125, -0.0586395263671875, -0.055938720703125, -0.0532379150390625, -0.050537109375, -0.0478363037109375, -0.045135498046875, -0.0424346923828125, -0.03973388671875, -0.0370330810546875, -0.034332275390625, -0.0316314697265625, -0.0289306640625, -0.0262298583984375, -0.023529052734375, -0.0208282470703125, -0.01812744140625, -0.0154266357421875, -0.012725830078125, -0.0100250244140625, -0.00732421875, -0.0046234130859375, -0.001922607421875, 0.0007781982421875, 0.00347900390625, 0.0061798095703125, 0.008880615234375, 0.0115814208984375, 0.0142822265625, 0.0169830322265625, 0.019683837890625, 0.0223846435546875, 0.02508544921875, 0.0277862548828125, 0.030487060546875, 0.0331878662109375, 0.035888671875, 0.0385894775390625, 0.041290283203125, 0.0439910888671875, 0.04669189453125, 0.0493927001953125, 0.052093505859375, 0.0547943115234375, 0.0574951171875, 0.0601959228515625, 0.062896728515625, 0.0655975341796875, 0.06829833984375, 0.0709991455078125, 0.073699951171875, 0.0764007568359375, 0.0791015625]}, "gradients/encoder.encoder.layers.7.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 3.0, 3.0, 4.0, 8.0, 10.0, 12.0, 26.0, 52.0, 41.0, 82.0, 118.0, 211.0, 400.0, 730.0, 1450.0, 3400.0, 8174.0, 22435.0, 71331.0, 281144.0, 1801319.0, 1642489.0, 258565.0, 66599.0, 21483.0, 7900.0, 3215.0, 1473.0, 681.0, 391.0, 210.0, 125.0, 67.0, 57.0, 39.0, 24.0, 11.0, 3.0, 4.0, 4.0, 2.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.041961669921875, -0.040561676025390625, -0.03916168212890625, -0.037761688232421875, -0.0363616943359375, -0.034961700439453125, -0.03356170654296875, -0.032161712646484375, -0.03076171875, -0.029361724853515625, -0.02796173095703125, -0.026561737060546875, -0.0251617431640625, -0.023761749267578125, -0.02236175537109375, -0.020961761474609375, -0.019561767578125, -0.018161773681640625, -0.01676177978515625, -0.015361785888671875, -0.0139617919921875, -0.012561798095703125, -0.01116180419921875, -0.009761810302734375, -0.00836181640625, -0.006961822509765625, -0.00556182861328125, -0.004161834716796875, -0.0027618408203125, -0.001361846923828125, 3.814697265625e-05, 0.001438140869140625, 0.002838134765625, 0.004238128662109375, 0.00563812255859375, 0.007038116455078125, 0.0084381103515625, 0.009838104248046875, 0.01123809814453125, 0.012638092041015625, 0.0140380859375, 0.015438079833984375, 0.01683807373046875, 0.018238067626953125, 0.0196380615234375, 0.021038055419921875, 0.02243804931640625, 0.023838043212890625, 0.025238037109375, 0.026638031005859375, 0.02803802490234375, 0.029438018798828125, 0.0308380126953125, 0.032238006591796875, 0.03363800048828125, 0.035037994384765625, 0.03643798828125, 0.037837982177734375, 0.03923797607421875, 0.040637969970703125, 0.0420379638671875, 0.043437957763671875, 0.04483795166015625, 0.046237945556640625, 0.047637939453125]}, "gradients/encoder.encoder.layers.7.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 3.0, 1.0, 1.0, 5.0, 8.0, 5.0, 8.0, 10.0, 10.0, 17.0, 24.0, 35.0, 38.0, 47.0, 84.0, 123.0, 171.0, 284.0, 483.0, 610.0, 618.0, 490.0, 315.0, 202.0, 142.0, 83.0, 69.0, 53.0, 36.0, 22.0, 27.0, 18.0, 9.0, 8.0, 7.0, 6.0, 5.0, 2.0, 3.0, 4.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.046234130859375, -0.044701576232910156, -0.04316902160644531, -0.04163646697998047, -0.040103912353515625, -0.03857135772705078, -0.03703880310058594, -0.035506248474121094, -0.03397369384765625, -0.032441139221191406, -0.030908584594726562, -0.02937602996826172, -0.027843475341796875, -0.02631092071533203, -0.024778366088867188, -0.023245811462402344, -0.0217132568359375, -0.020180702209472656, -0.018648147583007812, -0.01711559295654297, -0.015583038330078125, -0.014050483703613281, -0.012517929077148438, -0.010985374450683594, -0.00945281982421875, -0.007920265197753906, -0.0063877105712890625, -0.004855155944824219, -0.003322601318359375, -0.0017900466918945312, -0.0002574920654296875, 0.0012750625610351562, 0.0028076171875, 0.004340171813964844, 0.0058727264404296875, 0.007405281066894531, 0.008937835693359375, 0.010470390319824219, 0.012002944946289062, 0.013535499572753906, 0.01506805419921875, 0.016600608825683594, 0.018133163452148438, 0.01966571807861328, 0.021198272705078125, 0.02273082733154297, 0.024263381958007812, 0.025795936584472656, 0.0273284912109375, 0.028861045837402344, 0.030393600463867188, 0.03192615509033203, 0.033458709716796875, 0.03499126434326172, 0.03652381896972656, 0.038056373596191406, 0.03958892822265625, 0.041121482849121094, 0.04265403747558594, 0.04418659210205078, 0.045719146728515625, 0.04725170135498047, 0.04878425598144531, 0.050316810607910156, 0.051849365234375]}, "gradients/encoder.encoder.layers.7.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 2.0, 0.0, 1.0, 1.0, 2.0, 1.0, 17.0, 41.0, 127.0, 240.0, 249.0, 191.0, 88.0, 31.0, 10.0, 6.0, 2.0, 3.0, 2.0, 1.0, 0.0, 1.0, 1.0, 1.0], "bins": [-0.9638176560401917, -0.9446650147438049, -0.9255123138427734, -0.9063596725463867, -0.8872069716453552, -0.8680543303489685, -0.848901629447937, -0.8297489881515503, -0.8105963468551636, -0.7914437055587769, -0.7722910046577454, -0.7531383633613586, -0.7339856624603271, -0.7148330211639404, -0.6956803202629089, -0.6765276789665222, -0.6573749780654907, -0.638222336769104, -0.6190696358680725, -0.5999169945716858, -0.5807642936706543, -0.5616116523742676, -0.5424589514732361, -0.5233063101768494, -0.5041536092758179, -0.48500093817710876, -0.46584826707839966, -0.44669559597969055, -0.42754292488098145, -0.4083902835845947, -0.3892376124858856, -0.3700849413871765, -0.3509323000907898, -0.3317796289920807, -0.3126269578933716, -0.2934742867946625, -0.27432161569595337, -0.25516897439956665, -0.23601630330085754, -0.21686363220214844, -0.19771096110343933, -0.17855829000473022, -0.15940561890602112, -0.1402529627084732, -0.1211002916097641, -0.10194762051105499, -0.08279495686292648, -0.06364229321479797, -0.04448962211608887, -0.02533695474267006, -0.006184287369251251, 0.012968380004167557, 0.032121047377586365, 0.05127371847629547, 0.07042638212442398, 0.08957904577255249, 0.1087317168712616, 0.1278843879699707, 0.1470370590686798, 0.16618971526622772, 0.18534238636493683, 0.20449505746364594, 0.22364771366119385, 0.24280038475990295, 0.26195305585861206]}, "gradients/encoder.encoder.layers.7.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 3.0, 4.0, 2.0, 3.0, 5.0, 5.0, 5.0, 11.0, 12.0, 14.0, 19.0, 18.0, 17.0, 16.0, 22.0, 20.0, 29.0, 33.0, 43.0, 28.0, 43.0, 43.0, 38.0, 36.0, 38.0, 46.0, 34.0, 38.0, 28.0, 33.0, 42.0, 29.0, 27.0, 30.0, 33.0, 24.0, 16.0, 21.0, 20.0, 18.0, 13.0, 8.0, 11.0, 10.0, 6.0, 4.0, 3.0, 4.0, 2.0, 6.0, 1.0, 0.0, 1.0, 0.0, 3.0], "bins": [-0.1492345929145813, -0.14481954276561737, -0.14040449261665344, -0.13598942756652832, -0.1315743774175644, -0.12715932726860046, -0.12274427711963654, -0.11832922697067261, -0.11391416937112808, -0.10949911922216415, -0.10508406162261963, -0.1006690114736557, -0.09625396132469177, -0.09183890372514725, -0.08742385357618332, -0.0830087959766388, -0.07859374582767487, -0.07417869567871094, -0.06976363807916641, -0.06534858793020248, -0.06093353405594826, -0.05651848018169403, -0.0521034300327301, -0.047688376158475876, -0.04327332228422165, -0.03885826840996742, -0.034443214535713196, -0.030028164386749268, -0.02561311051249504, -0.021198056638240814, -0.016783004626631737, -0.01236795261502266, -0.007952898740768433, -0.0035378457978367805, 0.0008772071450948715, 0.005292260088026524, 0.009707313030958176, 0.014122366905212402, 0.01853741891682148, 0.022952470928430557, 0.027367524802684784, 0.03178257867693901, 0.03619763255119324, 0.040612682700157166, 0.04502773657441139, 0.04944279044866562, 0.05385784059762955, 0.058272894471883774, 0.062687948346138, 0.06710299849510193, 0.07151805609464645, 0.07593310624361038, 0.08034816384315491, 0.08476321399211884, 0.08917826414108276, 0.09359331429004669, 0.09800837188959122, 0.10242342203855515, 0.10683847963809967, 0.1112535297870636, 0.11566857993602753, 0.12008363753557205, 0.12449868768453598, 0.1289137452840805, 0.13332879543304443]}, "gradients/encoder.encoder.layers.7.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 1.0, 2.0, 2.0, 10.0, 13.0, 9.0, 13.0, 25.0, 38.0, 41.0, 71.0, 106.0, 185.0, 254.0, 401.0, 756.0, 1344.0, 2852.0, 7129.0, 20451.0, 62449.0, 229293.0, 474466.0, 171788.0, 49630.0, 16077.0, 5767.0, 2468.0, 1198.0, 649.0, 394.0, 232.0, 128.0, 95.0, 64.0, 47.0, 26.0, 29.0, 24.0, 10.0, 8.0, 8.0, 5.0, 4.0, 2.0, 3.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.07452392578125, -0.07215023040771484, -0.06977653503417969, -0.06740283966064453, -0.06502914428710938, -0.06265544891357422, -0.06028175354003906, -0.057908058166503906, -0.05553436279296875, -0.053160667419433594, -0.05078697204589844, -0.04841327667236328, -0.046039581298828125, -0.04366588592529297, -0.04129219055175781, -0.038918495178222656, -0.0365447998046875, -0.034171104431152344, -0.03179740905761719, -0.02942371368408203, -0.027050018310546875, -0.02467632293701172, -0.022302627563476562, -0.019928932189941406, -0.01755523681640625, -0.015181541442871094, -0.012807846069335938, -0.010434150695800781, -0.008060455322265625, -0.005686759948730469, -0.0033130645751953125, -0.0009393692016601562, 0.001434326171875, 0.0038080215454101562, 0.0061817169189453125, 0.008555412292480469, 0.010929107666015625, 0.013302803039550781, 0.015676498413085938, 0.018050193786621094, 0.02042388916015625, 0.022797584533691406, 0.025171279907226562, 0.02754497528076172, 0.029918670654296875, 0.03229236602783203, 0.03466606140136719, 0.037039756774902344, 0.0394134521484375, 0.041787147521972656, 0.04416084289550781, 0.04653453826904297, 0.048908233642578125, 0.05128192901611328, 0.05365562438964844, 0.056029319763183594, 0.05840301513671875, 0.060776710510253906, 0.06315040588378906, 0.06552410125732422, 0.06789779663085938, 0.07027149200439453, 0.07264518737792969, 0.07501888275146484, 0.077392578125]}, "gradients/encoder.encoder.layers.7.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 0.0, 3.0, 1.0, 4.0, 7.0, 19.0, 39.0, 51.0, 72.0, 90.0, 139.0, 130.0, 115.0, 133.0, 87.0, 52.0, 35.0, 20.0, 6.0, 6.0, 4.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0904541015625, -0.08783817291259766, -0.08522224426269531, -0.08260631561279297, -0.07999038696289062, -0.07737445831298828, -0.07475852966308594, -0.0721426010131836, -0.06952667236328125, -0.0669107437133789, -0.06429481506347656, -0.06167888641357422, -0.059062957763671875, -0.05644702911376953, -0.05383110046386719, -0.051215171813964844, -0.0485992431640625, -0.045983314514160156, -0.04336738586425781, -0.04075145721435547, -0.038135528564453125, -0.03551959991455078, -0.03290367126464844, -0.030287742614746094, -0.02767181396484375, -0.025055885314941406, -0.022439956665039062, -0.01982402801513672, -0.017208099365234375, -0.014592170715332031, -0.011976242065429688, -0.009360313415527344, -0.006744384765625, -0.004128456115722656, -0.0015125274658203125, 0.0011034011840820312, 0.003719329833984375, 0.006335258483886719, 0.008951187133789062, 0.011567115783691406, 0.01418304443359375, 0.016798973083496094, 0.019414901733398438, 0.02203083038330078, 0.024646759033203125, 0.02726268768310547, 0.029878616333007812, 0.032494544982910156, 0.0351104736328125, 0.037726402282714844, 0.04034233093261719, 0.04295825958251953, 0.045574188232421875, 0.04819011688232422, 0.05080604553222656, 0.053421974182128906, 0.05603790283203125, 0.058653831481933594, 0.06126976013183594, 0.06388568878173828, 0.06650161743164062, 0.06911754608154297, 0.07173347473144531, 0.07434940338134766, 0.07696533203125]}, "gradients/encoder.encoder.layers.7.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 3.0, 1.0, 3.0, 0.0, 4.0, 1.0, 0.0, 0.0, 3.0, 5.0, 5.0, 4.0, 9.0, 7.0, 11.0, 21.0, 23.0, 34.0, 43.0, 67.0, 81.0, 147.0, 258.0, 427.0, 714.0, 1350.0, 2870.0, 7210.0, 20681.0, 66444.0, 253292.0, 475809.0, 152720.0, 42490.0, 13865.0, 5160.0, 2214.0, 1058.0, 508.0, 343.0, 213.0, 155.0, 89.0, 73.0, 39.0, 38.0, 19.0, 15.0, 6.0, 5.0, 7.0, 5.0, 5.0, 8.0, 0.0, 2.0, 1.0, 4.0, 1.0, 3.0], "bins": [-0.07489013671875, -0.0727696418762207, -0.0706491470336914, -0.06852865219116211, -0.06640815734863281, -0.06428766250610352, -0.06216716766357422, -0.06004667282104492, -0.057926177978515625, -0.05580568313598633, -0.05368518829345703, -0.051564693450927734, -0.04944419860839844, -0.04732370376586914, -0.045203208923339844, -0.04308271408081055, -0.04096221923828125, -0.03884172439575195, -0.036721229553222656, -0.03460073471069336, -0.03248023986816406, -0.030359745025634766, -0.02823925018310547, -0.026118755340576172, -0.023998260498046875, -0.021877765655517578, -0.01975727081298828, -0.017636775970458984, -0.015516281127929688, -0.01339578628540039, -0.011275291442871094, -0.009154796600341797, -0.0070343017578125, -0.004913806915283203, -0.0027933120727539062, -0.0006728172302246094, 0.0014476776123046875, 0.0035681724548339844, 0.005688667297363281, 0.007809162139892578, 0.009929656982421875, 0.012050151824951172, 0.014170646667480469, 0.016291141510009766, 0.018411636352539062, 0.02053213119506836, 0.022652626037597656, 0.024773120880126953, 0.02689361572265625, 0.029014110565185547, 0.031134605407714844, 0.03325510025024414, 0.03537559509277344, 0.037496089935302734, 0.03961658477783203, 0.04173707962036133, 0.043857574462890625, 0.04597806930541992, 0.04809856414794922, 0.050219058990478516, 0.05233955383300781, 0.05446004867553711, 0.056580543518066406, 0.0587010383605957, 0.060821533203125]}, "gradients/encoder.encoder.layers.7.attention.v_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 2.0, 1.0, 6.0, 4.0, 6.0, 4.0, 9.0, 4.0, 9.0, 5.0, 16.0, 20.0, 16.0, 29.0, 36.0, 34.0, 39.0, 45.0, 48.0, 52.0, 60.0, 55.0, 59.0, 61.0, 55.0, 53.0, 41.0, 48.0, 31.0, 37.0, 32.0, 25.0, 14.0, 11.0, 10.0, 12.0, 8.0, 6.0, 6.0, 4.0, 3.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.06732177734375, -0.06452083587646484, -0.06171989440917969, -0.05891895294189453, -0.056118011474609375, -0.05331707000732422, -0.05051612854003906, -0.047715187072753906, -0.04491424560546875, -0.042113304138183594, -0.03931236267089844, -0.03651142120361328, -0.033710479736328125, -0.03090953826904297, -0.028108596801757812, -0.025307655334472656, -0.0225067138671875, -0.019705772399902344, -0.016904830932617188, -0.014103889465332031, -0.011302947998046875, -0.008502006530761719, -0.0057010650634765625, -0.0029001235961914062, -9.918212890625e-05, 0.0027017593383789062, 0.0055027008056640625, 0.008303642272949219, 0.011104583740234375, 0.013905525207519531, 0.016706466674804688, 0.019507408142089844, 0.022308349609375, 0.025109291076660156, 0.027910232543945312, 0.03071117401123047, 0.033512115478515625, 0.03631305694580078, 0.03911399841308594, 0.041914939880371094, 0.04471588134765625, 0.047516822814941406, 0.05031776428222656, 0.05311870574951172, 0.055919647216796875, 0.05872058868408203, 0.06152153015136719, 0.06432247161865234, 0.0671234130859375, 0.06992435455322266, 0.07272529602050781, 0.07552623748779297, 0.07832717895507812, 0.08112812042236328, 0.08392906188964844, 0.0867300033569336, 0.08953094482421875, 0.0923318862915039, 0.09513282775878906, 0.09793376922607422, 0.10073471069335938, 0.10353565216064453, 0.10633659362792969, 0.10913753509521484, 0.1119384765625]}, "gradients/encoder.encoder.layers.7.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 1.0, 2.0, 0.0, 1.0, 2.0, 7.0, 5.0, 4.0, 5.0, 28.0, 25.0, 44.0, 73.0, 110.0, 253.0, 379.0, 792.0, 1444.0, 2918.0, 6187.0, 15997.0, 52147.0, 199717.0, 507631.0, 184960.0, 48659.0, 15191.0, 5973.0, 2944.0, 1445.0, 723.0, 379.0, 204.0, 108.0, 80.0, 44.0, 27.0, 17.0, 10.0, 8.0, 7.0, 3.0, 3.0, 3.0, 2.0, 1.0, 2.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0255889892578125, -0.024775981903076172, -0.023962974548339844, -0.023149967193603516, -0.022336959838867188, -0.02152395248413086, -0.02071094512939453, -0.019897937774658203, -0.019084930419921875, -0.018271923065185547, -0.01745891571044922, -0.01664590835571289, -0.015832901000976562, -0.015019893646240234, -0.014206886291503906, -0.013393878936767578, -0.01258087158203125, -0.011767864227294922, -0.010954856872558594, -0.010141849517822266, -0.009328842163085938, -0.00851583480834961, -0.007702827453613281, -0.006889820098876953, -0.006076812744140625, -0.005263805389404297, -0.004450798034667969, -0.0036377906799316406, -0.0028247833251953125, -0.0020117759704589844, -0.0011987686157226562, -0.0003857612609863281, 0.00042724609375, 0.0012402534484863281, 0.0020532608032226562, 0.0028662681579589844, 0.0036792755126953125, 0.004492282867431641, 0.005305290222167969, 0.006118297576904297, 0.006931304931640625, 0.007744312286376953, 0.008557319641113281, 0.00937032699584961, 0.010183334350585938, 0.010996341705322266, 0.011809349060058594, 0.012622356414794922, 0.01343536376953125, 0.014248371124267578, 0.015061378479003906, 0.015874385833740234, 0.016687393188476562, 0.01750040054321289, 0.01831340789794922, 0.019126415252685547, 0.019939422607421875, 0.020752429962158203, 0.02156543731689453, 0.02237844467163086, 0.023191452026367188, 0.024004459381103516, 0.024817466735839844, 0.025630474090576172, 0.0264434814453125]}, "gradients/encoder.encoder.layers.7.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 3.0, 0.0, 4.0, 4.0, 5.0, 4.0, 1.0, 6.0, 5.0, 9.0, 5.0, 18.0, 15.0, 18.0, 27.0, 45.0, 41.0, 56.0, 64.0, 92.0, 81.0, 89.0, 80.0, 75.0, 60.0, 30.0, 38.0, 28.0, 25.0, 13.0, 14.0, 10.0, 11.0, 7.0, 7.0, 4.0, 7.0, 2.0, 4.0, 4.0, 3.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-9.894371032714844e-06, -9.564682841300964e-06, -9.234994649887085e-06, -8.905306458473206e-06, -8.575618267059326e-06, -8.245930075645447e-06, -7.916241884231567e-06, -7.586553692817688e-06, -7.256865501403809e-06, -6.927177309989929e-06, -6.59748911857605e-06, -6.26780092716217e-06, -5.938112735748291e-06, -5.608424544334412e-06, -5.278736352920532e-06, -4.949048161506653e-06, -4.6193599700927734e-06, -4.289671778678894e-06, -3.959983587265015e-06, -3.6302953958511353e-06, -3.300607204437256e-06, -2.9709190130233765e-06, -2.641230821609497e-06, -2.3115426301956177e-06, -1.9818544387817383e-06, -1.6521662473678589e-06, -1.3224780559539795e-06, -9.927898645401e-07, -6.631016731262207e-07, -3.334134817123413e-07, -3.725290298461914e-09, 3.259629011154175e-07, 6.556510925292969e-07, 9.853392839431763e-07, 1.3150274753570557e-06, 1.644715666770935e-06, 1.9744038581848145e-06, 2.304092049598694e-06, 2.6337802410125732e-06, 2.9634684324264526e-06, 3.293156623840332e-06, 3.6228448152542114e-06, 3.952533006668091e-06, 4.28222119808197e-06, 4.61190938949585e-06, 4.941597580909729e-06, 5.271285772323608e-06, 5.600973963737488e-06, 5.930662155151367e-06, 6.260350346565247e-06, 6.590038537979126e-06, 6.919726729393005e-06, 7.249414920806885e-06, 7.579103112220764e-06, 7.908791303634644e-06, 8.238479495048523e-06, 8.568167686462402e-06, 8.897855877876282e-06, 9.227544069290161e-06, 9.55723226070404e-06, 9.88692045211792e-06, 1.02166086435318e-05, 1.0546296834945679e-05, 1.0875985026359558e-05, 1.1205673217773438e-05]}, "gradients/encoder.encoder.layers.7.attention.q_proj.weight": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 0.0, 3.0, 2.0, 4.0, 4.0, 10.0, 10.0, 13.0, 18.0, 22.0, 43.0, 46.0, 64.0, 92.0, 130.0, 180.0, 285.0, 378.0, 597.0, 856.0, 1406.0, 2342.0, 4078.0, 8109.0, 18207.0, 53932.0, 215682.0, 493092.0, 171848.0, 44248.0, 15822.0, 7183.0, 3758.0, 2232.0, 1242.0, 852.0, 537.0, 355.0, 264.0, 173.0, 146.0, 91.0, 52.0, 48.0, 30.0, 20.0, 23.0, 10.0, 11.0, 4.0, 3.0, 3.0, 6.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.02691650390625, -0.0260317325592041, -0.025146961212158203, -0.024262189865112305, -0.023377418518066406, -0.022492647171020508, -0.02160787582397461, -0.02072310447692871, -0.019838333129882812, -0.018953561782836914, -0.018068790435791016, -0.017184019088745117, -0.01629924774169922, -0.01541447639465332, -0.014529705047607422, -0.013644933700561523, -0.012760162353515625, -0.011875391006469727, -0.010990619659423828, -0.01010584831237793, -0.009221076965332031, -0.008336305618286133, -0.007451534271240234, -0.006566762924194336, -0.0056819915771484375, -0.004797220230102539, -0.003912448883056641, -0.003027677536010742, -0.0021429061889648438, -0.0012581348419189453, -0.0003733634948730469, 0.0005114078521728516, 0.00139617919921875, 0.0022809505462646484, 0.003165721893310547, 0.004050493240356445, 0.004935264587402344, 0.005820035934448242, 0.006704807281494141, 0.007589578628540039, 0.008474349975585938, 0.009359121322631836, 0.010243892669677734, 0.011128664016723633, 0.012013435363769531, 0.01289820671081543, 0.013782978057861328, 0.014667749404907227, 0.015552520751953125, 0.016437292098999023, 0.017322063446044922, 0.01820683479309082, 0.01909160614013672, 0.019976377487182617, 0.020861148834228516, 0.021745920181274414, 0.022630691528320312, 0.02351546287536621, 0.02440023422241211, 0.025285005569458008, 0.026169776916503906, 0.027054548263549805, 0.027939319610595703, 0.0288240909576416, 0.0297088623046875]}, "gradients/encoder.encoder.layers.7.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 2.0, 2.0, 3.0, 4.0, 7.0, 2.0, 6.0, 7.0, 6.0, 12.0, 9.0, 17.0, 23.0, 30.0, 43.0, 31.0, 49.0, 39.0, 50.0, 67.0, 75.0, 67.0, 77.0, 48.0, 56.0, 55.0, 42.0, 40.0, 28.0, 18.0, 20.0, 19.0, 11.0, 8.0, 3.0, 6.0, 5.0, 6.0, 7.0, 5.0, 2.0, 1.0, 1.0, 1.0, 0.0, 3.0, 0.0, 1.0, 2.0, 0.0, 0.0, 1.0, 1.0, 1.0], "bins": [-0.0175628662109375, -0.017005443572998047, -0.016448020935058594, -0.01589059829711914, -0.015333175659179688, -0.014775753021240234, -0.014218330383300781, -0.013660907745361328, -0.013103485107421875, -0.012546062469482422, -0.011988639831542969, -0.011431217193603516, -0.010873794555664062, -0.01031637191772461, -0.009758949279785156, -0.009201526641845703, -0.00864410400390625, -0.008086681365966797, -0.007529258728027344, -0.006971836090087891, -0.0064144134521484375, -0.005856990814208984, -0.005299568176269531, -0.004742145538330078, -0.004184722900390625, -0.003627300262451172, -0.0030698776245117188, -0.0025124549865722656, -0.0019550323486328125, -0.0013976097106933594, -0.0008401870727539062, -0.0002827644348144531, 0.000274658203125, 0.0008320808410644531, 0.0013895034790039062, 0.0019469261169433594, 0.0025043487548828125, 0.0030617713928222656, 0.0036191940307617188, 0.004176616668701172, 0.004734039306640625, 0.005291461944580078, 0.005848884582519531, 0.006406307220458984, 0.0069637298583984375, 0.007521152496337891, 0.008078575134277344, 0.008635997772216797, 0.00919342041015625, 0.009750843048095703, 0.010308265686035156, 0.01086568832397461, 0.011423110961914062, 0.011980533599853516, 0.012537956237792969, 0.013095378875732422, 0.013652801513671875, 0.014210224151611328, 0.014767646789550781, 0.015325069427490234, 0.015882492065429688, 0.01643991470336914, 0.016997337341308594, 0.017554759979248047, 0.0181121826171875]}, "gradients/encoder.encoder.layers.7.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 2.0, 0.0, 1.0, 2.0, 3.0, 1.0, 1.0, 1.0, 4.0, 8.0, 34.0, 77.0, 154.0, 273.0, 247.0, 118.0, 52.0, 26.0, 6.0, 0.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.7593518495559692, -0.7416942715644836, -0.724036693572998, -0.7063791751861572, -0.6887215971946716, -0.671064019203186, -0.6534065008163452, -0.6357489228248596, -0.618091344833374, -0.6004337668418884, -0.5827761888504028, -0.565118670463562, -0.5474610924720764, -0.5298035144805908, -0.51214599609375, -0.4944884181022644, -0.4768308401107788, -0.4591732621192932, -0.44151571393013, -0.4238581657409668, -0.4062005877494812, -0.3885430097579956, -0.3708854615688324, -0.3532279133796692, -0.3355703353881836, -0.317912757396698, -0.3002552092075348, -0.2825976610183716, -0.264940083026886, -0.24728251993656158, -0.22962495684623718, -0.21196739375591278, -0.19430989027023315, -0.17665232717990875, -0.15899476408958435, -0.14133720099925995, -0.12367963790893555, -0.10602207481861115, -0.08836451172828674, -0.07070694863796234, -0.05304938554763794, -0.03539182245731354, -0.017734259366989136, -7.669627666473389e-05, 0.017580866813659668, 0.03523842990398407, 0.05289599299430847, 0.07055355608463287, 0.08821111917495728, 0.10586868226528168, 0.12352624535560608, 0.14118380844593048, 0.15884137153625488, 0.17649893462657928, 0.1941564977169037, 0.2118140608072281, 0.2294716238975525, 0.2471291869878769, 0.2647867500782013, 0.2824442982673645, 0.3001018762588501, 0.3177594542503357, 0.3354170024394989, 0.3530745506286621, 0.3707321286201477]}, "gradients/encoder.encoder.layers.7.layer_norm.bias": {"_type": "histogram", "values": [1.0, 1.0, 4.0, 1.0, 0.0, 3.0, 5.0, 2.0, 6.0, 8.0, 10.0, 6.0, 4.0, 5.0, 9.0, 11.0, 16.0, 14.0, 24.0, 20.0, 22.0, 19.0, 18.0, 38.0, 29.0, 29.0, 25.0, 33.0, 29.0, 39.0, 35.0, 27.0, 42.0, 37.0, 36.0, 35.0, 33.0, 36.0, 24.0, 27.0, 28.0, 28.0, 30.0, 18.0, 19.0, 16.0, 14.0, 13.0, 16.0, 10.0, 10.0, 8.0, 9.0, 9.0, 8.0, 3.0, 5.0, 4.0, 1.0, 1.0, 3.0, 5.0, 1.0, 2.0], "bins": [-0.1395571231842041, -0.13523133099079132, -0.13090552389621735, -0.12657973170280457, -0.12225393950939178, -0.11792813986539841, -0.11360234022140503, -0.10927654802799225, -0.10495074838399887, -0.1006249487400055, -0.09629915654659271, -0.09197335690259933, -0.08764755725860596, -0.08332176506519318, -0.0789959654211998, -0.07467016577720642, -0.07034437358379364, -0.06601857393980026, -0.06169278174638748, -0.057366982102394104, -0.053041186183691025, -0.048715390264987946, -0.04438959062099457, -0.04006379470229149, -0.03573799878358841, -0.03141220286488533, -0.027086405083537102, -0.022760607302188873, -0.018434811383485794, -0.014109015464782715, -0.009783217683434486, -0.005457419902086258, -0.0011316239833831787, 0.003194172866642475, 0.007519969716668129, 0.011845766566693783, 0.016171563416719437, 0.020497359335422516, 0.024823157116770744, 0.029148954898118973, 0.03347475081682205, 0.03780054673552513, 0.04212634265422821, 0.04645214229822159, 0.05077793821692467, 0.05510373413562775, 0.059429533779621124, 0.0637553334236145, 0.06808112561702728, 0.07240692526102066, 0.07673271745443344, 0.08105851709842682, 0.0853843092918396, 0.08971010893583298, 0.09403590857982635, 0.09836170077323914, 0.10268750041723251, 0.10701330006122589, 0.11133909225463867, 0.11566489189863205, 0.11999069154262543, 0.12431648373603821, 0.128642275929451, 0.13296808302402496, 0.13729387521743774]}, "gradients/encoder.encoder.layers.6.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 2.0, 3.0, 1.0, 3.0, 3.0, 3.0, 2.0, 5.0, 4.0, 6.0, 7.0, 7.0, 7.0, 7.0, 13.0, 19.0, 22.0, 28.0, 62.0, 65.0, 84.0, 141.0, 252.0, 351.0, 586.0, 1156.0, 2346.0, 5459.0, 12252.0, 37312.0, 176533.0, 1261719.0, 2152111.0, 437212.0, 73465.0, 19568.0, 7033.0, 3179.0, 1478.0, 667.0, 368.0, 252.0, 138.0, 90.0, 91.0, 43.0, 41.0, 24.0, 24.0, 14.0, 14.0, 5.0, 5.0, 7.0, 4.0, 2.0, 2.0, 3.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-0.058837890625, -0.057066917419433594, -0.05529594421386719, -0.05352497100830078, -0.051753997802734375, -0.04998302459716797, -0.04821205139160156, -0.046441078186035156, -0.04467010498046875, -0.042899131774902344, -0.04112815856933594, -0.03935718536376953, -0.037586212158203125, -0.03581523895263672, -0.03404426574707031, -0.032273292541503906, -0.0305023193359375, -0.028731346130371094, -0.026960372924804688, -0.02518939971923828, -0.023418426513671875, -0.02164745330810547, -0.019876480102539062, -0.018105506896972656, -0.01633453369140625, -0.014563560485839844, -0.012792587280273438, -0.011021614074707031, -0.009250640869140625, -0.007479667663574219, -0.0057086944580078125, -0.003937721252441406, -0.002166748046875, -0.00039577484130859375, 0.0013751983642578125, 0.0031461715698242188, 0.004917144775390625, 0.006688117980957031, 0.008459091186523438, 0.010230064392089844, 0.01200103759765625, 0.013772010803222656, 0.015542984008789062, 0.01731395721435547, 0.019084930419921875, 0.02085590362548828, 0.022626876831054688, 0.024397850036621094, 0.0261688232421875, 0.027939796447753906, 0.029710769653320312, 0.03148174285888672, 0.033252716064453125, 0.03502368927001953, 0.03679466247558594, 0.038565635681152344, 0.04033660888671875, 0.042107582092285156, 0.04387855529785156, 0.04564952850341797, 0.047420501708984375, 0.04919147491455078, 0.05096244812011719, 0.052733421325683594, 0.05450439453125]}, "gradients/encoder.encoder.layers.6.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 3.0, 1.0, 2.0, 5.0, 8.0, 19.0, 41.0, 58.0, 66.0, 113.0, 122.0, 150.0, 130.0, 100.0, 88.0, 44.0, 35.0, 18.0, 4.0, 5.0, 2.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.09039306640625, -0.0877676010131836, -0.08514213562011719, -0.08251667022705078, -0.07989120483398438, -0.07726573944091797, -0.07464027404785156, -0.07201480865478516, -0.06938934326171875, -0.06676387786865234, -0.06413841247558594, -0.06151294708251953, -0.058887481689453125, -0.05626201629638672, -0.05363655090332031, -0.051011085510253906, -0.0483856201171875, -0.045760154724121094, -0.04313468933105469, -0.04050922393798828, -0.037883758544921875, -0.03525829315185547, -0.03263282775878906, -0.030007362365722656, -0.02738189697265625, -0.024756431579589844, -0.022130966186523438, -0.01950550079345703, -0.016880035400390625, -0.014254570007324219, -0.011629104614257812, -0.009003639221191406, -0.006378173828125, -0.0037527084350585938, -0.0011272430419921875, 0.0014982223510742188, 0.004123687744140625, 0.006749153137207031, 0.009374618530273438, 0.012000083923339844, 0.01462554931640625, 0.017251014709472656, 0.019876480102539062, 0.02250194549560547, 0.025127410888671875, 0.02775287628173828, 0.030378341674804688, 0.033003807067871094, 0.0356292724609375, 0.038254737854003906, 0.04088020324707031, 0.04350566864013672, 0.046131134033203125, 0.04875659942626953, 0.05138206481933594, 0.054007530212402344, 0.05663299560546875, 0.059258460998535156, 0.06188392639160156, 0.06450939178466797, 0.06713485717773438, 0.06976032257080078, 0.07238578796386719, 0.0750112533569336, 0.07763671875]}, "gradients/encoder.encoder.layers.6.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 2.0, 2.0, 1.0, 4.0, 5.0, 5.0, 4.0, 8.0, 12.0, 15.0, 29.0, 45.0, 66.0, 107.0, 150.0, 245.0, 454.0, 950.0, 1877.0, 4846.0, 13819.0, 46873.0, 187578.0, 1104870.0, 2285974.0, 418308.0, 90202.0, 24477.0, 7752.0, 2879.0, 1226.0, 642.0, 341.0, 204.0, 125.0, 64.0, 54.0, 27.0, 14.0, 11.0, 12.0, 6.0, 7.0, 4.0, 2.0, 0.0, 3.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0418701171875, -0.04021167755126953, -0.03855323791503906, -0.036894798278808594, -0.035236358642578125, -0.033577919006347656, -0.03191947937011719, -0.03026103973388672, -0.02860260009765625, -0.02694416046142578, -0.025285720825195312, -0.023627281188964844, -0.021968841552734375, -0.020310401916503906, -0.018651962280273438, -0.01699352264404297, -0.0153350830078125, -0.013676643371582031, -0.012018203735351562, -0.010359764099121094, -0.008701324462890625, -0.007042884826660156, -0.0053844451904296875, -0.0037260055541992188, -0.00206756591796875, -0.00040912628173828125, 0.0012493133544921875, 0.0029077529907226562, 0.004566192626953125, 0.006224632263183594, 0.007883071899414062, 0.009541511535644531, 0.011199951171875, 0.012858390808105469, 0.014516830444335938, 0.016175270080566406, 0.017833709716796875, 0.019492149353027344, 0.021150588989257812, 0.02280902862548828, 0.02446746826171875, 0.02612590789794922, 0.027784347534179688, 0.029442787170410156, 0.031101226806640625, 0.032759666442871094, 0.03441810607910156, 0.03607654571533203, 0.0377349853515625, 0.03939342498779297, 0.04105186462402344, 0.042710304260253906, 0.044368743896484375, 0.046027183532714844, 0.04768562316894531, 0.04934406280517578, 0.05100250244140625, 0.05266094207763672, 0.05431938171386719, 0.055977821350097656, 0.057636260986328125, 0.059294700622558594, 0.06095314025878906, 0.06261157989501953, 0.06427001953125]}, "gradients/encoder.encoder.layers.6.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 1.0, 1.0, 1.0, 2.0, 0.0, 1.0, 3.0, 2.0, 3.0, 12.0, 15.0, 23.0, 22.0, 30.0, 31.0, 64.0, 70.0, 96.0, 134.0, 159.0, 293.0, 360.0, 526.0, 552.0, 491.0, 369.0, 226.0, 171.0, 93.0, 85.0, 79.0, 46.0, 42.0, 23.0, 13.0, 11.0, 6.0, 8.0, 6.0, 3.0, 3.0, 3.0, 3.0, 2.0, 2.0, 2.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.06256103515625, -0.060884952545166016, -0.05920886993408203, -0.05753278732299805, -0.05585670471191406, -0.05418062210083008, -0.052504539489746094, -0.05082845687866211, -0.049152374267578125, -0.04747629165649414, -0.045800209045410156, -0.04412412643432617, -0.04244804382324219, -0.0407719612121582, -0.03909587860107422, -0.037419795989990234, -0.03574371337890625, -0.034067630767822266, -0.03239154815673828, -0.030715465545654297, -0.029039382934570312, -0.027363300323486328, -0.025687217712402344, -0.02401113510131836, -0.022335052490234375, -0.02065896987915039, -0.018982887268066406, -0.017306804656982422, -0.015630722045898438, -0.013954639434814453, -0.012278556823730469, -0.010602474212646484, -0.0089263916015625, -0.007250308990478516, -0.005574226379394531, -0.003898143768310547, -0.0022220611572265625, -0.0005459785461425781, 0.0011301040649414062, 0.0028061866760253906, 0.004482269287109375, 0.006158351898193359, 0.007834434509277344, 0.009510517120361328, 0.011186599731445312, 0.012862682342529297, 0.014538764953613281, 0.016214847564697266, 0.01789093017578125, 0.019567012786865234, 0.02124309539794922, 0.022919178009033203, 0.024595260620117188, 0.026271343231201172, 0.027947425842285156, 0.02962350845336914, 0.031299591064453125, 0.03297567367553711, 0.034651756286621094, 0.03632783889770508, 0.03800392150878906, 0.03968000411987305, 0.04135608673095703, 0.043032169342041016, 0.044708251953125]}, "gradients/encoder.encoder.layers.6.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 2.0, 0.0, 0.0, 0.0, 3.0, 1.0, 3.0, 22.0, 82.0, 146.0, 297.0, 230.0, 132.0, 59.0, 17.0, 9.0, 5.0, 5.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-1.0928850173950195, -1.0705254077911377, -1.0481659173965454, -1.0258063077926636, -1.0034466981887817, -0.9810871481895447, -0.9587275981903076, -0.9363679885864258, -0.914008378982544, -0.8916488289833069, -0.869289219379425, -0.846929669380188, -0.8245700597763062, -0.8022105097770691, -0.779850959777832, -0.7574913501739502, -0.7351318001747131, -0.7127722501754761, -0.6904126405715942, -0.6680530905723572, -0.6456934809684753, -0.6233339309692383, -0.6009743213653564, -0.5786147713661194, -0.5562552213668823, -0.5338956713676453, -0.5115360617637634, -0.48917651176452637, -0.46681690216064453, -0.44445735216140747, -0.422097772359848, -0.3997381925582886, -0.3773786425590515, -0.35501906275749207, -0.3326594829559326, -0.31029993295669556, -0.2879403233528137, -0.26558077335357666, -0.2432211935520172, -0.22086161375045776, -0.19850203394889832, -0.17614245414733887, -0.15378287434577942, -0.13142330944538116, -0.10906372964382172, -0.08670414984226227, -0.06434458494186401, -0.041985005140304565, -0.019625425338745117, 0.0027341507375240326, 0.025093726813793182, 0.047453299164772034, 0.06981287896633148, 0.09217245876789093, 0.11453202366828918, 0.13689160346984863, 0.15925118327140808, 0.18161076307296753, 0.20397034287452698, 0.22632990777492523, 0.24868948757648468, 0.2710490822792053, 0.2934086322784424, 0.31576821208000183, 0.3381277918815613]}, "gradients/encoder.encoder.layers.6.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 4.0, 2.0, 7.0, 1.0, 1.0, 8.0, 7.0, 10.0, 12.0, 13.0, 23.0, 23.0, 29.0, 25.0, 27.0, 28.0, 40.0, 44.0, 46.0, 39.0, 37.0, 40.0, 44.0, 40.0, 55.0, 31.0, 49.0, 39.0, 44.0, 43.0, 28.0, 23.0, 30.0, 25.0, 22.0, 8.0, 8.0, 18.0, 12.0, 7.0, 4.0, 3.0, 5.0, 8.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 2.0], "bins": [-0.17290765047073364, -0.1673312932252884, -0.16175492107868195, -0.1561785638332367, -0.15060219168663025, -0.145025834441185, -0.13944947719573975, -0.1338731050491333, -0.12829673290252686, -0.12272036820650101, -0.11714400351047516, -0.11156764626502991, -0.10599127411842346, -0.10041491687297821, -0.09483855217695236, -0.08926218748092651, -0.08368583023548126, -0.07810946553945541, -0.07253310084342957, -0.06695674359798431, -0.06138037517666817, -0.05580401048064232, -0.05022764950990677, -0.04465128481388092, -0.03907492011785507, -0.033498555421829224, -0.027922192588448524, -0.022345829755067825, -0.016769465059041977, -0.011193100363016129, -0.005616739392280579, -4.0374696254730225e-05, 0.005535989999771118, 0.011112353764474392, 0.016688717529177666, 0.022265080362558365, 0.027841445058584213, 0.03341780975461006, 0.03899417072534561, 0.04457053542137146, 0.05014690011739731, 0.05572326481342316, 0.061299629509449005, 0.06687599420547485, 0.0724523514509201, 0.07802872359752655, 0.0836050808429718, 0.08918144553899765, 0.0947578102350235, 0.10033417493104935, 0.1059105396270752, 0.11148689687252045, 0.11706326901912689, 0.12263962626457214, 0.1282159984111786, 0.13379235565662384, 0.1393687129020691, 0.14494507014751434, 0.1505214422941208, 0.15609779953956604, 0.16167417168617249, 0.16725052893161774, 0.172826886177063, 0.17840325832366943, 0.18397963047027588]}, "gradients/encoder.encoder.layers.6.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 2.0, 2.0, 3.0, 2.0, 10.0, 11.0, 13.0, 15.0, 17.0, 25.0, 36.0, 46.0, 79.0, 105.0, 118.0, 252.0, 397.0, 724.0, 1309.0, 3072.0, 8928.0, 30639.0, 122355.0, 451251.0, 323098.0, 75481.0, 19787.0, 5853.0, 2283.0, 1049.0, 554.0, 365.0, 231.0, 143.0, 102.0, 70.0, 33.0, 25.0, 17.0, 19.0, 7.0, 8.0, 12.0, 6.0, 6.0, 4.0, 1.0, 3.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.09149169921875, -0.08870506286621094, -0.08591842651367188, -0.08313179016113281, -0.08034515380859375, -0.07755851745605469, -0.07477188110351562, -0.07198524475097656, -0.0691986083984375, -0.06641197204589844, -0.06362533569335938, -0.06083869934082031, -0.05805206298828125, -0.05526542663574219, -0.052478790283203125, -0.04969215393066406, -0.046905517578125, -0.04411888122558594, -0.041332244873046875, -0.03854560852050781, -0.03575897216796875, -0.03297233581542969, -0.030185699462890625, -0.027399063110351562, -0.0246124267578125, -0.021825790405273438, -0.019039154052734375, -0.016252517700195312, -0.01346588134765625, -0.010679244995117188, -0.007892608642578125, -0.0051059722900390625, -0.0023193359375, 0.0004673004150390625, 0.003253936767578125, 0.0060405731201171875, 0.00882720947265625, 0.011613845825195312, 0.014400482177734375, 0.017187118530273438, 0.0199737548828125, 0.022760391235351562, 0.025547027587890625, 0.028333663940429688, 0.03112030029296875, 0.03390693664550781, 0.036693572998046875, 0.03948020935058594, 0.042266845703125, 0.04505348205566406, 0.047840118408203125, 0.05062675476074219, 0.05341339111328125, 0.05620002746582031, 0.058986663818359375, 0.06177330017089844, 0.0645599365234375, 0.06734657287597656, 0.07013320922851562, 0.07291984558105469, 0.07570648193359375, 0.07849311828613281, 0.08127975463867188, 0.08406639099121094, 0.08685302734375]}, "gradients/encoder.encoder.layers.6.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 2.0, 2.0, 2.0, 4.0, 8.0, 18.0, 37.0, 59.0, 73.0, 101.0, 119.0, 126.0, 128.0, 112.0, 105.0, 55.0, 29.0, 18.0, 10.0, 4.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0894775390625, -0.08688831329345703, -0.08429908752441406, -0.0817098617553711, -0.07912063598632812, -0.07653141021728516, -0.07394218444824219, -0.07135295867919922, -0.06876373291015625, -0.06617450714111328, -0.06358528137207031, -0.060996055603027344, -0.058406829833984375, -0.055817604064941406, -0.05322837829589844, -0.05063915252685547, -0.0480499267578125, -0.04546070098876953, -0.04287147521972656, -0.040282249450683594, -0.037693023681640625, -0.035103797912597656, -0.03251457214355469, -0.02992534637451172, -0.02733612060546875, -0.02474689483642578, -0.022157669067382812, -0.019568443298339844, -0.016979217529296875, -0.014389991760253906, -0.011800765991210938, -0.009211540222167969, -0.006622314453125, -0.004033088684082031, -0.0014438629150390625, 0.0011453628540039062, 0.003734588623046875, 0.006323814392089844, 0.008913040161132812, 0.011502265930175781, 0.01409149169921875, 0.01668071746826172, 0.019269943237304688, 0.021859169006347656, 0.024448394775390625, 0.027037620544433594, 0.029626846313476562, 0.03221607208251953, 0.0348052978515625, 0.03739452362060547, 0.03998374938964844, 0.042572975158691406, 0.045162200927734375, 0.047751426696777344, 0.05034065246582031, 0.05292987823486328, 0.05551910400390625, 0.05810832977294922, 0.06069755554199219, 0.06328678131103516, 0.06587600708007812, 0.0684652328491211, 0.07105445861816406, 0.07364368438720703, 0.07623291015625]}, "gradients/encoder.encoder.layers.6.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 4.0, 1.0, 3.0, 4.0, 7.0, 9.0, 17.0, 19.0, 26.0, 33.0, 48.0, 72.0, 130.0, 174.0, 341.0, 615.0, 1425.0, 3865.0, 12553.0, 41900.0, 151336.0, 466166.0, 268477.0, 70342.0, 20693.0, 6214.0, 2127.0, 833.0, 448.0, 218.0, 141.0, 102.0, 75.0, 47.0, 31.0, 14.0, 22.0, 11.0, 11.0, 8.0, 2.0, 2.0, 2.0, 0.0, 3.0, 1.0, 0.0, 1.0], "bins": [-0.08819580078125, -0.08585691452026367, -0.08351802825927734, -0.08117914199829102, -0.07884025573730469, -0.07650136947631836, -0.07416248321533203, -0.0718235969543457, -0.06948471069335938, -0.06714582443237305, -0.06480693817138672, -0.06246805191040039, -0.06012916564941406, -0.057790279388427734, -0.055451393127441406, -0.05311250686645508, -0.05077362060546875, -0.04843473434448242, -0.046095848083496094, -0.043756961822509766, -0.04141807556152344, -0.03907918930053711, -0.03674030303955078, -0.03440141677856445, -0.032062530517578125, -0.029723644256591797, -0.02738475799560547, -0.02504587173461914, -0.022706985473632812, -0.020368099212646484, -0.018029212951660156, -0.015690326690673828, -0.0133514404296875, -0.011012554168701172, -0.008673667907714844, -0.006334781646728516, -0.0039958953857421875, -0.0016570091247558594, 0.0006818771362304688, 0.003020763397216797, 0.005359649658203125, 0.007698535919189453, 0.010037422180175781, 0.01237630844116211, 0.014715194702148438, 0.017054080963134766, 0.019392967224121094, 0.021731853485107422, 0.02407073974609375, 0.026409626007080078, 0.028748512268066406, 0.031087398529052734, 0.03342628479003906, 0.03576517105102539, 0.03810405731201172, 0.04044294357299805, 0.042781829833984375, 0.0451207160949707, 0.04745960235595703, 0.04979848861694336, 0.05213737487792969, 0.054476261138916016, 0.056815147399902344, 0.05915403366088867, 0.061492919921875]}, "gradients/encoder.encoder.layers.6.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 1.0, 0.0, 1.0, 1.0, 0.0, 3.0, 1.0, 2.0, 1.0, 6.0, 3.0, 4.0, 8.0, 5.0, 11.0, 8.0, 13.0, 13.0, 10.0, 20.0, 18.0, 30.0, 30.0, 42.0, 41.0, 28.0, 56.0, 58.0, 48.0, 52.0, 59.0, 46.0, 43.0, 39.0, 50.0, 35.0, 40.0, 24.0, 18.0, 28.0, 33.0, 16.0, 15.0, 8.0, 3.0, 12.0, 9.0, 7.0, 8.0, 3.0, 4.0, 3.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.07757568359375, -0.0751953125, -0.07281494140625, -0.0704345703125, -0.06805419921875, -0.065673828125, -0.06329345703125, -0.0609130859375, -0.05853271484375, -0.05615234375, -0.05377197265625, -0.0513916015625, -0.04901123046875, -0.046630859375, -0.04425048828125, -0.0418701171875, -0.03948974609375, -0.037109375, -0.03472900390625, -0.0323486328125, -0.02996826171875, -0.027587890625, -0.02520751953125, -0.0228271484375, -0.02044677734375, -0.01806640625, -0.01568603515625, -0.0133056640625, -0.01092529296875, -0.008544921875, -0.00616455078125, -0.0037841796875, -0.00140380859375, 0.0009765625, 0.00335693359375, 0.0057373046875, 0.00811767578125, 0.010498046875, 0.01287841796875, 0.0152587890625, 0.01763916015625, 0.02001953125, 0.02239990234375, 0.0247802734375, 0.02716064453125, 0.029541015625, 0.03192138671875, 0.0343017578125, 0.03668212890625, 0.0390625, 0.04144287109375, 0.0438232421875, 0.04620361328125, 0.048583984375, 0.05096435546875, 0.0533447265625, 0.05572509765625, 0.05810546875, 0.06048583984375, 0.0628662109375, 0.06524658203125, 0.067626953125, 0.07000732421875, 0.0723876953125, 0.07476806640625]}, "gradients/encoder.encoder.layers.6.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 3.0, 0.0, 1.0, 5.0, 3.0, 6.0, 8.0, 10.0, 33.0, 25.0, 40.0, 57.0, 94.0, 138.0, 205.0, 319.0, 512.0, 828.0, 1438.0, 2635.0, 5046.0, 10754.0, 23581.0, 54375.0, 130019.0, 294140.0, 294031.0, 129873.0, 54511.0, 23750.0, 10564.0, 5152.0, 2620.0, 1387.0, 881.0, 511.0, 367.0, 210.0, 126.0, 104.0, 76.0, 48.0, 34.0, 14.0, 12.0, 7.0, 3.0, 6.0, 3.0, 2.0, 2.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.01654052734375, -0.016023635864257812, -0.015506744384765625, -0.014989852905273438, -0.01447296142578125, -0.013956069946289062, -0.013439178466796875, -0.012922286987304688, -0.0124053955078125, -0.011888504028320312, -0.011371612548828125, -0.010854721069335938, -0.01033782958984375, -0.009820938110351562, -0.009304046630859375, -0.008787155151367188, -0.008270263671875, -0.0077533721923828125, -0.007236480712890625, -0.0067195892333984375, -0.00620269775390625, -0.0056858062744140625, -0.005168914794921875, -0.0046520233154296875, -0.0041351318359375, -0.0036182403564453125, -0.003101348876953125, -0.0025844573974609375, -0.00206756591796875, -0.0015506744384765625, -0.001033782958984375, -0.0005168914794921875, 0.0, 0.0005168914794921875, 0.001033782958984375, 0.0015506744384765625, 0.00206756591796875, 0.0025844573974609375, 0.003101348876953125, 0.0036182403564453125, 0.0041351318359375, 0.0046520233154296875, 0.005168914794921875, 0.0056858062744140625, 0.00620269775390625, 0.0067195892333984375, 0.007236480712890625, 0.0077533721923828125, 0.008270263671875, 0.008787155151367188, 0.009304046630859375, 0.009820938110351562, 0.01033782958984375, 0.010854721069335938, 0.011371612548828125, 0.011888504028320312, 0.0124053955078125, 0.012922286987304688, 0.013439178466796875, 0.013956069946289062, 0.01447296142578125, 0.014989852905273438, 0.015506744384765625, 0.016023635864257812, 0.01654052734375]}, "gradients/encoder.encoder.layers.6.attention.k_proj.bias": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 1.0, 0.0, 4.0, 3.0, 4.0, 4.0, 3.0, 15.0, 12.0, 36.0, 36.0, 48.0, 83.0, 96.0, 104.0, 119.0, 76.0, 115.0, 82.0, 46.0, 31.0, 28.0, 16.0, 20.0, 6.0, 8.0, 5.0, 7.0, 4.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-7.569789886474609e-06, -7.164664566516876e-06, -6.759539246559143e-06, -6.35441392660141e-06, -5.949288606643677e-06, -5.544163286685944e-06, -5.1390379667282104e-06, -4.733912646770477e-06, -4.328787326812744e-06, -3.923662006855011e-06, -3.518536686897278e-06, -3.1134113669395447e-06, -2.7082860469818115e-06, -2.3031607270240784e-06, -1.8980354070663452e-06, -1.492910087108612e-06, -1.087784767150879e-06, -6.826594471931458e-07, -2.775341272354126e-07, 1.2759119272232056e-07, 5.327165126800537e-07, 9.378418326377869e-07, 1.34296715259552e-06, 1.7480924725532532e-06, 2.1532177925109863e-06, 2.5583431124687195e-06, 2.9634684324264526e-06, 3.368593752384186e-06, 3.773719072341919e-06, 4.178844392299652e-06, 4.583969712257385e-06, 4.989095032215118e-06, 5.3942203521728516e-06, 5.799345672130585e-06, 6.204470992088318e-06, 6.609596312046051e-06, 7.014721632003784e-06, 7.419846951961517e-06, 7.82497227191925e-06, 8.230097591876984e-06, 8.635222911834717e-06, 9.04034823179245e-06, 9.445473551750183e-06, 9.850598871707916e-06, 1.025572419166565e-05, 1.0660849511623383e-05, 1.1065974831581116e-05, 1.1471100151538849e-05, 1.1876225471496582e-05, 1.2281350791454315e-05, 1.2686476111412048e-05, 1.3091601431369781e-05, 1.3496726751327515e-05, 1.3901852071285248e-05, 1.4306977391242981e-05, 1.4712102711200714e-05, 1.5117228031158447e-05, 1.552235335111618e-05, 1.5927478671073914e-05, 1.6332603991031647e-05, 1.673772931098938e-05, 1.7142854630947113e-05, 1.7547979950904846e-05, 1.795310527086258e-05, 1.8358230590820312e-05]}, "gradients/encoder.encoder.layers.6.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 5.0, 1.0, 3.0, 2.0, 6.0, 6.0, 4.0, 13.0, 20.0, 15.0, 36.0, 38.0, 68.0, 88.0, 179.0, 283.0, 541.0, 947.0, 1712.0, 3369.0, 6683.0, 14961.0, 37889.0, 100306.0, 255981.0, 349795.0, 167383.0, 63411.0, 24328.0, 10370.0, 4780.0, 2361.0, 1252.0, 676.0, 392.0, 232.0, 145.0, 98.0, 56.0, 36.0, 23.0, 19.0, 11.0, 9.0, 5.0, 11.0, 3.0, 4.0, 4.0, 6.0, 1.0, 1.0, 0.0, 1.0, 0.0, 2.0], "bins": [-0.020660400390625, -0.02003931999206543, -0.01941823959350586, -0.01879715919494629, -0.01817607879638672, -0.01755499839782715, -0.016933917999267578, -0.016312837600708008, -0.015691757202148438, -0.015070676803588867, -0.014449596405029297, -0.013828516006469727, -0.013207435607910156, -0.012586355209350586, -0.011965274810791016, -0.011344194412231445, -0.010723114013671875, -0.010102033615112305, -0.009480953216552734, -0.008859872817993164, -0.008238792419433594, -0.0076177120208740234, -0.006996631622314453, -0.006375551223754883, -0.0057544708251953125, -0.005133390426635742, -0.004512310028076172, -0.0038912296295166016, -0.0032701492309570312, -0.002649068832397461, -0.0020279884338378906, -0.0014069080352783203, -0.00078582763671875, -0.0001647472381591797, 0.0004563331604003906, 0.001077413558959961, 0.0016984939575195312, 0.0023195743560791016, 0.002940654754638672, 0.003561735153198242, 0.0041828155517578125, 0.004803895950317383, 0.005424976348876953, 0.0060460567474365234, 0.006667137145996094, 0.007288217544555664, 0.007909297943115234, 0.008530378341674805, 0.009151458740234375, 0.009772539138793945, 0.010393619537353516, 0.011014699935913086, 0.011635780334472656, 0.012256860733032227, 0.012877941131591797, 0.013499021530151367, 0.014120101928710938, 0.014741182327270508, 0.015362262725830078, 0.01598334312438965, 0.01660442352294922, 0.01722550392150879, 0.01784658432006836, 0.01846766471862793, 0.0190887451171875]}, "gradients/encoder.encoder.layers.6.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 2.0, 2.0, 2.0, 2.0, 3.0, 3.0, 8.0, 9.0, 7.0, 9.0, 10.0, 19.0, 20.0, 20.0, 26.0, 27.0, 23.0, 37.0, 50.0, 53.0, 57.0, 57.0, 64.0, 53.0, 47.0, 50.0, 58.0, 45.0, 43.0, 31.0, 34.0, 22.0, 15.0, 21.0, 17.0, 16.0, 7.0, 7.0, 12.0, 9.0, 5.0, 4.0, 4.0, 0.0, 1.0, 0.0, 1.0, 1.0, 2.0, 2.0, 0.0, 1.0, 1.0, 1.0], "bins": [-0.0158538818359375, -0.015375614166259766, -0.014897346496582031, -0.014419078826904297, -0.013940811157226562, -0.013462543487548828, -0.012984275817871094, -0.01250600814819336, -0.012027740478515625, -0.01154947280883789, -0.011071205139160156, -0.010592937469482422, -0.010114669799804688, -0.009636402130126953, -0.009158134460449219, -0.008679866790771484, -0.00820159912109375, -0.007723331451416016, -0.007245063781738281, -0.006766796112060547, -0.0062885284423828125, -0.005810260772705078, -0.005331993103027344, -0.004853725433349609, -0.004375457763671875, -0.0038971900939941406, -0.0034189224243164062, -0.002940654754638672, -0.0024623870849609375, -0.001984119415283203, -0.0015058517456054688, -0.0010275840759277344, -0.00054931640625, -7.104873657226562e-05, 0.00040721893310546875, 0.0008854866027832031, 0.0013637542724609375, 0.0018420219421386719, 0.0023202896118164062, 0.0027985572814941406, 0.003276824951171875, 0.0037550926208496094, 0.004233360290527344, 0.004711627960205078, 0.0051898956298828125, 0.005668163299560547, 0.006146430969238281, 0.006624698638916016, 0.00710296630859375, 0.007581233978271484, 0.008059501647949219, 0.008537769317626953, 0.009016036987304688, 0.009494304656982422, 0.009972572326660156, 0.01045083999633789, 0.010929107666015625, 0.01140737533569336, 0.011885643005371094, 0.012363910675048828, 0.012842178344726562, 0.013320446014404297, 0.013798713684082031, 0.014276981353759766, 0.0147552490234375]}, "gradients/encoder.encoder.layers.6.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 3.0, 0.0, 1.0, 4.0, 0.0, 4.0, 3.0, 19.0, 19.0, 35.0, 50.0, 69.0, 108.0, 103.0, 134.0, 133.0, 97.0, 76.0, 64.0, 37.0, 8.0, 8.0, 9.0, 9.0, 7.0, 2.0, 4.0, 2.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.26612961292266846, -0.25767847895622253, -0.2492273449897766, -0.2407762110233307, -0.23232507705688477, -0.22387394309043884, -0.21542279422283173, -0.2069716602563858, -0.19852052628993988, -0.19006939232349396, -0.18161825835704803, -0.1731671243906021, -0.164715975522995, -0.15626484155654907, -0.14781370759010315, -0.13936257362365723, -0.1309114396572113, -0.12246030569076538, -0.11400917172431946, -0.10555803030729294, -0.09710689634084702, -0.08865576237440109, -0.08020462095737457, -0.07175348699092865, -0.06330235302448273, -0.054851219058036804, -0.04640008136630058, -0.03794894367456436, -0.02949780970811844, -0.021046675741672516, -0.012595538049936295, -0.004144400358200073, 0.004306763410568237, 0.01275789923965931, 0.02120903506875038, 0.029660170897841454, 0.038111306726932526, 0.04656244069337845, 0.05501357838511467, 0.06346471607685089, 0.07191585004329681, 0.08036698400974274, 0.08881811797618866, 0.09726925939321518, 0.1057203933596611, 0.11417152732610703, 0.12262266874313354, 0.13107380270957947, 0.1395249366760254, 0.1479760706424713, 0.15642720460891724, 0.16487833857536316, 0.17332947254180908, 0.181780606508255, 0.19023175537586212, 0.19868288934230804, 0.20713402330875397, 0.2155851572751999, 0.2240362912416458, 0.23248742520809174, 0.24093857407569885, 0.24938970804214478, 0.2578408420085907, 0.2662919759750366, 0.27474310994148254]}, "gradients/encoder.encoder.layers.6.layer_norm.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 4.0, 0.0, 1.0, 2.0, 0.0, 1.0, 6.0, 1.0, 5.0, 10.0, 5.0, 17.0, 9.0, 18.0, 14.0, 22.0, 24.0, 27.0, 37.0, 25.0, 29.0, 36.0, 41.0, 48.0, 36.0, 46.0, 33.0, 46.0, 33.0, 45.0, 34.0, 36.0, 34.0, 31.0, 46.0, 33.0, 28.0, 26.0, 23.0, 18.0, 18.0, 19.0, 15.0, 6.0, 5.0, 7.0, 6.0, 5.0, 2.0, 3.0, 1.0, 0.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.16888469457626343, -0.16344721615314484, -0.15800973773002625, -0.15257225930690765, -0.14713478088378906, -0.14169730246067047, -0.13625982403755188, -0.1308223307132721, -0.1253848671913147, -0.1199473887681961, -0.11450991034507751, -0.10907243192195892, -0.10363495349884033, -0.09819747507572174, -0.09275998920202255, -0.08732251077890396, -0.08188502490520477, -0.07644754648208618, -0.07101006805896759, -0.065572589635849, -0.06013510748744011, -0.05469762906432152, -0.04926014691591263, -0.04382266849279404, -0.038385190069675446, -0.032947711646556854, -0.027510231360793114, -0.022072751075029373, -0.016635272651910782, -0.01119779422879219, -0.005760312080383301, -0.00032283365726470947, 0.005114644765853882, 0.010552124120295048, 0.015989603474736214, 0.021427083760499954, 0.026864562183618546, 0.03230204060673714, 0.03773952275514603, 0.04317700117826462, 0.04861447960138321, 0.0540519580245018, 0.05948943644762039, 0.06492692232131958, 0.07036440074443817, 0.07580187916755676, 0.08123935759067535, 0.08667683601379395, 0.09211431443691254, 0.09755179286003113, 0.10298927128314972, 0.10842674970626831, 0.1138642281293869, 0.1193017065525055, 0.12473919242620468, 0.13017666339874268, 0.13561415672302246, 0.14105163514614105, 0.14648911356925964, 0.15192659199237823, 0.15736407041549683, 0.16280154883861542, 0.168239027261734, 0.1736765205860138, 0.1791139841079712]}, "gradients/encoder.encoder.layers.5.feed_forward.output_dense.weight": {"_type": "histogram", "values": [2.0, 2.0, 1.0, 2.0, 3.0, 4.0, 5.0, 2.0, 4.0, 1.0, 11.0, 6.0, 7.0, 20.0, 15.0, 16.0, 25.0, 64.0, 80.0, 125.0, 265.0, 403.0, 851.0, 2023.0, 5443.0, 16532.0, 82865.0, 989495.0, 2668665.0, 366349.0, 44640.0, 10172.0, 3657.0, 1355.0, 528.0, 266.0, 142.0, 87.0, 59.0, 27.0, 31.0, 12.0, 12.0, 12.0, 6.0, 6.0, 2.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.06524658203125, -0.0629415512084961, -0.06063652038574219, -0.05833148956298828, -0.056026458740234375, -0.05372142791748047, -0.05141639709472656, -0.049111366271972656, -0.04680633544921875, -0.044501304626464844, -0.04219627380371094, -0.03989124298095703, -0.037586212158203125, -0.03528118133544922, -0.03297615051269531, -0.030671119689941406, -0.0283660888671875, -0.026061058044433594, -0.023756027221679688, -0.02145099639892578, -0.019145965576171875, -0.01684093475341797, -0.014535903930664062, -0.012230873107910156, -0.00992584228515625, -0.007620811462402344, -0.0053157806396484375, -0.0030107498168945312, -0.000705718994140625, 0.0015993118286132812, 0.0039043426513671875, 0.006209373474121094, 0.008514404296875, 0.010819435119628906, 0.013124465942382812, 0.015429496765136719, 0.017734527587890625, 0.02003955841064453, 0.022344589233398438, 0.024649620056152344, 0.02695465087890625, 0.029259681701660156, 0.03156471252441406, 0.03386974334716797, 0.036174774169921875, 0.03847980499267578, 0.04078483581542969, 0.043089866638183594, 0.0453948974609375, 0.047699928283691406, 0.05000495910644531, 0.05230998992919922, 0.054615020751953125, 0.05692005157470703, 0.05922508239746094, 0.061530113220214844, 0.06383514404296875, 0.06614017486572266, 0.06844520568847656, 0.07075023651123047, 0.07305526733398438, 0.07536029815673828, 0.07766532897949219, 0.0799703598022461, 0.082275390625]}, "gradients/encoder.encoder.layers.5.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 3.0, 3.0, 6.0, 4.0, 17.0, 30.0, 51.0, 54.0, 111.0, 130.0, 132.0, 148.0, 126.0, 88.0, 44.0, 36.0, 17.0, 8.0, 5.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0882568359375, -0.08563041687011719, -0.08300399780273438, -0.08037757873535156, -0.07775115966796875, -0.07512474060058594, -0.07249832153320312, -0.06987190246582031, -0.0672454833984375, -0.06461906433105469, -0.061992645263671875, -0.05936622619628906, -0.05673980712890625, -0.05411338806152344, -0.051486968994140625, -0.04886054992675781, -0.046234130859375, -0.04360771179199219, -0.040981292724609375, -0.03835487365722656, -0.03572845458984375, -0.03310203552246094, -0.030475616455078125, -0.027849197387695312, -0.0252227783203125, -0.022596359252929688, -0.019969940185546875, -0.017343521118164062, -0.01471710205078125, -0.012090682983398438, -0.009464263916015625, -0.0068378448486328125, -0.00421142578125, -0.0015850067138671875, 0.001041412353515625, 0.0036678314208984375, 0.00629425048828125, 0.008920669555664062, 0.011547088623046875, 0.014173507690429688, 0.0167999267578125, 0.019426345825195312, 0.022052764892578125, 0.024679183959960938, 0.02730560302734375, 0.029932022094726562, 0.032558441162109375, 0.03518486022949219, 0.037811279296875, 0.04043769836425781, 0.043064117431640625, 0.04569053649902344, 0.04831695556640625, 0.05094337463378906, 0.053569793701171875, 0.05619621276855469, 0.0588226318359375, 0.06144905090332031, 0.06407546997070312, 0.06670188903808594, 0.06932830810546875, 0.07195472717285156, 0.07458114624023438, 0.07720756530761719, 0.079833984375]}, "gradients/encoder.encoder.layers.5.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 2.0, 3.0, 3.0, 2.0, 8.0, 6.0, 4.0, 18.0, 19.0, 21.0, 39.0, 53.0, 75.0, 111.0, 195.0, 369.0, 720.0, 1532.0, 4525.0, 17304.0, 91952.0, 755627.0, 2860610.0, 388592.0, 55216.0, 11435.0, 3335.0, 1224.0, 552.0, 277.0, 170.0, 85.0, 68.0, 43.0, 33.0, 17.0, 10.0, 16.0, 11.0, 5.0, 6.0, 2.0, 2.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.060699462890625, -0.05839681625366211, -0.05609416961669922, -0.05379152297973633, -0.05148887634277344, -0.04918622970581055, -0.046883583068847656, -0.044580936431884766, -0.042278289794921875, -0.039975643157958984, -0.037672996520996094, -0.0353703498840332, -0.03306770324707031, -0.030765056610107422, -0.02846240997314453, -0.02615976333618164, -0.02385711669921875, -0.02155447006225586, -0.01925182342529297, -0.016949176788330078, -0.014646530151367188, -0.012343883514404297, -0.010041236877441406, -0.007738590240478516, -0.005435943603515625, -0.0031332969665527344, -0.0008306503295898438, 0.0014719963073730469, 0.0037746429443359375, 0.006077289581298828, 0.008379936218261719, 0.01068258285522461, 0.0129852294921875, 0.01528787612915039, 0.01759052276611328, 0.019893169403076172, 0.022195816040039062, 0.024498462677001953, 0.026801109313964844, 0.029103755950927734, 0.031406402587890625, 0.033709049224853516, 0.036011695861816406, 0.0383143424987793, 0.04061698913574219, 0.04291963577270508, 0.04522228240966797, 0.04752492904663086, 0.04982757568359375, 0.05213022232055664, 0.05443286895751953, 0.05673551559448242, 0.05903816223144531, 0.0613408088684082, 0.0636434555053711, 0.06594610214233398, 0.06824874877929688, 0.07055139541625977, 0.07285404205322266, 0.07515668869018555, 0.07745933532714844, 0.07976198196411133, 0.08206462860107422, 0.08436727523803711, 0.086669921875]}, "gradients/encoder.encoder.layers.5.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 2.0, 1.0, 3.0, 1.0, 2.0, 2.0, 6.0, 3.0, 5.0, 7.0, 11.0, 16.0, 10.0, 24.0, 20.0, 32.0, 41.0, 60.0, 76.0, 89.0, 124.0, 160.0, 227.0, 334.0, 419.0, 518.0, 460.0, 364.0, 279.0, 219.0, 151.0, 104.0, 73.0, 52.0, 47.0, 28.0, 35.0, 19.0, 11.0, 14.0, 7.0, 12.0, 8.0, 6.0, 2.0, 1.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.05029296875, -0.04875755310058594, -0.047222137451171875, -0.04568672180175781, -0.04415130615234375, -0.04261589050292969, -0.041080474853515625, -0.03954505920410156, -0.0380096435546875, -0.03647422790527344, -0.034938812255859375, -0.03340339660644531, -0.03186798095703125, -0.030332565307617188, -0.028797149658203125, -0.027261734008789062, -0.025726318359375, -0.024190902709960938, -0.022655487060546875, -0.021120071411132812, -0.01958465576171875, -0.018049240112304688, -0.016513824462890625, -0.014978408813476562, -0.0134429931640625, -0.011907577514648438, -0.010372161865234375, -0.008836746215820312, -0.00730133056640625, -0.0057659149169921875, -0.004230499267578125, -0.0026950836181640625, -0.00115966796875, 0.0003757476806640625, 0.001911163330078125, 0.0034465789794921875, 0.00498199462890625, 0.0065174102783203125, 0.008052825927734375, 0.009588241577148438, 0.0111236572265625, 0.012659072875976562, 0.014194488525390625, 0.015729904174804688, 0.01726531982421875, 0.018800735473632812, 0.020336151123046875, 0.021871566772460938, 0.023406982421875, 0.024942398071289062, 0.026477813720703125, 0.028013229370117188, 0.02954864501953125, 0.031084060668945312, 0.032619476318359375, 0.03415489196777344, 0.0356903076171875, 0.03722572326660156, 0.038761138916015625, 0.04029655456542969, 0.04183197021484375, 0.04336738586425781, 0.044902801513671875, 0.04643821716308594, 0.0479736328125]}, "gradients/encoder.encoder.layers.5.final_layer_norm.weight": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 2.0, 1.0, 0.0, 2.0, 4.0, 6.0, 11.0, 43.0, 79.0, 174.0, 228.0, 218.0, 147.0, 68.0, 17.0, 9.0, 3.0, 4.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.39122310280799866, -0.36937573552131653, -0.3475283682346344, -0.32568100094795227, -0.30383363366127014, -0.281986266374588, -0.2601388692855835, -0.23829151690006256, -0.21644414961338043, -0.1945967823266983, -0.17274941504001617, -0.15090203285217285, -0.12905466556549072, -0.10720730572938919, -0.08535993099212646, -0.06351256370544434, -0.04166519641876221, -0.01981782726943493, 0.0020295418798923492, 0.023876912891864777, 0.045724280178546906, 0.06757164746522903, 0.08941902220249176, 0.11126638948917389, 0.13311375677585602, 0.15496112406253815, 0.17680849134922028, 0.1986558735370636, 0.22050324082374573, 0.24235060811042786, 0.26419797539711, 0.2860453426837921, 0.30789273977279663, 0.32974010705947876, 0.3515874743461609, 0.373434841632843, 0.39528220891952515, 0.4171295762062073, 0.4389769434928894, 0.46082431077957153, 0.48267167806625366, 0.5045190453529358, 0.5263664126396179, 0.5482137799263, 0.5700611472129822, 0.5919085144996643, 0.6137558817863464, 0.6356032490730286, 0.6574506759643555, 0.6792980432510376, 0.7011454105377197, 0.7229927778244019, 0.744840145111084, 0.7666875123977661, 0.7885348796844482, 0.8103822469711304, 0.8322296142578125, 0.8540769815444946, 0.8759243488311768, 0.8977717161178589, 0.919619083404541, 0.9414664506912231, 0.9633138179779053, 0.9851611852645874, 1.0070085525512695]}, "gradients/encoder.encoder.layers.5.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 3.0, 4.0, 4.0, 5.0, 10.0, 6.0, 5.0, 11.0, 13.0, 4.0, 19.0, 14.0, 23.0, 19.0, 22.0, 18.0, 27.0, 23.0, 35.0, 39.0, 28.0, 33.0, 35.0, 27.0, 43.0, 29.0, 38.0, 44.0, 41.0, 36.0, 34.0, 36.0, 37.0, 34.0, 37.0, 21.0, 22.0, 18.0, 14.0, 19.0, 12.0, 17.0, 11.0, 10.0, 8.0, 6.0, 5.0, 5.0, 3.0, 3.0, 3.0, 0.0, 3.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0], "bins": [-0.1346113085746765, -0.1298602968454361, -0.12510928511619568, -0.12035828828811646, -0.11560727655887604, -0.11085626482963562, -0.1061052605509758, -0.10135425627231598, -0.09660324454307556, -0.09185223281383514, -0.08710122853517532, -0.0823502242565155, -0.07759921252727509, -0.07284820079803467, -0.06809719651937485, -0.06334619224071503, -0.05859518051147461, -0.05384417250752449, -0.04909316450357437, -0.04434215649962425, -0.03959114849567413, -0.034840140491724014, -0.030089132487773895, -0.025338124483823776, -0.020587116479873657, -0.015836108475923538, -0.01108510047197342, -0.0063340924680233, -0.0015830844640731812, 0.003167923539876938, 0.007918931543827057, 0.012669939547777176, 0.017420947551727295, 0.022171955555677414, 0.026922963559627533, 0.03167397156357765, 0.03642497956752777, 0.04117598757147789, 0.04592699557542801, 0.05067800357937813, 0.05542901158332825, 0.060180019587278366, 0.06493102759122849, 0.0696820318698883, 0.07443304359912872, 0.07918405532836914, 0.08393505960702896, 0.08868606388568878, 0.0934370756149292, 0.09818808734416962, 0.10293909162282944, 0.10769009590148926, 0.11244110763072968, 0.11719211935997009, 0.12194312363862991, 0.12669412791728973, 0.13144513964653015, 0.13619615137577057, 0.140947163105011, 0.1456981599330902, 0.15044917166233063, 0.15520018339157104, 0.15995118021965027, 0.16470219194889069, 0.1694532036781311]}, "gradients/encoder.encoder.layers.5.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 2.0, 3.0, 3.0, 9.0, 9.0, 3.0, 6.0, 16.0, 4.0, 24.0, 32.0, 31.0, 45.0, 53.0, 58.0, 92.0, 127.0, 175.0, 296.0, 501.0, 873.0, 1879.0, 4333.0, 12012.0, 39153.0, 135355.0, 348186.0, 330049.0, 120945.0, 35224.0, 11093.0, 3884.0, 1753.0, 841.0, 513.0, 321.0, 189.0, 101.0, 75.0, 58.0, 63.0, 43.0, 25.0, 23.0, 22.0, 14.0, 17.0, 8.0, 5.0, 10.0, 2.0, 4.0, 3.0, 5.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.06842041015625, -0.0663442611694336, -0.06426811218261719, -0.06219196319580078, -0.060115814208984375, -0.05803966522216797, -0.05596351623535156, -0.053887367248535156, -0.05181121826171875, -0.049735069274902344, -0.04765892028808594, -0.04558277130126953, -0.043506622314453125, -0.04143047332763672, -0.03935432434082031, -0.037278175354003906, -0.0352020263671875, -0.033125877380371094, -0.031049728393554688, -0.02897357940673828, -0.026897430419921875, -0.02482128143310547, -0.022745132446289062, -0.020668983459472656, -0.01859283447265625, -0.016516685485839844, -0.014440536499023438, -0.012364387512207031, -0.010288238525390625, -0.008212089538574219, -0.0061359405517578125, -0.004059791564941406, -0.001983642578125, 9.250640869140625e-05, 0.0021686553955078125, 0.004244804382324219, 0.006320953369140625, 0.008397102355957031, 0.010473251342773438, 0.012549400329589844, 0.01462554931640625, 0.016701698303222656, 0.018777847290039062, 0.02085399627685547, 0.022930145263671875, 0.02500629425048828, 0.027082443237304688, 0.029158592224121094, 0.0312347412109375, 0.033310890197753906, 0.03538703918457031, 0.03746318817138672, 0.039539337158203125, 0.04161548614501953, 0.04369163513183594, 0.045767784118652344, 0.04784393310546875, 0.049920082092285156, 0.05199623107910156, 0.05407238006591797, 0.056148529052734375, 0.05822467803955078, 0.06030082702636719, 0.062376976013183594, 0.064453125]}, "gradients/encoder.encoder.layers.5.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 3.0, 1.0, 2.0, 2.0, 6.0, 17.0, 24.0, 49.0, 77.0, 99.0, 113.0, 149.0, 138.0, 133.0, 77.0, 61.0, 31.0, 22.0, 5.0, 7.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.096435546875, -0.0937337875366211, -0.09103202819824219, -0.08833026885986328, -0.08562850952148438, -0.08292675018310547, -0.08022499084472656, -0.07752323150634766, -0.07482147216796875, -0.07211971282958984, -0.06941795349121094, -0.06671619415283203, -0.06401443481445312, -0.06131267547607422, -0.05861091613769531, -0.055909156799316406, -0.0532073974609375, -0.050505638122558594, -0.04780387878417969, -0.04510211944580078, -0.042400360107421875, -0.03969860076904297, -0.03699684143066406, -0.034295082092285156, -0.03159332275390625, -0.028891563415527344, -0.026189804077148438, -0.02348804473876953, -0.020786285400390625, -0.01808452606201172, -0.015382766723632812, -0.012681007385253906, -0.009979248046875, -0.007277488708496094, -0.0045757293701171875, -0.0018739700317382812, 0.000827789306640625, 0.0035295486450195312, 0.0062313079833984375, 0.008933067321777344, 0.01163482666015625, 0.014336585998535156, 0.017038345336914062, 0.01974010467529297, 0.022441864013671875, 0.02514362335205078, 0.027845382690429688, 0.030547142028808594, 0.0332489013671875, 0.035950660705566406, 0.03865242004394531, 0.04135417938232422, 0.044055938720703125, 0.04675769805908203, 0.04945945739746094, 0.052161216735839844, 0.05486297607421875, 0.057564735412597656, 0.06026649475097656, 0.06296825408935547, 0.06567001342773438, 0.06837177276611328, 0.07107353210449219, 0.0737752914428711, 0.07647705078125]}, "gradients/encoder.encoder.layers.5.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 2.0, 1.0, 0.0, 2.0, 5.0, 1.0, 4.0, 4.0, 9.0, 7.0, 17.0, 17.0, 21.0, 21.0, 41.0, 42.0, 65.0, 90.0, 161.0, 225.0, 403.0, 745.0, 1488.0, 3011.0, 6515.0, 14350.0, 32621.0, 74614.0, 157575.0, 261607.0, 244527.0, 136976.0, 62532.0, 27344.0, 12274.0, 5517.0, 2656.0, 1338.0, 687.0, 362.0, 220.0, 145.0, 82.0, 56.0, 45.0, 34.0, 21.0, 15.0, 15.0, 18.0, 13.0, 11.0, 4.0, 5.0, 1.0, 4.0, 4.0, 3.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.0380859375, -0.03685331344604492, -0.035620689392089844, -0.034388065338134766, -0.03315544128417969, -0.03192281723022461, -0.03069019317626953, -0.029457569122314453, -0.028224945068359375, -0.026992321014404297, -0.02575969696044922, -0.02452707290649414, -0.023294448852539062, -0.022061824798583984, -0.020829200744628906, -0.019596576690673828, -0.01836395263671875, -0.017131328582763672, -0.015898704528808594, -0.014666080474853516, -0.013433456420898438, -0.01220083236694336, -0.010968208312988281, -0.009735584259033203, -0.008502960205078125, -0.007270336151123047, -0.006037712097167969, -0.004805088043212891, -0.0035724639892578125, -0.0023398399353027344, -0.0011072158813476562, 0.00012540817260742188, 0.0013580322265625, 0.002590656280517578, 0.0038232803344726562, 0.005055904388427734, 0.0062885284423828125, 0.007521152496337891, 0.008753776550292969, 0.009986400604248047, 0.011219024658203125, 0.012451648712158203, 0.013684272766113281, 0.01491689682006836, 0.016149520874023438, 0.017382144927978516, 0.018614768981933594, 0.019847393035888672, 0.02108001708984375, 0.022312641143798828, 0.023545265197753906, 0.024777889251708984, 0.026010513305664062, 0.02724313735961914, 0.02847576141357422, 0.029708385467529297, 0.030941009521484375, 0.03217363357543945, 0.03340625762939453, 0.03463888168334961, 0.03587150573730469, 0.037104129791259766, 0.038336753845214844, 0.03956937789916992, 0.040802001953125]}, "gradients/encoder.encoder.layers.5.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 2.0, 4.0, 0.0, 0.0, 2.0, 5.0, 4.0, 5.0, 8.0, 11.0, 5.0, 16.0, 12.0, 14.0, 19.0, 19.0, 14.0, 20.0, 24.0, 25.0, 30.0, 39.0, 36.0, 32.0, 30.0, 56.0, 40.0, 32.0, 41.0, 29.0, 34.0, 37.0, 35.0, 42.0, 30.0, 29.0, 27.0, 26.0, 31.0, 20.0, 19.0, 15.0, 10.0, 11.0, 15.0, 12.0, 10.0, 8.0, 6.0, 2.0, 5.0, 7.0, 6.0, 2.0, 4.0, 1.0, 1.0, 0.0, 0.0, 2.0], "bins": [-0.06195068359375, -0.06004047393798828, -0.05813026428222656, -0.056220054626464844, -0.054309844970703125, -0.052399635314941406, -0.05048942565917969, -0.04857921600341797, -0.04666900634765625, -0.04475879669189453, -0.04284858703613281, -0.040938377380371094, -0.039028167724609375, -0.037117958068847656, -0.03520774841308594, -0.03329753875732422, -0.0313873291015625, -0.02947711944580078, -0.027566909790039062, -0.025656700134277344, -0.023746490478515625, -0.021836280822753906, -0.019926071166992188, -0.01801586151123047, -0.01610565185546875, -0.014195442199707031, -0.012285232543945312, -0.010375022888183594, -0.008464813232421875, -0.006554603576660156, -0.0046443939208984375, -0.0027341842651367188, -0.000823974609375, 0.0010862350463867188, 0.0029964447021484375, 0.004906654357910156, 0.006816864013671875, 0.008727073669433594, 0.010637283325195312, 0.012547492980957031, 0.01445770263671875, 0.01636791229248047, 0.018278121948242188, 0.020188331604003906, 0.022098541259765625, 0.024008750915527344, 0.025918960571289062, 0.02782917022705078, 0.0297393798828125, 0.03164958953857422, 0.03355979919433594, 0.035470008850097656, 0.037380218505859375, 0.039290428161621094, 0.04120063781738281, 0.04311084747314453, 0.04502105712890625, 0.04693126678466797, 0.04884147644042969, 0.050751686096191406, 0.052661895751953125, 0.054572105407714844, 0.05648231506347656, 0.05839252471923828, 0.060302734375]}, "gradients/encoder.encoder.layers.5.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 4.0, 2.0, 2.0, 6.0, 7.0, 5.0, 6.0, 13.0, 18.0, 21.0, 30.0, 55.0, 109.0, 183.0, 318.0, 647.0, 1600.0, 4437.0, 13207.0, 43534.0, 132599.0, 290820.0, 316926.0, 162261.0, 55858.0, 16913.0, 5372.0, 1922.0, 825.0, 389.0, 205.0, 89.0, 46.0, 50.0, 25.0, 19.0, 16.0, 10.0, 1.0, 6.0, 2.0, 1.0, 4.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 1.0], "bins": [-0.0200042724609375, -0.01945018768310547, -0.018896102905273438, -0.018342018127441406, -0.017787933349609375, -0.017233848571777344, -0.016679763793945312, -0.01612567901611328, -0.01557159423828125, -0.015017509460449219, -0.014463424682617188, -0.013909339904785156, -0.013355255126953125, -0.012801170349121094, -0.012247085571289062, -0.011693000793457031, -0.011138916015625, -0.010584831237792969, -0.010030746459960938, -0.009476661682128906, -0.008922576904296875, -0.008368492126464844, -0.007814407348632812, -0.007260322570800781, -0.00670623779296875, -0.006152153015136719, -0.0055980682373046875, -0.005043983459472656, -0.004489898681640625, -0.003935813903808594, -0.0033817291259765625, -0.0028276443481445312, -0.0022735595703125, -0.0017194747924804688, -0.0011653900146484375, -0.0006113052368164062, -5.7220458984375e-05, 0.0004968643188476562, 0.0010509490966796875, 0.0016050338745117188, 0.00215911865234375, 0.0027132034301757812, 0.0032672882080078125, 0.0038213729858398438, 0.004375457763671875, 0.004929542541503906, 0.0054836273193359375, 0.006037712097167969, 0.006591796875, 0.007145881652832031, 0.0076999664306640625, 0.008254051208496094, 0.008808135986328125, 0.009362220764160156, 0.009916305541992188, 0.010470390319824219, 0.01102447509765625, 0.011578559875488281, 0.012132644653320312, 0.012686729431152344, 0.013240814208984375, 0.013794898986816406, 0.014348983764648438, 0.014903068542480469, 0.0154571533203125]}, "gradients/encoder.encoder.layers.5.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 1.0, 2.0, 0.0, 4.0, 0.0, 2.0, 2.0, 4.0, 4.0, 1.0, 7.0, 7.0, 3.0, 10.0, 5.0, 17.0, 13.0, 15.0, 27.0, 12.0, 21.0, 16.0, 24.0, 35.0, 15.0, 49.0, 26.0, 31.0, 49.0, 39.0, 55.0, 26.0, 52.0, 33.0, 43.0, 47.0, 34.0, 40.0, 26.0, 25.0, 30.0, 12.0, 33.0, 18.0, 11.0, 16.0, 10.0, 15.0, 8.0, 8.0, 8.0, 5.0, 7.0, 2.0, 3.0, 5.0, 3.0, 2.0, 2.0, 2.0], "bins": [-5.0067901611328125e-06, -4.863366484642029e-06, -4.719942808151245e-06, -4.5765191316604614e-06, -4.433095455169678e-06, -4.289671778678894e-06, -4.14624810218811e-06, -4.002824425697327e-06, -3.859400749206543e-06, -3.7159770727157593e-06, -3.5725533962249756e-06, -3.429129719734192e-06, -3.285706043243408e-06, -3.1422823667526245e-06, -2.998858690261841e-06, -2.855435013771057e-06, -2.7120113372802734e-06, -2.5685876607894897e-06, -2.425163984298706e-06, -2.2817403078079224e-06, -2.1383166313171387e-06, -1.994892954826355e-06, -1.8514692783355713e-06, -1.7080456018447876e-06, -1.564621925354004e-06, -1.4211982488632202e-06, -1.2777745723724365e-06, -1.1343508958816528e-06, -9.909272193908691e-07, -8.475035429000854e-07, -7.040798664093018e-07, -5.606561899185181e-07, -4.172325134277344e-07, -2.738088369369507e-07, -1.30385160446167e-07, 1.30385160446167e-08, 1.564621925354004e-07, 2.998858690261841e-07, 4.4330954551696777e-07, 5.867332220077515e-07, 7.301568984985352e-07, 8.735805749893188e-07, 1.0170042514801025e-06, 1.1604279279708862e-06, 1.30385160446167e-06, 1.4472752809524536e-06, 1.5906989574432373e-06, 1.734122633934021e-06, 1.8775463104248047e-06, 2.0209699869155884e-06, 2.164393663406372e-06, 2.3078173398971558e-06, 2.4512410163879395e-06, 2.594664692878723e-06, 2.738088369369507e-06, 2.8815120458602905e-06, 3.0249357223510742e-06, 3.168359398841858e-06, 3.3117830753326416e-06, 3.4552067518234253e-06, 3.598630428314209e-06, 3.7420541048049927e-06, 3.885477781295776e-06, 4.02890145778656e-06, 4.172325134277344e-06]}, "gradients/encoder.encoder.layers.5.attention.q_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 6.0, 7.0, 9.0, 8.0, 19.0, 29.0, 76.0, 102.0, 174.0, 325.0, 582.0, 1058.0, 2376.0, 6016.0, 16890.0, 50487.0, 136452.0, 276800.0, 298063.0, 162730.0, 62417.0, 21020.0, 7336.0, 2899.0, 1205.0, 637.0, 352.0, 227.0, 100.0, 74.0, 33.0, 27.0, 12.0, 2.0, 8.0, 3.0, 5.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.015899658203125, -0.015290498733520508, -0.014681339263916016, -0.014072179794311523, -0.013463020324707031, -0.012853860855102539, -0.012244701385498047, -0.011635541915893555, -0.011026382446289062, -0.01041722297668457, -0.009808063507080078, -0.009198904037475586, -0.008589744567871094, -0.007980585098266602, -0.007371425628662109, -0.006762266159057617, -0.006153106689453125, -0.005543947219848633, -0.004934787750244141, -0.0043256282806396484, -0.0037164688110351562, -0.003107309341430664, -0.002498149871826172, -0.0018889904022216797, -0.0012798309326171875, -0.0006706714630126953, -6.151199340820312e-05, 0.0005476474761962891, 0.0011568069458007812, 0.0017659664154052734, 0.0023751258850097656, 0.002984285354614258, 0.00359344482421875, 0.004202604293823242, 0.004811763763427734, 0.0054209232330322266, 0.006030082702636719, 0.006639242172241211, 0.007248401641845703, 0.007857561111450195, 0.008466720581054688, 0.00907588005065918, 0.009685039520263672, 0.010294198989868164, 0.010903358459472656, 0.011512517929077148, 0.01212167739868164, 0.012730836868286133, 0.013339996337890625, 0.013949155807495117, 0.01455831527709961, 0.015167474746704102, 0.015776634216308594, 0.016385793685913086, 0.016994953155517578, 0.01760411262512207, 0.018213272094726562, 0.018822431564331055, 0.019431591033935547, 0.02004075050354004, 0.02064990997314453, 0.021259069442749023, 0.021868228912353516, 0.022477388381958008, 0.0230865478515625]}, "gradients/encoder.encoder.layers.5.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 3.0, 0.0, 3.0, 1.0, 2.0, 2.0, 4.0, 3.0, 4.0, 5.0, 9.0, 15.0, 9.0, 18.0, 13.0, 16.0, 15.0, 25.0, 22.0, 26.0, 28.0, 25.0, 32.0, 44.0, 37.0, 48.0, 44.0, 45.0, 54.0, 47.0, 52.0, 45.0, 36.0, 41.0, 28.0, 38.0, 23.0, 29.0, 25.0, 14.0, 17.0, 16.0, 9.0, 7.0, 3.0, 7.0, 6.0, 5.0, 4.0, 4.0, 3.0, 3.0, 1.0, 2.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0108489990234375, -0.010486841201782227, -0.010124683380126953, -0.00976252555847168, -0.009400367736816406, -0.009038209915161133, -0.00867605209350586, -0.008313894271850586, -0.007951736450195312, -0.007589578628540039, -0.007227420806884766, -0.006865262985229492, -0.006503105163574219, -0.006140947341918945, -0.005778789520263672, -0.0054166316986083984, -0.005054473876953125, -0.0046923160552978516, -0.004330158233642578, -0.003968000411987305, -0.0036058425903320312, -0.003243684768676758, -0.0028815269470214844, -0.002519369125366211, -0.0021572113037109375, -0.001795053482055664, -0.0014328956604003906, -0.0010707378387451172, -0.0007085800170898438, -0.0003464221954345703, 1.5735626220703125e-05, 0.00037789344787597656, 0.00074005126953125, 0.0011022090911865234, 0.0014643669128417969, 0.0018265247344970703, 0.0021886825561523438, 0.002550840377807617, 0.0029129981994628906, 0.003275156021118164, 0.0036373138427734375, 0.003999471664428711, 0.004361629486083984, 0.004723787307739258, 0.005085945129394531, 0.005448102951049805, 0.005810260772705078, 0.0061724185943603516, 0.006534576416015625, 0.0068967342376708984, 0.007258892059326172, 0.007621049880981445, 0.007983207702636719, 0.008345365524291992, 0.008707523345947266, 0.009069681167602539, 0.009431838989257812, 0.009793996810913086, 0.01015615463256836, 0.010518312454223633, 0.010880470275878906, 0.01124262809753418, 0.011604785919189453, 0.011966943740844727, 0.0123291015625]}, "gradients/encoder.encoder.layers.5.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 6.0, 39.0, 219.0, 441.0, 239.0, 45.0, 11.0, 4.0, 4.0, 4.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.1452641487121582, -1.1181871891021729, -1.091110348701477, -1.0640333890914917, -1.036956548690796, -1.0098795890808105, -0.98280268907547, -0.9557257890701294, -0.9286488890647888, -0.9015719890594482, -0.8744950890541077, -0.8474181890487671, -0.8203412294387817, -0.7932643294334412, -0.7661874294281006, -0.73911052942276, -0.7120336294174194, -0.6849567294120789, -0.6578798294067383, -0.6308028697967529, -0.6037259697914124, -0.5766490697860718, -0.5495721697807312, -0.5224952697753906, -0.4954183101654053, -0.4683414101600647, -0.44126448035240173, -0.41418758034706116, -0.3871106803417206, -0.3600337505340576, -0.33295685052871704, -0.30587995052337646, -0.2788030505180359, -0.2517261505126953, -0.22464923560619354, -0.19757232069969177, -0.1704954206943512, -0.14341850578784943, -0.11634159088134766, -0.08926469087600708, -0.06218777596950531, -0.03511086851358414, -0.008033957332372665, 0.019042953848838806, 0.04611986130475998, 0.07319676876068115, 0.10027368366718292, 0.1273505836725235, 0.15442749857902527, 0.18150441348552704, 0.20858131349086761, 0.23565822839736938, 0.26273512840270996, 0.28981202840805054, 0.3168889582157135, 0.3439658582210541, 0.37104278802871704, 0.3981196880340576, 0.4251966178417206, 0.45227351784706116, 0.47935041785240173, 0.5064273476600647, 0.5335042476654053, 0.5605811476707458, 0.5876580476760864]}, "gradients/encoder.encoder.layers.5.layer_norm.bias": {"_type": "histogram", "values": [1.0, 3.0, 0.0, 1.0, 1.0, 1.0, 1.0, 4.0, 2.0, 3.0, 5.0, 10.0, 8.0, 8.0, 9.0, 15.0, 13.0, 19.0, 17.0, 22.0, 21.0, 40.0, 41.0, 35.0, 44.0, 48.0, 33.0, 48.0, 44.0, 40.0, 41.0, 34.0, 46.0, 44.0, 41.0, 30.0, 36.0, 27.0, 31.0, 20.0, 19.0, 16.0, 16.0, 14.0, 21.0, 9.0, 4.0, 7.0, 9.0, 4.0, 2.0, 6.0, 2.0, 2.0, 0.0, 3.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.1647854447364807, -0.15922793745994568, -0.15367043018341064, -0.1481129229068756, -0.14255543053150177, -0.13699792325496674, -0.1314404159784317, -0.12588290870189667, -0.12032540887594223, -0.1147679015994072, -0.10921040177345276, -0.10365289449691772, -0.09809538722038269, -0.09253788739442825, -0.08698038011789322, -0.08142288029193878, -0.07586537301540375, -0.07030786573886871, -0.06475036591291428, -0.05919285863637924, -0.053635355085134506, -0.04807785153388977, -0.042520344257354736, -0.03696284070611, -0.031405337154865265, -0.02584783360362053, -0.020290328189730644, -0.01473282277584076, -0.009175319224596024, -0.003617815673351288, 0.0019396916031837463, 0.007497195154428482, 0.013054698705673218, 0.018612202256917953, 0.02416970767080784, 0.029727213084697723, 0.03528471663594246, 0.040842220187187195, 0.04639972746372223, 0.051957231014966965, 0.0575147345662117, 0.06307224184274673, 0.06862974166870117, 0.0741872489452362, 0.07974475622177124, 0.08530225604772568, 0.09085976332426071, 0.09641726315021515, 0.10197477042675018, 0.10753227770328522, 0.11308977752923965, 0.11864728480577469, 0.12420478463172913, 0.12976229190826416, 0.1353197991847992, 0.14087730646133423, 0.14643481373786926, 0.1519923210144043, 0.15754982829093933, 0.16310733556747437, 0.1686648279428482, 0.17422233521938324, 0.17977984249591827, 0.1853373497724533, 0.19089484214782715]}, "gradients/encoder.encoder.layers.4.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 3.0, 0.0, 2.0, 6.0, 3.0, 3.0, 8.0, 13.0, 21.0, 33.0, 44.0, 70.0, 113.0, 189.0, 366.0, 846.0, 2369.0, 7410.0, 27299.0, 202924.0, 2216305.0, 1576315.0, 129590.0, 21196.0, 5783.0, 2034.0, 735.0, 293.0, 140.0, 76.0, 37.0, 27.0, 15.0, 14.0, 8.0, 0.0, 3.0, 1.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.0760498046875, -0.0738067626953125, -0.071563720703125, -0.0693206787109375, -0.06707763671875, -0.0648345947265625, -0.062591552734375, -0.0603485107421875, -0.05810546875, -0.0558624267578125, -0.053619384765625, -0.0513763427734375, -0.04913330078125, -0.0468902587890625, -0.044647216796875, -0.0424041748046875, -0.0401611328125, -0.0379180908203125, -0.035675048828125, -0.0334320068359375, -0.03118896484375, -0.0289459228515625, -0.026702880859375, -0.0244598388671875, -0.022216796875, -0.0199737548828125, -0.017730712890625, -0.0154876708984375, -0.01324462890625, -0.0110015869140625, -0.008758544921875, -0.0065155029296875, -0.0042724609375, -0.0020294189453125, 0.000213623046875, 0.0024566650390625, 0.00469970703125, 0.0069427490234375, 0.009185791015625, 0.0114288330078125, 0.013671875, 0.0159149169921875, 0.018157958984375, 0.0204010009765625, 0.02264404296875, 0.0248870849609375, 0.027130126953125, 0.0293731689453125, 0.0316162109375, 0.0338592529296875, 0.036102294921875, 0.0383453369140625, 0.04058837890625, 0.0428314208984375, 0.045074462890625, 0.0473175048828125, 0.049560546875, 0.0518035888671875, 0.054046630859375, 0.0562896728515625, 0.05853271484375, 0.0607757568359375, 0.063018798828125, 0.0652618408203125, 0.0675048828125]}, "gradients/encoder.encoder.layers.4.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 4.0, 4.0, 12.0, 25.0, 40.0, 70.0, 80.0, 117.0, 133.0, 132.0, 115.0, 96.0, 66.0, 58.0, 31.0, 15.0, 7.0, 5.0, 2.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0860595703125, -0.0835428237915039, -0.08102607727050781, -0.07850933074951172, -0.07599258422851562, -0.07347583770751953, -0.07095909118652344, -0.06844234466552734, -0.06592559814453125, -0.06340885162353516, -0.06089210510253906, -0.05837535858154297, -0.055858612060546875, -0.05334186553955078, -0.05082511901855469, -0.048308372497558594, -0.0457916259765625, -0.043274879455566406, -0.04075813293457031, -0.03824138641357422, -0.035724639892578125, -0.03320789337158203, -0.030691146850585938, -0.028174400329589844, -0.02565765380859375, -0.023140907287597656, -0.020624160766601562, -0.01810741424560547, -0.015590667724609375, -0.013073921203613281, -0.010557174682617188, -0.008040428161621094, -0.005523681640625, -0.0030069351196289062, -0.0004901885986328125, 0.0020265579223632812, 0.004543304443359375, 0.007060050964355469, 0.009576797485351562, 0.012093544006347656, 0.01461029052734375, 0.017127037048339844, 0.019643783569335938, 0.02216053009033203, 0.024677276611328125, 0.02719402313232422, 0.029710769653320312, 0.032227516174316406, 0.0347442626953125, 0.037261009216308594, 0.03977775573730469, 0.04229450225830078, 0.044811248779296875, 0.04732799530029297, 0.04984474182128906, 0.052361488342285156, 0.05487823486328125, 0.057394981384277344, 0.05991172790527344, 0.06242847442626953, 0.06494522094726562, 0.06746196746826172, 0.06997871398925781, 0.0724954605102539, 0.07501220703125]}, "gradients/encoder.encoder.layers.4.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 3.0, 5.0, 4.0, 3.0, 6.0, 8.0, 10.0, 23.0, 29.0, 44.0, 67.0, 119.0, 180.0, 281.0, 553.0, 1490.0, 6921.0, 55016.0, 1015391.0, 2951844.0, 143816.0, 14305.0, 2544.0, 754.0, 340.0, 192.0, 122.0, 68.0, 49.0, 37.0, 25.0, 15.0, 11.0, 5.0, 3.0, 5.0, 1.0, 1.0, 4.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.08160400390625, -0.07860279083251953, -0.07560157775878906, -0.0726003646850586, -0.06959915161132812, -0.06659793853759766, -0.06359672546386719, -0.06059551239013672, -0.05759429931640625, -0.05459308624267578, -0.05159187316894531, -0.048590660095214844, -0.045589447021484375, -0.042588233947753906, -0.03958702087402344, -0.03658580780029297, -0.0335845947265625, -0.03058338165283203, -0.027582168579101562, -0.024580955505371094, -0.021579742431640625, -0.018578529357910156, -0.015577316284179688, -0.012576103210449219, -0.00957489013671875, -0.006573677062988281, -0.0035724639892578125, -0.0005712509155273438, 0.002429962158203125, 0.005431175231933594, 0.008432388305664062, 0.011433601379394531, 0.014434814453125, 0.01743602752685547, 0.020437240600585938, 0.023438453674316406, 0.026439666748046875, 0.029440879821777344, 0.03244209289550781, 0.03544330596923828, 0.03844451904296875, 0.04144573211669922, 0.04444694519042969, 0.047448158264160156, 0.050449371337890625, 0.053450584411621094, 0.05645179748535156, 0.05945301055908203, 0.0624542236328125, 0.06545543670654297, 0.06845664978027344, 0.0714578628540039, 0.07445907592773438, 0.07746028900146484, 0.08046150207519531, 0.08346271514892578, 0.08646392822265625, 0.08946514129638672, 0.09246635437011719, 0.09546756744384766, 0.09846878051757812, 0.1014699935913086, 0.10447120666503906, 0.10747241973876953, 0.1104736328125]}, "gradients/encoder.encoder.layers.4.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 6.0, 6.0, 6.0, 9.0, 11.0, 7.0, 10.0, 14.0, 20.0, 30.0, 40.0, 47.0, 51.0, 75.0, 86.0, 133.0, 186.0, 230.0, 315.0, 514.0, 477.0, 460.0, 349.0, 280.0, 185.0, 131.0, 83.0, 77.0, 54.0, 51.0, 34.0, 22.0, 23.0, 18.0, 10.0, 11.0, 7.0, 7.0, 2.0, 2.0, 3.0, 0.0, 2.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 3.0], "bins": [-0.044830322265625, -0.04349374771118164, -0.04215717315673828, -0.04082059860229492, -0.03948402404785156, -0.0381474494934082, -0.036810874938964844, -0.035474300384521484, -0.034137725830078125, -0.032801151275634766, -0.031464576721191406, -0.030128002166748047, -0.028791427612304688, -0.027454853057861328, -0.02611827850341797, -0.02478170394897461, -0.02344512939453125, -0.02210855484008789, -0.02077198028564453, -0.019435405731201172, -0.018098831176757812, -0.016762256622314453, -0.015425682067871094, -0.014089107513427734, -0.012752532958984375, -0.011415958404541016, -0.010079383850097656, -0.008742809295654297, -0.0074062347412109375, -0.006069660186767578, -0.004733085632324219, -0.0033965110778808594, -0.0020599365234375, -0.0007233619689941406, 0.0006132125854492188, 0.0019497871398925781, 0.0032863616943359375, 0.004622936248779297, 0.005959510803222656, 0.007296085357666016, 0.008632659912109375, 0.009969234466552734, 0.011305809020996094, 0.012642383575439453, 0.013978958129882812, 0.015315532684326172, 0.01665210723876953, 0.01798868179321289, 0.01932525634765625, 0.02066183090209961, 0.02199840545654297, 0.023334980010986328, 0.024671554565429688, 0.026008129119873047, 0.027344703674316406, 0.028681278228759766, 0.030017852783203125, 0.031354427337646484, 0.032691001892089844, 0.0340275764465332, 0.03536415100097656, 0.03670072555541992, 0.03803730010986328, 0.03937387466430664, 0.04071044921875]}, "gradients/encoder.encoder.layers.4.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 0.0, 3.0, 2.0, 10.0, 7.0, 17.0, 40.0, 52.0, 94.0, 104.0, 143.0, 142.0, 136.0, 101.0, 65.0, 51.0, 18.0, 14.0, 4.0, 3.0, 4.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.3541611433029175, -0.3428930640220642, -0.33162498474121094, -0.32035690546035767, -0.3090888261795044, -0.2978207468986511, -0.28655266761779785, -0.2752845883369446, -0.2640165090560913, -0.25274842977523804, -0.24148035049438477, -0.2302122712135315, -0.21894419193267822, -0.20767611265182495, -0.19640803337097168, -0.1851399540901184, -0.17387187480926514, -0.16260379552841187, -0.1513357162475586, -0.14006763696670532, -0.12879955768585205, -0.11753147840499878, -0.10626339912414551, -0.09499531984329224, -0.08372724056243896, -0.0724591612815857, -0.06119108200073242, -0.04992300271987915, -0.03865492343902588, -0.027386844158172607, -0.016118764877319336, -0.0048506855964660645, 0.006417393684387207, 0.01768547296524048, 0.02895355224609375, 0.04022163152694702, 0.05148971080780029, 0.06275779008865356, 0.07402586936950684, 0.08529394865036011, 0.09656202793121338, 0.10783010721206665, 0.11909818649291992, 0.1303662657737732, 0.14163434505462646, 0.15290242433547974, 0.164170503616333, 0.17543858289718628, 0.18670666217803955, 0.19797474145889282, 0.2092428207397461, 0.22051090002059937, 0.23177897930145264, 0.2430470585823059, 0.2543151378631592, 0.26558321714401245, 0.2768512964248657, 0.288119375705719, 0.29938745498657227, 0.31065553426742554, 0.3219236135482788, 0.3331916928291321, 0.34445977210998535, 0.3557278513908386, 0.3669959306716919]}, "gradients/encoder.encoder.layers.4.final_layer_norm.bias": {"_type": "histogram", "values": [3.0, 2.0, 1.0, 2.0, 3.0, 2.0, 2.0, 6.0, 3.0, 10.0, 7.0, 15.0, 10.0, 13.0, 12.0, 13.0, 14.0, 19.0, 16.0, 26.0, 30.0, 40.0, 33.0, 33.0, 37.0, 36.0, 32.0, 40.0, 45.0, 45.0, 32.0, 37.0, 37.0, 34.0, 42.0, 24.0, 43.0, 30.0, 27.0, 22.0, 20.0, 15.0, 17.0, 15.0, 14.0, 9.0, 11.0, 8.0, 5.0, 4.0, 7.0, 2.0, 5.0, 3.0, 2.0, 1.0, 1.0, 1.0, 1.0, 3.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.1225472092628479, -0.11832526326179504, -0.11410331726074219, -0.10988137125968933, -0.10565942525863647, -0.10143747925758362, -0.09721552580595016, -0.09299357980489731, -0.08877163380384445, -0.0845496878027916, -0.08032774180173874, -0.07610579580068588, -0.07188384234905243, -0.06766189634799957, -0.06343995034694672, -0.05921800434589386, -0.054996058344841, -0.05077411234378815, -0.04655216634273529, -0.042330216616392136, -0.03810827061533928, -0.03388632461428642, -0.029664376750588417, -0.02544242888689041, -0.021220482885837555, -0.0169985368847847, -0.012776589021086693, -0.008554642088711262, -0.004332695156335831, -0.00011074915528297424, 0.004111198708415031, 0.008333146572113037, 0.012555092573165894, 0.01677703857421875, 0.020998986437916756, 0.02522093430161476, 0.029442880302667618, 0.033664826303720474, 0.03788677603006363, 0.042108722031116486, 0.04633066803216934, 0.0505526140332222, 0.054774560034275055, 0.05899650976061821, 0.06321845948696136, 0.06744040548801422, 0.07166235148906708, 0.07588429749011993, 0.08010624349117279, 0.08432818949222565, 0.0885501354932785, 0.09277208149433136, 0.09699402749538422, 0.10121597349643707, 0.10543792694807053, 0.10965987294912338, 0.11388181895017624, 0.1181037649512291, 0.12232571095228195, 0.1265476644039154, 0.13076961040496826, 0.13499155640602112, 0.13921350240707397, 0.14343544840812683, 0.1476573944091797]}, "gradients/encoder.encoder.layers.4.attention.out_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 4.0, 2.0, 2.0, 4.0, 7.0, 7.0, 12.0, 16.0, 22.0, 23.0, 41.0, 25.0, 67.0, 81.0, 121.0, 132.0, 202.0, 313.0, 577.0, 1064.0, 2244.0, 5456.0, 16230.0, 51940.0, 166646.0, 378577.0, 281477.0, 96842.0, 29726.0, 9567.0, 3510.0, 1523.0, 773.0, 420.0, 296.0, 162.0, 115.0, 82.0, 54.0, 60.0, 32.0, 22.0, 20.0, 16.0, 16.0, 9.0, 7.0, 8.0, 4.0, 2.0, 3.0, 1.0, 2.0, 1.0, 3.0, 0.0, 3.0], "bins": [-0.0721435546875, -0.06994152069091797, -0.06773948669433594, -0.0655374526977539, -0.06333541870117188, -0.061133384704589844, -0.05893135070800781, -0.05672931671142578, -0.05452728271484375, -0.05232524871826172, -0.05012321472167969, -0.047921180725097656, -0.045719146728515625, -0.043517112731933594, -0.04131507873535156, -0.03911304473876953, -0.0369110107421875, -0.03470897674560547, -0.03250694274902344, -0.030304908752441406, -0.028102874755859375, -0.025900840759277344, -0.023698806762695312, -0.02149677276611328, -0.01929473876953125, -0.01709270477294922, -0.014890670776367188, -0.012688636779785156, -0.010486602783203125, -0.008284568786621094, -0.0060825347900390625, -0.0038805007934570312, -0.001678466796875, 0.0005235671997070312, 0.0027256011962890625, 0.004927635192871094, 0.007129669189453125, 0.009331703186035156, 0.011533737182617188, 0.013735771179199219, 0.01593780517578125, 0.01813983917236328, 0.020341873168945312, 0.022543907165527344, 0.024745941162109375, 0.026947975158691406, 0.029150009155273438, 0.03135204315185547, 0.0335540771484375, 0.03575611114501953, 0.03795814514160156, 0.040160179138183594, 0.042362213134765625, 0.044564247131347656, 0.04676628112792969, 0.04896831512451172, 0.05117034912109375, 0.05337238311767578, 0.05557441711425781, 0.057776451110839844, 0.059978485107421875, 0.062180519104003906, 0.06438255310058594, 0.06658458709716797, 0.06878662109375]}, "gradients/encoder.encoder.layers.4.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 10.0, 21.0, 22.0, 38.0, 80.0, 108.0, 131.0, 143.0, 132.0, 102.0, 88.0, 63.0, 32.0, 21.0, 9.0, 4.0, 4.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.08795166015625, -0.08533668518066406, -0.08272171020507812, -0.08010673522949219, -0.07749176025390625, -0.07487678527832031, -0.07226181030273438, -0.06964683532714844, -0.0670318603515625, -0.06441688537597656, -0.061801910400390625, -0.05918693542480469, -0.05657196044921875, -0.05395698547363281, -0.051342010498046875, -0.04872703552246094, -0.046112060546875, -0.04349708557128906, -0.040882110595703125, -0.03826713562011719, -0.03565216064453125, -0.03303718566894531, -0.030422210693359375, -0.027807235717773438, -0.0251922607421875, -0.022577285766601562, -0.019962310791015625, -0.017347335815429688, -0.01473236083984375, -0.012117385864257812, -0.009502410888671875, -0.0068874359130859375, -0.0042724609375, -0.0016574859619140625, 0.000957489013671875, 0.0035724639892578125, 0.00618743896484375, 0.008802413940429688, 0.011417388916015625, 0.014032363891601562, 0.0166473388671875, 0.019262313842773438, 0.021877288818359375, 0.024492263793945312, 0.02710723876953125, 0.029722213745117188, 0.032337188720703125, 0.03495216369628906, 0.037567138671875, 0.04018211364746094, 0.042797088623046875, 0.04541206359863281, 0.04802703857421875, 0.05064201354980469, 0.053256988525390625, 0.05587196350097656, 0.0584869384765625, 0.06110191345214844, 0.06371688842773438, 0.06633186340332031, 0.06894683837890625, 0.07156181335449219, 0.07417678833007812, 0.07679176330566406, 0.07940673828125]}, "gradients/encoder.encoder.layers.4.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 4.0, 2.0, 3.0, 4.0, 6.0, 13.0, 9.0, 20.0, 39.0, 49.0, 97.0, 133.0, 243.0, 377.0, 697.0, 1332.0, 2756.0, 6374.0, 16960.0, 49544.0, 154855.0, 385020.0, 285471.0, 94482.0, 30598.0, 10772.0, 4322.0, 2039.0, 974.0, 564.0, 310.0, 183.0, 124.0, 71.0, 40.0, 27.0, 17.0, 8.0, 12.0, 6.0, 3.0, 5.0, 1.0, 2.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0], "bins": [-0.06884765625, -0.06686830520629883, -0.06488895416259766, -0.06290960311889648, -0.06093025207519531, -0.05895090103149414, -0.05697154998779297, -0.0549921989440918, -0.053012847900390625, -0.05103349685668945, -0.04905414581298828, -0.04707479476928711, -0.04509544372558594, -0.043116092681884766, -0.041136741638183594, -0.03915739059448242, -0.03717803955078125, -0.03519868850708008, -0.033219337463378906, -0.031239986419677734, -0.029260635375976562, -0.02728128433227539, -0.02530193328857422, -0.023322582244873047, -0.021343231201171875, -0.019363880157470703, -0.01738452911376953, -0.01540517807006836, -0.013425827026367188, -0.011446475982666016, -0.009467124938964844, -0.007487773895263672, -0.0055084228515625, -0.003529071807861328, -0.0015497207641601562, 0.0004296302795410156, 0.0024089813232421875, 0.004388332366943359, 0.006367683410644531, 0.008347034454345703, 0.010326385498046875, 0.012305736541748047, 0.014285087585449219, 0.01626443862915039, 0.018243789672851562, 0.020223140716552734, 0.022202491760253906, 0.024181842803955078, 0.02616119384765625, 0.028140544891357422, 0.030119895935058594, 0.032099246978759766, 0.03407859802246094, 0.03605794906616211, 0.03803730010986328, 0.04001665115356445, 0.041996002197265625, 0.0439753532409668, 0.04595470428466797, 0.04793405532836914, 0.04991340637207031, 0.051892757415771484, 0.053872108459472656, 0.05585145950317383, 0.057830810546875]}, "gradients/encoder.encoder.layers.4.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 1.0, 1.0, 0.0, 2.0, 2.0, 5.0, 2.0, 9.0, 6.0, 7.0, 12.0, 9.0, 10.0, 23.0, 18.0, 17.0, 28.0, 34.0, 35.0, 32.0, 53.0, 38.0, 49.0, 45.0, 39.0, 41.0, 40.0, 46.0, 39.0, 32.0, 30.0, 23.0, 42.0, 31.0, 29.0, 26.0, 25.0, 30.0, 14.0, 14.0, 11.0, 13.0, 7.0, 8.0, 10.0, 5.0, 3.0, 5.0, 5.0, 2.0, 1.0, 2.0, 3.0, 0.0, 1.0, 0.0, 2.0, 1.0, 1.0, 0.0, 1.0], "bins": [-0.0628662109375, -0.060699462890625, -0.05853271484375, -0.056365966796875, -0.05419921875, -0.052032470703125, -0.04986572265625, -0.047698974609375, -0.0455322265625, -0.043365478515625, -0.04119873046875, -0.039031982421875, -0.036865234375, -0.034698486328125, -0.03253173828125, -0.030364990234375, -0.0281982421875, -0.026031494140625, -0.02386474609375, -0.021697998046875, -0.01953125, -0.017364501953125, -0.01519775390625, -0.013031005859375, -0.0108642578125, -0.008697509765625, -0.00653076171875, -0.004364013671875, -0.002197265625, -3.0517578125e-05, 0.00213623046875, 0.004302978515625, 0.0064697265625, 0.008636474609375, 0.01080322265625, 0.012969970703125, 0.01513671875, 0.017303466796875, 0.01947021484375, 0.021636962890625, 0.0238037109375, 0.025970458984375, 0.02813720703125, 0.030303955078125, 0.032470703125, 0.034637451171875, 0.03680419921875, 0.038970947265625, 0.0411376953125, 0.043304443359375, 0.04547119140625, 0.047637939453125, 0.0498046875, 0.051971435546875, 0.05413818359375, 0.056304931640625, 0.0584716796875, 0.060638427734375, 0.06280517578125, 0.064971923828125, 0.067138671875, 0.069305419921875, 0.07147216796875, 0.073638916015625, 0.0758056640625]}, "gradients/encoder.encoder.layers.4.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 2.0, 5.0, 3.0, 10.0, 9.0, 16.0, 22.0, 22.0, 25.0, 47.0, 61.0, 110.0, 165.0, 328.0, 653.0, 1485.0, 3553.0, 9136.0, 22612.0, 55400.0, 125343.0, 249099.0, 287417.0, 165295.0, 74944.0, 31467.0, 12518.0, 4871.0, 2067.0, 880.0, 411.0, 207.0, 119.0, 70.0, 50.0, 33.0, 28.0, 22.0, 15.0, 12.0, 11.0, 3.0, 6.0, 3.0, 3.0, 3.0, 1.0, 1.0, 2.0, 0.0, 3.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.01496124267578125, -0.014481425285339355, -0.014001607894897461, -0.013521790504455566, -0.013041973114013672, -0.012562155723571777, -0.012082338333129883, -0.011602520942687988, -0.011122703552246094, -0.0106428861618042, -0.010163068771362305, -0.00968325138092041, -0.009203433990478516, -0.008723616600036621, -0.008243799209594727, -0.007763981819152832, -0.0072841644287109375, -0.006804347038269043, -0.0063245296478271484, -0.005844712257385254, -0.005364894866943359, -0.004885077476501465, -0.00440526008605957, -0.003925442695617676, -0.0034456253051757812, -0.0029658079147338867, -0.002485990524291992, -0.0020061731338500977, -0.0015263557434082031, -0.0010465383529663086, -0.0005667209625244141, -8.690357208251953e-05, 0.000392913818359375, 0.0008727312088012695, 0.001352548599243164, 0.0018323659896850586, 0.002312183380126953, 0.0027920007705688477, 0.003271818161010742, 0.0037516355514526367, 0.004231452941894531, 0.004711270332336426, 0.00519108772277832, 0.005670905113220215, 0.006150722503662109, 0.006630539894104004, 0.0071103572845458984, 0.007590174674987793, 0.008069992065429688, 0.008549809455871582, 0.009029626846313477, 0.009509444236755371, 0.009989261627197266, 0.01046907901763916, 0.010948896408081055, 0.01142871379852295, 0.011908531188964844, 0.012388348579406738, 0.012868165969848633, 0.013347983360290527, 0.013827800750732422, 0.014307618141174316, 0.014787435531616211, 0.015267252922058105, 0.0157470703125]}, "gradients/encoder.encoder.layers.4.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 3.0, 2.0, 0.0, 0.0, 6.0, 2.0, 7.0, 9.0, 8.0, 11.0, 17.0, 25.0, 21.0, 31.0, 45.0, 44.0, 72.0, 74.0, 64.0, 85.0, 87.0, 52.0, 63.0, 59.0, 59.0, 31.0, 26.0, 29.0, 28.0, 13.0, 9.0, 8.0, 10.0, 3.0, 3.0, 3.0, 6.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-8.285045623779297e-06, -8.005648851394653e-06, -7.72625207901001e-06, -7.446855306625366e-06, -7.167458534240723e-06, -6.888061761856079e-06, -6.6086649894714355e-06, -6.329268217086792e-06, -6.0498714447021484e-06, -5.770474672317505e-06, -5.491077899932861e-06, -5.211681127548218e-06, -4.932284355163574e-06, -4.652887582778931e-06, -4.373490810394287e-06, -4.0940940380096436e-06, -3.814697265625e-06, -3.5353004932403564e-06, -3.255903720855713e-06, -2.9765069484710693e-06, -2.6971101760864258e-06, -2.4177134037017822e-06, -2.1383166313171387e-06, -1.8589198589324951e-06, -1.5795230865478516e-06, -1.300126314163208e-06, -1.0207295417785645e-06, -7.413327693939209e-07, -4.6193599700927734e-07, -1.825392246246338e-07, 9.685754776000977e-08, 3.762543201446533e-07, 6.556510925292969e-07, 9.350478649139404e-07, 1.214444637298584e-06, 1.4938414096832275e-06, 1.773238182067871e-06, 2.0526349544525146e-06, 2.332031726837158e-06, 2.6114284992218018e-06, 2.8908252716064453e-06, 3.170222043991089e-06, 3.4496188163757324e-06, 3.729015588760376e-06, 4.0084123611450195e-06, 4.287809133529663e-06, 4.567205905914307e-06, 4.84660267829895e-06, 5.125999450683594e-06, 5.405396223068237e-06, 5.684792995452881e-06, 5.964189767837524e-06, 6.243586540222168e-06, 6.5229833126068115e-06, 6.802380084991455e-06, 7.081776857376099e-06, 7.361173629760742e-06, 7.640570402145386e-06, 7.91996717453003e-06, 8.199363946914673e-06, 8.478760719299316e-06, 8.75815749168396e-06, 9.037554264068604e-06, 9.316951036453247e-06, 9.59634780883789e-06]}, "gradients/encoder.encoder.layers.4.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 3.0, 1.0, 10.0, 9.0, 9.0, 24.0, 21.0, 40.0, 62.0, 90.0, 162.0, 282.0, 476.0, 966.0, 1993.0, 4292.0, 10168.0, 24494.0, 58792.0, 132145.0, 241738.0, 267816.0, 168636.0, 78584.0, 33078.0, 13793.0, 5779.0, 2482.0, 1209.0, 577.0, 318.0, 178.0, 126.0, 68.0, 37.0, 34.0, 21.0, 17.0, 8.0, 5.0, 8.0, 5.0, 3.0, 1.0, 1.0, 1.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0157623291015625, -0.015256643295288086, -0.014750957489013672, -0.014245271682739258, -0.013739585876464844, -0.01323390007019043, -0.012728214263916016, -0.012222528457641602, -0.011716842651367188, -0.011211156845092773, -0.01070547103881836, -0.010199785232543945, -0.009694099426269531, -0.009188413619995117, -0.008682727813720703, -0.008177042007446289, -0.007671356201171875, -0.007165670394897461, -0.006659984588623047, -0.006154298782348633, -0.005648612976074219, -0.005142927169799805, -0.004637241363525391, -0.0041315555572509766, -0.0036258697509765625, -0.0031201839447021484, -0.0026144981384277344, -0.0021088123321533203, -0.0016031265258789062, -0.0010974407196044922, -0.0005917549133300781, -8.606910705566406e-05, 0.00041961669921875, 0.0009253025054931641, 0.0014309883117675781, 0.0019366741180419922, 0.0024423599243164062, 0.0029480457305908203, 0.0034537315368652344, 0.0039594173431396484, 0.0044651031494140625, 0.0049707889556884766, 0.005476474761962891, 0.005982160568237305, 0.006487846374511719, 0.006993532180786133, 0.007499217987060547, 0.008004903793334961, 0.008510589599609375, 0.009016275405883789, 0.009521961212158203, 0.010027647018432617, 0.010533332824707031, 0.011039018630981445, 0.01154470443725586, 0.012050390243530273, 0.012556076049804688, 0.013061761856079102, 0.013567447662353516, 0.01407313346862793, 0.014578819274902344, 0.015084505081176758, 0.015590190887451172, 0.016095876693725586, 0.0166015625]}, "gradients/encoder.encoder.layers.4.attention.q_proj.bias": {"_type": "histogram", "values": [2.0, 1.0, 2.0, 2.0, 0.0, 1.0, 3.0, 4.0, 2.0, 9.0, 9.0, 8.0, 9.0, 9.0, 14.0, 14.0, 19.0, 25.0, 32.0, 23.0, 39.0, 46.0, 51.0, 44.0, 49.0, 38.0, 59.0, 53.0, 62.0, 55.0, 53.0, 45.0, 38.0, 44.0, 32.0, 26.0, 15.0, 16.0, 14.0, 10.0, 9.0, 11.0, 5.0, 3.0, 2.0, 1.0, 4.0, 2.0, 1.0, 1.0, 0.0, 1.0, 1.0, 3.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.012176513671875, -0.011721372604370117, -0.011266231536865234, -0.010811090469360352, -0.010355949401855469, -0.009900808334350586, -0.009445667266845703, -0.00899052619934082, -0.008535385131835938, -0.008080244064331055, -0.007625102996826172, -0.007169961929321289, -0.006714820861816406, -0.0062596797943115234, -0.005804538726806641, -0.005349397659301758, -0.004894256591796875, -0.004439115524291992, -0.003983974456787109, -0.0035288333892822266, -0.0030736923217773438, -0.002618551254272461, -0.002163410186767578, -0.0017082691192626953, -0.0012531280517578125, -0.0007979869842529297, -0.0003428459167480469, 0.00011229515075683594, 0.0005674362182617188, 0.0010225772857666016, 0.0014777183532714844, 0.0019328594207763672, 0.00238800048828125, 0.002843141555786133, 0.0032982826232910156, 0.0037534236907958984, 0.004208564758300781, 0.004663705825805664, 0.005118846893310547, 0.00557398796081543, 0.0060291290283203125, 0.006484270095825195, 0.006939411163330078, 0.007394552230834961, 0.007849693298339844, 0.008304834365844727, 0.00875997543334961, 0.009215116500854492, 0.009670257568359375, 0.010125398635864258, 0.01058053970336914, 0.011035680770874023, 0.011490821838378906, 0.011945962905883789, 0.012401103973388672, 0.012856245040893555, 0.013311386108398438, 0.01376652717590332, 0.014221668243408203, 0.014676809310913086, 0.015131950378417969, 0.015587091445922852, 0.016042232513427734, 0.016497373580932617, 0.0169525146484375]}, "gradients/encoder.encoder.layers.4.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0, 2.0, 0.0, 0.0, 3.0, 0.0, 3.0, 3.0, 1.0, 2.0, 5.0, 8.0, 11.0, 22.0, 54.0, 111.0, 170.0, 195.0, 179.0, 117.0, 73.0, 30.0, 8.0, 5.0, 6.0, 2.0, 2.0, 2.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.5217850208282471, -0.509054958820343, -0.4963248670101166, -0.4835948050022125, -0.4708647131919861, -0.45813465118408203, -0.445404589176178, -0.43267449736595154, -0.4199444353580475, -0.40721437335014343, -0.394484281539917, -0.38175421953201294, -0.3690241277217865, -0.35629406571388245, -0.343563973903656, -0.33083391189575195, -0.3181038498878479, -0.30537378787994385, -0.2926436960697174, -0.27991363406181335, -0.2671835422515869, -0.25445348024368286, -0.24172340333461761, -0.22899332642555237, -0.21626323461532593, -0.20353315770626068, -0.19080308079719543, -0.17807301878929138, -0.16534294188022614, -0.1526128649711609, -0.13988278806209564, -0.1271527111530304, -0.11442264914512634, -0.1016925722360611, -0.08896250277757645, -0.0762324258685112, -0.06350235641002655, -0.050772279500961304, -0.03804220259189606, -0.025312133133411407, -0.012582056224346161, 0.00014801789075136185, 0.012878092005848885, 0.025608167052268982, 0.03833824023604393, 0.05106831341981888, 0.06379839032888412, 0.07652845978736877, 0.08925853669643402, 0.10198861360549927, 0.11471868306398392, 0.12744876742362976, 0.1401788294315338, 0.15290890634059906, 0.1656389832496643, 0.17836904525756836, 0.1910991370677948, 0.20382921397686005, 0.2165592908859253, 0.22928935289382935, 0.2420194298028946, 0.25474950671195984, 0.2674795985221863, 0.28020966053009033, 0.2929397225379944]}, "gradients/encoder.encoder.layers.4.layer_norm.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 2.0, 0.0, 0.0, 2.0, 2.0, 2.0, 2.0, 4.0, 4.0, 10.0, 8.0, 9.0, 12.0, 11.0, 11.0, 13.0, 18.0, 21.0, 27.0, 20.0, 27.0, 35.0, 34.0, 31.0, 36.0, 26.0, 37.0, 38.0, 34.0, 43.0, 37.0, 37.0, 32.0, 40.0, 40.0, 35.0, 38.0, 30.0, 31.0, 25.0, 29.0, 22.0, 10.0, 24.0, 15.0, 8.0, 13.0, 6.0, 6.0, 3.0, 3.0, 8.0, 4.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0], "bins": [-0.16210085153579712, -0.1570567488670349, -0.1520126461982727, -0.1469685435295105, -0.1419244408607483, -0.13688033819198608, -0.13183623552322388, -0.12679213285446167, -0.12174802273511887, -0.11670392006635666, -0.11165981739759445, -0.10661571472883224, -0.10157160460948944, -0.09652750194072723, -0.09148339927196503, -0.08643929660320282, -0.08139519393444061, -0.0763510912656784, -0.0713069885969162, -0.06626288592815399, -0.061218779534101486, -0.05617467686533928, -0.051130570471286774, -0.04608646780252457, -0.04104236513376236, -0.03599826246500015, -0.030954157933592796, -0.02591005340218544, -0.020865950733423233, -0.015821848064661026, -0.01077774353325367, -0.0057336390018463135, -0.0006895363330841064, 0.004354567267000675, 0.009398670867085457, 0.014442774467170238, 0.01948687806725502, 0.024530980736017227, 0.029575085267424583, 0.03461918979883194, 0.03966329246759415, 0.044707395136356354, 0.04975149780511856, 0.054795604199171066, 0.05983970686793327, 0.06488381326198578, 0.06992791593074799, 0.07497201859951019, 0.0800161212682724, 0.0850602239370346, 0.09010432660579681, 0.09514842927455902, 0.10019253194332123, 0.10523663461208344, 0.11028074473142624, 0.11532484740018845, 0.12036895006895065, 0.12541306018829346, 0.13045716285705566, 0.13550126552581787, 0.14054536819458008, 0.14558947086334229, 0.1506335735321045, 0.1556776762008667, 0.1607217788696289]}, "gradients/encoder.encoder.layers.3.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 3.0, 2.0, 1.0, 2.0, 4.0, 2.0, 5.0, 9.0, 9.0, 12.0, 17.0, 14.0, 34.0, 50.0, 81.0, 153.0, 253.0, 598.0, 1599.0, 5862.0, 24405.0, 204218.0, 2425098.0, 1411697.0, 99275.0, 14193.0, 4186.0, 1537.0, 510.0, 200.0, 103.0, 49.0, 34.0, 29.0, 13.0, 11.0, 12.0, 2.0, 4.0, 7.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.08258056640625, -0.08006668090820312, -0.07755279541015625, -0.07503890991210938, -0.0725250244140625, -0.07001113891601562, -0.06749725341796875, -0.06498336791992188, -0.062469482421875, -0.059955596923828125, -0.05744171142578125, -0.054927825927734375, -0.0524139404296875, -0.049900054931640625, -0.04738616943359375, -0.044872283935546875, -0.0423583984375, -0.039844512939453125, -0.03733062744140625, -0.034816741943359375, -0.0323028564453125, -0.029788970947265625, -0.02727508544921875, -0.024761199951171875, -0.022247314453125, -0.019733428955078125, -0.01721954345703125, -0.014705657958984375, -0.0121917724609375, -0.009677886962890625, -0.00716400146484375, -0.004650115966796875, -0.00213623046875, 0.000377655029296875, 0.00289154052734375, 0.005405426025390625, 0.0079193115234375, 0.010433197021484375, 0.01294708251953125, 0.015460968017578125, 0.017974853515625, 0.020488739013671875, 0.02300262451171875, 0.025516510009765625, 0.0280303955078125, 0.030544281005859375, 0.03305816650390625, 0.035572052001953125, 0.0380859375, 0.040599822998046875, 0.04311370849609375, 0.045627593994140625, 0.0481414794921875, 0.050655364990234375, 0.05316925048828125, 0.055683135986328125, 0.058197021484375, 0.060710906982421875, 0.06322479248046875, 0.06573867797851562, 0.0682525634765625, 0.07076644897460938, 0.07328033447265625, 0.07579421997070312, 0.07830810546875]}, "gradients/encoder.encoder.layers.3.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 1.0, 5.0, 8.0, 15.0, 36.0, 56.0, 89.0, 104.0, 117.0, 137.0, 143.0, 106.0, 73.0, 65.0, 36.0, 12.0, 9.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0975341796875, -0.09486007690429688, -0.09218597412109375, -0.08951187133789062, -0.0868377685546875, -0.08416366577148438, -0.08148956298828125, -0.07881546020507812, -0.076141357421875, -0.07346725463867188, -0.07079315185546875, -0.06811904907226562, -0.0654449462890625, -0.06277084350585938, -0.06009674072265625, -0.057422637939453125, -0.05474853515625, -0.052074432373046875, -0.04940032958984375, -0.046726226806640625, -0.0440521240234375, -0.041378021240234375, -0.03870391845703125, -0.036029815673828125, -0.033355712890625, -0.030681610107421875, -0.02800750732421875, -0.025333404541015625, -0.0226593017578125, -0.019985198974609375, -0.01731109619140625, -0.014636993408203125, -0.011962890625, -0.009288787841796875, -0.00661468505859375, -0.003940582275390625, -0.0012664794921875, 0.001407623291015625, 0.00408172607421875, 0.006755828857421875, 0.009429931640625, 0.012104034423828125, 0.01477813720703125, 0.017452239990234375, 0.0201263427734375, 0.022800445556640625, 0.02547454833984375, 0.028148651123046875, 0.03082275390625, 0.033496856689453125, 0.03617095947265625, 0.038845062255859375, 0.0415191650390625, 0.044193267822265625, 0.04686737060546875, 0.049541473388671875, 0.052215576171875, 0.054889678955078125, 0.05756378173828125, 0.060237884521484375, 0.0629119873046875, 0.06558609008789062, 0.06826019287109375, 0.07093429565429688, 0.0736083984375]}, "gradients/encoder.encoder.layers.3.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 2.0, 2.0, 4.0, 5.0, 1.0, 2.0, 2.0, 4.0, 8.0, 11.0, 13.0, 16.0, 25.0, 35.0, 52.0, 74.0, 89.0, 124.0, 201.0, 449.0, 1074.0, 3799.0, 23412.0, 249159.0, 3226751.0, 634470.0, 45203.0, 6462.0, 1528.0, 573.0, 270.0, 151.0, 89.0, 68.0, 35.0, 45.0, 22.0, 15.0, 16.0, 10.0, 4.0, 7.0, 2.0, 4.0, 6.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.09747314453125, -0.09448814392089844, -0.09150314331054688, -0.08851814270019531, -0.08553314208984375, -0.08254814147949219, -0.07956314086914062, -0.07657814025878906, -0.0735931396484375, -0.07060813903808594, -0.06762313842773438, -0.06463813781738281, -0.06165313720703125, -0.05866813659667969, -0.055683135986328125, -0.05269813537597656, -0.049713134765625, -0.04672813415527344, -0.043743133544921875, -0.04075813293457031, -0.03777313232421875, -0.03478813171386719, -0.031803131103515625, -0.028818130493164062, -0.0258331298828125, -0.022848129272460938, -0.019863128662109375, -0.016878128051757812, -0.01389312744140625, -0.010908126831054688, -0.007923126220703125, -0.0049381256103515625, -0.001953125, 0.0010318756103515625, 0.004016876220703125, 0.0070018768310546875, 0.00998687744140625, 0.012971878051757812, 0.015956878662109375, 0.018941879272460938, 0.0219268798828125, 0.024911880493164062, 0.027896881103515625, 0.030881881713867188, 0.03386688232421875, 0.03685188293457031, 0.039836883544921875, 0.04282188415527344, 0.045806884765625, 0.04879188537597656, 0.051776885986328125, 0.05476188659667969, 0.05774688720703125, 0.06073188781738281, 0.06371688842773438, 0.06670188903808594, 0.0696868896484375, 0.07267189025878906, 0.07565689086914062, 0.07864189147949219, 0.08162689208984375, 0.08461189270019531, 0.08759689331054688, 0.09058189392089844, 0.09356689453125]}, "gradients/encoder.encoder.layers.3.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 3.0, 8.0, 4.0, 6.0, 13.0, 14.0, 14.0, 31.0, 44.0, 71.0, 82.0, 129.0, 166.0, 271.0, 421.0, 550.0, 591.0, 490.0, 340.0, 275.0, 182.0, 111.0, 62.0, 64.0, 48.0, 34.0, 13.0, 16.0, 7.0, 10.0, 2.0, 3.0, 5.0, 3.0, 3.0, 2.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.072509765625, -0.07066917419433594, -0.06882858276367188, -0.06698799133300781, -0.06514739990234375, -0.06330680847167969, -0.061466217041015625, -0.05962562561035156, -0.0577850341796875, -0.05594444274902344, -0.054103851318359375, -0.05226325988769531, -0.05042266845703125, -0.04858207702636719, -0.046741485595703125, -0.04490089416503906, -0.043060302734375, -0.04121971130371094, -0.039379119873046875, -0.03753852844238281, -0.03569793701171875, -0.03385734558105469, -0.032016754150390625, -0.030176162719726562, -0.0283355712890625, -0.026494979858398438, -0.024654388427734375, -0.022813796997070312, -0.02097320556640625, -0.019132614135742188, -0.017292022705078125, -0.015451431274414062, -0.01361083984375, -0.011770248413085938, -0.009929656982421875, -0.008089065551757812, -0.00624847412109375, -0.0044078826904296875, -0.002567291259765625, -0.0007266998291015625, 0.0011138916015625, 0.0029544830322265625, 0.004795074462890625, 0.0066356658935546875, 0.00847625732421875, 0.010316848754882812, 0.012157440185546875, 0.013998031616210938, 0.015838623046875, 0.017679214477539062, 0.019519805908203125, 0.021360397338867188, 0.02320098876953125, 0.025041580200195312, 0.026882171630859375, 0.028722763061523438, 0.0305633544921875, 0.03240394592285156, 0.034244537353515625, 0.03608512878417969, 0.03792572021484375, 0.03976631164550781, 0.041606903076171875, 0.04344749450683594, 0.0452880859375]}, "gradients/encoder.encoder.layers.3.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 2.0, 3.0, 1.0, 9.0, 12.0, 24.0, 46.0, 108.0, 143.0, 187.0, 173.0, 127.0, 79.0, 52.0, 22.0, 9.0, 2.0, 7.0, 1.0, 0.0, 3.0, 3.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.43141767382621765, -0.41530224680900574, -0.39918678998947144, -0.3830713629722595, -0.3669559359550476, -0.3508405089378357, -0.3347250819206238, -0.3186096251010895, -0.30249419808387756, -0.28637877106666565, -0.27026331424713135, -0.25414788722991943, -0.23803246021270752, -0.2219170331954956, -0.2058015912771225, -0.1896861493587494, -0.17357072234153748, -0.15745529532432556, -0.14133985340595245, -0.12522441148757935, -0.10910898447036743, -0.09299355000257492, -0.07687811553478241, -0.0607626810669899, -0.04464724659919739, -0.028531812131404877, -0.012416377663612366, 0.0036990568041801453, 0.019814491271972656, 0.03592992573976517, 0.05204536020755768, 0.06816079467535019, 0.08427619934082031, 0.10039163380861282, 0.11650706827640533, 0.13262251019477844, 0.14873793721199036, 0.16485336422920227, 0.18096880614757538, 0.1970842480659485, 0.2131996750831604, 0.22931510210037231, 0.24543054401874542, 0.26154598593711853, 0.27766141295433044, 0.29377683997154236, 0.30989229679107666, 0.3260077238082886, 0.3421231508255005, 0.3582385778427124, 0.3743540048599243, 0.3904694616794586, 0.40658488869667053, 0.42270031571388245, 0.43881577253341675, 0.45493119955062866, 0.4710466265678406, 0.4871620535850525, 0.5032774806022644, 0.5193929076194763, 0.535508394241333, 0.5516238212585449, 0.5677392482757568, 0.5838546752929688, 0.5999701023101807]}, "gradients/encoder.encoder.layers.3.final_layer_norm.bias": {"_type": "histogram", "values": [2.0, 1.0, 3.0, 0.0, 1.0, 0.0, 2.0, 3.0, 8.0, 3.0, 5.0, 9.0, 12.0, 6.0, 13.0, 18.0, 20.0, 19.0, 28.0, 26.0, 31.0, 24.0, 37.0, 22.0, 38.0, 45.0, 36.0, 39.0, 28.0, 36.0, 42.0, 39.0, 39.0, 31.0, 34.0, 36.0, 26.0, 30.0, 31.0, 19.0, 27.0, 28.0, 18.0, 18.0, 17.0, 11.0, 10.0, 10.0, 7.0, 7.0, 4.0, 6.0, 4.0, 2.0, 5.0, 2.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.12657690048217773, -0.12231141328811646, -0.11804593354463577, -0.11378045380115509, -0.10951496660709381, -0.10524947941303253, -0.10098399966955185, -0.09671851992607117, -0.09245303273200989, -0.08818754553794861, -0.08392206579446793, -0.07965658605098724, -0.07539109885692596, -0.07112561166286469, -0.066860131919384, -0.06259465217590332, -0.05832916498184204, -0.05406368151307106, -0.04979819804430008, -0.0455327145755291, -0.04126723110675812, -0.03700174763798714, -0.032736264169216156, -0.028470780700445175, -0.024205297231674194, -0.019939813762903214, -0.015674330294132233, -0.011408846825361252, -0.007143363356590271, -0.00287787988781929, 0.0013876035809516907, 0.0056530870497226715, 0.009918570518493652, 0.014184053987264633, 0.018449537456035614, 0.022715020924806595, 0.026980504393577576, 0.031245987862348557, 0.03551147133111954, 0.03977695479989052, 0.0440424382686615, 0.04830792173743248, 0.05257340520620346, 0.05683888867497444, 0.06110437214374542, 0.0653698593378067, 0.06963533908128738, 0.07390081882476807, 0.07816630601882935, 0.08243179321289062, 0.08669727295637131, 0.09096275269985199, 0.09522823989391327, 0.09949372708797455, 0.10375920683145523, 0.10802468657493591, 0.11229017376899719, 0.11655566096305847, 0.12082114070653915, 0.12508662045001984, 0.12935210764408112, 0.1336175948381424, 0.13788306713104248, 0.14214855432510376, 0.14641404151916504]}, "gradients/encoder.encoder.layers.3.attention.out_proj.weight": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 2.0, 1.0, 1.0, 2.0, 0.0, 2.0, 4.0, 2.0, 5.0, 10.0, 11.0, 15.0, 12.0, 22.0, 55.0, 63.0, 91.0, 110.0, 195.0, 269.0, 504.0, 941.0, 2139.0, 5384.0, 14347.0, 45575.0, 155854.0, 398491.0, 291895.0, 89623.0, 26838.0, 9202.0, 3586.0, 1500.0, 709.0, 354.0, 231.0, 160.0, 114.0, 93.0, 54.0, 26.0, 16.0, 22.0, 12.0, 11.0, 2.0, 2.0, 5.0, 1.0, 3.0, 0.0, 2.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.07843017578125, -0.0758819580078125, -0.073333740234375, -0.0707855224609375, -0.0682373046875, -0.0656890869140625, -0.063140869140625, -0.0605926513671875, -0.05804443359375, -0.0554962158203125, -0.052947998046875, -0.0503997802734375, -0.0478515625, -0.0453033447265625, -0.042755126953125, -0.0402069091796875, -0.03765869140625, -0.0351104736328125, -0.032562255859375, -0.0300140380859375, -0.0274658203125, -0.0249176025390625, -0.022369384765625, -0.0198211669921875, -0.01727294921875, -0.0147247314453125, -0.012176513671875, -0.0096282958984375, -0.007080078125, -0.0045318603515625, -0.001983642578125, 0.0005645751953125, 0.00311279296875, 0.0056610107421875, 0.008209228515625, 0.0107574462890625, 0.0133056640625, 0.0158538818359375, 0.018402099609375, 0.0209503173828125, 0.02349853515625, 0.0260467529296875, 0.028594970703125, 0.0311431884765625, 0.03369140625, 0.0362396240234375, 0.038787841796875, 0.0413360595703125, 0.04388427734375, 0.0464324951171875, 0.048980712890625, 0.0515289306640625, 0.0540771484375, 0.0566253662109375, 0.059173583984375, 0.0617218017578125, 0.06427001953125, 0.0668182373046875, 0.069366455078125, 0.0719146728515625, 0.074462890625, 0.0770111083984375, 0.079559326171875, 0.0821075439453125, 0.08465576171875]}, "gradients/encoder.encoder.layers.3.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 4.0, 1.0, 7.0, 8.0, 8.0, 30.0, 41.0, 50.0, 73.0, 102.0, 116.0, 116.0, 136.0, 106.0, 67.0, 60.0, 35.0, 26.0, 21.0, 3.0, 6.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.090576171875, -0.08809566497802734, -0.08561515808105469, -0.08313465118408203, -0.08065414428710938, -0.07817363739013672, -0.07569313049316406, -0.0732126235961914, -0.07073211669921875, -0.0682516098022461, -0.06577110290527344, -0.06329059600830078, -0.060810089111328125, -0.05832958221435547, -0.05584907531738281, -0.053368568420410156, -0.0508880615234375, -0.048407554626464844, -0.04592704772949219, -0.04344654083251953, -0.040966033935546875, -0.03848552703857422, -0.03600502014160156, -0.033524513244628906, -0.03104400634765625, -0.028563499450683594, -0.026082992553710938, -0.02360248565673828, -0.021121978759765625, -0.01864147186279297, -0.016160964965820312, -0.013680458068847656, -0.011199951171875, -0.008719444274902344, -0.0062389373779296875, -0.0037584304809570312, -0.001277923583984375, 0.0012025833129882812, 0.0036830902099609375, 0.006163597106933594, 0.00864410400390625, 0.011124610900878906, 0.013605117797851562, 0.01608562469482422, 0.018566131591796875, 0.02104663848876953, 0.023527145385742188, 0.026007652282714844, 0.0284881591796875, 0.030968666076660156, 0.03344917297363281, 0.03592967987060547, 0.038410186767578125, 0.04089069366455078, 0.04337120056152344, 0.045851707458496094, 0.04833221435546875, 0.050812721252441406, 0.05329322814941406, 0.05577373504638672, 0.058254241943359375, 0.06073474884033203, 0.06321525573730469, 0.06569576263427734, 0.06817626953125]}, "gradients/encoder.encoder.layers.3.attention.v_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 2.0, 3.0, 0.0, 4.0, 10.0, 9.0, 19.0, 25.0, 33.0, 64.0, 99.0, 178.0, 314.0, 624.0, 1570.0, 4350.0, 14334.0, 55264.0, 249911.0, 502400.0, 165724.0, 37812.0, 10175.0, 3244.0, 1234.0, 539.0, 249.0, 154.0, 89.0, 41.0, 40.0, 13.0, 16.0, 8.0, 4.0, 3.0, 3.0, 3.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.078369140625, -0.07550525665283203, -0.07264137268066406, -0.0697774887084961, -0.06691360473632812, -0.06404972076416016, -0.06118583679199219, -0.05832195281982422, -0.05545806884765625, -0.05259418487548828, -0.04973030090332031, -0.046866416931152344, -0.044002532958984375, -0.041138648986816406, -0.03827476501464844, -0.03541088104248047, -0.0325469970703125, -0.02968311309814453, -0.026819229125976562, -0.023955345153808594, -0.021091461181640625, -0.018227577209472656, -0.015363693237304688, -0.012499809265136719, -0.00963592529296875, -0.006772041320800781, -0.0039081573486328125, -0.0010442733764648438, 0.001819610595703125, 0.004683494567871094, 0.0075473785400390625, 0.010411262512207031, 0.013275146484375, 0.01613903045654297, 0.019002914428710938, 0.021866798400878906, 0.024730682373046875, 0.027594566345214844, 0.030458450317382812, 0.03332233428955078, 0.03618621826171875, 0.03905010223388672, 0.04191398620605469, 0.044777870178222656, 0.047641754150390625, 0.050505638122558594, 0.05336952209472656, 0.05623340606689453, 0.0590972900390625, 0.06196117401123047, 0.06482505798339844, 0.0676889419555664, 0.07055282592773438, 0.07341670989990234, 0.07628059387207031, 0.07914447784423828, 0.08200836181640625, 0.08487224578857422, 0.08773612976074219, 0.09060001373291016, 0.09346389770507812, 0.0963277816772461, 0.09919166564941406, 0.10205554962158203, 0.10491943359375]}, "gradients/encoder.encoder.layers.3.attention.v_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 1.0, 0.0, 2.0, 2.0, 4.0, 2.0, 8.0, 6.0, 10.0, 11.0, 9.0, 10.0, 20.0, 18.0, 14.0, 28.0, 23.0, 32.0, 31.0, 27.0, 42.0, 53.0, 41.0, 44.0, 42.0, 49.0, 47.0, 32.0, 41.0, 41.0, 40.0, 46.0, 31.0, 25.0, 22.0, 26.0, 23.0, 16.0, 20.0, 15.0, 7.0, 12.0, 14.0, 10.0, 7.0, 2.0, 4.0, 1.0, 1.0, 4.0, 4.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.07342529296875, -0.0708150863647461, -0.06820487976074219, -0.06559467315673828, -0.06298446655273438, -0.06037425994873047, -0.05776405334472656, -0.055153846740722656, -0.05254364013671875, -0.049933433532714844, -0.04732322692871094, -0.04471302032470703, -0.042102813720703125, -0.03949260711669922, -0.03688240051269531, -0.034272193908691406, -0.0316619873046875, -0.029051780700683594, -0.026441574096679688, -0.02383136749267578, -0.021221160888671875, -0.01861095428466797, -0.016000747680664062, -0.013390541076660156, -0.01078033447265625, -0.008170127868652344, -0.0055599212646484375, -0.0029497146606445312, -0.000339508056640625, 0.0022706985473632812, 0.0048809051513671875, 0.007491111755371094, 0.010101318359375, 0.012711524963378906, 0.015321731567382812, 0.01793193817138672, 0.020542144775390625, 0.02315235137939453, 0.025762557983398438, 0.028372764587402344, 0.03098297119140625, 0.033593177795410156, 0.03620338439941406, 0.03881359100341797, 0.041423797607421875, 0.04403400421142578, 0.04664421081542969, 0.049254417419433594, 0.0518646240234375, 0.054474830627441406, 0.05708503723144531, 0.05969524383544922, 0.062305450439453125, 0.06491565704345703, 0.06752586364746094, 0.07013607025146484, 0.07274627685546875, 0.07535648345947266, 0.07796669006347656, 0.08057689666748047, 0.08318710327148438, 0.08579730987548828, 0.08840751647949219, 0.0910177230834961, 0.0936279296875]}, "gradients/encoder.encoder.layers.3.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 2.0, 3.0, 1.0, 1.0, 1.0, 3.0, 2.0, 5.0, 2.0, 1.0, 9.0, 14.0, 17.0, 18.0, 42.0, 52.0, 51.0, 78.0, 116.0, 182.0, 287.0, 379.0, 652.0, 1096.0, 1820.0, 3169.0, 5355.0, 9492.0, 17592.0, 34141.0, 70181.0, 163784.0, 319102.0, 229501.0, 95356.0, 44460.0, 22831.0, 12244.0, 6850.0, 3842.0, 2257.0, 1299.0, 794.0, 486.0, 317.0, 206.0, 139.0, 103.0, 64.0, 53.0, 39.0, 20.0, 14.0, 13.0, 14.0, 4.0, 5.0, 6.0, 2.0, 1.0, 2.0, 1.0, 2.0], "bins": [-0.01806640625, -0.017530202865600586, -0.016993999481201172, -0.016457796096801758, -0.015921592712402344, -0.01538538932800293, -0.014849185943603516, -0.014312982559204102, -0.013776779174804688, -0.013240575790405273, -0.01270437240600586, -0.012168169021606445, -0.011631965637207031, -0.011095762252807617, -0.010559558868408203, -0.010023355484008789, -0.009487152099609375, -0.008950948715209961, -0.008414745330810547, -0.007878541946411133, -0.007342338562011719, -0.006806135177612305, -0.006269931793212891, -0.0057337284088134766, -0.0051975250244140625, -0.0046613216400146484, -0.004125118255615234, -0.0035889148712158203, -0.0030527114868164062, -0.002516508102416992, -0.001980304718017578, -0.001444101333618164, -0.00090789794921875, -0.00037169456481933594, 0.00016450881958007812, 0.0007007122039794922, 0.0012369155883789062, 0.0017731189727783203, 0.0023093223571777344, 0.0028455257415771484, 0.0033817291259765625, 0.0039179325103759766, 0.004454135894775391, 0.004990339279174805, 0.005526542663574219, 0.006062746047973633, 0.006598949432373047, 0.007135152816772461, 0.007671356201171875, 0.008207559585571289, 0.008743762969970703, 0.009279966354370117, 0.009816169738769531, 0.010352373123168945, 0.01088857650756836, 0.011424779891967773, 0.011960983276367188, 0.012497186660766602, 0.013033390045166016, 0.01356959342956543, 0.014105796813964844, 0.014642000198364258, 0.015178203582763672, 0.015714406967163086, 0.0162506103515625]}, "gradients/encoder.encoder.layers.3.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 1.0, 2.0, 0.0, 1.0, 3.0, 2.0, 1.0, 1.0, 3.0, 2.0, 7.0, 6.0, 5.0, 5.0, 3.0, 12.0, 12.0, 13.0, 42.0, 42.0, 49.0, 95.0, 117.0, 127.0, 91.0, 92.0, 91.0, 46.0, 41.0, 27.0, 11.0, 16.0, 12.0, 4.0, 3.0, 1.0, 2.0, 6.0, 7.0, 4.0, 2.0, 1.0, 1.0, 1.0, 2.0, 2.0, 0.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.1801719665527344e-05, -1.1362135410308838e-05, -1.0922551155090332e-05, -1.0482966899871826e-05, -1.004338264465332e-05, -9.603798389434814e-06, -9.164214134216309e-06, -8.724629878997803e-06, -8.285045623779297e-06, -7.845461368560791e-06, -7.405877113342285e-06, -6.966292858123779e-06, -6.5267086029052734e-06, -6.087124347686768e-06, -5.647540092468262e-06, -5.207955837249756e-06, -4.76837158203125e-06, -4.328787326812744e-06, -3.889203071594238e-06, -3.4496188163757324e-06, -3.0100345611572266e-06, -2.5704503059387207e-06, -2.130866050720215e-06, -1.691281795501709e-06, -1.2516975402832031e-06, -8.121132850646973e-07, -3.725290298461914e-07, 6.705522537231445e-08, 5.066394805908203e-07, 9.462237358093262e-07, 1.385807991027832e-06, 1.8253922462463379e-06, 2.2649765014648438e-06, 2.7045607566833496e-06, 3.1441450119018555e-06, 3.5837292671203613e-06, 4.023313522338867e-06, 4.462897777557373e-06, 4.902482032775879e-06, 5.342066287994385e-06, 5.781650543212891e-06, 6.2212347984313965e-06, 6.660819053649902e-06, 7.100403308868408e-06, 7.539987564086914e-06, 7.97957181930542e-06, 8.419156074523926e-06, 8.858740329742432e-06, 9.298324584960938e-06, 9.737908840179443e-06, 1.017749309539795e-05, 1.0617077350616455e-05, 1.1056661605834961e-05, 1.1496245861053467e-05, 1.1935830116271973e-05, 1.2375414371490479e-05, 1.2814998626708984e-05, 1.325458288192749e-05, 1.3694167137145996e-05, 1.4133751392364502e-05, 1.4573335647583008e-05, 1.5012919902801514e-05, 1.545250415802002e-05, 1.5892088413238525e-05, 1.633167266845703e-05]}, "gradients/encoder.encoder.layers.3.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 2.0, 1.0, 0.0, 2.0, 2.0, 1.0, 5.0, 8.0, 7.0, 15.0, 18.0, 30.0, 44.0, 76.0, 98.0, 159.0, 256.0, 448.0, 806.0, 1534.0, 3428.0, 7766.0, 18789.0, 47001.0, 124971.0, 305617.0, 317410.0, 133834.0, 50543.0, 20026.0, 8238.0, 3724.0, 1649.0, 857.0, 450.0, 263.0, 162.0, 108.0, 74.0, 59.0, 29.0, 15.0, 10.0, 8.0, 11.0, 5.0, 4.0, 2.0, 1.0, 1.0, 3.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.01873779296875, -0.018044710159301758, -0.017351627349853516, -0.016658544540405273, -0.01596546173095703, -0.015272378921508789, -0.014579296112060547, -0.013886213302612305, -0.013193130493164062, -0.01250004768371582, -0.011806964874267578, -0.011113882064819336, -0.010420799255371094, -0.009727716445922852, -0.00903463363647461, -0.008341550827026367, -0.007648468017578125, -0.006955385208129883, -0.006262302398681641, -0.0055692195892333984, -0.004876136779785156, -0.004183053970336914, -0.003489971160888672, -0.0027968883514404297, -0.0021038055419921875, -0.0014107227325439453, -0.0007176399230957031, -2.4557113647460938e-05, 0.0006685256958007812, 0.0013616085052490234, 0.0020546913146972656, 0.002747774124145508, 0.00344085693359375, 0.004133939743041992, 0.004827022552490234, 0.0055201053619384766, 0.006213188171386719, 0.006906270980834961, 0.007599353790283203, 0.008292436599731445, 0.008985519409179688, 0.00967860221862793, 0.010371685028076172, 0.011064767837524414, 0.011757850646972656, 0.012450933456420898, 0.01314401626586914, 0.013837099075317383, 0.014530181884765625, 0.015223264694213867, 0.01591634750366211, 0.01660943031311035, 0.017302513122558594, 0.017995595932006836, 0.018688678741455078, 0.01938176155090332, 0.020074844360351562, 0.020767927169799805, 0.021461009979248047, 0.02215409278869629, 0.02284717559814453, 0.023540258407592773, 0.024233341217041016, 0.024926424026489258, 0.0256195068359375]}, "gradients/encoder.encoder.layers.3.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 5.0, 0.0, 3.0, 4.0, 3.0, 3.0, 5.0, 9.0, 7.0, 7.0, 15.0, 12.0, 13.0, 21.0, 19.0, 33.0, 36.0, 48.0, 56.0, 59.0, 76.0, 55.0, 70.0, 65.0, 64.0, 53.0, 50.0, 47.0, 26.0, 28.0, 23.0, 20.0, 14.0, 15.0, 10.0, 11.0, 11.0, 5.0, 5.0, 1.0, 3.0, 2.0, 2.0, 0.0, 2.0, 2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.01904296875, -0.01841259002685547, -0.017782211303710938, -0.017151832580566406, -0.016521453857421875, -0.015891075134277344, -0.015260696411132812, -0.014630317687988281, -0.01399993896484375, -0.013369560241699219, -0.012739181518554688, -0.012108802795410156, -0.011478424072265625, -0.010848045349121094, -0.010217666625976562, -0.009587287902832031, -0.0089569091796875, -0.008326530456542969, -0.0076961517333984375, -0.007065773010253906, -0.006435394287109375, -0.005805015563964844, -0.0051746368408203125, -0.004544258117675781, -0.00391387939453125, -0.0032835006713867188, -0.0026531219482421875, -0.0020227432250976562, -0.001392364501953125, -0.0007619857788085938, -0.0001316070556640625, 0.0004987716674804688, 0.001129150390625, 0.0017595291137695312, 0.0023899078369140625, 0.0030202865600585938, 0.003650665283203125, 0.004281044006347656, 0.0049114227294921875, 0.005541801452636719, 0.00617218017578125, 0.006802558898925781, 0.0074329376220703125, 0.008063316345214844, 0.008693695068359375, 0.009324073791503906, 0.009954452514648438, 0.010584831237792969, 0.0112152099609375, 0.011845588684082031, 0.012475967407226562, 0.013106346130371094, 0.013736724853515625, 0.014367103576660156, 0.014997482299804688, 0.01562786102294922, 0.01625823974609375, 0.01688861846923828, 0.017518997192382812, 0.018149375915527344, 0.018779754638671875, 0.019410133361816406, 0.020040512084960938, 0.02067089080810547, 0.02130126953125]}, "gradients/encoder.encoder.layers.3.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 4.0, 3.0, 2.0, 8.0, 15.0, 21.0, 67.0, 113.0, 252.0, 213.0, 183.0, 79.0, 39.0, 9.0, 3.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.6898530125617981, -0.6707860827445984, -0.6517190933227539, -0.6326521635055542, -0.6135851740837097, -0.59451824426651, -0.5754512548446655, -0.5563843250274658, -0.5373173952102661, -0.5182504653930664, -0.4991834759712219, -0.48011651635169983, -0.46104955673217773, -0.441982626914978, -0.42291566729545593, -0.40384870767593384, -0.38478171825408936, -0.36571475863456726, -0.34664779901504517, -0.32758083939552307, -0.308513879776001, -0.28944694995880127, -0.2703799903392792, -0.2513130307197571, -0.23224607110023499, -0.2131791114807129, -0.1941121518611908, -0.1750452071428299, -0.1559782475233078, -0.1369112879037857, -0.11784433573484421, -0.09877738356590271, -0.07971048355102539, -0.060643527656793594, -0.0415765717625618, -0.022509615868330002, -0.0034426599740982056, 0.01562429964542389, 0.03469125181436539, 0.053758203983306885, 0.07282516360282898, 0.09189212322235107, 0.11095907539129257, 0.13002602756023407, 0.14909298717975616, 0.16815994679927826, 0.18722689151763916, 0.20629385113716125, 0.22536081075668335, 0.24442777037620544, 0.26349472999572754, 0.28256168961524963, 0.30162864923477173, 0.32069557905197144, 0.33976253867149353, 0.3588294982910156, 0.3778964579105377, 0.3969634175300598, 0.4160303771495819, 0.435097336769104, 0.4541642665863037, 0.4732312560081482, 0.4922981858253479, 0.5113651752471924, 0.5304321050643921]}, "gradients/encoder.encoder.layers.3.layer_norm.bias": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 1.0, 0.0, 2.0, 2.0, 2.0, 3.0, 4.0, 3.0, 8.0, 8.0, 9.0, 5.0, 12.0, 15.0, 14.0, 19.0, 30.0, 26.0, 27.0, 24.0, 26.0, 30.0, 42.0, 34.0, 27.0, 37.0, 40.0, 43.0, 40.0, 44.0, 46.0, 42.0, 37.0, 36.0, 26.0, 31.0, 19.0, 33.0, 21.0, 14.0, 26.0, 14.0, 21.0, 15.0, 4.0, 8.0, 12.0, 6.0, 4.0, 7.0, 6.0, 2.0, 9.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.17511945962905884, -0.16959434747695923, -0.1640692502260208, -0.1585441380739212, -0.1530190259218216, -0.14749392867088318, -0.14196881651878357, -0.13644370436668396, -0.13091859221458435, -0.12539348006248474, -0.11986837536096573, -0.11434327065944672, -0.1088181585073471, -0.1032930538058281, -0.09776794910430908, -0.09224283695220947, -0.08671773970127106, -0.08119263499975204, -0.07566752284765244, -0.07014241814613342, -0.06461730599403381, -0.0590922012925148, -0.05356709659099579, -0.04804198816418648, -0.04251687973737717, -0.036991771310567856, -0.031466662883758545, -0.025941558182239532, -0.02041644975543022, -0.01489134132862091, -0.009366236627101898, -0.0038411282002925873, 0.0016839802265167236, 0.00720908772200346, 0.012734195217490196, 0.018259301781654358, 0.02378441020846367, 0.02930951863527298, 0.03483462333679199, 0.0403597317636013, 0.045884840190410614, 0.051409948617219925, 0.056935057044029236, 0.06246016174554825, 0.06798526644706726, 0.07351037859916687, 0.07903548330068588, 0.0845605880022049, 0.0900857001543045, 0.09561080485582352, 0.10113591700792313, 0.10666102170944214, 0.11218613386154175, 0.11771123856306076, 0.12323634326457977, 0.12876145541667938, 0.1342865526676178, 0.1398116648197174, 0.14533676207065582, 0.15086187422275543, 0.15638698637485504, 0.16191208362579346, 0.16743719577789307, 0.17296230792999268, 0.17848742008209229]}, "gradients/encoder.encoder.layers.2.feed_forward.output_dense.weight": {"_type": "histogram", "values": [2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0, 3.0, 4.0, 3.0, 7.0, 7.0, 11.0, 15.0, 13.0, 26.0, 33.0, 66.0, 113.0, 201.0, 375.0, 932.0, 2980.0, 8666.0, 30005.0, 200298.0, 1525250.0, 2033348.0, 330343.0, 45204.0, 10122.0, 3619.0, 1627.0, 571.0, 186.0, 110.0, 48.0, 36.0, 19.0, 18.0, 8.0, 5.0, 5.0, 2.0, 3.0, 1.0, 1.0, 1.0, 1.0, 3.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.08160400390625, -0.07940530776977539, -0.07720661163330078, -0.07500791549682617, -0.07280921936035156, -0.07061052322387695, -0.06841182708740234, -0.06621313095092773, -0.06401443481445312, -0.061815738677978516, -0.059617042541503906, -0.0574183464050293, -0.05521965026855469, -0.05302095413208008, -0.05082225799560547, -0.04862356185913086, -0.04642486572265625, -0.04422616958618164, -0.04202747344970703, -0.03982877731323242, -0.03763008117675781, -0.0354313850402832, -0.033232688903808594, -0.031033992767333984, -0.028835296630859375, -0.026636600494384766, -0.024437904357910156, -0.022239208221435547, -0.020040512084960938, -0.017841815948486328, -0.01564311981201172, -0.01344442367553711, -0.0112457275390625, -0.00904703140258789, -0.006848335266113281, -0.004649639129638672, -0.0024509429931640625, -0.0002522468566894531, 0.0019464492797851562, 0.004145145416259766, 0.006343841552734375, 0.008542537689208984, 0.010741233825683594, 0.012939929962158203, 0.015138626098632812, 0.017337322235107422, 0.01953601837158203, 0.02173471450805664, 0.02393341064453125, 0.02613210678100586, 0.02833080291748047, 0.030529499053955078, 0.03272819519042969, 0.0349268913269043, 0.037125587463378906, 0.039324283599853516, 0.041522979736328125, 0.043721675872802734, 0.045920372009277344, 0.04811906814575195, 0.05031776428222656, 0.05251646041870117, 0.05471515655517578, 0.05691385269165039, 0.059112548828125]}, "gradients/encoder.encoder.layers.2.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 1.0, 8.0, 11.0, 24.0, 34.0, 67.0, 73.0, 106.0, 130.0, 116.0, 116.0, 110.0, 73.0, 53.0, 49.0, 20.0, 13.0, 6.0, 5.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.09759521484375, -0.09492111206054688, -0.09224700927734375, -0.08957290649414062, -0.0868988037109375, -0.08422470092773438, -0.08155059814453125, -0.07887649536132812, -0.076202392578125, -0.07352828979492188, -0.07085418701171875, -0.06818008422851562, -0.0655059814453125, -0.06283187866210938, -0.06015777587890625, -0.057483673095703125, -0.0548095703125, -0.052135467529296875, -0.04946136474609375, -0.046787261962890625, -0.0441131591796875, -0.041439056396484375, -0.03876495361328125, -0.036090850830078125, -0.033416748046875, -0.030742645263671875, -0.02806854248046875, -0.025394439697265625, -0.0227203369140625, -0.020046234130859375, -0.01737213134765625, -0.014698028564453125, -0.01202392578125, -0.009349822998046875, -0.00667572021484375, -0.004001617431640625, -0.0013275146484375, 0.001346588134765625, 0.00402069091796875, 0.006694793701171875, 0.009368896484375, 0.012042999267578125, 0.01471710205078125, 0.017391204833984375, 0.0200653076171875, 0.022739410400390625, 0.02541351318359375, 0.028087615966796875, 0.03076171875, 0.033435821533203125, 0.03610992431640625, 0.038784027099609375, 0.0414581298828125, 0.044132232666015625, 0.04680633544921875, 0.049480438232421875, 0.052154541015625, 0.054828643798828125, 0.05750274658203125, 0.060176849365234375, 0.0628509521484375, 0.06552505493164062, 0.06819915771484375, 0.07087326049804688, 0.07354736328125]}, "gradients/encoder.encoder.layers.2.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 0.0, 3.0, 1.0, 0.0, 2.0, 2.0, 6.0, 6.0, 6.0, 14.0, 18.0, 23.0, 26.0, 44.0, 35.0, 46.0, 76.0, 129.0, 351.0, 1500.0, 17378.0, 2487778.0, 1671169.0, 13639.0, 1289.0, 329.0, 139.0, 75.0, 52.0, 48.0, 26.0, 22.0, 15.0, 7.0, 12.0, 10.0, 8.0, 4.0, 1.0, 3.0, 4.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.19091796875, -0.184326171875, -0.177734375, -0.171142578125, -0.16455078125, -0.157958984375, -0.1513671875, -0.144775390625, -0.13818359375, -0.131591796875, -0.125, -0.118408203125, -0.11181640625, -0.105224609375, -0.0986328125, -0.092041015625, -0.08544921875, -0.078857421875, -0.072265625, -0.065673828125, -0.05908203125, -0.052490234375, -0.0458984375, -0.039306640625, -0.03271484375, -0.026123046875, -0.01953125, -0.012939453125, -0.00634765625, 0.000244140625, 0.0068359375, 0.013427734375, 0.02001953125, 0.026611328125, 0.033203125, 0.039794921875, 0.04638671875, 0.052978515625, 0.0595703125, 0.066162109375, 0.07275390625, 0.079345703125, 0.0859375, 0.092529296875, 0.09912109375, 0.105712890625, 0.1123046875, 0.118896484375, 0.12548828125, 0.132080078125, 0.138671875, 0.145263671875, 0.15185546875, 0.158447265625, 0.1650390625, 0.171630859375, 0.17822265625, 0.184814453125, 0.19140625, 0.197998046875, 0.20458984375, 0.211181640625, 0.2177734375, 0.224365234375, 0.23095703125]}, "gradients/encoder.encoder.layers.2.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 3.0, 0.0, 0.0, 0.0, 1.0, 1.0, 3.0, 1.0, 2.0, 0.0, 8.0, 12.0, 12.0, 12.0, 20.0, 40.0, 68.0, 93.0, 141.0, 233.0, 401.0, 534.0, 727.0, 603.0, 449.0, 256.0, 183.0, 98.0, 72.0, 44.0, 24.0, 13.0, 11.0, 10.0, 8.0, 2.0, 0.0, 4.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0], "bins": [-0.108154296875, -0.10561656951904297, -0.10307884216308594, -0.1005411148071289, -0.09800338745117188, -0.09546566009521484, -0.09292793273925781, -0.09039020538330078, -0.08785247802734375, -0.08531475067138672, -0.08277702331542969, -0.08023929595947266, -0.07770156860351562, -0.0751638412475586, -0.07262611389160156, -0.07008838653564453, -0.0675506591796875, -0.06501293182373047, -0.06247520446777344, -0.059937477111816406, -0.057399749755859375, -0.054862022399902344, -0.05232429504394531, -0.04978656768798828, -0.04724884033203125, -0.04471111297607422, -0.04217338562011719, -0.039635658264160156, -0.037097930908203125, -0.034560203552246094, -0.03202247619628906, -0.02948474884033203, -0.026947021484375, -0.02440929412841797, -0.021871566772460938, -0.019333839416503906, -0.016796112060546875, -0.014258384704589844, -0.011720657348632812, -0.009182929992675781, -0.00664520263671875, -0.004107475280761719, -0.0015697479248046875, 0.0009679794311523438, 0.003505706787109375, 0.006043434143066406, 0.008581161499023438, 0.011118888854980469, 0.0136566162109375, 0.01619434356689453, 0.018732070922851562, 0.021269798278808594, 0.023807525634765625, 0.026345252990722656, 0.028882980346679688, 0.03142070770263672, 0.03395843505859375, 0.03649616241455078, 0.03903388977050781, 0.041571617126464844, 0.044109344482421875, 0.046647071838378906, 0.04918479919433594, 0.05172252655029297, 0.05426025390625]}, "gradients/encoder.encoder.layers.2.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0, 2.0, 6.0, 14.0, 39.0, 121.0, 250.0, 270.0, 193.0, 75.0, 22.0, 8.0, 3.0, 2.0, 2.0, 0.0, 1.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-1.1342246532440186, -1.103812575340271, -1.0734004974365234, -1.0429884195327759, -1.0125763416290283, -0.9821643233299255, -0.9517523050308228, -0.9213402271270752, -0.8909281492233276, -0.8605160713195801, -0.8301039934158325, -0.7996919751167297, -0.7692798972129822, -0.7388678193092346, -0.7084558010101318, -0.6780437231063843, -0.6476316452026367, -0.6172195672988892, -0.5868074893951416, -0.5563954710960388, -0.5259833931922913, -0.4955713152885437, -0.46515926718711853, -0.43474721908569336, -0.4043351411819458, -0.37392306327819824, -0.34351101517677307, -0.3130989670753479, -0.28268688917160034, -0.2522748112678528, -0.2218627631664276, -0.19145070016384125, -0.16103863716125488, -0.13062657415866852, -0.10021451115608215, -0.06980244815349579, -0.039390385150909424, -0.008978322148323059, 0.021433740854263306, 0.05184580385684967, 0.08225786685943604, 0.1126699298620224, 0.14308199286460876, 0.17349405586719513, 0.2039061188697815, 0.23431818187236786, 0.2647302448749542, 0.2951422929763794, 0.32555437088012695, 0.3559664487838745, 0.3863784968852997, 0.41679054498672485, 0.4472026228904724, 0.47761470079421997, 0.5080267190933228, 0.5384387969970703, 0.5688508749008179, 0.5992629528045654, 0.629675030708313, 0.6600870490074158, 0.6904991269111633, 0.7209112048149109, 0.7513232231140137, 0.7817353010177612, 0.8121473789215088]}, "gradients/encoder.encoder.layers.2.final_layer_norm.bias": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 0.0, 0.0, 3.0, 1.0, 3.0, 4.0, 0.0, 5.0, 7.0, 7.0, 8.0, 8.0, 11.0, 6.0, 11.0, 23.0, 27.0, 31.0, 21.0, 18.0, 28.0, 35.0, 46.0, 38.0, 38.0, 49.0, 32.0, 42.0, 40.0, 46.0, 44.0, 30.0, 40.0, 41.0, 46.0, 34.0, 38.0, 20.0, 21.0, 16.0, 14.0, 15.0, 12.0, 14.0, 4.0, 6.0, 6.0, 2.0, 8.0, 6.0, 3.0, 1.0, 3.0, 1.0, 3.0, 0.0, 0.0, 2.0, 0.0, 0.0, 3.0], "bins": [-0.17921161651611328, -0.17352086305618286, -0.16783012449741364, -0.1621393859386444, -0.156448632478714, -0.15075787901878357, -0.14506714046001434, -0.13937640190124512, -0.1336856484413147, -0.12799489498138428, -0.12230415642261505, -0.11661341041326523, -0.1109226644039154, -0.10523191839456558, -0.09954117238521576, -0.09385042637586594, -0.08815968036651611, -0.08246893435716629, -0.07677818834781647, -0.07108744233846664, -0.06539669632911682, -0.059705950319767, -0.054015204310417175, -0.04832445830106735, -0.04263371229171753, -0.036942966282367706, -0.03125222027301788, -0.02556147426366806, -0.019870728254318237, -0.014179982244968414, -0.008489236235618591, -0.0027984902262687683, 0.0028922557830810547, 0.008583001792430878, 0.0142737478017807, 0.019964493811130524, 0.025655239820480347, 0.03134598582983017, 0.03703673183917999, 0.042727477848529816, 0.04841822385787964, 0.05410896986722946, 0.059799715876579285, 0.06549046188592911, 0.07118120789527893, 0.07687195390462875, 0.08256269991397858, 0.0882534459233284, 0.09394419193267822, 0.09963493794202805, 0.10532568395137787, 0.11101642996072769, 0.11670717597007751, 0.12239792197942734, 0.12808866798877716, 0.1337794065475464, 0.1394701600074768, 0.14516091346740723, 0.15085165202617645, 0.15654239058494568, 0.1622331440448761, 0.16792389750480652, 0.17361463606357574, 0.17930537462234497, 0.1849961280822754]}, "gradients/encoder.encoder.layers.2.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 1.0, 0.0, 3.0, 3.0, 4.0, 7.0, 13.0, 16.0, 29.0, 41.0, 80.0, 140.0, 219.0, 419.0, 827.0, 1648.0, 4041.0, 11739.0, 43406.0, 201342.0, 488821.0, 226169.0, 48854.0, 12673.0, 4367.0, 1836.0, 858.0, 445.0, 241.0, 114.0, 68.0, 52.0, 35.0, 15.0, 4.0, 12.0, 13.0, 7.0, 2.0, 0.0, 2.0, 2.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.08160400390625, -0.07864093780517578, -0.07567787170410156, -0.07271480560302734, -0.06975173950195312, -0.0667886734008789, -0.06382560729980469, -0.06086254119873047, -0.05789947509765625, -0.05493640899658203, -0.05197334289550781, -0.049010276794433594, -0.046047210693359375, -0.043084144592285156, -0.04012107849121094, -0.03715801239013672, -0.0341949462890625, -0.03123188018798828, -0.028268814086914062, -0.025305747985839844, -0.022342681884765625, -0.019379615783691406, -0.016416549682617188, -0.013453483581542969, -0.01049041748046875, -0.007527351379394531, -0.0045642852783203125, -0.0016012191772460938, 0.001361846923828125, 0.004324913024902344, 0.0072879791259765625, 0.010251045227050781, 0.013214111328125, 0.01617717742919922, 0.019140243530273438, 0.022103309631347656, 0.025066375732421875, 0.028029441833496094, 0.030992507934570312, 0.03395557403564453, 0.03691864013671875, 0.03988170623779297, 0.04284477233886719, 0.045807838439941406, 0.048770904541015625, 0.051733970642089844, 0.05469703674316406, 0.05766010284423828, 0.0606231689453125, 0.06358623504638672, 0.06654930114746094, 0.06951236724853516, 0.07247543334960938, 0.0754384994506836, 0.07840156555175781, 0.08136463165283203, 0.08432769775390625, 0.08729076385498047, 0.09025382995605469, 0.0932168960571289, 0.09617996215820312, 0.09914302825927734, 0.10210609436035156, 0.10506916046142578, 0.1080322265625]}, "gradients/encoder.encoder.layers.2.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 3.0, 4.0, 5.0, 15.0, 13.0, 38.0, 40.0, 76.0, 77.0, 99.0, 127.0, 117.0, 89.0, 95.0, 85.0, 54.0, 32.0, 24.0, 9.0, 8.0, 3.0, 1.0, 2.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.10540771484375, -0.1025705337524414, -0.09973335266113281, -0.09689617156982422, -0.09405899047851562, -0.09122180938720703, -0.08838462829589844, -0.08554744720458984, -0.08271026611328125, -0.07987308502197266, -0.07703590393066406, -0.07419872283935547, -0.07136154174804688, -0.06852436065673828, -0.06568717956542969, -0.0628499984741211, -0.0600128173828125, -0.057175636291503906, -0.05433845520019531, -0.05150127410888672, -0.048664093017578125, -0.04582691192626953, -0.04298973083496094, -0.040152549743652344, -0.03731536865234375, -0.034478187561035156, -0.03164100646972656, -0.02880382537841797, -0.025966644287109375, -0.02312946319580078, -0.020292282104492188, -0.017455101013183594, -0.014617919921875, -0.011780738830566406, -0.008943557739257812, -0.006106376647949219, -0.003269195556640625, -0.00043201446533203125, 0.0024051666259765625, 0.005242347717285156, 0.00807952880859375, 0.010916709899902344, 0.013753890991210938, 0.01659107208251953, 0.019428253173828125, 0.02226543426513672, 0.025102615356445312, 0.027939796447753906, 0.0307769775390625, 0.033614158630371094, 0.03645133972167969, 0.03928852081298828, 0.042125701904296875, 0.04496288299560547, 0.04780006408691406, 0.050637245178222656, 0.05347442626953125, 0.056311607360839844, 0.05914878845214844, 0.06198596954345703, 0.06482315063476562, 0.06766033172607422, 0.07049751281738281, 0.0733346939086914, 0.076171875]}, "gradients/encoder.encoder.layers.2.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 2.0, 2.0, 3.0, 1.0, 2.0, 0.0, 4.0, 5.0, 4.0, 10.0, 23.0, 18.0, 33.0, 40.0, 55.0, 94.0, 90.0, 169.0, 305.0, 502.0, 1010.0, 1923.0, 3922.0, 8255.0, 19052.0, 52272.0, 165177.0, 371303.0, 275282.0, 93447.0, 31522.0, 12417.0, 5764.0, 2672.0, 1388.0, 720.0, 431.0, 218.0, 122.0, 81.0, 64.0, 37.0, 20.0, 29.0, 23.0, 18.0, 10.0, 6.0, 4.0, 7.0, 4.0, 4.0, 5.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.06304931640625, -0.06106376647949219, -0.059078216552734375, -0.05709266662597656, -0.05510711669921875, -0.05312156677246094, -0.051136016845703125, -0.04915046691894531, -0.0471649169921875, -0.04517936706542969, -0.043193817138671875, -0.04120826721191406, -0.03922271728515625, -0.03723716735839844, -0.035251617431640625, -0.03326606750488281, -0.031280517578125, -0.029294967651367188, -0.027309417724609375, -0.025323867797851562, -0.02333831787109375, -0.021352767944335938, -0.019367218017578125, -0.017381668090820312, -0.0153961181640625, -0.013410568237304688, -0.011425018310546875, -0.009439468383789062, -0.00745391845703125, -0.0054683685302734375, -0.003482818603515625, -0.0014972686767578125, 0.00048828125, 0.0024738311767578125, 0.004459381103515625, 0.0064449310302734375, 0.00843048095703125, 0.010416030883789062, 0.012401580810546875, 0.014387130737304688, 0.0163726806640625, 0.018358230590820312, 0.020343780517578125, 0.022329330444335938, 0.02431488037109375, 0.026300430297851562, 0.028285980224609375, 0.030271530151367188, 0.032257080078125, 0.03424263000488281, 0.036228179931640625, 0.03821372985839844, 0.04019927978515625, 0.04218482971191406, 0.044170379638671875, 0.04615592956542969, 0.0481414794921875, 0.05012702941894531, 0.052112579345703125, 0.05409812927246094, 0.05608367919921875, 0.05806922912597656, 0.060054779052734375, 0.06204032897949219, 0.06402587890625]}, "gradients/encoder.encoder.layers.2.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 0.0, 1.0, 0.0, 3.0, 4.0, 1.0, 4.0, 5.0, 7.0, 13.0, 11.0, 16.0, 13.0, 18.0, 20.0, 12.0, 22.0, 24.0, 31.0, 34.0, 42.0, 40.0, 39.0, 44.0, 42.0, 36.0, 56.0, 35.0, 46.0, 49.0, 33.0, 31.0, 35.0, 26.0, 39.0, 39.0, 25.0, 18.0, 17.0, 13.0, 13.0, 11.0, 8.0, 8.0, 11.0, 4.0, 3.0, 3.0, 3.0, 2.0, 3.0, 2.0, 1.0, 0.0, 1.0, 0.0, 2.0], "bins": [-0.095703125, -0.09285736083984375, -0.0900115966796875, -0.08716583251953125, -0.084320068359375, -0.08147430419921875, -0.0786285400390625, -0.07578277587890625, -0.07293701171875, -0.07009124755859375, -0.0672454833984375, -0.06439971923828125, -0.061553955078125, -0.05870819091796875, -0.0558624267578125, -0.05301666259765625, -0.0501708984375, -0.04732513427734375, -0.0444793701171875, -0.04163360595703125, -0.038787841796875, -0.03594207763671875, -0.0330963134765625, -0.03025054931640625, -0.02740478515625, -0.02455902099609375, -0.0217132568359375, -0.01886749267578125, -0.016021728515625, -0.01317596435546875, -0.0103302001953125, -0.00748443603515625, -0.004638671875, -0.00179290771484375, 0.0010528564453125, 0.00389862060546875, 0.006744384765625, 0.00959014892578125, 0.0124359130859375, 0.01528167724609375, 0.01812744140625, 0.02097320556640625, 0.0238189697265625, 0.02666473388671875, 0.029510498046875, 0.03235626220703125, 0.0352020263671875, 0.03804779052734375, 0.0408935546875, 0.04373931884765625, 0.0465850830078125, 0.04943084716796875, 0.052276611328125, 0.05512237548828125, 0.0579681396484375, 0.06081390380859375, 0.06365966796875, 0.06650543212890625, 0.0693511962890625, 0.07219696044921875, 0.075042724609375, 0.07788848876953125, 0.0807342529296875, 0.08358001708984375, 0.08642578125]}, "gradients/encoder.encoder.layers.2.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 4.0, 5.0, 1.0, 6.0, 6.0, 11.0, 16.0, 26.0, 46.0, 79.0, 144.0, 214.0, 430.0, 859.0, 1919.0, 4190.0, 10441.0, 28707.0, 93904.0, 309335.0, 391774.0, 140974.0, 40897.0, 14117.0, 5693.0, 2396.0, 1121.0, 555.0, 294.0, 150.0, 98.0, 56.0, 25.0, 28.0, 16.0, 4.0, 10.0, 7.0, 2.0, 1.0, 1.0, 2.0, 3.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.0220947265625, -0.021385669708251953, -0.020676612854003906, -0.01996755599975586, -0.019258499145507812, -0.018549442291259766, -0.01784038543701172, -0.017131328582763672, -0.016422271728515625, -0.015713214874267578, -0.015004158020019531, -0.014295101165771484, -0.013586044311523438, -0.01287698745727539, -0.012167930603027344, -0.011458873748779297, -0.01074981689453125, -0.010040760040283203, -0.009331703186035156, -0.00862264633178711, -0.007913589477539062, -0.007204532623291016, -0.006495475769042969, -0.005786418914794922, -0.005077362060546875, -0.004368305206298828, -0.0036592483520507812, -0.0029501914978027344, -0.0022411346435546875, -0.0015320777893066406, -0.0008230209350585938, -0.00011396408081054688, 0.0005950927734375, 0.0013041496276855469, 0.0020132064819335938, 0.0027222633361816406, 0.0034313201904296875, 0.004140377044677734, 0.004849433898925781, 0.005558490753173828, 0.006267547607421875, 0.006976604461669922, 0.007685661315917969, 0.008394718170166016, 0.009103775024414062, 0.00981283187866211, 0.010521888732910156, 0.011230945587158203, 0.01194000244140625, 0.012649059295654297, 0.013358116149902344, 0.01406717300415039, 0.014776229858398438, 0.015485286712646484, 0.01619434356689453, 0.016903400421142578, 0.017612457275390625, 0.018321514129638672, 0.01903057098388672, 0.019739627838134766, 0.020448684692382812, 0.02115774154663086, 0.021866798400878906, 0.022575855255126953, 0.023284912109375]}, "gradients/encoder.encoder.layers.2.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 1.0, 1.0, 1.0, 3.0, 1.0, 4.0, 3.0, 7.0, 9.0, 4.0, 13.0, 28.0, 21.0, 36.0, 48.0, 54.0, 58.0, 80.0, 95.0, 79.0, 75.0, 88.0, 72.0, 50.0, 50.0, 28.0, 22.0, 29.0, 13.0, 12.0, 10.0, 6.0, 4.0, 2.0, 3.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-1.1622905731201172e-05, -1.130625605583191e-05, -1.0989606380462646e-05, -1.0672956705093384e-05, -1.0356307029724121e-05, -1.0039657354354858e-05, -9.723007678985596e-06, -9.406358003616333e-06, -9.08970832824707e-06, -8.773058652877808e-06, -8.456408977508545e-06, -8.139759302139282e-06, -7.82310962677002e-06, -7.506459951400757e-06, -7.189810276031494e-06, -6.8731606006622314e-06, -6.556510925292969e-06, -6.239861249923706e-06, -5.923211574554443e-06, -5.606561899185181e-06, -5.289912223815918e-06, -4.973262548446655e-06, -4.656612873077393e-06, -4.33996319770813e-06, -4.023313522338867e-06, -3.7066638469696045e-06, -3.390014171600342e-06, -3.073364496231079e-06, -2.7567148208618164e-06, -2.4400651454925537e-06, -2.123415470123291e-06, -1.8067657947540283e-06, -1.4901161193847656e-06, -1.173466444015503e-06, -8.568167686462402e-07, -5.401670932769775e-07, -2.2351741790771484e-07, 9.313225746154785e-08, 4.0978193283081055e-07, 7.264316082000732e-07, 1.043081283569336e-06, 1.3597309589385986e-06, 1.6763806343078613e-06, 1.993030309677124e-06, 2.3096799850463867e-06, 2.6263296604156494e-06, 2.942979335784912e-06, 3.259629011154175e-06, 3.5762786865234375e-06, 3.8929283618927e-06, 4.209578037261963e-06, 4.526227712631226e-06, 4.842877388000488e-06, 5.159527063369751e-06, 5.476176738739014e-06, 5.792826414108276e-06, 6.109476089477539e-06, 6.426125764846802e-06, 6.7427754402160645e-06, 7.059425115585327e-06, 7.37607479095459e-06, 7.692724466323853e-06, 8.009374141693115e-06, 8.326023817062378e-06, 8.64267349243164e-06]}, "gradients/encoder.encoder.layers.2.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 4.0, 5.0, 5.0, 8.0, 8.0, 12.0, 18.0, 27.0, 34.0, 48.0, 64.0, 99.0, 151.0, 249.0, 391.0, 626.0, 971.0, 1648.0, 3119.0, 6188.0, 13455.0, 32098.0, 83215.0, 204149.0, 321335.0, 221929.0, 93345.0, 36122.0, 14653.0, 6614.0, 3225.0, 1879.0, 1041.0, 617.0, 405.0, 270.0, 176.0, 116.0, 69.0, 42.0, 39.0, 25.0, 20.0, 15.0, 9.0, 5.0, 5.0, 8.0, 2.0, 3.0, 3.0, 3.0, 2.0, 2.0], "bins": [-0.020294189453125, -0.01970696449279785, -0.019119739532470703, -0.018532514572143555, -0.017945289611816406, -0.017358064651489258, -0.01677083969116211, -0.01618361473083496, -0.015596389770507812, -0.015009164810180664, -0.014421939849853516, -0.013834714889526367, -0.013247489929199219, -0.01266026496887207, -0.012073040008544922, -0.011485815048217773, -0.010898590087890625, -0.010311365127563477, -0.009724140167236328, -0.00913691520690918, -0.008549690246582031, -0.007962465286254883, -0.007375240325927734, -0.006788015365600586, -0.0062007904052734375, -0.005613565444946289, -0.005026340484619141, -0.004439115524291992, -0.0038518905639648438, -0.0032646656036376953, -0.002677440643310547, -0.0020902156829833984, -0.00150299072265625, -0.0009157657623291016, -0.0003285408020019531, 0.0002586841583251953, 0.0008459091186523438, 0.0014331340789794922, 0.0020203590393066406, 0.002607583999633789, 0.0031948089599609375, 0.003782033920288086, 0.004369258880615234, 0.004956483840942383, 0.005543708801269531, 0.00613093376159668, 0.006718158721923828, 0.0073053836822509766, 0.007892608642578125, 0.008479833602905273, 0.009067058563232422, 0.00965428352355957, 0.010241508483886719, 0.010828733444213867, 0.011415958404541016, 0.012003183364868164, 0.012590408325195312, 0.013177633285522461, 0.01376485824584961, 0.014352083206176758, 0.014939308166503906, 0.015526533126831055, 0.016113758087158203, 0.01670098304748535, 0.0172882080078125]}, "gradients/encoder.encoder.layers.2.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 3.0, 5.0, 7.0, 5.0, 8.0, 15.0, 15.0, 22.0, 24.0, 40.0, 58.0, 57.0, 66.0, 60.0, 80.0, 84.0, 68.0, 79.0, 58.0, 62.0, 39.0, 43.0, 21.0, 20.0, 20.0, 13.0, 10.0, 6.0, 5.0, 5.0, 1.0, 3.0, 0.0, 2.0, 4.0, 1.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.0222320556640625, -0.02142810821533203, -0.020624160766601562, -0.019820213317871094, -0.019016265869140625, -0.018212318420410156, -0.017408370971679688, -0.01660442352294922, -0.01580047607421875, -0.014996528625488281, -0.014192581176757812, -0.013388633728027344, -0.012584686279296875, -0.011780738830566406, -0.010976791381835938, -0.010172843933105469, -0.009368896484375, -0.008564949035644531, -0.0077610015869140625, -0.006957054138183594, -0.006153106689453125, -0.005349159240722656, -0.0045452117919921875, -0.0037412643432617188, -0.00293731689453125, -0.0021333694458007812, -0.0013294219970703125, -0.0005254745483398438, 0.000278472900390625, 0.0010824203491210938, 0.0018863677978515625, 0.0026903152465820312, 0.0034942626953125, 0.004298210144042969, 0.0051021575927734375, 0.005906105041503906, 0.006710052490234375, 0.007513999938964844, 0.008317947387695312, 0.009121894836425781, 0.00992584228515625, 0.010729789733886719, 0.011533737182617188, 0.012337684631347656, 0.013141632080078125, 0.013945579528808594, 0.014749526977539062, 0.015553474426269531, 0.016357421875, 0.01716136932373047, 0.017965316772460938, 0.018769264221191406, 0.019573211669921875, 0.020377159118652344, 0.021181106567382812, 0.02198505401611328, 0.02278900146484375, 0.02359294891357422, 0.024396896362304688, 0.025200843811035156, 0.026004791259765625, 0.026808738708496094, 0.027612686157226562, 0.02841663360595703, 0.0292205810546875]}, "gradients/encoder.encoder.layers.2.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0, 14.0, 24.0, 74.0, 160.0, 241.0, 206.0, 138.0, 70.0, 30.0, 21.0, 5.0, 4.0, 2.0, 2.0, 4.0, 1.0, 3.0, 3.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.5351487994194031, -0.5176911950111389, -0.5002336502075195, -0.48277607560157776, -0.465318500995636, -0.4478608965873718, -0.43040332198143005, -0.4129457473754883, -0.3954881727695465, -0.37803059816360474, -0.36057302355766296, -0.3431154489517212, -0.32565784454345703, -0.30820029973983765, -0.2907426953315735, -0.2732851207256317, -0.25582754611968994, -0.23836997151374817, -0.2209123969078064, -0.20345480740070343, -0.18599723279476166, -0.16853965818881989, -0.15108206868171692, -0.13362449407577515, -0.11616691946983337, -0.0987093448638916, -0.08125176280736923, -0.06379418075084686, -0.04633660614490509, -0.028879031538963318, -0.011421449482440948, 0.006036132574081421, 0.023493647575378418, 0.04095122590661049, 0.05840880423784256, 0.07586638629436493, 0.0933239609003067, 0.11078153550624847, 0.12823912501335144, 0.1456966996192932, 0.16315427422523499, 0.18061184883117676, 0.19806942343711853, 0.2155270129442215, 0.23298458755016327, 0.25044214725494385, 0.267899751663208, 0.2853573262691498, 0.30281490087509155, 0.3202724754810333, 0.3377300500869751, 0.35518762469291687, 0.37264519929885864, 0.3901028037071228, 0.4075603783130646, 0.42501795291900635, 0.4424755275249481, 0.4599331021308899, 0.47739067673683167, 0.49484825134277344, 0.5123058557510376, 0.529763400554657, 0.5472210049629211, 0.5646785497665405, 0.5821361541748047]}, "gradients/encoder.encoder.layers.2.layer_norm.bias": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 2.0, 4.0, 2.0, 5.0, 0.0, 6.0, 5.0, 4.0, 5.0, 8.0, 9.0, 17.0, 15.0, 12.0, 23.0, 29.0, 34.0, 26.0, 33.0, 28.0, 44.0, 44.0, 43.0, 55.0, 49.0, 36.0, 46.0, 43.0, 34.0, 47.0, 32.0, 37.0, 26.0, 27.0, 33.0, 23.0, 28.0, 19.0, 16.0, 17.0, 12.0, 7.0, 5.0, 8.0, 7.0, 4.0, 3.0, 2.0, 2.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.18936187028884888, -0.18273046612739563, -0.1760990470647812, -0.16946764290332794, -0.1628362387418747, -0.15620481967926025, -0.149573415517807, -0.14294201135635376, -0.1363106071949005, -0.12967920303344727, -0.12304779142141342, -0.11641637980937958, -0.10978497564792633, -0.10315356403589249, -0.09652215242385864, -0.0898907482624054, -0.08325932919979095, -0.07662791758775711, -0.06999651342630386, -0.06336510181427002, -0.056733693927526474, -0.05010228604078293, -0.043470874428749084, -0.03683946654200554, -0.030208058655261993, -0.023576650768518448, -0.016945241019129753, -0.010313831269741058, -0.003682423382997513, 0.0029489845037460327, 0.009580396115779877, 0.016211804002523422, 0.022843211889266968, 0.029474619776010513, 0.03610602766275406, 0.0427374392747879, 0.04936884716153145, 0.056000255048274994, 0.06263166666030884, 0.06926307082176208, 0.07589448243379593, 0.08252589404582977, 0.08915729820728302, 0.09578870981931686, 0.10242012143135071, 0.10905152559280396, 0.1156829372048378, 0.12231434881687164, 0.1289457529783249, 0.13557715713977814, 0.14220857620239258, 0.14883998036384583, 0.15547138452529907, 0.16210278868675232, 0.16873420774936676, 0.17536561191082, 0.18199703097343445, 0.1886284351348877, 0.19525985419750214, 0.20189125835895538, 0.20852266252040863, 0.21515408158302307, 0.22178548574447632, 0.22841688990592957, 0.2350482940673828]}, "gradients/encoder.encoder.layers.1.feed_forward.output_dense.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 3.0, 1.0, 3.0, 3.0, 6.0, 5.0, 5.0, 2.0, 7.0, 19.0, 24.0, 35.0, 59.0, 74.0, 117.0, 207.0, 389.0, 738.0, 1624.0, 3455.0, 7242.0, 16631.0, 50392.0, 210212.0, 859342.0, 1748526.0, 966408.0, 240051.0, 57188.0, 17256.0, 6922.0, 3437.0, 1940.0, 963.0, 431.0, 213.0, 124.0, 80.0, 53.0, 33.0, 21.0, 24.0, 10.0, 8.0, 5.0, 5.0, 3.0, 1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.05535888671875, -0.0537567138671875, -0.052154541015625, -0.0505523681640625, -0.0489501953125, -0.0473480224609375, -0.045745849609375, -0.0441436767578125, -0.04254150390625, -0.0409393310546875, -0.039337158203125, -0.0377349853515625, -0.0361328125, -0.0345306396484375, -0.032928466796875, -0.0313262939453125, -0.02972412109375, -0.0281219482421875, -0.026519775390625, -0.0249176025390625, -0.0233154296875, -0.0217132568359375, -0.020111083984375, -0.0185089111328125, -0.01690673828125, -0.0153045654296875, -0.013702392578125, -0.0121002197265625, -0.010498046875, -0.0088958740234375, -0.007293701171875, -0.0056915283203125, -0.00408935546875, -0.0024871826171875, -0.000885009765625, 0.0007171630859375, 0.0023193359375, 0.0039215087890625, 0.005523681640625, 0.0071258544921875, 0.00872802734375, 0.0103302001953125, 0.011932373046875, 0.0135345458984375, 0.01513671875, 0.0167388916015625, 0.018341064453125, 0.0199432373046875, 0.02154541015625, 0.0231475830078125, 0.024749755859375, 0.0263519287109375, 0.0279541015625, 0.0295562744140625, 0.031158447265625, 0.0327606201171875, 0.03436279296875, 0.0359649658203125, 0.037567138671875, 0.0391693115234375, 0.040771484375, 0.0423736572265625, 0.043975830078125, 0.0455780029296875, 0.04718017578125]}, "gradients/encoder.encoder.layers.1.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 3.0, 8.0, 12.0, 22.0, 28.0, 42.0, 64.0, 89.0, 120.0, 99.0, 116.0, 114.0, 92.0, 51.0, 59.0, 47.0, 22.0, 12.0, 4.0, 10.0, 3.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.09124755859375, -0.08865070343017578, -0.08605384826660156, -0.08345699310302734, -0.08086013793945312, -0.0782632827758789, -0.07566642761230469, -0.07306957244873047, -0.07047271728515625, -0.06787586212158203, -0.06527900695800781, -0.0626821517944336, -0.060085296630859375, -0.057488441467285156, -0.05489158630371094, -0.05229473114013672, -0.0496978759765625, -0.04710102081298828, -0.04450416564941406, -0.041907310485839844, -0.039310455322265625, -0.036713600158691406, -0.03411674499511719, -0.03151988983154297, -0.02892303466796875, -0.02632617950439453, -0.023729324340820312, -0.021132469177246094, -0.018535614013671875, -0.015938758850097656, -0.013341903686523438, -0.010745048522949219, -0.008148193359375, -0.005551338195800781, -0.0029544830322265625, -0.00035762786865234375, 0.002239227294921875, 0.004836082458496094, 0.0074329376220703125, 0.010029792785644531, 0.01262664794921875, 0.015223503112792969, 0.017820358276367188, 0.020417213439941406, 0.023014068603515625, 0.025610923767089844, 0.028207778930664062, 0.03080463409423828, 0.0334014892578125, 0.03599834442138672, 0.03859519958496094, 0.041192054748535156, 0.043788909912109375, 0.046385765075683594, 0.04898262023925781, 0.05157947540283203, 0.05417633056640625, 0.05677318572998047, 0.05937004089355469, 0.061966896057128906, 0.06456375122070312, 0.06716060638427734, 0.06975746154785156, 0.07235431671142578, 0.074951171875]}, "gradients/encoder.encoder.layers.1.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 1.0, 5.0, 5.0, 6.0, 4.0, 5.0, 11.0, 17.0, 15.0, 21.0, 21.0, 39.0, 43.0, 55.0, 74.0, 135.0, 288.0, 503.0, 1082.0, 3150.0, 11795.0, 74482.0, 1740795.0, 2253412.0, 89040.0, 13380.0, 3433.0, 1166.0, 528.0, 280.0, 160.0, 97.0, 60.0, 45.0, 36.0, 22.0, 23.0, 13.0, 9.0, 8.0, 5.0, 3.0, 4.0, 6.0, 3.0, 2.0, 1.0, 1.0, 4.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 3.0], "bins": [-0.115966796875, -0.1121063232421875, -0.108245849609375, -0.1043853759765625, -0.10052490234375, -0.0966644287109375, -0.092803955078125, -0.0889434814453125, -0.0850830078125, -0.0812225341796875, -0.077362060546875, -0.0735015869140625, -0.06964111328125, -0.0657806396484375, -0.061920166015625, -0.0580596923828125, -0.05419921875, -0.0503387451171875, -0.046478271484375, -0.0426177978515625, -0.03875732421875, -0.0348968505859375, -0.031036376953125, -0.0271759033203125, -0.0233154296875, -0.0194549560546875, -0.015594482421875, -0.0117340087890625, -0.00787353515625, -0.0040130615234375, -0.000152587890625, 0.0037078857421875, 0.007568359375, 0.0114288330078125, 0.015289306640625, 0.0191497802734375, 0.02301025390625, 0.0268707275390625, 0.030731201171875, 0.0345916748046875, 0.0384521484375, 0.0423126220703125, 0.046173095703125, 0.0500335693359375, 0.05389404296875, 0.0577545166015625, 0.061614990234375, 0.0654754638671875, 0.0693359375, 0.0731964111328125, 0.077056884765625, 0.0809173583984375, 0.08477783203125, 0.0886383056640625, 0.092498779296875, 0.0963592529296875, 0.1002197265625, 0.1040802001953125, 0.107940673828125, 0.1118011474609375, 0.11566162109375, 0.1195220947265625, 0.123382568359375, 0.1272430419921875, 0.131103515625]}, "gradients/encoder.encoder.layers.1.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 1.0, 0.0, 1.0, 2.0, 2.0, 3.0, 2.0, 5.0, 5.0, 5.0, 7.0, 5.0, 26.0, 20.0, 30.0, 56.0, 75.0, 117.0, 146.0, 226.0, 355.0, 508.0, 645.0, 609.0, 435.0, 286.0, 144.0, 103.0, 83.0, 41.0, 34.0, 36.0, 18.0, 17.0, 11.0, 10.0, 3.0, 4.0, 0.0, 3.0, 3.0, 0.0, 1.0, 1.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0], "bins": [-0.07464599609375, -0.07212638854980469, -0.06960678100585938, -0.06708717346191406, -0.06456756591796875, -0.06204795837402344, -0.059528350830078125, -0.05700874328613281, -0.0544891357421875, -0.05196952819824219, -0.049449920654296875, -0.04693031311035156, -0.04441070556640625, -0.04189109802246094, -0.039371490478515625, -0.03685188293457031, -0.034332275390625, -0.03181266784667969, -0.029293060302734375, -0.026773452758789062, -0.02425384521484375, -0.021734237670898438, -0.019214630126953125, -0.016695022583007812, -0.0141754150390625, -0.011655807495117188, -0.009136199951171875, -0.0066165924072265625, -0.00409698486328125, -0.0015773773193359375, 0.000942230224609375, 0.0034618377685546875, 0.0059814453125, 0.008501052856445312, 0.011020660400390625, 0.013540267944335938, 0.01605987548828125, 0.018579483032226562, 0.021099090576171875, 0.023618698120117188, 0.0261383056640625, 0.028657913208007812, 0.031177520751953125, 0.03369712829589844, 0.03621673583984375, 0.03873634338378906, 0.041255950927734375, 0.04377555847167969, 0.046295166015625, 0.04881477355957031, 0.051334381103515625, 0.05385398864746094, 0.05637359619140625, 0.05889320373535156, 0.061412811279296875, 0.06393241882324219, 0.0664520263671875, 0.06897163391113281, 0.07149124145507812, 0.07401084899902344, 0.07653045654296875, 0.07905006408691406, 0.08156967163085938, 0.08408927917480469, 0.08660888671875]}, "gradients/encoder.encoder.layers.1.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0, 0.0, 2.0, 2.0, 2.0, 2.0, 3.0, 7.0, 13.0, 24.0, 54.0, 117.0, 199.0, 252.0, 179.0, 82.0, 34.0, 17.0, 12.0, 3.0, 3.0, 0.0, 2.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], "bins": [-1.2369219064712524, -1.20773184299469, -1.1785417795181274, -1.1493515968322754, -1.120161533355713, -1.0909714698791504, -1.061781406402588, -1.0325913429260254, -1.003401279449463, -0.9742112159729004, -0.9450210928916931, -0.9158310294151306, -0.8866409659385681, -0.8574508428573608, -0.8282607793807983, -0.7990707159042358, -0.7698805928230286, -0.7406905293464661, -0.7115004062652588, -0.6823103427886963, -0.6531202793121338, -0.6239302158355713, -0.594740092754364, -0.5655500292778015, -0.5363599061965942, -0.5071698427200317, -0.47797974944114685, -0.44878965616226196, -0.41959959268569946, -0.3904094994068146, -0.3612194061279297, -0.3320293426513672, -0.3028392791748047, -0.2736491858959198, -0.2444591224193573, -0.2152690291404724, -0.18607895076274872, -0.15688887238502502, -0.12769877910614014, -0.09850870072841644, -0.06931862235069275, -0.04012854024767876, -0.010938458144664764, 0.018251627683639526, 0.04744170606136322, 0.07663178443908691, 0.1058218777179718, 0.1350119560956955, 0.1642020344734192, 0.19339211285114288, 0.22258219122886658, 0.25177228450775146, 0.28096234798431396, 0.31015244126319885, 0.33934253454208374, 0.36853259801864624, 0.39772269129753113, 0.426912784576416, 0.4561028480529785, 0.4852929413318634, 0.5144830346107483, 0.5436730980873108, 0.5728632211685181, 0.6020532846450806, 0.6312433481216431]}, "gradients/encoder.encoder.layers.1.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 0.0, 1.0, 3.0, 4.0, 9.0, 5.0, 7.0, 17.0, 20.0, 17.0, 19.0, 23.0, 40.0, 39.0, 28.0, 32.0, 51.0, 52.0, 55.0, 46.0, 44.0, 51.0, 59.0, 40.0, 44.0, 39.0, 34.0, 41.0, 32.0, 27.0, 23.0, 21.0, 11.0, 21.0, 17.0, 14.0, 10.0, 4.0, 4.0, 3.0, 7.0, 1.0, 0.0, 2.0, 0.0, 0.0, 2.0], "bins": [-0.3009309768676758, -0.2929282784461975, -0.28492555022239685, -0.2769228518009186, -0.2689201235771179, -0.26091742515563965, -0.2529147267341614, -0.24491199851036072, -0.23690930008888245, -0.22890658676624298, -0.22090387344360352, -0.21290117502212524, -0.20489846169948578, -0.1968957483768463, -0.18889303505420685, -0.18089032173156738, -0.17288760840892792, -0.16488489508628845, -0.156882181763649, -0.14887946844100952, -0.14087677001953125, -0.13287405669689178, -0.12487134337425232, -0.11686863005161285, -0.10886592417955399, -0.10086321085691452, -0.09286050498485565, -0.08485779166221619, -0.07685507833957672, -0.06885237246751785, -0.06084965914487839, -0.05284694954752922, -0.044844239950180054, -0.03684153035283089, -0.02883881889283657, -0.020836107432842255, -0.012833397835493088, -0.004830688238143921, 0.0031720250844955444, 0.011174734681844711, 0.019177444279193878, 0.027180153876543045, 0.03518286347389221, 0.04318557679653168, 0.051188286393880844, 0.05919099599123001, 0.06719370931386948, 0.07519641518592834, 0.08319912850856781, 0.09120184183120728, 0.09920454770326614, 0.10720726102590561, 0.11520996689796448, 0.12321268022060394, 0.1312153935432434, 0.13921810686588287, 0.14722082018852234, 0.1552235335111618, 0.16322624683380127, 0.17122894525527954, 0.179231658577919, 0.18723437190055847, 0.19523708522319794, 0.2032397985458374, 0.21124249696731567]}, "gradients/encoder.encoder.layers.1.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 1.0, 1.0, 6.0, 2.0, 3.0, 5.0, 12.0, 22.0, 19.0, 14.0, 35.0, 35.0, 53.0, 84.0, 197.0, 483.0, 1688.0, 10102.0, 113646.0, 647331.0, 249344.0, 21352.0, 2811.0, 697.0, 270.0, 128.0, 60.0, 39.0, 32.0, 15.0, 25.0, 12.0, 13.0, 9.0, 10.0, 7.0, 1.0, 1.0, 1.0, 2.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.1649169921875, -0.1598682403564453, -0.15481948852539062, -0.14977073669433594, -0.14472198486328125, -0.13967323303222656, -0.13462448120117188, -0.1295757293701172, -0.1245269775390625, -0.11947822570800781, -0.11442947387695312, -0.10938072204589844, -0.10433197021484375, -0.09928321838378906, -0.09423446655273438, -0.08918571472167969, -0.084136962890625, -0.07908821105957031, -0.07403945922851562, -0.06899070739746094, -0.06394195556640625, -0.05889320373535156, -0.053844451904296875, -0.04879570007324219, -0.0437469482421875, -0.03869819641113281, -0.033649444580078125, -0.028600692749023438, -0.02355194091796875, -0.018503189086914062, -0.013454437255859375, -0.008405685424804688, -0.00335693359375, 0.0016918182373046875, 0.006740570068359375, 0.011789321899414062, 0.01683807373046875, 0.021886825561523438, 0.026935577392578125, 0.03198432922363281, 0.0370330810546875, 0.04208183288574219, 0.047130584716796875, 0.05217933654785156, 0.05722808837890625, 0.06227684020996094, 0.06732559204101562, 0.07237434387207031, 0.077423095703125, 0.08247184753417969, 0.08752059936523438, 0.09256935119628906, 0.09761810302734375, 0.10266685485839844, 0.10771560668945312, 0.11276435852050781, 0.1178131103515625, 0.12286186218261719, 0.12791061401367188, 0.13295936584472656, 0.13800811767578125, 0.14305686950683594, 0.14810562133789062, 0.1531543731689453, 0.158203125]}, "gradients/encoder.encoder.layers.1.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 5.0, 2.0, 2.0, 2.0, 14.0, 28.0, 24.0, 33.0, 45.0, 51.0, 63.0, 92.0, 99.0, 81.0, 80.0, 81.0, 65.0, 58.0, 55.0, 33.0, 28.0, 31.0, 16.0, 6.0, 5.0, 8.0, 5.0, 3.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.09381103515625, -0.09125804901123047, -0.08870506286621094, -0.0861520767211914, -0.08359909057617188, -0.08104610443115234, -0.07849311828613281, -0.07594013214111328, -0.07338714599609375, -0.07083415985107422, -0.06828117370605469, -0.06572818756103516, -0.06317520141601562, -0.060622215270996094, -0.05806922912597656, -0.05551624298095703, -0.0529632568359375, -0.05041027069091797, -0.04785728454589844, -0.045304298400878906, -0.042751312255859375, -0.040198326110839844, -0.03764533996582031, -0.03509235382080078, -0.03253936767578125, -0.02998638153076172, -0.027433395385742188, -0.024880409240722656, -0.022327423095703125, -0.019774436950683594, -0.017221450805664062, -0.014668464660644531, -0.012115478515625, -0.009562492370605469, -0.0070095062255859375, -0.004456520080566406, -0.001903533935546875, 0.0006494522094726562, 0.0032024383544921875, 0.005755424499511719, 0.00830841064453125, 0.010861396789550781, 0.013414382934570312, 0.015967369079589844, 0.018520355224609375, 0.021073341369628906, 0.023626327514648438, 0.02617931365966797, 0.0287322998046875, 0.03128528594970703, 0.03383827209472656, 0.036391258239746094, 0.038944244384765625, 0.041497230529785156, 0.04405021667480469, 0.04660320281982422, 0.04915618896484375, 0.05170917510986328, 0.05426216125488281, 0.056815147399902344, 0.059368133544921875, 0.061921119689941406, 0.06447410583496094, 0.06702709197998047, 0.069580078125]}, "gradients/encoder.encoder.layers.1.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 2.0, 3.0, 7.0, 5.0, 10.0, 13.0, 21.0, 21.0, 26.0, 45.0, 77.0, 118.0, 212.0, 339.0, 565.0, 1125.0, 2145.0, 5032.0, 13199.0, 36837.0, 106619.0, 265437.0, 357819.0, 164853.0, 58904.0, 20598.0, 7855.0, 3243.0, 1464.0, 798.0, 419.0, 258.0, 162.0, 103.0, 65.0, 50.0, 36.0, 28.0, 15.0, 7.0, 6.0, 4.0, 5.0, 6.0, 2.0, 4.0, 1.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.058685302734375, -0.05660581588745117, -0.054526329040527344, -0.052446842193603516, -0.05036735534667969, -0.04828786849975586, -0.04620838165283203, -0.0441288948059082, -0.042049407958984375, -0.03996992111206055, -0.03789043426513672, -0.03581094741821289, -0.03373146057128906, -0.031651973724365234, -0.029572486877441406, -0.027493000030517578, -0.02541351318359375, -0.023334026336669922, -0.021254539489746094, -0.019175052642822266, -0.017095565795898438, -0.01501607894897461, -0.012936592102050781, -0.010857105255126953, -0.008777618408203125, -0.006698131561279297, -0.004618644714355469, -0.0025391578674316406, -0.0004596710205078125, 0.0016198158264160156, 0.0036993026733398438, 0.005778789520263672, 0.0078582763671875, 0.009937763214111328, 0.012017250061035156, 0.014096736907958984, 0.016176223754882812, 0.01825571060180664, 0.02033519744873047, 0.022414684295654297, 0.024494171142578125, 0.026573657989501953, 0.02865314483642578, 0.03073263168334961, 0.03281211853027344, 0.034891605377197266, 0.036971092224121094, 0.03905057907104492, 0.04113006591796875, 0.04320955276489258, 0.045289039611816406, 0.047368526458740234, 0.04944801330566406, 0.05152750015258789, 0.05360698699951172, 0.05568647384643555, 0.057765960693359375, 0.0598454475402832, 0.06192493438720703, 0.06400442123413086, 0.06608390808105469, 0.06816339492797852, 0.07024288177490234, 0.07232236862182617, 0.07440185546875]}, "gradients/encoder.encoder.layers.1.attention.v_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 3.0, 1.0, 4.0, 6.0, 8.0, 5.0, 10.0, 8.0, 8.0, 20.0, 15.0, 19.0, 26.0, 33.0, 26.0, 35.0, 38.0, 48.0, 45.0, 52.0, 50.0, 51.0, 48.0, 46.0, 46.0, 44.0, 46.0, 50.0, 30.0, 37.0, 26.0, 20.0, 25.0, 13.0, 10.0, 14.0, 12.0, 10.0, 7.0, 5.0, 5.0, 2.0, 1.0, 4.0, 3.0, 0.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.110107421875, -0.10626220703125, -0.1024169921875, -0.09857177734375, -0.0947265625, -0.09088134765625, -0.0870361328125, -0.08319091796875, -0.079345703125, -0.07550048828125, -0.0716552734375, -0.06781005859375, -0.06396484375, -0.06011962890625, -0.0562744140625, -0.05242919921875, -0.048583984375, -0.04473876953125, -0.0408935546875, -0.03704833984375, -0.033203125, -0.02935791015625, -0.0255126953125, -0.02166748046875, -0.017822265625, -0.01397705078125, -0.0101318359375, -0.00628662109375, -0.00244140625, 0.00140380859375, 0.0052490234375, 0.00909423828125, 0.012939453125, 0.01678466796875, 0.0206298828125, 0.02447509765625, 0.0283203125, 0.03216552734375, 0.0360107421875, 0.03985595703125, 0.043701171875, 0.04754638671875, 0.0513916015625, 0.05523681640625, 0.05908203125, 0.06292724609375, 0.0667724609375, 0.07061767578125, 0.074462890625, 0.07830810546875, 0.0821533203125, 0.08599853515625, 0.08984375, 0.09368896484375, 0.0975341796875, 0.10137939453125, 0.105224609375, 0.10906982421875, 0.1129150390625, 0.11676025390625, 0.12060546875, 0.12445068359375, 0.1282958984375, 0.13214111328125, 0.135986328125]}, "gradients/encoder.encoder.layers.1.attention.k_proj.weight": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 6.0, 5.0, 7.0, 8.0, 9.0, 18.0, 20.0, 39.0, 45.0, 91.0, 118.0, 175.0, 272.0, 433.0, 841.0, 1568.0, 3582.0, 9363.0, 28072.0, 94595.0, 303070.0, 405364.0, 138165.0, 40845.0, 12833.0, 4597.0, 2022.0, 991.0, 548.0, 294.0, 203.0, 119.0, 70.0, 58.0, 38.0, 24.0, 15.0, 10.0, 10.0, 6.0, 3.0, 6.0, 4.0, 4.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.01788330078125, -0.017327070236206055, -0.01677083969116211, -0.016214609146118164, -0.01565837860107422, -0.015102148056030273, -0.014545917510986328, -0.013989686965942383, -0.013433456420898438, -0.012877225875854492, -0.012320995330810547, -0.011764764785766602, -0.011208534240722656, -0.010652303695678711, -0.010096073150634766, -0.00953984260559082, -0.008983612060546875, -0.00842738151550293, -0.007871150970458984, -0.007314920425415039, -0.006758689880371094, -0.0062024593353271484, -0.005646228790283203, -0.005089998245239258, -0.0045337677001953125, -0.003977537155151367, -0.003421306610107422, -0.0028650760650634766, -0.0023088455200195312, -0.001752614974975586, -0.0011963844299316406, -0.0006401538848876953, -8.392333984375e-05, 0.0004723072052001953, 0.0010285377502441406, 0.001584768295288086, 0.0021409988403320312, 0.0026972293853759766, 0.003253459930419922, 0.003809690475463867, 0.0043659210205078125, 0.004922151565551758, 0.005478382110595703, 0.0060346126556396484, 0.006590843200683594, 0.007147073745727539, 0.007703304290771484, 0.00825953483581543, 0.008815765380859375, 0.00937199592590332, 0.009928226470947266, 0.010484457015991211, 0.011040687561035156, 0.011596918106079102, 0.012153148651123047, 0.012709379196166992, 0.013265609741210938, 0.013821840286254883, 0.014378070831298828, 0.014934301376342773, 0.015490531921386719, 0.016046762466430664, 0.01660299301147461, 0.017159223556518555, 0.0177154541015625]}, "gradients/encoder.encoder.layers.1.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 2.0, 1.0, 1.0, 1.0, 4.0, 5.0, 5.0, 8.0, 7.0, 12.0, 8.0, 17.0, 20.0, 20.0, 32.0, 33.0, 31.0, 45.0, 53.0, 55.0, 70.0, 91.0, 67.0, 49.0, 57.0, 64.0, 51.0, 40.0, 32.0, 28.0, 25.0, 15.0, 17.0, 8.0, 6.0, 7.0, 0.0, 4.0, 8.0, 5.0, 2.0, 2.0, 1.0, 1.0, 2.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0], "bins": [-8.106231689453125e-06, -7.860362529754639e-06, -7.614493370056152e-06, -7.368624210357666e-06, -7.12275505065918e-06, -6.876885890960693e-06, -6.631016731262207e-06, -6.385147571563721e-06, -6.139278411865234e-06, -5.893409252166748e-06, -5.647540092468262e-06, -5.401670932769775e-06, -5.155801773071289e-06, -4.909932613372803e-06, -4.664063453674316e-06, -4.41819429397583e-06, -4.172325134277344e-06, -3.926455974578857e-06, -3.680586814880371e-06, -3.4347176551818848e-06, -3.1888484954833984e-06, -2.942979335784912e-06, -2.6971101760864258e-06, -2.4512410163879395e-06, -2.205371856689453e-06, -1.959502696990967e-06, -1.7136335372924805e-06, -1.4677643775939941e-06, -1.2218952178955078e-06, -9.760260581970215e-07, -7.301568984985352e-07, -4.842877388000488e-07, -2.384185791015625e-07, 7.450580596923828e-09, 2.5331974029541016e-07, 4.991888999938965e-07, 7.450580596923828e-07, 9.909272193908691e-07, 1.2367963790893555e-06, 1.4826655387878418e-06, 1.7285346984863281e-06, 1.9744038581848145e-06, 2.2202730178833008e-06, 2.466142177581787e-06, 2.7120113372802734e-06, 2.9578804969787598e-06, 3.203749656677246e-06, 3.4496188163757324e-06, 3.6954879760742188e-06, 3.941357135772705e-06, 4.187226295471191e-06, 4.433095455169678e-06, 4.678964614868164e-06, 4.92483377456665e-06, 5.170702934265137e-06, 5.416572093963623e-06, 5.662441253662109e-06, 5.908310413360596e-06, 6.154179573059082e-06, 6.400048732757568e-06, 6.645917892456055e-06, 6.891787052154541e-06, 7.137656211853027e-06, 7.383525371551514e-06, 7.62939453125e-06]}, "gradients/encoder.encoder.layers.1.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 2.0, 4.0, 4.0, 3.0, 5.0, 7.0, 12.0, 9.0, 14.0, 18.0, 19.0, 51.0, 53.0, 61.0, 95.0, 164.0, 234.0, 349.0, 521.0, 865.0, 1388.0, 2544.0, 4936.0, 10937.0, 27556.0, 74824.0, 197016.0, 368370.0, 218037.0, 85216.0, 31073.0, 12063.0, 5278.0, 2679.0, 1526.0, 923.0, 554.0, 377.0, 234.0, 159.0, 109.0, 63.0, 66.0, 41.0, 31.0, 15.0, 11.0, 10.0, 7.0, 10.0, 8.0, 8.0, 3.0, 3.0, 1.0, 2.0, 3.0, 1.0], "bins": [-0.0178680419921875, -0.01733565330505371, -0.016803264617919922, -0.016270875930786133, -0.015738487243652344, -0.015206098556518555, -0.014673709869384766, -0.014141321182250977, -0.013608932495117188, -0.013076543807983398, -0.01254415512084961, -0.01201176643371582, -0.011479377746582031, -0.010946989059448242, -0.010414600372314453, -0.009882211685180664, -0.009349822998046875, -0.008817434310913086, -0.008285045623779297, -0.007752656936645508, -0.007220268249511719, -0.00668787956237793, -0.006155490875244141, -0.0056231021881103516, -0.0050907135009765625, -0.0045583248138427734, -0.004025936126708984, -0.0034935474395751953, -0.0029611587524414062, -0.002428770065307617, -0.0018963813781738281, -0.001363992691040039, -0.00083160400390625, -0.00029921531677246094, 0.00023317337036132812, 0.0007655620574951172, 0.0012979507446289062, 0.0018303394317626953, 0.0023627281188964844, 0.0028951168060302734, 0.0034275054931640625, 0.0039598941802978516, 0.004492282867431641, 0.00502467155456543, 0.005557060241699219, 0.006089448928833008, 0.006621837615966797, 0.007154226303100586, 0.007686614990234375, 0.008219003677368164, 0.008751392364501953, 0.009283781051635742, 0.009816169738769531, 0.01034855842590332, 0.01088094711303711, 0.011413335800170898, 0.011945724487304688, 0.012478113174438477, 0.013010501861572266, 0.013542890548706055, 0.014075279235839844, 0.014607667922973633, 0.015140056610107422, 0.01567244529724121, 0.016204833984375]}, "gradients/encoder.encoder.layers.1.attention.q_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 3.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 1.0, 1.0, 3.0, 3.0, 2.0, 3.0, 3.0, 6.0, 3.0, 9.0, 8.0, 8.0, 18.0, 14.0, 22.0, 36.0, 32.0, 60.0, 73.0, 68.0, 84.0, 90.0, 91.0, 80.0, 71.0, 40.0, 40.0, 39.0, 25.0, 18.0, 15.0, 12.0, 7.0, 5.0, 4.0, 3.0, 4.0, 1.0, 1.0, 1.0, 1.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 3.0], "bins": [-0.024169921875, -0.02344655990600586, -0.02272319793701172, -0.021999835968017578, -0.021276473999023438, -0.020553112030029297, -0.019829750061035156, -0.019106388092041016, -0.018383026123046875, -0.017659664154052734, -0.016936302185058594, -0.016212940216064453, -0.015489578247070312, -0.014766216278076172, -0.014042854309082031, -0.01331949234008789, -0.01259613037109375, -0.01187276840209961, -0.011149406433105469, -0.010426044464111328, -0.009702682495117188, -0.008979320526123047, -0.008255958557128906, -0.007532596588134766, -0.006809234619140625, -0.006085872650146484, -0.005362510681152344, -0.004639148712158203, -0.0039157867431640625, -0.003192424774169922, -0.0024690628051757812, -0.0017457008361816406, -0.0010223388671875, -0.0002989768981933594, 0.00042438507080078125, 0.0011477470397949219, 0.0018711090087890625, 0.002594470977783203, 0.0033178329467773438, 0.004041194915771484, 0.004764556884765625, 0.005487918853759766, 0.006211280822753906, 0.006934642791748047, 0.0076580047607421875, 0.008381366729736328, 0.009104728698730469, 0.00982809066772461, 0.01055145263671875, 0.01127481460571289, 0.011998176574707031, 0.012721538543701172, 0.013444900512695312, 0.014168262481689453, 0.014891624450683594, 0.015614986419677734, 0.016338348388671875, 0.017061710357666016, 0.017785072326660156, 0.018508434295654297, 0.019231796264648438, 0.019955158233642578, 0.02067852020263672, 0.02140188217163086, 0.022125244140625]}, "gradients/encoder.encoder.layers.1.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 3.0, 1.0, 2.0, 4.0, 12.0, 32.0, 105.0, 163.0, 262.0, 213.0, 104.0, 59.0, 18.0, 20.0, 6.0, 1.0, 2.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.2807014584541321, -0.2583438456058502, -0.23598623275756836, -0.2136286050081253, -0.19127099215984344, -0.16891337931156158, -0.14655575156211853, -0.12419813871383667, -0.10184052586555481, -0.07948291301727295, -0.05712529271841049, -0.034767672419548035, -0.012410059571266174, 0.009947553277015686, 0.03230518102645874, 0.0546627938747406, 0.07702040672302246, 0.09937801957130432, 0.12173563987016678, 0.14409326016902924, 0.1664508730173111, 0.18880848586559296, 0.211166113615036, 0.23352372646331787, 0.25588133931159973, 0.2782389521598816, 0.30059656500816345, 0.3229541778564453, 0.34531182050704956, 0.36766940355300903, 0.3900270462036133, 0.41238465905189514, 0.4347423315048218, 0.45709994435310364, 0.4794575572013855, 0.5018151998519897, 0.5241727828979492, 0.5465304255485535, 0.5688880681991577, 0.5912456512451172, 0.6136032342910767, 0.6359608769416809, 0.6583184599876404, 0.6806761026382446, 0.7030336856842041, 0.7253913283348083, 0.7477489709854126, 0.7701065540313721, 0.7924641966819763, 0.8148218393325806, 0.83717942237854, 0.8595370650291443, 0.8818946480751038, 0.904252290725708, 0.9266098737716675, 0.9489675164222717, 0.971325159072876, 0.9936828017234802, 1.0160404443740845, 1.038398027420044, 1.0607556104660034, 1.083113193511963, 1.105470895767212, 1.1278284788131714, 1.1501860618591309]}, "gradients/encoder.encoder.layers.1.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 0.0, 1.0, 1.0, 1.0, 2.0, 1.0, 0.0, 3.0, 3.0, 8.0, 7.0, 9.0, 4.0, 4.0, 8.0, 16.0, 11.0, 25.0, 23.0, 24.0, 30.0, 36.0, 34.0, 44.0, 40.0, 42.0, 37.0, 50.0, 54.0, 52.0, 42.0, 34.0, 40.0, 42.0, 31.0, 36.0, 30.0, 28.0, 27.0, 24.0, 24.0, 20.0, 8.0, 12.0, 9.0, 9.0, 9.0, 5.0, 8.0, 2.0, 1.0, 1.0, 4.0, 2.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.2755107879638672, -0.2669037878513336, -0.25829681754112244, -0.24968981742858887, -0.2410828322172165, -0.23247584700584412, -0.22386884689331055, -0.21526186168193817, -0.2066548764705658, -0.19804789125919342, -0.18944089114665985, -0.18083390593528748, -0.1722269207239151, -0.16361993551254272, -0.15501293540000916, -0.14640595018863678, -0.1377989500761032, -0.12919196486473083, -0.12058497220277786, -0.11197797954082489, -0.10337099432945251, -0.09476400166749954, -0.08615700900554657, -0.0775500237941742, -0.06894303113222122, -0.06033604219555855, -0.051729053258895874, -0.0431220605969429, -0.03451507166028023, -0.025908082723617554, -0.01730109006166458, -0.008694101125001907, -8.71121883392334e-05, 0.008519877679646015, 0.017126867547631264, 0.025733858346939087, 0.03434084728360176, 0.042947836220264435, 0.05155482888221741, 0.06016181781888008, 0.06876880675554276, 0.07737579941749573, 0.0859827846288681, 0.09458977729082108, 0.10319676995277405, 0.11180375516414642, 0.1204107478260994, 0.12901774048805237, 0.13762472569942474, 0.14623171091079712, 0.1548387110233307, 0.16344569623470306, 0.17205268144607544, 0.180659681558609, 0.18926666676998138, 0.19787365198135376, 0.20648065209388733, 0.2150876373052597, 0.22369463741779327, 0.23230162262916565, 0.24090860784053802, 0.2495155930519104, 0.25812259316444397, 0.26672959327697754, 0.2753365635871887]}, "gradients/encoder.encoder.layers.0.feed_forward.output_dense.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 4.0, 3.0, 0.0, 0.0, 0.0, 2.0, 3.0, 3.0, 8.0, 14.0, 23.0, 39.0, 59.0, 91.0, 206.0, 359.0, 814.0, 2130.0, 6943.0, 20500.0, 100401.0, 725359.0, 2063584.0, 1066872.0, 166484.0, 27039.0, 7494.0, 3362.0, 1391.0, 504.0, 245.0, 129.0, 83.0, 53.0, 37.0, 20.0, 16.0, 6.0, 9.0, 3.0, 1.0, 3.0, 1.0, 1.0, 1.0, 2.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.08966064453125, -0.08721256256103516, -0.08476448059082031, -0.08231639862060547, -0.07986831665039062, -0.07742023468017578, -0.07497215270996094, -0.0725240707397461, -0.07007598876953125, -0.0676279067993164, -0.06517982482910156, -0.06273174285888672, -0.060283660888671875, -0.05783557891845703, -0.05538749694824219, -0.052939414978027344, -0.0504913330078125, -0.048043251037597656, -0.04559516906738281, -0.04314708709716797, -0.040699005126953125, -0.03825092315673828, -0.03580284118652344, -0.033354759216308594, -0.03090667724609375, -0.028458595275878906, -0.026010513305664062, -0.02356243133544922, -0.021114349365234375, -0.01866626739501953, -0.016218185424804688, -0.013770103454589844, -0.011322021484375, -0.008873939514160156, -0.0064258575439453125, -0.003977775573730469, -0.001529693603515625, 0.0009183883666992188, 0.0033664703369140625, 0.005814552307128906, 0.00826263427734375, 0.010710716247558594, 0.013158798217773438, 0.015606880187988281, 0.018054962158203125, 0.02050304412841797, 0.022951126098632812, 0.025399208068847656, 0.0278472900390625, 0.030295372009277344, 0.03274345397949219, 0.03519153594970703, 0.037639617919921875, 0.04008769989013672, 0.04253578186035156, 0.044983863830566406, 0.04743194580078125, 0.049880027770996094, 0.05232810974121094, 0.05477619171142578, 0.057224273681640625, 0.05967235565185547, 0.06212043762207031, 0.06456851959228516, 0.0670166015625]}, "gradients/encoder.encoder.layers.0.feed_forward.output_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 6.0, 2.0, 7.0, 9.0, 20.0, 35.0, 50.0, 56.0, 69.0, 81.0, 83.0, 102.0, 108.0, 102.0, 94.0, 53.0, 44.0, 37.0, 26.0, 12.0, 10.0, 6.0, 1.0, 2.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.09429931640625, -0.09172725677490234, -0.08915519714355469, -0.08658313751220703, -0.08401107788085938, -0.08143901824951172, -0.07886695861816406, -0.0762948989868164, -0.07372283935546875, -0.0711507797241211, -0.06857872009277344, -0.06600666046142578, -0.06343460083007812, -0.06086254119873047, -0.05829048156738281, -0.055718421936035156, -0.0531463623046875, -0.050574302673339844, -0.04800224304199219, -0.04543018341064453, -0.042858123779296875, -0.04028606414794922, -0.03771400451660156, -0.035141944885253906, -0.03256988525390625, -0.029997825622558594, -0.027425765991210938, -0.02485370635986328, -0.022281646728515625, -0.01970958709716797, -0.017137527465820312, -0.014565467834472656, -0.011993408203125, -0.009421348571777344, -0.0068492889404296875, -0.004277229309082031, -0.001705169677734375, 0.0008668899536132812, 0.0034389495849609375, 0.006011009216308594, 0.00858306884765625, 0.011155128479003906, 0.013727188110351562, 0.01629924774169922, 0.018871307373046875, 0.02144336700439453, 0.024015426635742188, 0.026587486267089844, 0.0291595458984375, 0.031731605529785156, 0.03430366516113281, 0.03687572479248047, 0.039447784423828125, 0.04201984405517578, 0.04459190368652344, 0.047163963317871094, 0.04973602294921875, 0.052308082580566406, 0.05488014221191406, 0.05745220184326172, 0.060024261474609375, 0.06259632110595703, 0.06516838073730469, 0.06774044036865234, 0.0703125]}, "gradients/encoder.encoder.layers.0.feed_forward.intermediate_dense.weight": {"_type": "histogram", "values": [2.0, 1.0, 2.0, 1.0, 4.0, 0.0, 5.0, 1.0, 3.0, 7.0, 16.0, 11.0, 20.0, 18.0, 38.0, 57.0, 96.0, 163.0, 289.0, 629.0, 1457.0, 4296.0, 21030.0, 342477.0, 3619529.0, 182551.0, 15549.0, 3587.0, 1285.0, 526.0, 272.0, 133.0, 85.0, 49.0, 32.0, 19.0, 16.0, 16.0, 7.0, 7.0, 3.0, 4.0, 2.0, 4.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.117431640625, -0.11262130737304688, -0.10781097412109375, -0.10300064086914062, -0.0981903076171875, -0.09337997436523438, -0.08856964111328125, -0.08375930786132812, -0.078948974609375, -0.07413864135742188, -0.06932830810546875, -0.06451797485351562, -0.0597076416015625, -0.054897308349609375, -0.05008697509765625, -0.045276641845703125, -0.04046630859375, -0.035655975341796875, -0.03084564208984375, -0.026035308837890625, -0.0212249755859375, -0.016414642333984375, -0.01160430908203125, -0.006793975830078125, -0.001983642578125, 0.002826690673828125, 0.00763702392578125, 0.012447357177734375, 0.0172576904296875, 0.022068023681640625, 0.02687835693359375, 0.031688690185546875, 0.0364990234375, 0.041309356689453125, 0.04611968994140625, 0.050930023193359375, 0.0557403564453125, 0.060550689697265625, 0.06536102294921875, 0.07017135620117188, 0.074981689453125, 0.07979202270507812, 0.08460235595703125, 0.08941268920898438, 0.0942230224609375, 0.09903335571289062, 0.10384368896484375, 0.10865402221679688, 0.11346435546875, 0.11827468872070312, 0.12308502197265625, 0.12789535522460938, 0.1327056884765625, 0.13751602172851562, 0.14232635498046875, 0.14713668823242188, 0.151947021484375, 0.15675735473632812, 0.16156768798828125, 0.16637802124023438, 0.1711883544921875, 0.17599868774414062, 0.18080902099609375, 0.18561935424804688, 0.1904296875]}, "gradients/encoder.encoder.layers.0.feed_forward.intermediate_dense.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 2.0, 1.0, 4.0, 3.0, 3.0, 5.0, 9.0, 16.0, 8.0, 23.0, 50.0, 61.0, 65.0, 80.0, 90.0, 139.0, 189.0, 241.0, 351.0, 332.0, 409.0, 425.0, 363.0, 308.0, 210.0, 168.0, 163.0, 96.0, 56.0, 60.0, 49.0, 25.0, 21.0, 24.0, 15.0, 9.0, 5.0, 4.0, 3.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0], "bins": [-0.09405517578125, -0.09150409698486328, -0.08895301818847656, -0.08640193939208984, -0.08385086059570312, -0.0812997817993164, -0.07874870300292969, -0.07619762420654297, -0.07364654541015625, -0.07109546661376953, -0.06854438781738281, -0.0659933090209961, -0.06344223022460938, -0.060891151428222656, -0.05834007263183594, -0.05578899383544922, -0.0532379150390625, -0.05068683624267578, -0.04813575744628906, -0.045584678649902344, -0.043033599853515625, -0.040482521057128906, -0.03793144226074219, -0.03538036346435547, -0.03282928466796875, -0.03027820587158203, -0.027727127075195312, -0.025176048278808594, -0.022624969482421875, -0.020073890686035156, -0.017522811889648438, -0.014971733093261719, -0.012420654296875, -0.009869575500488281, -0.0073184967041015625, -0.004767417907714844, -0.002216339111328125, 0.00033473968505859375, 0.0028858184814453125, 0.005436897277832031, 0.00798797607421875, 0.010539054870605469, 0.013090133666992188, 0.015641212463378906, 0.018192291259765625, 0.020743370056152344, 0.023294448852539062, 0.02584552764892578, 0.0283966064453125, 0.03094768524169922, 0.03349876403808594, 0.036049842834472656, 0.038600921630859375, 0.041152000427246094, 0.04370307922363281, 0.04625415802001953, 0.04880523681640625, 0.05135631561279297, 0.05390739440917969, 0.056458473205566406, 0.059009552001953125, 0.061560630798339844, 0.06411170959472656, 0.06666278839111328, 0.0692138671875]}, "gradients/encoder.encoder.layers.0.final_layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 2.0, 1.0, 0.0, 2.0, 2.0, 3.0, 3.0, 6.0, 3.0, 5.0, 10.0, 16.0, 16.0, 36.0, 43.0, 63.0, 86.0, 117.0, 121.0, 138.0, 116.0, 75.0, 42.0, 23.0, 27.0, 16.0, 13.0, 10.0, 4.0, 3.0, 2.0, 2.0, 2.0, 0.0, 2.0, 1.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.0513836145401, -1.0201712846755981, -0.9889589548110962, -0.9577466249465942, -0.9265342950820923, -0.8953219652175903, -0.8641096353530884, -0.8328973054885864, -0.8016849756240845, -0.7704726457595825, -0.7392603158950806, -0.7080479860305786, -0.6768356561660767, -0.6456233263015747, -0.6144109964370728, -0.5831986665725708, -0.5519863367080688, -0.5207740068435669, -0.48956167697906494, -0.458349347114563, -0.42713701725006104, -0.3959246873855591, -0.36471235752105713, -0.3335000276565552, -0.3022876977920532, -0.27107536792755127, -0.23986303806304932, -0.20865070819854736, -0.1774383783340454, -0.14622604846954346, -0.1150137186050415, -0.08380138874053955, -0.05258899927139282, -0.02137666940689087, 0.009835660457611084, 0.04104799032211304, 0.07226032018661499, 0.10347265005111694, 0.1346849799156189, 0.16589730978012085, 0.1971096396446228, 0.22832196950912476, 0.2595342993736267, 0.29074662923812866, 0.3219589591026306, 0.35317128896713257, 0.3843836188316345, 0.4155959486961365, 0.4468082785606384, 0.4780206084251404, 0.5092329382896423, 0.5404452681541443, 0.5716575980186462, 0.6028699278831482, 0.6340822577476501, 0.6652945876121521, 0.696506917476654, 0.727719247341156, 0.758931577205658, 0.7901439070701599, 0.8213562369346619, 0.8525685667991638, 0.8837808966636658, 0.9149932265281677, 0.9462055563926697]}, "gradients/encoder.encoder.layers.0.final_layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 2.0, 1.0, 1.0, 5.0, 4.0, 2.0, 8.0, 6.0, 1.0, 7.0, 18.0, 13.0, 17.0, 25.0, 23.0, 19.0, 33.0, 43.0, 45.0, 25.0, 55.0, 40.0, 54.0, 49.0, 46.0, 40.0, 49.0, 51.0, 52.0, 35.0, 31.0, 33.0, 29.0, 26.0, 19.0, 19.0, 22.0, 19.0, 12.0, 4.0, 7.0, 7.0, 6.0, 3.0, 4.0, 3.0, 3.0, 1.0, 0.0, 2.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.29340970516204834, -0.28261226415634155, -0.27181482315063477, -0.26101741194725037, -0.2502199709415436, -0.2394225299358368, -0.2286251038312912, -0.2178276777267456, -0.20703023672103882, -0.19623279571533203, -0.18543536961078644, -0.17463794350624084, -0.16384050250053406, -0.15304306149482727, -0.14224563539028168, -0.13144820928573608, -0.1206507682800293, -0.1098533347249031, -0.09905590116977692, -0.08825846761465073, -0.07746103405952454, -0.06666360050439835, -0.055866166949272156, -0.045068733394145966, -0.034271299839019775, -0.023473866283893585, -0.012676432728767395, -0.0018789991736412048, 0.008918434381484985, 0.019715867936611176, 0.030513301491737366, 0.041310735046863556, 0.052108168601989746, 0.06290560215711594, 0.07370303571224213, 0.08450046926736832, 0.0952979028224945, 0.1060953363776207, 0.11689276993274689, 0.12769019603729248, 0.13848763704299927, 0.14928507804870605, 0.16008250415325165, 0.17087993025779724, 0.18167737126350403, 0.19247481226921082, 0.2032722383737564, 0.214069664478302, 0.2248671054840088, 0.23566454648971558, 0.24646197259426117, 0.25725939869880676, 0.26805683970451355, 0.27885428071022034, 0.28965169191360474, 0.3004491329193115, 0.3112465739250183, 0.3220440149307251, 0.3328414559364319, 0.3436388671398163, 0.35443630814552307, 0.36523374915122986, 0.37603116035461426, 0.38682860136032104, 0.39762604236602783]}, "gradients/encoder.encoder.layers.0.attention.out_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 1.0, 1.0, 2.0, 1.0, 3.0, 0.0, 1.0, 6.0, 1.0, 1.0, 4.0, 9.0, 9.0, 13.0, 23.0, 29.0, 44.0, 46.0, 90.0, 161.0, 249.0, 443.0, 779.0, 1523.0, 3381.0, 8768.0, 29714.0, 182452.0, 656370.0, 127358.0, 23610.0, 7318.0, 2978.0, 1410.0, 714.0, 393.0, 259.0, 156.0, 85.0, 49.0, 42.0, 17.0, 24.0, 9.0, 6.0, 5.0, 1.0, 1.0, 4.0, 1.0, 1.0, 0.0, 1.0, 4.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0], "bins": [-0.0767822265625, -0.07425785064697266, -0.07173347473144531, -0.06920909881591797, -0.06668472290039062, -0.06416034698486328, -0.06163597106933594, -0.059111595153808594, -0.05658721923828125, -0.054062843322753906, -0.05153846740722656, -0.04901409149169922, -0.046489715576171875, -0.04396533966064453, -0.04144096374511719, -0.038916587829589844, -0.0363922119140625, -0.033867835998535156, -0.03134346008300781, -0.02881908416748047, -0.026294708251953125, -0.02377033233642578, -0.021245956420898438, -0.018721580505371094, -0.01619720458984375, -0.013672828674316406, -0.011148452758789062, -0.008624076843261719, -0.006099700927734375, -0.0035753250122070312, -0.0010509490966796875, 0.0014734268188476562, 0.003997802734375, 0.006522178649902344, 0.009046554565429688, 0.011570930480957031, 0.014095306396484375, 0.01661968231201172, 0.019144058227539062, 0.021668434143066406, 0.02419281005859375, 0.026717185974121094, 0.029241561889648438, 0.03176593780517578, 0.034290313720703125, 0.03681468963623047, 0.03933906555175781, 0.041863441467285156, 0.0443878173828125, 0.046912193298339844, 0.04943656921386719, 0.05196094512939453, 0.054485321044921875, 0.05700969696044922, 0.05953407287597656, 0.062058448791503906, 0.06458282470703125, 0.0671072006225586, 0.06963157653808594, 0.07215595245361328, 0.07468032836914062, 0.07720470428466797, 0.07972908020019531, 0.08225345611572266, 0.08477783203125]}, "gradients/encoder.encoder.layers.0.attention.out_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 1.0, 4.0, 2.0, 2.0, 6.0, 5.0, 8.0, 15.0, 21.0, 25.0, 40.0, 52.0, 76.0, 71.0, 88.0, 75.0, 69.0, 87.0, 91.0, 69.0, 56.0, 39.0, 32.0, 27.0, 15.0, 11.0, 8.0, 7.0, 5.0, 4.0, 1.0, 2.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.098876953125, -0.09598731994628906, -0.09309768676757812, -0.09020805358886719, -0.08731842041015625, -0.08442878723144531, -0.08153915405273438, -0.07864952087402344, -0.0757598876953125, -0.07287025451660156, -0.06998062133789062, -0.06709098815917969, -0.06420135498046875, -0.06131172180175781, -0.058422088623046875, -0.05553245544433594, -0.052642822265625, -0.04975318908691406, -0.046863555908203125, -0.04397392272949219, -0.04108428955078125, -0.03819465637207031, -0.035305023193359375, -0.03241539001464844, -0.0295257568359375, -0.026636123657226562, -0.023746490478515625, -0.020856857299804688, -0.01796722412109375, -0.015077590942382812, -0.012187957763671875, -0.009298324584960938, -0.00640869140625, -0.0035190582275390625, -0.000629425048828125, 0.0022602081298828125, 0.00514984130859375, 0.008039474487304688, 0.010929107666015625, 0.013818740844726562, 0.0167083740234375, 0.019598007202148438, 0.022487640380859375, 0.025377273559570312, 0.02826690673828125, 0.031156539916992188, 0.034046173095703125, 0.03693580627441406, 0.039825439453125, 0.04271507263183594, 0.045604705810546875, 0.04849433898925781, 0.05138397216796875, 0.05427360534667969, 0.057163238525390625, 0.06005287170410156, 0.0629425048828125, 0.06583213806152344, 0.06872177124023438, 0.07161140441894531, 0.07450103759765625, 0.07739067077636719, 0.08028030395507812, 0.08316993713378906, 0.0860595703125]}, "gradients/encoder.encoder.layers.0.attention.v_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 3.0, 1.0, 1.0, 1.0, 5.0, 5.0, 6.0, 10.0, 10.0, 18.0, 42.0, 55.0, 104.0, 215.0, 364.0, 844.0, 2355.0, 8721.0, 63832.0, 777008.0, 172139.0, 16969.0, 3599.0, 1238.0, 493.0, 205.0, 128.0, 63.0, 56.0, 26.0, 11.0, 14.0, 9.0, 8.0, 5.0, 2.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 2.0], "bins": [-0.10028076171875, -0.09775400161743164, -0.09522724151611328, -0.09270048141479492, -0.09017372131347656, -0.0876469612121582, -0.08512020111083984, -0.08259344100952148, -0.08006668090820312, -0.07753992080688477, -0.0750131607055664, -0.07248640060424805, -0.06995964050292969, -0.06743288040161133, -0.06490612030029297, -0.06237936019897461, -0.05985260009765625, -0.05732583999633789, -0.05479907989501953, -0.05227231979370117, -0.04974555969238281, -0.04721879959106445, -0.044692039489746094, -0.042165279388427734, -0.039638519287109375, -0.037111759185791016, -0.034584999084472656, -0.0320582389831543, -0.029531478881835938, -0.027004718780517578, -0.02447795867919922, -0.02195119857788086, -0.0194244384765625, -0.01689767837524414, -0.014370918273925781, -0.011844158172607422, -0.009317398071289062, -0.006790637969970703, -0.004263877868652344, -0.0017371177673339844, 0.000789642333984375, 0.0033164024353027344, 0.005843162536621094, 0.008369922637939453, 0.010896682739257812, 0.013423442840576172, 0.01595020294189453, 0.01847696304321289, 0.02100372314453125, 0.02353048324584961, 0.02605724334716797, 0.028584003448486328, 0.031110763549804688, 0.03363752365112305, 0.036164283752441406, 0.038691043853759766, 0.041217803955078125, 0.043744564056396484, 0.046271324157714844, 0.0487980842590332, 0.05132484436035156, 0.05385160446166992, 0.05637836456298828, 0.05890512466430664, 0.061431884765625]}, "gradients/encoder.encoder.layers.0.attention.v_proj.bias": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 1.0, 0.0, 4.0, 0.0, 1.0, 3.0, 2.0, 5.0, 8.0, 9.0, 10.0, 9.0, 11.0, 21.0, 13.0, 25.0, 21.0, 29.0, 30.0, 39.0, 33.0, 42.0, 55.0, 70.0, 75.0, 59.0, 46.0, 62.0, 58.0, 43.0, 25.0, 40.0, 38.0, 29.0, 21.0, 17.0, 15.0, 13.0, 13.0, 3.0, 1.0, 4.0, 5.0, 3.0, 1.0, 1.0, 1.0, 1.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.12200927734375, -0.11821651458740234, -0.11442375183105469, -0.11063098907470703, -0.10683822631835938, -0.10304546356201172, -0.09925270080566406, -0.0954599380493164, -0.09166717529296875, -0.0878744125366211, -0.08408164978027344, -0.08028888702392578, -0.07649612426757812, -0.07270336151123047, -0.06891059875488281, -0.06511783599853516, -0.0613250732421875, -0.057532310485839844, -0.05373954772949219, -0.04994678497314453, -0.046154022216796875, -0.04236125946044922, -0.03856849670410156, -0.034775733947753906, -0.03098297119140625, -0.027190208435058594, -0.023397445678710938, -0.01960468292236328, -0.015811920166015625, -0.012019157409667969, -0.008226394653320312, -0.004433631896972656, -0.000640869140625, 0.0031518936157226562, 0.0069446563720703125, 0.010737419128417969, 0.014530181884765625, 0.01832294464111328, 0.022115707397460938, 0.025908470153808594, 0.02970123291015625, 0.033493995666503906, 0.03728675842285156, 0.04107952117919922, 0.044872283935546875, 0.04866504669189453, 0.05245780944824219, 0.056250572204589844, 0.0600433349609375, 0.06383609771728516, 0.06762886047363281, 0.07142162322998047, 0.07521438598632812, 0.07900714874267578, 0.08279991149902344, 0.0865926742553711, 0.09038543701171875, 0.0941781997680664, 0.09797096252441406, 0.10176372528076172, 0.10555648803710938, 0.10934925079345703, 0.11314201354980469, 0.11693477630615234, 0.1207275390625]}, "gradients/encoder.encoder.layers.0.attention.k_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 4.0, 0.0, 1.0, 0.0, 6.0, 2.0, 4.0, 6.0, 4.0, 6.0, 5.0, 18.0, 18.0, 28.0, 64.0, 77.0, 162.0, 374.0, 925.0, 3720.0, 27916.0, 780855.0, 219423.0, 11621.0, 2065.0, 639.0, 260.0, 131.0, 71.0, 36.0, 33.0, 16.0, 16.0, 14.0, 11.0, 7.0, 8.0, 6.0, 3.0, 2.0, 2.0, 2.0, 0.0, 2.0, 1.0, 3.0, 3.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.0218963623046875, -0.02110910415649414, -0.02032184600830078, -0.019534587860107422, -0.018747329711914062, -0.017960071563720703, -0.017172813415527344, -0.016385555267333984, -0.015598297119140625, -0.014811038970947266, -0.014023780822753906, -0.013236522674560547, -0.012449264526367188, -0.011662006378173828, -0.010874748229980469, -0.01008749008178711, -0.00930023193359375, -0.00851297378540039, -0.007725715637207031, -0.006938457489013672, -0.0061511993408203125, -0.005363941192626953, -0.004576683044433594, -0.0037894248962402344, -0.003002166748046875, -0.0022149085998535156, -0.0014276504516601562, -0.0006403923034667969, 0.0001468658447265625, 0.0009341239929199219, 0.0017213821411132812, 0.0025086402893066406, 0.0032958984375, 0.004083156585693359, 0.004870414733886719, 0.005657672882080078, 0.0064449310302734375, 0.007232189178466797, 0.008019447326660156, 0.008806705474853516, 0.009593963623046875, 0.010381221771240234, 0.011168479919433594, 0.011955738067626953, 0.012742996215820312, 0.013530254364013672, 0.014317512512207031, 0.01510477066040039, 0.01589202880859375, 0.01667928695678711, 0.01746654510498047, 0.018253803253173828, 0.019041061401367188, 0.019828319549560547, 0.020615577697753906, 0.021402835845947266, 0.022190093994140625, 0.022977352142333984, 0.023764610290527344, 0.024551868438720703, 0.025339126586914062, 0.026126384735107422, 0.02691364288330078, 0.02770090103149414, 0.0284881591796875]}, "gradients/encoder.encoder.layers.0.attention.k_proj.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 3.0, 0.0, 1.0, 4.0, 3.0, 2.0, 7.0, 5.0, 9.0, 18.0, 21.0, 24.0, 26.0, 35.0, 31.0, 35.0, 43.0, 50.0, 94.0, 71.0, 54.0, 56.0, 52.0, 54.0, 48.0, 56.0, 35.0, 36.0, 25.0, 23.0, 13.0, 15.0, 16.0, 17.0, 8.0, 6.0, 4.0, 4.0, 5.0, 4.0, 1.0, 0.0, 3.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0], "bins": [-6.377696990966797e-06, -6.195157766342163e-06, -6.012618541717529e-06, -5.8300793170928955e-06, -5.647540092468262e-06, -5.465000867843628e-06, -5.282461643218994e-06, -5.09992241859436e-06, -4.9173831939697266e-06, -4.734843969345093e-06, -4.552304744720459e-06, -4.369765520095825e-06, -4.187226295471191e-06, -4.004687070846558e-06, -3.822147846221924e-06, -3.63960862159729e-06, -3.4570693969726562e-06, -3.2745301723480225e-06, -3.0919909477233887e-06, -2.909451723098755e-06, -2.726912498474121e-06, -2.5443732738494873e-06, -2.3618340492248535e-06, -2.1792948246002197e-06, -1.996755599975586e-06, -1.8142163753509521e-06, -1.6316771507263184e-06, -1.4491379261016846e-06, -1.2665987014770508e-06, -1.084059476852417e-06, -9.015202522277832e-07, -7.189810276031494e-07, -5.364418029785156e-07, -3.5390257835388184e-07, -1.7136335372924805e-07, 1.1175870895385742e-08, 1.9371509552001953e-07, 3.762543201446533e-07, 5.587935447692871e-07, 7.413327693939209e-07, 9.238719940185547e-07, 1.1064112186431885e-06, 1.2889504432678223e-06, 1.471489667892456e-06, 1.6540288925170898e-06, 1.8365681171417236e-06, 2.0191073417663574e-06, 2.201646566390991e-06, 2.384185791015625e-06, 2.566725015640259e-06, 2.7492642402648926e-06, 2.9318034648895264e-06, 3.11434268951416e-06, 3.296881914138794e-06, 3.4794211387634277e-06, 3.6619603633880615e-06, 3.844499588012695e-06, 4.027038812637329e-06, 4.209578037261963e-06, 4.392117261886597e-06, 4.5746564865112305e-06, 4.757195711135864e-06, 4.939734935760498e-06, 5.122274160385132e-06, 5.304813385009766e-06]}, "gradients/encoder.encoder.layers.0.attention.q_proj.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0, 1.0, 0.0, 1.0, 1.0, 0.0, 10.0, 4.0, 9.0, 9.0, 16.0, 12.0, 26.0, 46.0, 63.0, 148.0, 299.0, 583.0, 1240.0, 3687.0, 12774.0, 69003.0, 716778.0, 208226.0, 25607.0, 6305.0, 2117.0, 794.0, 355.0, 170.0, 99.0, 53.0, 30.0, 28.0, 14.0, 9.0, 6.0, 15.0, 7.0, 7.0, 3.0, 3.0, 2.0, 1.0, 1.0, 3.0, 1.0, 1.0, 2.0], "bins": [-0.0238037109375, -0.023172855377197266, -0.02254199981689453, -0.021911144256591797, -0.021280288696289062, -0.020649433135986328, -0.020018577575683594, -0.01938772201538086, -0.018756866455078125, -0.01812601089477539, -0.017495155334472656, -0.016864299774169922, -0.016233444213867188, -0.015602588653564453, -0.014971733093261719, -0.014340877532958984, -0.01371002197265625, -0.013079166412353516, -0.012448310852050781, -0.011817455291748047, -0.011186599731445312, -0.010555744171142578, -0.009924888610839844, -0.00929403305053711, -0.008663177490234375, -0.00803232192993164, -0.007401466369628906, -0.006770610809326172, -0.0061397552490234375, -0.005508899688720703, -0.004878044128417969, -0.004247188568115234, -0.0036163330078125, -0.0029854774475097656, -0.0023546218872070312, -0.0017237663269042969, -0.0010929107666015625, -0.0004620552062988281, 0.00016880035400390625, 0.0007996559143066406, 0.001430511474609375, 0.0020613670349121094, 0.0026922225952148438, 0.003323078155517578, 0.0039539337158203125, 0.004584789276123047, 0.005215644836425781, 0.005846500396728516, 0.00647735595703125, 0.007108211517333984, 0.007739067077636719, 0.008369922637939453, 0.009000778198242188, 0.009631633758544922, 0.010262489318847656, 0.01089334487915039, 0.011524200439453125, 0.01215505599975586, 0.012785911560058594, 0.013416767120361328, 0.014047622680664062, 0.014678478240966797, 0.015309333801269531, 0.015940189361572266, 0.016571044921875]}, "gradients/encoder.encoder.layers.0.attention.q_proj.bias": {"_type": "histogram", "values": [2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 2.0, 0.0, 4.0, 2.0, 3.0, 4.0, 9.0, 7.0, 14.0, 24.0, 25.0, 40.0, 55.0, 83.0, 120.0, 142.0, 104.0, 98.0, 66.0, 49.0, 37.0, 33.0, 21.0, 15.0, 12.0, 15.0, 10.0, 7.0, 1.0, 3.0, 1.0, 1.0, 3.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.0195770263671875, -0.018964529037475586, -0.018352031707763672, -0.017739534378051758, -0.017127037048339844, -0.01651453971862793, -0.015902042388916016, -0.015289545059204102, -0.014677047729492188, -0.014064550399780273, -0.01345205307006836, -0.012839555740356445, -0.012227058410644531, -0.011614561080932617, -0.011002063751220703, -0.010389566421508789, -0.009777069091796875, -0.009164571762084961, -0.008552074432373047, -0.007939577102661133, -0.007327079772949219, -0.006714582443237305, -0.006102085113525391, -0.0054895877838134766, -0.0048770904541015625, -0.0042645931243896484, -0.0036520957946777344, -0.0030395984649658203, -0.0024271011352539062, -0.0018146038055419922, -0.0012021064758300781, -0.0005896091461181641, 2.288818359375e-05, 0.0006353855133056641, 0.0012478828430175781, 0.0018603801727294922, 0.0024728775024414062, 0.0030853748321533203, 0.0036978721618652344, 0.0043103694915771484, 0.0049228668212890625, 0.0055353641510009766, 0.006147861480712891, 0.006760358810424805, 0.007372856140136719, 0.007985353469848633, 0.008597850799560547, 0.009210348129272461, 0.009822845458984375, 0.010435342788696289, 0.011047840118408203, 0.011660337448120117, 0.012272834777832031, 0.012885332107543945, 0.01349782943725586, 0.014110326766967773, 0.014722824096679688, 0.015335321426391602, 0.015947818756103516, 0.01656031608581543, 0.017172813415527344, 0.017785310745239258, 0.018397808074951172, 0.019010305404663086, 0.019622802734375]}, "gradients/encoder.encoder.layers.0.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 2.0, 2.0, 3.0, 4.0, 7.0, 11.0, 19.0, 32.0, 46.0, 97.0, 155.0, 259.0, 160.0, 86.0, 55.0, 31.0, 14.0, 12.0, 4.0, 4.0, 3.0, 1.0, 3.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0, 1.0, 1.0], "bins": [-0.7350334525108337, -0.7178519368171692, -0.7006704807281494, -0.6834889650344849, -0.6663074493408203, -0.6491259336471558, -0.631944477558136, -0.6147629618644714, -0.5975815057754517, -0.5803999900817871, -0.5632185339927673, -0.5460370182991028, -0.5288555026054382, -0.5116740465164185, -0.4944925308227539, -0.47731101512908936, -0.4601294994354248, -0.44294801354408264, -0.4257664978504181, -0.4085850119590759, -0.3914034962654114, -0.3742220103740692, -0.35704052448272705, -0.3398590087890625, -0.32267752289772034, -0.3054960370063782, -0.2883145213127136, -0.27113303542137146, -0.2539515495300293, -0.23677003383636475, -0.21958854794502258, -0.20240704715251923, -0.18522551655769348, -0.16804401576519012, -0.15086251497268677, -0.1336810290813446, -0.11649952828884125, -0.09931802749633789, -0.08213653415441513, -0.06495504081249237, -0.047773540019989014, -0.030592042952775955, -0.013410545885562897, 0.0037709511816501617, 0.02095244824886322, 0.03813394904136658, 0.05531544238328934, 0.0724969357252121, 0.08967843651771545, 0.10685993731021881, 0.12404143065214157, 0.14122292399406433, 0.1584044247865677, 0.17558592557907104, 0.1927674114704132, 0.20994891226291656, 0.22713041305541992, 0.24431191384792328, 0.26149341464042664, 0.2786749005317688, 0.29585641622543335, 0.3130379021167755, 0.3302193880081177, 0.3474009037017822, 0.3645823895931244]}, "gradients/encoder.encoder.layers.0.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 3.0, 3.0, 5.0, 3.0, 6.0, 5.0, 8.0, 8.0, 12.0, 24.0, 16.0, 21.0, 24.0, 28.0, 30.0, 30.0, 22.0, 51.0, 48.0, 55.0, 50.0, 115.0, 59.0, 56.0, 51.0, 43.0, 33.0, 31.0, 36.0, 21.0, 27.0, 18.0, 18.0, 20.0, 9.0, 5.0, 7.0, 3.0, 5.0, 0.0, 2.0, 3.0, 2.0, 1.0, 0.0, 0.0, 1.0, 1.0], "bins": [-0.35718709230422974, -0.3476164937019348, -0.3380458950996399, -0.32847529649734497, -0.31890472769737244, -0.3093341290950775, -0.2997635304927826, -0.29019293189048767, -0.28062233328819275, -0.2710517346858978, -0.2614811360836029, -0.25191056728363037, -0.24233995378017426, -0.23276937007904053, -0.2231987714767456, -0.21362817287445068, -0.20405758917331696, -0.19448699057102203, -0.1849164068698883, -0.17534580826759338, -0.16577520966529846, -0.15620461106300354, -0.1466340273618698, -0.1370634287595749, -0.12749284505844116, -0.11792225390672684, -0.10835165530443192, -0.09878106415271759, -0.08921046555042267, -0.07963987439870834, -0.07006928324699402, -0.0604986846446991, -0.050928086042404175, -0.04135749116539955, -0.03178689628839493, -0.022216305136680603, -0.01264571025967598, -0.003075115382671356, 0.006495475769042969, 0.01606607437133789, 0.025636665523052216, 0.03520726040005684, 0.04477785527706146, 0.05434844642877579, 0.06391903758049011, 0.07348963618278503, 0.08306022733449936, 0.09263082593679428, 0.1022014170885086, 0.11177200824022293, 0.12134260684251785, 0.13091319799423218, 0.1404837965965271, 0.15005439519882202, 0.15962497889995575, 0.16919557750225067, 0.1787661612033844, 0.18833675980567932, 0.19790734350681305, 0.20747794210910797, 0.2170485407114029, 0.22661912441253662, 0.23618972301483154, 0.24576032161712646, 0.2553309202194214]}, "gradients/encoder.encoder.pos_conv_embed.conv.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 0.0, 2.0, 1.0, 2.0, 2.0, 5.0, 6.0, 12.0, 9.0, 12.0, 22.0, 8.0, 34.0, 29.0, 33.0, 28.0, 36.0, 53.0, 179.0, 266.0, 73.0, 34.0, 33.0, 24.0, 15.0, 16.0, 17.0, 7.0, 8.0, 9.0, 8.0, 4.0, 7.0, 8.0, 4.0, 2.0, 4.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.06005859375, -0.057923316955566406, -0.05578804016113281, -0.05365276336669922, -0.051517486572265625, -0.04938220977783203, -0.04724693298339844, -0.045111656188964844, -0.04297637939453125, -0.040841102600097656, -0.03870582580566406, -0.03657054901123047, -0.034435272216796875, -0.03229999542236328, -0.030164718627929688, -0.028029441833496094, -0.0258941650390625, -0.023758888244628906, -0.021623611450195312, -0.01948833465576172, -0.017353057861328125, -0.015217781066894531, -0.013082504272460938, -0.010947227478027344, -0.00881195068359375, -0.006676673889160156, -0.0045413970947265625, -0.0024061203002929688, -0.000270843505859375, 0.0018644332885742188, 0.0039997100830078125, 0.006134986877441406, 0.008270263671875, 0.010405540466308594, 0.012540817260742188, 0.014676094055175781, 0.016811370849609375, 0.01894664764404297, 0.021081924438476562, 0.023217201232910156, 0.02535247802734375, 0.027487754821777344, 0.029623031616210938, 0.03175830841064453, 0.033893585205078125, 0.03602886199951172, 0.03816413879394531, 0.040299415588378906, 0.0424346923828125, 0.044569969177246094, 0.04670524597167969, 0.04884052276611328, 0.050975799560546875, 0.05311107635498047, 0.05524635314941406, 0.057381629943847656, 0.05951690673828125, 0.061652183532714844, 0.06378746032714844, 0.06592273712158203, 0.06805801391601562, 0.07019329071044922, 0.07232856750488281, 0.0744638442993164, 0.07659912109375]}, "gradients/encoder.encoder.pos_conv_embed.conv.weight_v": {"_type": "histogram", "values": [1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 3.0, 4.0, 5.0, 10.0, 14.0, 27.0, 37.0, 67.0, 109.0, 196.0, 441.0, 1178.0, 5243.0, 72887.0, 8292191.0, 12555.0, 2318.0, 697.0, 289.0, 132.0, 74.0, 38.0, 26.0, 21.0, 10.0, 8.0, 3.0, 4.0, 2.0, 4.0, 2.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.2614518105983734, -0.2501899302005768, -0.23892806470394135, -0.22766618430614471, -0.21640431880950928, -0.20514243841171265, -0.19388055801391602, -0.18261867761611938, -0.17135681211948395, -0.16009493172168732, -0.14883306622505188, -0.13757118582725525, -0.12630930542945862, -0.11504743993282318, -0.10378555953502655, -0.09252368658781052, -0.08126181364059448, -0.06999994069337845, -0.058738064020872116, -0.047476187348365784, -0.03621431440114975, -0.024952441453933716, -0.013690561056137085, -0.002428688108921051, 0.008833184838294983, 0.020095059648156166, 0.03135693445801735, 0.04261881113052368, 0.053880684077739716, 0.06514255702495575, 0.07640443742275238, 0.08766631036996841, 0.09892818331718445, 0.11019005626440048, 0.12145192921161652, 0.13271380960941315, 0.14397567510604858, 0.15523755550384521, 0.16649943590164185, 0.17776131629943848, 0.1890231817960739, 0.20028506219387054, 0.21154692769050598, 0.2228088080883026, 0.23407068848609924, 0.24533255398273468, 0.2565944194793701, 0.26785629987716675, 0.2791181802749634, 0.29038006067276, 0.30164194107055664, 0.31290382146835327, 0.3241656720638275, 0.33542755246162415, 0.3466894328594208, 0.3579513132572174, 0.36921316385269165, 0.3804750442504883, 0.3917369246482849, 0.40299880504608154, 0.4142606556415558, 0.4255225360393524, 0.43678441643714905, 0.4480462968349457, 0.4593081772327423]}, "gradients/encoder.encoder.pos_conv_embed.conv.weight_g": {"_type": "histogram", "values": [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 1.0, 1.0, 0.0, 0.0, 2.0, 0.0, 2.0, 3.0, 1.0, 4.0, 2.0, 1.0, 4.0, 2.0, 3.0, 5.0, 5.0, 9.0, 4.0, 8.0, 2.0, 3.0, 5.0, 3.0, 3.0, 2.0, 5.0, 4.0, 3.0, 3.0, 6.0, 4.0, 2.0, 4.0, 3.0, 0.0, 0.0, 1.0, 2.0, 0.0, 2.0, 1.0, 1.0, 0.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.2817419767379761, -0.2738223373889923, -0.26590266823768616, -0.2579830288887024, -0.25006338953971863, -0.24214373528957367, -0.2342240810394287, -0.22630444169044495, -0.21838480234146118, -0.21046514809131622, -0.20254550874233246, -0.1946258544921875, -0.18670621514320374, -0.17878656089305878, -0.17086690664291382, -0.16294726729393005, -0.1550276130437851, -0.14710795879364014, -0.13918831944465637, -0.1312686651945114, -0.12334902584552765, -0.11542937159538269, -0.10750972479581833, -0.09959007799625397, -0.0916704311966896, -0.08375078439712524, -0.07583113759756088, -0.06791149079799652, -0.05999184027314186, -0.0520721934735775, -0.04415254294872284, -0.03623289614915848, -0.02831326425075531, -0.02039361745119095, -0.012473968788981438, -0.004554320126771927, 0.0033653266727924347, 0.011284973472356796, 0.019204623997211456, 0.027124270796775818, 0.03504391759634018, 0.04296356439590454, 0.0508832111954689, 0.05880286172032356, 0.06672251224517822, 0.07464215159416199, 0.08256180584430695, 0.09048145264387131, 0.09840109944343567, 0.10632074624300003, 0.11424039304256439, 0.12216004729270935, 0.13007968664169312, 0.13799934089183807, 0.14591899514198303, 0.1538386344909668, 0.16175827383995056, 0.16967792809009552, 0.17759756743907928, 0.18551722168922424, 0.193436861038208, 0.20135651528835297, 0.20927616953849792, 0.2171958088874817, 0.22511546313762665]}, "gradients/encoder.masked_spec_embed": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 2.0, 2.0, 1.0, 3.0, 0.0, 0.0, 1.0, 1.0, 4.0, 4.0, 4.0, 7.0, 7.0, 5.0, 9.0, 10.0, 10.0, 16.0, 25.0, 21.0, 32.0, 32.0, 55.0, 54.0, 59.0, 62.0, 82.0, 74.0, 55.0, 47.0, 55.0, 57.0, 41.0, 51.0, 26.0, 26.0, 18.0, 10.0, 15.0, 13.0, 3.0, 6.0, 3.0, 4.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0], "bins": [-0.032257080078125, -0.03121471405029297, -0.030172348022460938, -0.029129981994628906, -0.028087615966796875, -0.027045249938964844, -0.026002883911132812, -0.02496051788330078, -0.02391815185546875, -0.02287578582763672, -0.021833419799804688, -0.020791053771972656, -0.019748687744140625, -0.018706321716308594, -0.017663955688476562, -0.01662158966064453, -0.0155792236328125, -0.014536857604980469, -0.013494491577148438, -0.012452125549316406, -0.011409759521484375, -0.010367393493652344, -0.009325027465820312, -0.008282661437988281, -0.00724029541015625, -0.006197929382324219, -0.0051555633544921875, -0.004113197326660156, -0.003070831298828125, -0.0020284652709960938, -0.0009860992431640625, 5.626678466796875e-05, 0.0010986328125, 0.0021409988403320312, 0.0031833648681640625, 0.004225730895996094, 0.005268096923828125, 0.006310462951660156, 0.0073528289794921875, 0.008395195007324219, 0.00943756103515625, 0.010479927062988281, 0.011522293090820312, 0.012564659118652344, 0.013607025146484375, 0.014649391174316406, 0.015691757202148438, 0.01673412322998047, 0.0177764892578125, 0.01881885528564453, 0.019861221313476562, 0.020903587341308594, 0.021945953369140625, 0.022988319396972656, 0.024030685424804688, 0.02507305145263672, 0.02611541748046875, 0.02715778350830078, 0.028200149536132812, 0.029242515563964844, 0.030284881591796875, 0.031327247619628906, 0.03236961364746094, 0.03341197967529297, 0.034454345703125]}, "gradients/encoder.feature_projection.projection.weight": {"_type": "histogram", "values": [1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 2.0, 1.0, 5.0, 7.0, 6.0, 3.0, 12.0, 15.0, 34.0, 41.0, 81.0, 107.0, 229.0, 491.0, 1284.0, 3765.0, 14785.0, 110718.0, 341034.0, 40030.0, 7543.0, 2266.0, 859.0, 419.0, 207.0, 112.0, 78.0, 41.0, 34.0, 16.0, 19.0, 9.0, 8.0, 9.0, 4.0, 2.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-1.18359375, -1.1431732177734375, -1.102752685546875, -1.0623321533203125, -1.02191162109375, -0.9814910888671875, -0.941070556640625, -0.9006500244140625, -0.8602294921875, -0.8198089599609375, -0.779388427734375, -0.7389678955078125, -0.69854736328125, -0.6581268310546875, -0.617706298828125, -0.5772857666015625, -0.536865234375, -0.4964447021484375, -0.456024169921875, -0.4156036376953125, -0.37518310546875, -0.3347625732421875, -0.294342041015625, -0.2539215087890625, -0.2135009765625, -0.1730804443359375, -0.132659912109375, -0.0922393798828125, -0.05181884765625, -0.0113983154296875, 0.029022216796875, 0.0694427490234375, 0.10986328125, 0.1502838134765625, 0.190704345703125, 0.2311248779296875, 0.27154541015625, 0.3119659423828125, 0.352386474609375, 0.3928070068359375, 0.4332275390625, 0.4736480712890625, 0.514068603515625, 0.5544891357421875, 0.59490966796875, 0.6353302001953125, 0.675750732421875, 0.7161712646484375, 0.756591796875, 0.7970123291015625, 0.837432861328125, 0.8778533935546875, 0.91827392578125, 0.9586944580078125, 0.999114990234375, 1.0395355224609375, 1.0799560546875, 1.1203765869140625, 1.160797119140625, 1.2012176513671875, 1.24163818359375, 1.2820587158203125, 1.322479248046875, 1.3628997802734375, 1.4033203125]}, "gradients/encoder.feature_projection.projection.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 5.0, 1.0, 9.0, 12.0, 11.0, 20.0, 27.0, 32.0, 45.0, 31.0, 65.0, 70.0, 88.0, 86.0, 84.0, 88.0, 78.0, 65.0, 56.0, 35.0, 35.0, 23.0, 15.0, 14.0, 6.0, 3.0, 4.0, 3.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.085693359375, -0.0831766128540039, -0.08065986633300781, -0.07814311981201172, -0.07562637329101562, -0.07310962677001953, -0.07059288024902344, -0.06807613372802734, -0.06555938720703125, -0.06304264068603516, -0.06052589416503906, -0.05800914764404297, -0.055492401123046875, -0.05297565460205078, -0.05045890808105469, -0.047942161560058594, -0.0454254150390625, -0.042908668518066406, -0.04039192199707031, -0.03787517547607422, -0.035358428955078125, -0.03284168243408203, -0.030324935913085938, -0.027808189392089844, -0.02529144287109375, -0.022774696350097656, -0.020257949829101562, -0.01774120330810547, -0.015224456787109375, -0.012707710266113281, -0.010190963745117188, -0.007674217224121094, -0.005157470703125, -0.0026407241821289062, -0.0001239776611328125, 0.0023927688598632812, 0.004909515380859375, 0.007426261901855469, 0.009943008422851562, 0.012459754943847656, 0.01497650146484375, 0.017493247985839844, 0.020009994506835938, 0.02252674102783203, 0.025043487548828125, 0.02756023406982422, 0.030076980590820312, 0.032593727111816406, 0.0351104736328125, 0.037627220153808594, 0.04014396667480469, 0.04266071319580078, 0.045177459716796875, 0.04769420623779297, 0.05021095275878906, 0.052727699279785156, 0.05524444580078125, 0.057761192321777344, 0.06027793884277344, 0.06279468536376953, 0.06531143188476562, 0.06782817840576172, 0.07034492492675781, 0.0728616714477539, 0.07537841796875]}, "gradients/encoder.feature_projection.layer_norm.weight": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 2.0, 2.0, 4.0, 3.0, 5.0, 5.0, 7.0, 8.0, 16.0, 17.0, 15.0, 17.0, 24.0, 22.0, 52.0, 51.0, 42.0, 47.0, 48.0, 23.0, 23.0, 12.0, 11.0, 14.0, 5.0, 3.0, 7.0, 4.0, 2.0, 2.0, 1.0, 1.0, 0.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0], "bins": [-0.15517164766788483, -0.15032774209976196, -0.1454838365316391, -0.14063993096351624, -0.13579602539539337, -0.1309521198272705, -0.12610821425914764, -0.12126430869102478, -0.11642040312290192, -0.11157649755477905, -0.10673259198665619, -0.10188868641853333, -0.09704478085041046, -0.0922008752822876, -0.08735696971416473, -0.08251306414604187, -0.077669158577919, -0.07282525300979614, -0.06798134744167328, -0.06313744187355042, -0.05829353630542755, -0.05344963073730469, -0.048605725169181824, -0.04376181960105896, -0.038917914032936096, -0.03407400846481323, -0.02923010289669037, -0.024386197328567505, -0.01954229176044464, -0.014698386192321777, -0.009854480624198914, -0.00501057505607605, -0.00016666948795318604, 0.004677236080169678, 0.009521141648292542, 0.014365047216415405, 0.01920895278453827, 0.024052858352661133, 0.028896763920783997, 0.03374066948890686, 0.038584575057029724, 0.04342848062515259, 0.04827238619327545, 0.053116291761398315, 0.05796019732952118, 0.06280410289764404, 0.0676480084657669, 0.07249191403388977, 0.07733581960201263, 0.0821797251701355, 0.08702363073825836, 0.09186753630638123, 0.09671144187450409, 0.10155534744262695, 0.10639925301074982, 0.11124315857887268, 0.11608706414699554, 0.12093096971511841, 0.12577487528324127, 0.13061878085136414, 0.135462686419487, 0.14030659198760986, 0.14515049755573273, 0.1499944031238556, 0.15483830869197845]}, "gradients/encoder.feature_projection.layer_norm.bias": {"_type": "histogram", "values": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 3.0, 1.0, 1.0, 4.0, 3.0, 2.0, 3.0, 7.0, 7.0, 4.0, 7.0, 11.0, 8.0, 11.0, 11.0, 12.0, 15.0, 53.0, 87.0, 99.0, 44.0, 14.0, 18.0, 12.0, 11.0, 11.0, 6.0, 8.0, 8.0, 4.0, 2.0, 2.0, 3.0, 4.0, 0.0, 3.0, 1.0, 2.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], "bins": [-0.20371556282043457, -0.19731774926185608, -0.19091995060443878, -0.18452215194702148, -0.178124338388443, -0.1717265248298645, -0.1653287261724472, -0.1589309275150299, -0.15253311395645142, -0.14613530039787292, -0.13973750174045563, -0.13333970308303833, -0.12694188952445984, -0.12054408341646194, -0.11414627730846405, -0.10774847120046616, -0.10135066509246826, -0.09495285898447037, -0.08855505287647247, -0.08215724676847458, -0.07575944066047668, -0.06936163455247879, -0.0629638284444809, -0.056566022336483, -0.05016821622848511, -0.04377041012048721, -0.03737260401248932, -0.030974797904491425, -0.02457699179649353, -0.018179185688495636, -0.011781379580497742, -0.005383573472499847, 0.0010142326354980469, 0.007412038743495941, 0.013809844851493835, 0.02020765095949173, 0.026605457067489624, 0.03300326317548752, 0.03940106928348541, 0.04579887539148331, 0.0521966814994812, 0.058594487607479095, 0.06499229371547699, 0.07139009982347488, 0.07778790593147278, 0.08418571203947067, 0.09058351814746857, 0.09698132425546646, 0.10337913036346436, 0.10977693647146225, 0.11617474257946014, 0.12257254868745804, 0.12897035479545593, 0.13536816835403442, 0.14176596701145172, 0.14816376566886902, 0.1545615792274475, 0.160959392786026, 0.1673571914434433, 0.1737549901008606, 0.1801528036594391, 0.18655061721801758, 0.19294841587543488, 0.19934621453285217, 0.20574402809143066]}} \ No newline at end of file diff --git a/wandb/run-20220504_095140-cwhobv6l/logs/debug-internal.log b/wandb/run-20220504_095140-cwhobv6l/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..c0884b778501c9cd52328fbbd47263435cd64176 --- /dev/null +++ b/wandb/run-20220504_095140-cwhobv6l/logs/debug-internal.log @@ -0,0 +1,3381 @@ +2022-05-04 09:51:41,189 INFO MainThread:49247 [internal.py:wandb_internal():89] W&B internal server running at pid: 49247, started at: 2022-05-04 09:51:41.189232 +2022-05-04 09:51:41,191 DEBUG HandlerThread:49247 [handler.py:handle_request():131] handle_request: check_version +2022-05-04 09:51:41,191 INFO WriterThread:49247 [datastore.py:open_for_write():77] open: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/run-cwhobv6l.wandb +2022-05-04 09:51:41,192 DEBUG SenderThread:49247 [sender.py:send():235] send: header +2022-05-04 09:51:41,193 DEBUG SenderThread:49247 [sender.py:send_request():249] send_request: check_version +2022-05-04 09:51:41,258 DEBUG SenderThread:49247 [sender.py:send():235] send: run +2022-05-04 09:51:41,327 INFO SenderThread:49247 [dir_watcher.py:__init__():169] watching files in: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files +2022-05-04 09:51:41,327 INFO SenderThread:49247 [sender.py:_start_run_threads():809] run started: cwhobv6l with start time 1651657900 +2022-05-04 09:51:41,328 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:51:41,328 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:51:41,329 DEBUG HandlerThread:49247 [handler.py:handle_request():131] handle_request: run_start +2022-05-04 09:51:42,329 INFO Thread-8 :49247 [dir_watcher.py:_on_file_created():217] file/dir created: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/wandb-summary.json +2022-05-04 09:51:43,711 DEBUG HandlerThread:49247 [meta.py:__init__():36] meta init +2022-05-04 09:51:43,711 DEBUG HandlerThread:49247 [meta.py:__init__():50] meta init done +2022-05-04 09:51:43,711 DEBUG HandlerThread:49247 [meta.py:probe():210] probe +2022-05-04 09:51:43,717 DEBUG HandlerThread:49247 [meta.py:_setup_git():200] setup git +2022-05-04 09:51:43,749 DEBUG HandlerThread:49247 [meta.py:_setup_git():207] setup git done +2022-05-04 09:51:43,749 DEBUG HandlerThread:49247 [meta.py:_save_pip():54] save pip +2022-05-04 09:51:43,750 DEBUG HandlerThread:49247 [meta.py:_save_pip():68] save pip done +2022-05-04 09:51:43,750 DEBUG HandlerThread:49247 [meta.py:probe():248] probe done +2022-05-04 09:51:43,754 DEBUG SenderThread:49247 [sender.py:send():235] send: files +2022-05-04 09:51:43,754 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-metadata.json with policy now +2022-05-04 09:51:43,760 DEBUG HandlerThread:49247 [handler.py:handle_request():131] handle_request: stop_status +2022-05-04 09:51:43,760 DEBUG SenderThread:49247 [sender.py:send_request():249] send_request: stop_status +2022-05-04 09:51:43,791 DEBUG SenderThread:49247 [sender.py:send():235] send: config +2022-05-04 09:51:43,792 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:51:43,792 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:51:43,792 WARNING SenderThread:49247 [sender.py:send_metric():902] Seen metric with glob (shouldnt happen) +2022-05-04 09:51:44,002 INFO Thread-11 :49247 [upload_job.py:push():137] Uploaded file /tmp/tmp67j_as9iwandb/ti2pviem-wandb-metadata.json +2022-05-04 09:51:44,333 INFO Thread-8 :49247 [dir_watcher.py:_on_file_created():217] file/dir created: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/requirements.txt +2022-05-04 09:51:44,333 INFO Thread-8 :49247 [dir_watcher.py:_on_file_created():217] file/dir created: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:51:44,333 INFO Thread-8 :49247 [dir_watcher.py:_on_file_created():217] file/dir created: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/wandb-metadata.json +2022-05-04 09:51:46,334 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:51:48,334 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:51:48,828 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:51:48,828 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:51:48,828 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:51:48,828 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:51:48,829 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:51:48,829 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:51:49,335 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/wandb-summary.json +2022-05-04 09:51:50,335 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:51:52,336 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:51:53,067 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:51:53,067 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:51:53,068 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:51:53,336 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/wandb-summary.json +2022-05-04 09:51:54,337 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:51:56,337 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:51:57,135 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:51:57,135 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:51:57,136 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:51:57,338 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/wandb-summary.json +2022-05-04 09:51:58,338 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:51:58,808 DEBUG HandlerThread:49247 [handler.py:handle_request():131] handle_request: stop_status +2022-05-04 09:51:58,808 DEBUG SenderThread:49247 [sender.py:send_request():249] send_request: stop_status +2022-05-04 09:52:00,339 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:52:01,012 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:52:01,012 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:52:01,012 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:52:01,339 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/wandb-summary.json +2022-05-04 09:52:02,340 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:52:04,340 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:52:04,796 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:52:04,796 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:52:04,797 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:52:05,341 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/wandb-summary.json +2022-05-04 09:52:06,341 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:52:08,342 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:52:08,485 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:52:08,485 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:52:08,485 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:52:09,343 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/wandb-summary.json +2022-05-04 09:52:10,343 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:52:12,149 DEBUG SenderThread:49247 [sender.py:send():235] send: stats +2022-05-04 09:52:12,290 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:52:12,290 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:52:12,291 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:52:12,344 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/config.yaml +2022-05-04 09:52:12,344 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:52:12,344 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/wandb-summary.json +2022-05-04 09:52:13,851 DEBUG HandlerThread:49247 [handler.py:handle_request():131] handle_request: stop_status +2022-05-04 09:52:13,852 DEBUG SenderThread:49247 [sender.py:send_request():249] send_request: stop_status +2022-05-04 09:52:14,345 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:52:16,009 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:52:16,009 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:52:16,011 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:52:16,346 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:52:16,346 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/wandb-summary.json +2022-05-04 09:52:18,347 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:52:19,610 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:52:19,610 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:52:19,611 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:52:20,348 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:52:20,348 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/wandb-summary.json +2022-05-04 09:52:22,348 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:52:23,124 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:52:23,124 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:52:23,126 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:52:23,349 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/wandb-summary.json +2022-05-04 09:52:24,349 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:52:26,350 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:52:26,591 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:52:26,591 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:52:26,592 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:52:27,350 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/wandb-summary.json +2022-05-04 09:52:28,351 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:52:28,893 DEBUG HandlerThread:49247 [handler.py:handle_request():131] handle_request: stop_status +2022-05-04 09:52:28,893 DEBUG SenderThread:49247 [sender.py:send_request():249] send_request: stop_status +2022-05-04 09:52:30,029 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:52:30,030 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:52:30,030 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:52:30,352 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:52:30,352 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/wandb-summary.json +2022-05-04 09:52:32,352 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:52:33,951 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:33,956 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:33,956 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:33,956 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:33,957 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:33,957 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:33,957 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:33,957 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:33,957 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:33,957 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:33,957 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:33,957 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:33,957 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:33,957 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:33,957 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:33,957 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:33,957 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:33,957 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:33,958 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:33,958 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:33,958 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:33,958 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:33,958 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:33,958 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:33,958 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:33,958 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:33,958 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:33,958 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:33,958 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:33,958 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:33,958 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:33,958 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:33,959 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:33,959 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:33,959 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:33,959 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:33,959 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:33,959 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:33,964 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:33,964 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:33,970 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:33,975 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:33,980 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:33,986 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:33,991 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:33,991 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:33,991 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:33,991 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:33,992 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:33,993 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:33,993 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:33,993 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:33,993 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:33,993 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:33,993 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:33,993 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:33,993 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:33,993 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:33,993 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:33,993 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:33,993 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:33,994 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:33,994 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,004 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,004 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,004 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,004 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,004 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,004 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,004 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,004 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,004 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,004 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,004 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,004 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,004 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,010 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,010 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,010 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,020 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,026 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,031 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,037 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,037 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,037 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,037 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,042 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,042 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,042 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,048 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,048 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,048 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,048 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,048 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,048 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,049 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,049 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,049 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,049 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,049 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,049 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,049 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,049 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,049 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,049 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,049 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,049 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,050 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,050 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,050 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,050 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,050 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,050 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,050 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,050 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,050 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,050 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,050 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,050 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,051 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,051 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,051 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,051 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,051 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,051 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,051 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,051 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,051 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,051 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,057 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,062 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,062 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,062 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,068 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,068 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,068 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,068 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,068 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,068 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,068 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,068 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,068 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,069 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,069 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,069 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,069 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,069 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,069 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,069 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,069 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,069 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,069 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,069 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,069 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,074 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,075 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,075 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,075 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,075 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,075 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,075 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,075 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,075 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,075 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,075 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,075 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,075 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,076 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,076 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,076 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,076 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,076 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,076 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,076 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,076 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,076 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,076 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,076 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,076 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,076 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,076 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,077 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,077 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,077 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,077 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,077 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,077 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,077 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,077 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,077 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,077 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,077 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,077 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,077 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,077 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,078 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,078 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,078 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,078 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,078 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,078 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,078 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,078 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,078 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,078 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,078 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,078 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,079 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,079 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,079 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,079 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,079 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,079 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,079 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,079 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,079 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,079 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,079 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,079 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,080 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,080 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,080 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,080 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,080 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,080 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,080 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,080 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,080 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,080 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,080 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,080 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,080 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,080 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,081 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,081 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,081 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,081 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,081 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,081 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,081 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,081 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,081 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,081 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,081 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,081 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,081 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,081 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,082 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,082 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,082 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,082 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,082 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,082 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,082 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,082 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,082 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,082 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,082 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,082 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,082 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,082 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,082 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,083 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,083 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,083 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,083 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,083 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,083 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,083 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,083 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,083 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,083 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,083 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,083 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,083 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,083 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,083 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,083 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,083 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,083 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,084 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,084 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,084 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,084 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,084 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,084 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,084 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,084 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,084 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,084 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,084 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,084 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,084 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,084 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,084 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,084 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,084 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,085 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,085 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,085 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,085 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,085 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,085 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,085 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,085 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,085 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,085 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,085 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,085 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,085 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,085 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,085 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,085 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,085 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,085 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,086 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,086 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,086 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,086 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,086 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,086 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,086 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,086 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,086 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,086 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,086 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,086 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,086 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,086 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,086 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,086 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,086 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,087 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,087 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,087 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,087 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,087 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,087 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,087 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,087 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,087 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,087 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,087 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,087 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,087 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,087 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,087 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,087 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,087 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,088 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,088 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,088 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,088 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,088 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,088 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,088 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,088 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,088 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,088 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,088 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,088 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,088 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,088 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,088 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,088 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,088 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,089 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,089 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,089 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,089 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,089 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,089 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,089 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,089 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,089 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,089 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,089 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,089 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,089 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,089 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,089 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,089 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,090 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,090 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,090 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,090 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,090 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,090 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,090 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,090 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,090 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,090 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,090 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,091 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,091 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,091 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,091 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,091 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,091 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,091 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,091 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,092 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,092 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,092 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,092 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,092 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,092 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,092 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,092 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,092 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,092 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,092 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,092 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,092 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,092 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,092 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,092 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,093 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,093 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,093 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,093 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,093 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,093 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,093 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,093 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,093 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,093 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,093 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,093 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,093 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,093 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,093 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,093 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,093 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,093 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,093 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,094 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,094 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,094 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,094 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,094 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,094 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,094 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,094 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,094 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,094 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,094 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,094 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,094 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,094 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,094 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,094 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,094 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,094 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,095 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,095 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,095 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,095 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,095 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,095 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,095 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,095 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,095 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,095 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,095 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,095 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,095 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,095 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,095 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,095 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,095 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,096 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,096 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,096 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,096 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,096 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,096 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,096 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,096 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,096 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,096 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,096 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,096 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,096 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,096 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,096 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,096 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,096 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,096 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,097 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,097 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,097 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,097 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,097 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,097 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,097 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,097 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,097 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,097 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,097 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,097 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,097 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,097 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,097 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,097 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,097 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,098 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,098 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,098 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,098 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,098 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,098 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,098 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,098 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,098 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,098 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,098 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,098 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,098 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,098 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,098 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,098 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,099 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,099 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,099 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,099 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,099 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,099 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,099 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,099 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,099 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,099 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,099 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,099 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,099 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,099 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,099 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,099 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,099 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,100 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,100 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,100 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,100 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,100 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,100 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,100 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,100 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,100 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,100 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,100 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,100 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,100 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,100 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,100 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,100 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,100 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,100 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,100 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,101 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,101 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,101 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,101 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,101 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,101 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,101 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,101 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,101 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,101 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,101 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,101 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,101 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,101 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,101 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,101 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,101 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,101 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,101 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,101 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,102 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,102 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,102 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,102 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,102 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,102 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,102 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,102 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,102 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,102 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,102 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,102 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,102 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,102 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,102 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,102 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,102 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,102 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,102 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,102 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,103 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,103 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,103 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,103 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,103 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,103 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,103 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,103 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,103 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,103 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,103 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,103 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,103 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,103 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,103 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,103 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,103 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,103 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,104 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,104 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,104 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,104 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,104 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,104 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,104 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,104 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,104 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,104 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,104 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,104 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,104 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,104 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,104 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,104 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,104 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,104 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,105 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,105 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,105 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,105 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,105 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,105 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,105 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,105 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,105 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,105 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,105 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,105 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,105 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,105 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,105 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,105 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,105 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,105 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,105 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,105 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,106 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,106 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,106 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,106 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,106 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,106 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,106 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,106 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,106 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,106 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,106 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,106 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,106 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,106 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,106 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,106 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,106 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,106 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,106 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,107 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,107 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,107 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,107 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,107 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,107 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,107 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,107 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,107 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,107 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,107 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,107 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,107 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,107 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,107 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,107 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,107 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,107 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,108 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,108 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,108 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,108 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,108 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,108 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,108 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,108 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,108 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,108 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,108 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,108 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,108 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,108 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,108 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,108 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,108 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,109 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,109 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,109 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,109 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,109 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,109 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,109 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,109 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,109 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,109 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,109 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,109 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,109 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,109 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,109 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,109 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,109 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,109 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,110 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,110 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,110 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,110 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,110 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,110 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,110 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,110 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,110 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,110 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,110 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,110 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,110 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,110 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,110 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,110 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,110 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,110 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,110 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,111 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,111 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,111 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,111 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,111 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,111 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,111 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,111 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,111 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,111 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,111 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,111 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,111 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,111 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,111 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,111 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,111 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,111 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,112 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,112 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,112 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,112 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,112 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,112 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,112 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,112 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,112 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,112 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,112 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,112 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,112 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,112 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,112 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,112 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,112 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,112 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,112 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,112 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,113 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,113 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,113 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,113 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,113 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,113 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,113 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,113 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,113 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,113 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,113 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,113 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,113 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,113 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,113 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,113 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,113 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,113 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,113 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,114 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,114 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,114 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,114 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,114 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,114 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,114 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,114 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,114 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,114 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,114 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,114 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,114 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,114 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,114 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,114 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,114 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,114 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,115 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,115 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,115 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,115 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,115 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,115 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,115 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,115 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,115 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,115 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,115 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,115 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,115 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,115 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,115 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,115 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,115 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,115 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,116 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,116 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,116 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,116 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,116 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,116 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,116 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,116 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,116 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,116 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,116 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,116 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,116 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,116 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,116 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,116 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,116 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,116 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,116 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,116 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,117 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,117 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,117 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,117 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,117 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,117 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,117 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,117 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,117 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,117 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,117 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,117 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,117 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,117 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,117 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,117 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,117 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,117 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,117 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,118 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,118 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,118 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,118 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,118 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,118 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,118 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,118 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,118 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,118 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,118 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,118 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,118 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,118 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,118 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,118 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,119 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,119 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,119 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,119 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,119 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,119 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,119 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,119 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,119 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,119 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,119 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,119 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,119 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,119 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,119 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,119 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,119 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,119 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,120 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,120 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,120 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,120 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,120 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,120 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,120 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,120 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,120 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,120 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,120 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,120 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,120 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,120 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,120 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,120 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,120 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,120 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,120 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,121 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,121 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,121 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,121 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,121 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,121 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,121 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,121 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,121 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,121 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,121 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,121 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,121 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,121 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,121 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,121 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,121 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,121 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,121 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,122 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,122 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,122 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,122 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,122 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,122 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,122 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,122 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,122 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,122 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,122 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,122 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,122 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,122 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,122 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,122 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,122 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,122 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,122 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,122 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,123 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,123 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,123 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,123 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,123 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,123 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,123 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,123 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,123 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,123 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,123 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,123 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,123 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,123 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,123 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,123 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,123 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,123 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,123 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,124 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,124 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,124 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,124 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,124 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,124 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,124 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,124 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,124 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,124 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,124 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,124 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,124 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,124 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,124 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,124 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,124 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,124 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,125 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,125 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,125 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,125 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,125 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,125 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,125 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,125 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,125 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,125 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,125 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,125 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,125 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,125 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,125 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,125 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,125 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,125 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,125 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,126 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,126 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,126 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,126 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,126 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,126 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,126 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,126 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,126 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,126 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,126 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,126 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,126 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,126 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,126 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,126 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,126 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,126 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,126 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,126 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,127 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,127 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,127 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,127 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,127 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,127 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,127 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,127 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,127 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,127 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,127 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,127 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,127 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,127 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,127 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,127 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,127 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,127 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,127 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,128 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,128 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,128 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,128 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,128 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,128 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,128 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,128 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,128 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,128 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,128 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,128 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,128 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,128 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,128 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,128 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,128 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,129 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,129 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,129 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,129 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,129 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,129 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,129 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,129 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,129 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,129 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,129 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,129 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,129 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,129 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,129 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,129 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,129 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,129 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,129 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,130 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,130 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,130 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,130 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,130 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,130 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,130 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,130 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,130 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,130 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,130 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,130 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,130 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,130 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,130 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,130 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,130 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,130 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,130 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,130 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,131 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,131 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,131 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,131 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,131 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,131 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,131 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,131 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,131 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,131 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,131 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,131 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,131 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,131 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,131 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,131 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,131 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,131 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,131 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,132 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,132 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,132 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,132 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,132 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,132 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,132 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,132 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,132 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,132 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,132 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,132 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,132 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,132 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,132 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,132 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,132 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,132 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,132 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,132 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,133 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,133 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,133 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,133 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,133 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,133 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,133 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,133 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,133 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,133 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,133 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,133 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,133 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,133 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,133 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,133 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,133 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,133 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,133 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,134 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,134 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,134 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,134 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,134 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,134 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,134 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,134 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,134 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,134 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,134 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,134 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,134 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,134 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,134 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,134 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,134 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,134 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,134 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,135 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,135 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,135 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,135 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,135 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,135 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,135 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,135 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,135 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,135 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,135 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,135 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,135 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,135 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,135 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,135 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,135 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,135 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,136 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,136 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,136 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,136 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,136 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,136 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,136 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,136 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,136 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,136 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,136 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,136 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,136 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,136 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,136 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,136 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,136 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,136 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,136 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,137 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,137 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,137 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,137 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,137 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,137 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,137 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,137 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,137 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,137 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,137 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,137 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,137 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,137 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,137 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,137 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,137 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,137 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,137 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,137 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,138 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,138 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,138 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,138 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,138 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,138 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,138 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,138 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,138 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,138 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,138 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,138 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,138 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,138 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,138 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,138 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,138 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,139 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,139 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,139 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,139 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,139 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,139 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,139 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,139 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,139 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,139 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,139 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,139 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,139 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,139 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,139 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,139 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,139 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,140 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,140 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,140 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,140 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,140 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,140 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,140 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,140 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,140 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,140 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,140 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,140 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,140 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,140 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,140 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,140 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,140 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,140 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,140 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,141 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,141 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,141 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,141 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,141 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,141 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,141 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,141 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,141 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,141 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,141 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,141 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,141 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,141 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,141 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,141 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,141 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,141 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,141 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,142 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,142 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,142 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,142 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,142 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,142 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,142 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,142 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,142 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,142 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,142 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,142 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,142 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,142 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,142 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,142 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,142 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,142 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,142 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,142 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,143 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,143 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,143 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,143 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,143 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,143 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,143 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,143 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,143 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,143 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,143 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,143 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,143 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,143 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,143 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,143 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,143 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,144 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,144 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,144 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,144 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,144 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,144 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,144 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,144 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,144 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,144 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,144 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,144 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,144 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,144 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,144 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,144 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,144 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,144 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,144 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,145 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,145 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,145 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,145 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,145 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,145 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,145 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,145 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,145 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,145 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,145 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,145 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,145 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,145 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,145 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,145 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,145 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,145 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,145 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,146 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,146 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,146 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,146 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,146 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,146 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,146 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,146 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,146 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,146 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,146 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,146 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,146 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,146 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,146 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,146 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,146 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,146 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,146 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,147 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,147 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,147 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,147 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,147 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,147 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,147 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,147 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,147 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,147 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,147 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,147 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,147 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,147 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,147 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,147 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,147 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,147 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,148 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,148 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,148 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,148 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,148 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,148 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,148 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,148 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,148 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,148 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,148 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,148 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,148 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,148 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,148 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,148 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,148 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,148 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,149 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,149 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,149 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,149 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,149 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,149 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,149 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,149 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,149 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,149 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,149 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,149 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,149 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,149 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,149 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,149 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,149 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,149 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,150 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,150 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,150 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,150 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,150 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,150 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,150 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,150 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,150 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,150 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,150 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,150 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,150 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,150 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,150 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,150 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,150 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,150 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,150 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,150 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,151 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,151 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,151 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,151 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,151 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,151 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,151 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,151 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,151 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,151 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,151 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,151 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,151 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,151 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,151 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,151 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,151 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,151 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,151 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,152 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,152 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,152 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,152 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,152 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,152 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,152 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,152 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,152 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,152 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,152 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,152 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,152 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,152 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,152 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,152 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,152 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,152 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,152 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,153 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,153 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,153 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,153 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,153 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,153 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,153 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,153 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,153 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,153 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,153 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,153 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,153 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,153 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,153 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,153 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,153 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,154 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,154 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,154 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,154 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,154 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,154 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,154 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,154 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,154 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,154 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,154 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,154 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,154 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,154 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,154 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,154 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,154 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,154 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,155 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,155 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,155 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,155 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,155 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,155 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,155 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,155 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,155 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,155 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,155 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,155 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,155 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,155 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,155 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,155 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,156 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,156 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,156 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,156 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,156 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,156 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,156 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,156 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,156 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,156 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,156 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,156 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,156 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,156 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,156 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,156 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,156 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,156 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,157 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,157 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,157 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,157 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,157 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,157 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,157 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,157 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,157 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,157 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,157 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,157 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,157 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,157 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,157 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,157 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,157 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,157 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,158 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,158 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,158 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,158 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,158 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,158 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,158 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,158 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,158 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,158 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,158 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,158 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,158 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,158 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,158 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,158 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,158 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,158 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,159 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,159 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,159 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,159 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,159 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,159 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,159 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,159 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,159 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,159 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,159 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,159 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,159 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,159 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,159 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,159 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,160 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,160 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,160 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,160 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,160 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,160 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,160 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,160 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,160 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,160 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,160 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,160 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,160 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,160 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,160 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,160 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,160 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,160 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,160 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,161 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,161 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,161 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,161 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,161 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,161 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,161 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,161 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,161 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,161 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,161 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,161 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,161 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,161 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,161 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,161 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,161 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,161 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,161 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,161 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,162 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,162 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,162 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,162 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,162 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,162 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,162 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,162 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,162 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,162 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,162 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,162 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,162 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,162 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,162 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,162 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,162 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,162 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,162 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,163 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,163 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,163 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,163 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,163 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,163 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,163 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,163 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,163 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,163 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,163 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,163 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,163 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,163 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,163 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,163 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,163 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,163 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,163 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,164 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,164 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,164 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,164 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,164 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,164 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,164 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,164 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,164 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,164 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,164 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,164 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,164 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,164 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,164 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,164 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,164 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,164 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,164 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,165 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,165 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,165 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,165 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,165 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,165 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,165 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,165 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,165 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,165 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,165 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,165 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,165 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,165 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,165 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,165 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,165 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,165 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,165 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,165 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,166 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,166 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,166 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,166 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,166 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,166 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,166 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,166 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,166 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,166 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,166 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,166 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,166 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,166 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,166 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,166 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,166 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,166 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,167 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,167 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,167 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,167 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,167 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,167 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,167 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,167 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,167 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,167 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,167 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,167 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,167 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,167 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,167 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,167 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,167 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,167 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,167 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,168 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,168 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,168 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,168 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,168 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,168 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,168 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,168 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,168 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,168 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,168 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,168 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,168 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,168 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,168 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,168 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,168 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,168 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,169 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,169 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,169 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,169 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,169 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,169 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,169 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,169 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,169 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,169 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,169 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,169 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,169 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,169 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,169 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,169 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,169 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,169 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,170 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,170 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,170 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,170 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,170 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,170 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,170 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,170 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,170 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,170 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,170 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,170 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,170 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,170 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,170 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,170 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,170 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,170 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,171 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,171 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,171 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,171 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,171 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,171 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,171 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,171 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,171 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,171 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,171 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,171 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,171 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,171 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,171 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,171 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,171 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,171 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,171 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,172 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,172 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,172 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,172 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,172 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,172 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,172 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,172 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,172 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,172 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,172 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,172 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,172 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,172 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,172 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,172 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,172 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,172 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,172 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,173 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,173 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,173 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,173 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,173 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,173 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,173 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,173 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,173 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,173 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,173 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,173 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,173 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,173 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,173 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,173 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,173 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,173 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,174 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,174 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,174 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,174 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,174 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,174 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,174 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,174 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,174 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,174 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,174 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,174 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,174 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,174 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,174 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,174 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,174 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,174 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,175 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,175 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,175 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,175 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,175 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,175 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,175 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,175 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,175 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,175 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,175 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,175 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,175 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,175 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,175 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,175 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,175 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,175 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,176 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,176 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,176 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,176 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,176 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,176 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,176 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,176 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,176 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,176 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,176 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,176 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,176 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,176 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,176 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,176 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,176 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,176 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,176 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,177 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,177 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,177 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,177 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,177 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,177 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,177 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,177 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,177 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,177 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,177 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,177 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,177 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,177 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,177 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,177 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,177 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,177 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,177 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,178 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,178 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,178 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,178 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,178 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,178 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,178 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,178 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,178 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,178 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,178 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,178 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,178 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,178 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,178 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,178 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,178 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,178 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,179 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,179 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,179 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,179 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,179 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,179 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,179 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,179 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,179 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,179 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,179 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,179 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,179 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,179 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,179 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,179 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,179 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,180 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,180 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,180 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,180 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,180 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,180 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,180 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,180 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,180 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,180 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,180 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,180 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,180 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,180 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,180 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,180 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,180 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,180 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,181 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,181 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,181 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,181 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,181 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,181 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,181 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,181 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,181 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,181 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,181 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,181 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,181 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,181 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,181 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,181 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,181 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,181 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,181 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,182 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,182 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,182 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,182 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,182 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,182 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,182 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,182 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,182 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,182 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,182 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,182 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,182 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,182 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,182 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,182 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,182 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,182 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,182 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,183 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,183 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,183 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,183 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,183 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,183 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,183 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,183 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,183 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,183 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,183 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,183 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,183 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,183 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,183 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,183 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,183 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,183 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,183 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,184 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,184 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,184 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,184 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,184 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,184 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,184 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,184 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,184 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,184 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,184 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,184 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,184 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,184 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,184 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,184 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,184 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,184 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,184 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,185 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,185 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,185 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,185 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,185 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,185 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,185 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,185 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,185 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,185 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,185 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,185 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,185 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,185 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,185 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,185 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,185 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,185 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,186 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,186 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,186 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,186 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,186 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,186 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,186 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,186 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,186 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,186 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,186 DEBUG SenderThread:49247 [sender.py:send():235] send: metric +2022-05-04 09:52:34,186 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:52:34,256 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:52:34,331 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:52:34,353 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:52:34,353 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/wandb-summary.json +2022-05-04 09:52:36,354 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:52:37,266 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:52:37,324 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:52:37,391 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:52:38,390 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:52:38,390 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/wandb-summary.json +2022-05-04 09:52:40,391 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:52:40,578 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:52:40,617 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:52:40,684 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:52:41,391 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/wandb-summary.json +2022-05-04 09:52:42,392 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:52:43,957 DEBUG HandlerThread:49247 [handler.py:handle_request():131] handle_request: stop_status +2022-05-04 09:52:44,392 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:52:46,393 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:52:48,394 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:52:49,395 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:52:51,420 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:52:52,420 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:52:53,420 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:52:54,421 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:52:55,421 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:52:57,422 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:52:58,422 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:52:59,423 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:53:00,423 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:53:01,424 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:53:03,425 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:53:03,958 DEBUG HandlerThread:49247 [handler.py:handle_request():131] handle_request: stop_status +2022-05-04 09:53:04,425 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:53:05,426 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:53:07,426 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:53:08,427 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:53:09,427 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:53:10,428 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:53:11,428 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:53:13,429 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:53:14,429 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:53:15,430 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:53:15,921 ERROR SenderThread:49247 [retry.py:__call__():126] Retry attempt failed: +Traceback (most recent call last): + File "/home/sanchit_huggingface_co/gcp/lib/python3.9/site-packages/urllib3/connectionpool.py", line 449, in _make_request + six.raise_from(e, None) + File "", line 3, in raise_from + File "/home/sanchit_huggingface_co/gcp/lib/python3.9/site-packages/urllib3/connectionpool.py", line 444, in _make_request + httplib_response = conn.getresponse() + File "/usr/lib/python3.9/http/client.py", line 1349, in getresponse + response.begin() + File "/usr/lib/python3.9/http/client.py", line 316, in begin + version, status, reason = self._read_status() + File "/usr/lib/python3.9/http/client.py", line 277, in _read_status + line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1") + File "/usr/lib/python3.9/socket.py", line 704, in readinto + return self._sock.recv_into(b) + File "/usr/lib/python3.9/ssl.py", line 1241, in recv_into + return self.read(nbytes, buffer) + File "/usr/lib/python3.9/ssl.py", line 1099, in read + return self._sslobj.read(len, buffer) +socket.timeout: The read operation timed out + +During handling of the above exception, another exception occurred: + +Traceback (most recent call last): + File "/home/sanchit_huggingface_co/gcp/lib/python3.9/site-packages/requests/adapters.py", line 440, in send + resp = conn.urlopen( + File "/home/sanchit_huggingface_co/gcp/lib/python3.9/site-packages/urllib3/connectionpool.py", line 785, in urlopen + retries = retries.increment( + File "/home/sanchit_huggingface_co/gcp/lib/python3.9/site-packages/urllib3/util/retry.py", line 550, in increment + raise six.reraise(type(error), error, _stacktrace) + File "/home/sanchit_huggingface_co/gcp/lib/python3.9/site-packages/urllib3/packages/six.py", line 770, in reraise + raise value + File "/home/sanchit_huggingface_co/gcp/lib/python3.9/site-packages/urllib3/connectionpool.py", line 703, in urlopen + httplib_response = self._make_request( + File "/home/sanchit_huggingface_co/gcp/lib/python3.9/site-packages/urllib3/connectionpool.py", line 451, in _make_request + self._raise_timeout(err=e, url=url, timeout_value=read_timeout) + File "/home/sanchit_huggingface_co/gcp/lib/python3.9/site-packages/urllib3/connectionpool.py", line 340, in _raise_timeout + raise ReadTimeoutError( +urllib3.exceptions.ReadTimeoutError: HTTPSConnectionPool(host='api.wandb.ai', port=443): Read timed out. (read timeout=10) + +During handling of the above exception, another exception occurred: + +Traceback (most recent call last): + File "/home/sanchit_huggingface_co/gcp/lib/python3.9/site-packages/wandb/sdk/lib/retry.py", line 102, in __call__ + result = self._call_fn(*args, **kwargs) + File "/home/sanchit_huggingface_co/gcp/lib/python3.9/site-packages/wandb/sdk/internal/internal_api.py", line 140, in execute + return self.client.execute(*args, **kwargs) + File "/home/sanchit_huggingface_co/gcp/lib/python3.9/site-packages/wandb/vendor/gql-0.2.0/gql/client.py", line 52, in execute + result = self._get_result(document, *args, **kwargs) + File "/home/sanchit_huggingface_co/gcp/lib/python3.9/site-packages/wandb/vendor/gql-0.2.0/gql/client.py", line 60, in _get_result + return self.transport.execute(document, *args, **kwargs) + File "/home/sanchit_huggingface_co/gcp/lib/python3.9/site-packages/wandb/vendor/gql-0.2.0/gql/transport/requests.py", line 38, in execute + request = requests.post(self.url, **post_args) + File "/home/sanchit_huggingface_co/gcp/lib/python3.9/site-packages/requests/api.py", line 117, in post + return request('post', url, data=data, json=json, **kwargs) + File "/home/sanchit_huggingface_co/gcp/lib/python3.9/site-packages/requests/api.py", line 61, in request + return session.request(method=method, url=url, **kwargs) + File "/home/sanchit_huggingface_co/gcp/lib/python3.9/site-packages/requests/sessions.py", line 529, in request + resp = self.send(prep, **send_kwargs) + File "/home/sanchit_huggingface_co/gcp/lib/python3.9/site-packages/requests/sessions.py", line 645, in send + r = adapter.send(request, **kwargs) + File "/home/sanchit_huggingface_co/gcp/lib/python3.9/site-packages/requests/adapters.py", line 532, in send + raise ReadTimeout(e, request=request) +requests.exceptions.ReadTimeout: HTTPSConnectionPool(host='api.wandb.ai', port=443): Read timed out. (read timeout=10) +2022-05-04 09:53:16,430 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:53:17,431 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:53:19,432 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:53:20,432 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:53:21,432 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:53:22,433 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:53:23,433 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:53:23,960 DEBUG HandlerThread:49247 [handler.py:handle_request():131] handle_request: stop_status +2022-05-04 09:53:25,478 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:53:26,479 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:53:27,479 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:53:28,480 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:53:29,480 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:53:31,481 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:53:32,481 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:53:33,482 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:53:34,482 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:53:35,483 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:53:37,483 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:53:38,484 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:53:39,484 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:53:40,485 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:53:41,485 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:53:42,485 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:53:43,486 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:53:43,997 DEBUG HandlerThread:49247 [handler.py:handle_request():131] handle_request: stop_status +2022-05-04 09:53:45,486 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:53:46,487 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:53:47,487 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:53:48,488 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:53:49,488 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:53:51,489 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:53:52,489 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:53:53,490 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:53:54,490 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:53:55,491 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:53:56,491 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:53:57,492 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:53:59,501 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:54:00,501 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:54:01,501 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:54:02,502 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:54:03,502 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:54:03,991 DEBUG HandlerThread:49247 [handler.py:handle_request():131] handle_request: stop_status +2022-05-04 09:54:04,503 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:54:05,503 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:54:07,504 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:54:08,504 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:54:09,505 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:54:10,505 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:54:11,506 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:54:12,506 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:54:13,506 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:54:14,507 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:54:15,507 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:54:17,508 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:54:18,508 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:54:19,509 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:54:20,509 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:54:21,527 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:54:22,528 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:54:23,528 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:54:23,992 DEBUG HandlerThread:49247 [handler.py:handle_request():131] handle_request: stop_status +2022-05-04 09:54:25,529 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:54:26,529 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:54:27,530 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:54:29,530 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:54:30,531 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:54:31,531 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:54:33,532 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:54:34,532 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:54:35,533 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:54:37,533 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:54:38,534 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:54:39,346 WARNING FileStreamThread:49247 [file_stream.py:request_with_retry():594] requests_with_retry encountered retryable exception: 500 Server Error: Internal Server Error for url: https://api.wandb.ai/files/sanchit-gandhi/xtreme_s_xlsr_2_bart_covost2_fr_en/cwhobv6l/file_stream. func: >, args: ('https://api.wandb.ai/files/sanchit-gandhi/xtreme_s_xlsr_2_bart_covost2_fr_en/cwhobv6l/file_stream',), kwargs: {'json': {'complete': False, 'failed': False, 'dropped': 0, 'uploaded': []}} +2022-05-04 09:54:39,534 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:54:41,535 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:54:42,535 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:54:43,536 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:54:44,007 DEBUG HandlerThread:49247 [handler.py:handle_request():131] handle_request: stop_status +2022-05-04 09:54:45,536 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:54:46,537 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:54:47,537 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:54:49,538 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:54:50,538 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:54:51,539 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:54:53,539 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:54:54,540 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:54:55,540 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:54:56,540 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:54:57,541 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:54:59,542 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:55:00,542 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:55:01,542 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:55:03,543 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:55:04,008 DEBUG HandlerThread:49247 [handler.py:handle_request():131] handle_request: stop_status +2022-05-04 09:55:04,544 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:55:05,544 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:55:07,545 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:55:08,545 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:55:09,545 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:55:11,546 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:55:12,546 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:55:13,547 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:55:14,547 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:55:16,548 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:55:18,549 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:55:20,583 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:55:22,583 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:55:23,584 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:55:24,008 DEBUG HandlerThread:49247 [handler.py:handle_request():131] handle_request: stop_status +2022-05-04 09:55:24,584 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:55:25,585 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:55:26,585 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:55:28,586 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:55:29,586 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:55:30,586 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:55:32,587 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:55:33,588 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:55:34,588 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:55:35,589 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:55:36,589 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:55:38,590 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:55:39,590 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:55:40,591 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:55:41,591 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:55:42,591 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:55:44,130 DEBUG HandlerThread:49247 [handler.py:handle_request():131] handle_request: stop_status +2022-05-04 09:55:44,592 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:55:45,593 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:55:46,593 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:55:47,594 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:55:48,594 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:55:50,595 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:55:51,595 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:55:52,595 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:55:53,596 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:55:54,596 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:55:56,597 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:55:57,598 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:55:58,598 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:55:59,598 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:56:00,599 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:56:02,600 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:56:03,600 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:56:04,131 DEBUG HandlerThread:49247 [handler.py:handle_request():131] handle_request: stop_status +2022-05-04 09:56:04,600 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:56:05,601 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:56:06,601 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:56:08,602 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:56:09,602 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:56:10,603 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:56:11,603 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:56:12,768 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:56:13,167 DEBUG SenderThread:49247 [sender.py:send():235] send: stats +2022-05-04 09:56:13,167 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:56:13,167 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:56:13,247 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:56:13,253 DEBUG SenderThread:49247 [sender.py:send_request():249] send_request: stop_status +2022-05-04 09:56:13,346 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:56:13,346 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:56:13,411 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:56:13,412 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:56:13,412 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:56:13,476 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:56:13,476 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:56:13,477 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:56:13,542 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:56:13,543 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:56:13,543 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:56:13,606 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:56:13,607 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:56:13,607 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:56:13,672 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:56:13,672 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:56:13,673 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:56:13,737 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:56:13,737 DEBUG SenderThread:49247 [sender.py:send_request():249] send_request: stop_status +2022-05-04 09:56:13,763 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/config.yaml +2022-05-04 09:56:13,763 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/wandb-summary.json +2022-05-04 09:56:14,024 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:56:14,024 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:56:14,091 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:56:14,092 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:56:14,092 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:56:14,160 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:56:14,161 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:56:14,234 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:56:14,297 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:56:14,297 DEBUG SenderThread:49247 [sender.py:send():235] send: stats +2022-05-04 09:56:14,298 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:56:14,298 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:56:14,364 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:56:14,364 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:56:14,365 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:56:14,428 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:56:14,429 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:56:14,429 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:56:14,493 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:56:14,493 DEBUG SenderThread:49247 [sender.py:send_request():249] send_request: stop_status +2022-05-04 09:56:14,627 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:56:14,628 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:56:14,693 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:56:14,694 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:56:14,694 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:56:14,760 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:56:14,761 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:56:14,761 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:56:14,823 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:56:14,823 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/wandb-summary.json +2022-05-04 09:56:14,825 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:56:14,827 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:56:14,827 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:56:14,895 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:56:14,896 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:56:14,896 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:56:14,967 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:56:14,968 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:56:14,968 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:56:15,035 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:56:15,036 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:56:15,037 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:56:15,105 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:56:15,106 DEBUG SenderThread:49247 [sender.py:send():235] send: stats +2022-05-04 09:56:15,107 DEBUG SenderThread:49247 [sender.py:send_request():249] send_request: stop_status +2022-05-04 09:56:15,260 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:56:15,261 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:56:15,327 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:56:15,328 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:56:15,328 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:56:15,392 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:56:15,393 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:56:15,458 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:56:15,523 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:56:15,524 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:56:15,524 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:56:15,588 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:56:15,589 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:56:15,589 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:56:15,653 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:56:15,654 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:56:15,654 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:56:15,718 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:56:15,719 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:56:15,719 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:56:15,785 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:56:15,786 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:56:15,786 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:56:15,848 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:56:15,848 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/wandb-summary.json +2022-05-04 09:56:15,850 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:56:15,850 DEBUG SenderThread:49247 [sender.py:send_request():249] send_request: stop_status +2022-05-04 09:56:16,265 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:56:16,265 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:56:16,330 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:56:16,331 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:56:16,331 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:56:16,393 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:56:16,394 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:56:16,394 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:56:16,457 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:56:16,457 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:56:16,458 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:56:16,521 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:56:16,522 DEBUG SenderThread:49247 [sender.py:send():235] send: stats +2022-05-04 09:56:16,523 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:56:16,523 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:56:16,593 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:56:16,593 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:56:16,593 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:56:16,658 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:56:16,659 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:56:16,725 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:56:16,791 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:56:16,791 DEBUG SenderThread:49247 [sender.py:send_request():249] send_request: stop_status +2022-05-04 09:56:16,848 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:56:16,849 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/wandb-summary.json +2022-05-04 09:56:17,024 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:56:17,024 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:56:17,094 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:56:17,094 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:56:17,094 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:56:17,165 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:56:17,166 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:56:17,166 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:56:17,237 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:56:17,238 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:56:17,238 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:56:17,306 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:56:17,306 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:56:17,306 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:56:17,372 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:56:17,374 DEBUG SenderThread:49247 [sender.py:send_request():249] send_request: stop_status +2022-05-04 09:56:17,593 DEBUG SenderThread:49247 [sender.py:send():235] send: stats +2022-05-04 09:56:17,594 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:56:17,594 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:56:17,661 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:56:17,662 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:56:17,662 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:56:17,729 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:56:17,729 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:56:17,729 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:56:17,797 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:56:17,798 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:56:17,798 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:56:17,863 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:56:17,863 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/wandb-summary.json +2022-05-04 09:56:17,864 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:56:17,865 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:56:17,865 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:56:17,936 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:56:17,937 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:56:17,937 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:56:18,008 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:56:18,009 DEBUG SenderThread:49247 [sender.py:send_request():249] send_request: stop_status +2022-05-04 09:56:18,536 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:56:18,537 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:56:18,604 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:56:18,605 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:56:18,674 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:56:18,742 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:56:18,742 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:56:18,743 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:56:18,810 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:56:18,810 DEBUG SenderThread:49247 [sender.py:send():235] send: stats +2022-05-04 09:56:18,811 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:56:18,811 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:56:18,882 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:56:18,882 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/wandb-summary.json +2022-05-04 09:56:18,884 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:56:18,884 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:56:18,884 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:56:18,968 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:56:18,974 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:56:18,974 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:56:19,069 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:56:19,070 DEBUG SenderThread:49247 [sender.py:send_request():249] send_request: stop_status +2022-05-04 09:56:19,882 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/wandb-summary.json +2022-05-04 09:56:20,882 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:56:21,883 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:56:22,883 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:56:23,884 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:56:24,183 DEBUG HandlerThread:49247 [handler.py:handle_request():131] handle_request: stop_status +2022-05-04 09:56:24,884 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:56:26,885 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:56:27,900 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:56:28,901 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:56:29,901 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:56:30,821 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:56:30,821 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:56:30,892 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:56:30,894 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:56:30,894 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:56:30,965 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:56:30,965 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/wandb-summary.json +2022-05-04 09:56:30,967 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:56:30,967 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:56:30,968 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:56:31,034 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:56:31,035 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:56:31,035 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:56:31,104 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:56:31,105 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:56:31,105 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:56:31,174 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:56:31,175 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:56:31,175 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:56:31,241 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:56:31,242 DEBUG SenderThread:49247 [sender.py:send_request():249] send_request: stop_status +2022-05-04 09:56:31,966 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/wandb-summary.json +2022-05-04 09:56:32,092 DEBUG SenderThread:49247 [sender.py:send():235] send: stats +2022-05-04 09:56:32,092 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:56:32,092 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:56:32,159 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:56:32,160 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:56:32,229 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:56:32,300 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:56:32,302 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:56:32,302 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:56:32,369 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:56:32,370 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:56:32,370 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:56:32,436 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:56:32,437 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:56:32,437 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:56:32,506 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:56:32,507 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:56:32,507 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:56:32,573 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:56:32,574 DEBUG SenderThread:49247 [sender.py:send_request():249] send_request: stop_status +2022-05-04 09:56:32,966 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:56:32,966 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/wandb-summary.json +2022-05-04 09:56:33,574 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:56:33,574 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:56:33,642 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:56:33,642 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:56:33,643 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:56:33,713 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:56:33,714 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:56:33,714 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:56:33,779 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:56:33,780 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:56:33,780 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:56:33,846 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:56:33,847 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:56:33,847 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:56:33,914 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:56:33,916 DEBUG SenderThread:49247 [sender.py:send():235] send: stats +2022-05-04 09:56:33,917 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:56:33,917 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:56:33,982 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/wandb-summary.json +2022-05-04 09:56:33,984 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:56:33,986 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:56:33,986 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:56:34,059 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:56:34,060 DEBUG SenderThread:49247 [sender.py:send_request():249] send_request: stop_status +2022-05-04 09:56:34,362 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:56:34,363 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:56:34,430 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:56:34,431 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:56:34,499 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:56:34,566 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:56:34,567 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:56:34,567 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:56:34,636 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:56:34,637 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:56:34,637 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:56:34,706 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:56:34,983 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:56:34,983 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/wandb-summary.json +2022-05-04 09:56:35,975 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:56:36,018 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:56:36,034 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:56:36,087 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:56:37,019 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:56:37,019 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/wandb-summary.json +2022-05-04 09:56:38,019 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:56:38,664 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:56:38,705 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:56:38,773 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:56:39,020 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/wandb-summary.json +2022-05-04 09:56:40,020 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:56:41,020 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:56:41,288 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:56:41,328 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:56:41,398 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:56:42,021 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/wandb-summary.json +2022-05-04 09:56:43,021 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:56:43,884 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:56:43,926 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:56:43,994 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:56:44,022 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/wandb-summary.json +2022-05-04 09:56:44,176 DEBUG HandlerThread:49247 [handler.py:handle_request():131] handle_request: stop_status +2022-05-04 09:56:44,176 DEBUG SenderThread:49247 [sender.py:send_request():249] send_request: stop_status +2022-05-04 09:56:45,022 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:56:46,442 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:56:46,483 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:56:46,557 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:56:47,023 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:56:47,023 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/wandb-summary.json +2022-05-04 09:56:47,795 DEBUG SenderThread:49247 [sender.py:send():235] send: stats +2022-05-04 09:56:48,944 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:56:48,984 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:56:49,050 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:56:49,051 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:56:50,050 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:56:50,051 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/wandb-summary.json +2022-05-04 09:56:51,051 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:56:51,404 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:56:51,445 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:56:51,512 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:56:52,051 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:56:52,051 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/wandb-summary.json +2022-05-04 09:56:53,841 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:56:53,882 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:56:53,956 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:56:54,052 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:56:54,052 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/wandb-summary.json +2022-05-04 09:56:55,052 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:56:56,053 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:56:56,183 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:56:56,231 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:56:56,297 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:56:57,053 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:56:57,053 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/wandb-summary.json +2022-05-04 09:56:58,053 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:56:58,964 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:56:59,137 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:56:59,149 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:56:59,218 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:56:59,425 DEBUG HandlerThread:49247 [handler.py:handle_request():131] handle_request: stop_status +2022-05-04 09:56:59,426 DEBUG SenderThread:49247 [sender.py:send_request():249] send_request: stop_status +2022-05-04 09:57:00,128 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/wandb-summary.json +2022-05-04 09:57:01,128 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:57:03,129 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:57:03,493 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:57:03,533 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:57:03,601 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:57:04,129 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/wandb-summary.json +2022-05-04 09:57:05,130 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:57:07,130 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:57:07,639 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:57:07,680 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:57:07,745 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:57:08,131 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/wandb-summary.json +2022-05-04 09:57:09,131 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:57:10,132 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:57:11,132 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:57:11,551 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:57:11,591 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:57:11,655 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:57:12,132 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/wandb-summary.json +2022-05-04 09:57:13,133 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:57:14,133 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:57:14,517 DEBUG HandlerThread:49247 [handler.py:handle_request():131] handle_request: stop_status +2022-05-04 09:57:14,517 DEBUG SenderThread:49247 [sender.py:send_request():249] send_request: stop_status +2022-05-04 09:57:15,134 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:57:15,389 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:57:15,429 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:57:15,493 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:57:16,134 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/wandb-summary.json +2022-05-04 09:57:17,134 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:57:18,135 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:57:18,449 DEBUG SenderThread:49247 [sender.py:send():235] send: stats +2022-05-04 09:57:19,135 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:57:19,148 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:57:19,215 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:57:19,280 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:57:20,136 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/wandb-summary.json +2022-05-04 09:57:21,136 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:57:22,136 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:57:23,019 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:57:23,059 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:57:23,127 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:57:23,137 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:57:23,137 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/wandb-summary.json +2022-05-04 09:57:24,137 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:57:25,138 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:57:26,699 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:57:26,740 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:57:26,806 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:57:27,138 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:57:27,139 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/wandb-summary.json +2022-05-04 09:57:28,139 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:57:29,139 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:57:29,570 DEBUG HandlerThread:49247 [handler.py:handle_request():131] handle_request: stop_status +2022-05-04 09:57:29,571 DEBUG SenderThread:49247 [sender.py:send_request():249] send_request: stop_status +2022-05-04 09:57:30,275 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:57:30,342 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:57:30,411 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:57:31,140 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:57:31,140 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/wandb-summary.json +2022-05-04 09:57:32,141 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:57:33,141 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:57:33,805 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:57:33,845 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:57:33,911 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:57:34,141 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/wandb-summary.json +2022-05-04 09:57:35,142 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:57:36,142 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:57:37,142 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:57:37,318 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:57:37,359 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:57:37,426 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:57:38,143 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/wandb-summary.json +2022-05-04 09:57:39,143 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:57:40,144 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:57:40,867 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:57:40,908 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:57:40,977 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:57:41,144 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:57:41,144 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/wandb-summary.json +2022-05-04 09:57:42,145 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:57:43,145 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:57:44,177 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:57:44,217 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:57:44,281 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:57:44,704 DEBUG HandlerThread:49247 [handler.py:handle_request():131] handle_request: stop_status +2022-05-04 09:57:44,705 DEBUG SenderThread:49247 [sender.py:send_request():249] send_request: stop_status +2022-05-04 09:57:45,146 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:57:45,146 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/wandb-summary.json +2022-05-04 09:57:46,146 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:57:47,147 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:57:48,063 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:57:48,205 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:57:48,271 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:57:48,934 DEBUG SenderThread:49247 [sender.py:send():235] send: stats +2022-05-04 09:57:49,206 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:57:49,206 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/wandb-summary.json +2022-05-04 09:57:50,206 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:57:51,206 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:57:51,372 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:57:51,413 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:57:51,483 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:57:52,207 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/wandb-summary.json +2022-05-04 09:57:53,207 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:57:54,208 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:57:54,671 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:57:54,712 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:57:54,780 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:57:55,208 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:57:55,208 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/wandb-summary.json +2022-05-04 09:57:56,208 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:57:57,209 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:57:57,954 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:57:57,994 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:57:58,064 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:57:58,209 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/wandb-summary.json +2022-05-04 09:57:59,209 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:57:59,767 DEBUG HandlerThread:49247 [handler.py:handle_request():131] handle_request: stop_status +2022-05-04 09:57:59,767 DEBUG SenderThread:49247 [sender.py:send_request():249] send_request: stop_status +2022-05-04 09:58:00,210 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:58:01,210 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:58:01,229 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:58:01,272 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:58:01,340 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:58:02,211 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:58:02,211 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/wandb-summary.json +2022-05-04 09:58:03,211 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:58:04,422 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:58:04,464 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:58:04,531 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:58:05,212 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:58:05,212 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/wandb-summary.json +2022-05-04 09:58:06,212 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:58:07,213 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:58:07,603 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:58:07,645 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:58:07,714 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:58:08,213 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/wandb-summary.json +2022-05-04 09:58:09,214 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:58:10,214 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:58:10,768 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:58:10,809 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:58:10,875 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:58:11,214 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:58:11,215 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/wandb-summary.json +2022-05-04 09:58:12,215 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:58:13,215 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:58:13,921 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:58:13,963 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:58:14,030 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:58:14,216 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/wandb-summary.json +2022-05-04 09:58:14,820 DEBUG HandlerThread:49247 [handler.py:handle_request():131] handle_request: stop_status +2022-05-04 09:58:14,821 DEBUG SenderThread:49247 [sender.py:send_request():249] send_request: stop_status +2022-05-04 09:58:15,216 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:58:16,216 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:58:17,213 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:58:17,255 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:58:17,256 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:58:17,324 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:58:18,256 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:58:18,256 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/wandb-summary.json +2022-05-04 09:58:19,257 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:58:19,429 DEBUG SenderThread:49247 [sender.py:send():235] send: stats +2022-05-04 09:58:20,367 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:58:20,408 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:58:20,476 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:58:21,257 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:58:21,258 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/wandb-summary.json +2022-05-04 09:58:22,258 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:58:23,258 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:58:23,409 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:58:23,451 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:58:23,520 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:58:24,259 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:58:24,259 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/wandb-summary.json +2022-05-04 09:58:25,259 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:58:26,952 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:58:27,090 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:58:27,156 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:58:27,260 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:58:27,260 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/wandb-summary.json +2022-05-04 09:58:28,260 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:58:29,261 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:58:29,870 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:58:29,912 DEBUG HandlerThread:49247 [handler.py:handle_request():131] handle_request: stop_status +2022-05-04 09:58:29,912 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:58:29,980 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:58:29,981 DEBUG SenderThread:49247 [sender.py:send_request():249] send_request: stop_status +2022-05-04 09:58:30,261 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/wandb-summary.json +2022-05-04 09:58:31,261 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:58:32,262 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:58:32,822 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:58:32,863 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:58:32,933 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:58:33,262 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:58:33,262 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/wandb-summary.json +2022-05-04 09:58:34,263 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:58:35,263 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:58:35,776 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:58:35,815 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:58:35,885 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:58:36,263 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/wandb-summary.json +2022-05-04 09:58:37,264 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:58:38,264 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:58:38,706 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:58:38,747 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:58:38,819 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:58:39,265 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:58:39,265 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/wandb-summary.json +2022-05-04 09:58:40,265 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:58:41,266 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:58:41,644 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:58:41,684 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:58:41,755 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:58:42,266 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/wandb-summary.json +2022-05-04 09:58:43,266 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:58:44,267 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:58:44,511 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:58:44,551 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:58:44,624 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:58:45,042 DEBUG HandlerThread:49247 [handler.py:handle_request():131] handle_request: stop_status +2022-05-04 09:58:45,043 DEBUG SenderThread:49247 [sender.py:send_request():249] send_request: stop_status +2022-05-04 09:58:45,267 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:58:45,267 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/wandb-summary.json +2022-05-04 09:58:46,267 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:58:47,268 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:58:47,366 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:58:47,405 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:58:47,496 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:58:48,268 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:58:48,268 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/wandb-summary.json +2022-05-04 09:58:49,268 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:58:49,907 DEBUG SenderThread:49247 [sender.py:send():235] send: stats +2022-05-04 09:58:50,231 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:58:50,273 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:58:50,344 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:58:51,343 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:58:51,343 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/wandb-summary.json +2022-05-04 09:58:52,343 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:58:53,013 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:58:53,053 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:58:53,123 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:58:53,344 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:58:53,344 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/wandb-summary.json +2022-05-04 09:58:54,344 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:58:55,344 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:58:55,857 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:58:55,898 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:58:55,967 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:58:56,345 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/wandb-summary.json +2022-05-04 09:58:57,345 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:58:58,346 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:58:58,669 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:58:58,709 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:58:58,778 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:58:59,346 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:58:59,346 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/wandb-summary.json +2022-05-04 09:59:00,108 DEBUG HandlerThread:49247 [handler.py:handle_request():131] handle_request: stop_status +2022-05-04 09:59:00,108 DEBUG SenderThread:49247 [sender.py:send_request():249] send_request: stop_status +2022-05-04 09:59:00,346 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:59:01,347 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:59:01,436 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:59:01,482 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:59:01,554 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:59:02,347 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:59:02,348 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/wandb-summary.json +2022-05-04 09:59:03,348 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:59:04,659 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:59:04,805 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:59:05,014 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:59:05,349 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:59:05,349 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/wandb-summary.json +2022-05-04 09:59:06,349 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:59:07,349 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:59:07,376 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:59:07,416 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:59:07,487 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:59:08,350 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:59:08,350 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/wandb-summary.json +2022-05-04 09:59:09,350 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:59:10,094 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:59:10,135 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:59:10,204 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:59:10,351 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/wandb-summary.json +2022-05-04 09:59:11,351 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:59:12,352 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:59:12,782 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:59:12,823 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:59:12,890 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:59:13,352 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:59:13,352 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/wandb-summary.json +2022-05-04 09:59:14,352 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:59:15,169 DEBUG HandlerThread:49247 [handler.py:handle_request():131] handle_request: stop_status +2022-05-04 09:59:15,170 DEBUG SenderThread:49247 [sender.py:send_request():249] send_request: stop_status +2022-05-04 09:59:15,353 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:59:15,429 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:59:15,470 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:59:15,536 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:59:16,353 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:59:16,354 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/wandb-summary.json +2022-05-04 09:59:17,354 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:59:18,028 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:59:18,069 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:59:18,137 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:59:18,354 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/wandb-summary.json +2022-05-04 09:59:19,355 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:59:20,355 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:59:20,526 DEBUG SenderThread:49247 [sender.py:send():235] send: stats +2022-05-04 09:59:20,592 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:59:20,633 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:59:20,703 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:59:21,355 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:59:21,356 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/wandb-summary.json +2022-05-04 09:59:22,356 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:59:23,116 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:59:23,157 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:59:23,231 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:59:23,356 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:59:23,356 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/wandb-summary.json +2022-05-04 09:59:24,356 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:59:25,357 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:59:25,643 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:59:25,685 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:59:25,753 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:59:26,357 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:59:26,357 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/wandb-summary.json +2022-05-04 09:59:27,358 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:59:28,119 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:59:28,156 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:59:28,224 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:59:28,358 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/wandb-summary.json +2022-05-04 09:59:29,358 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:59:30,229 DEBUG HandlerThread:49247 [handler.py:handle_request():131] handle_request: stop_status +2022-05-04 09:59:30,229 DEBUG SenderThread:49247 [sender.py:send_request():249] send_request: stop_status +2022-05-04 09:59:30,359 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:59:30,541 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:59:30,580 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:59:30,650 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:59:31,359 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:59:31,360 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/wandb-summary.json +2022-05-04 09:59:32,360 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:59:32,939 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:59:32,980 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:59:33,049 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:59:33,360 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:59:33,360 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/wandb-summary.json +2022-05-04 09:59:34,360 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:59:35,361 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:59:35,704 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:59:35,843 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:59:35,908 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:59:36,361 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:59:36,362 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/wandb-summary.json +2022-05-04 09:59:37,362 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:59:40,221 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:59:40,262 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:59:40,329 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:59:40,363 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:59:40,363 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/wandb-summary.json +2022-05-04 09:59:42,364 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:59:44,355 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:59:44,395 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:59:44,410 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:59:44,463 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:59:45,401 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/wandb-summary.json +2022-05-04 09:59:45,416 DEBUG HandlerThread:49247 [handler.py:handle_request():131] handle_request: stop_status +2022-05-04 09:59:45,417 DEBUG SenderThread:49247 [sender.py:send_request():249] send_request: stop_status +2022-05-04 09:59:46,401 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:59:48,260 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:59:48,302 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:59:48,402 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:59:48,402 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/wandb-summary.json +2022-05-04 09:59:48,404 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:59:50,402 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:59:50,999 DEBUG SenderThread:49247 [sender.py:send():235] send: stats +2022-05-04 09:59:52,176 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:59:52,218 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:59:52,330 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:59:52,403 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:59:52,403 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/wandb-summary.json +2022-05-04 09:59:54,404 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:59:55,963 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:59:56,004 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:59:56,071 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 09:59:56,404 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:59:56,405 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/wandb-summary.json +2022-05-04 09:59:58,405 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 09:59:59,765 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 09:59:59,806 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 09:59:59,875 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 10:00:00,406 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 10:00:00,406 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/wandb-summary.json +2022-05-04 10:00:00,542 DEBUG HandlerThread:49247 [handler.py:handle_request():131] handle_request: stop_status +2022-05-04 10:00:00,543 DEBUG SenderThread:49247 [sender.py:send_request():249] send_request: stop_status +2022-05-04 10:00:02,407 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 10:00:03,412 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 10:00:03,455 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 10:00:03,603 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 10:00:04,450 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 10:00:04,450 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/wandb-summary.json +2022-05-04 10:00:06,450 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 10:00:07,059 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 10:00:07,102 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 10:00:07,237 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 10:00:07,451 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/wandb-summary.json +2022-05-04 10:00:08,451 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 10:00:10,452 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 10:00:10,669 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 10:00:10,712 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 10:00:10,856 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 10:00:11,452 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/wandb-summary.json +2022-05-04 10:00:12,453 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 10:00:14,235 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 10:00:14,278 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 10:00:14,349 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 10:00:14,454 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 10:00:14,454 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/wandb-summary.json +2022-05-04 10:00:15,613 DEBUG HandlerThread:49247 [handler.py:handle_request():131] handle_request: stop_status +2022-05-04 10:00:15,614 DEBUG SenderThread:49247 [sender.py:send_request():249] send_request: stop_status +2022-05-04 10:00:16,455 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 10:00:17,711 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 10:00:17,754 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 10:00:17,879 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 10:00:18,456 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 10:00:18,456 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/wandb-summary.json +2022-05-04 10:00:20,457 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 10:00:21,159 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 10:00:21,202 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 10:00:21,309 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 10:00:21,457 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/wandb-summary.json +2022-05-04 10:00:21,511 DEBUG SenderThread:49247 [sender.py:send():235] send: stats +2022-05-04 10:00:22,457 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 10:00:24,458 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 10:00:25,169 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 10:00:25,317 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 10:00:25,438 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 10:00:25,459 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/wandb-summary.json +2022-05-04 10:00:26,459 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 10:00:28,460 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 10:00:28,597 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 10:00:28,641 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 10:00:28,712 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 10:00:29,460 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/wandb-summary.json +2022-05-04 10:00:30,461 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 10:00:30,760 DEBUG HandlerThread:49247 [handler.py:handle_request():131] handle_request: stop_status +2022-05-04 10:00:30,761 DEBUG SenderThread:49247 [sender.py:send_request():249] send_request: stop_status +2022-05-04 10:00:32,227 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 10:00:32,269 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 10:00:32,342 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 10:00:32,461 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 10:00:32,462 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/wandb-summary.json +2022-05-04 10:00:34,462 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 10:00:35,845 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 10:00:35,888 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 10:00:36,023 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 10:00:36,463 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 10:00:36,463 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/wandb-summary.json +2022-05-04 10:00:38,463 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 10:00:39,326 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 10:00:39,369 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 10:00:39,446 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 10:00:39,464 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/wandb-summary.json +2022-05-04 10:00:40,464 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 10:00:42,465 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 10:00:42,796 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 10:00:42,843 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 10:00:42,929 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 10:00:43,465 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/wandb-summary.json +2022-05-04 10:00:44,466 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 10:00:45,882 DEBUG HandlerThread:49247 [handler.py:handle_request():131] handle_request: stop_status +2022-05-04 10:00:45,882 DEBUG SenderThread:49247 [sender.py:send_request():249] send_request: stop_status +2022-05-04 10:00:46,319 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 10:00:46,362 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 10:00:46,440 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 10:00:46,466 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 10:00:46,467 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/wandb-summary.json +2022-05-04 10:00:48,467 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 10:00:49,674 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 10:00:49,718 DEBUG SenderThread:49247 [sender.py:send():235] send: summary +2022-05-04 10:00:49,863 INFO SenderThread:49247 [sender.py:_save_file():944] saving file wandb-summary.json with policy end +2022-05-04 10:00:50,468 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 10:00:50,468 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/wandb-summary.json +2022-05-04 10:00:52,065 DEBUG SenderThread:49247 [sender.py:send():235] send: stats +2022-05-04 10:00:52,469 INFO Thread-8 :49247 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 10:00:52,976 DEBUG SenderThread:49247 [sender.py:send():235] send: history +2022-05-04 10:00:52,981 INFO SenderThread:49247 [sender.py:finish():1075] shutting down sender +2022-05-04 10:00:52,987 INFO SenderThread:49247 [dir_watcher.py:finish():283] shutting down directory watcher +2022-05-04 10:00:53,017 INFO HandlerThread:49247 [handler.py:finish():739] shutting down handler +2022-05-04 10:00:53,470 INFO SenderThread:49247 [dir_watcher.py:finish():313] scan: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files +2022-05-04 10:00:53,474 INFO SenderThread:49247 [dir_watcher.py:finish():327] scan save: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/wandb-metadata.json wandb-metadata.json +2022-05-04 10:00:53,475 INFO SenderThread:49247 [dir_watcher.py:finish():327] scan save: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log output.log +2022-05-04 10:00:53,475 INFO SenderThread:49247 [dir_watcher.py:finish():327] scan save: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/wandb-summary.json wandb-summary.json +2022-05-04 10:00:53,479 INFO SenderThread:49247 [dir_watcher.py:finish():327] scan save: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/requirements.txt requirements.txt +2022-05-04 10:00:53,483 INFO SenderThread:49247 [dir_watcher.py:finish():327] scan save: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/config.yaml config.yaml +2022-05-04 10:00:53,486 INFO SenderThread:49247 [file_pusher.py:finish():177] shutting down file pusher +2022-05-04 10:00:53,491 INFO SenderThread:49247 [file_pusher.py:join():182] waiting for file pusher +2022-05-04 10:00:53,812 INFO Thread-12 :49247 [upload_job.py:push():137] Uploaded file /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/output.log +2022-05-04 10:00:53,832 INFO Thread-14 :49247 [upload_job.py:push():137] Uploaded file /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/requirements.txt +2022-05-04 10:00:53,849 INFO Thread-15 :49247 [upload_job.py:push():137] Uploaded file /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/config.yaml +2022-05-04 10:00:53,930 INFO Thread-13 :49247 [upload_job.py:push():137] Uploaded file /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/files/wandb-summary.json +2022-05-04 10:00:55,298 ERROR MainThread:49247 [internal.py:wandb_internal():164] Thread WriterThread: +Traceback (most recent call last): + File "/home/sanchit_huggingface_co/gcp/lib/python3.9/site-packages/wandb/sdk/internal/internal_util.py", line 54, in run + self._run() + File "/home/sanchit_huggingface_co/gcp/lib/python3.9/site-packages/wandb/sdk/internal/internal_util.py", line 105, in _run + self._process(record) + File "/home/sanchit_huggingface_co/gcp/lib/python3.9/site-packages/wandb/sdk/internal/internal.py", line 346, in _process + self._wm.write(record) + File "/home/sanchit_huggingface_co/gcp/lib/python3.9/site-packages/wandb/sdk/internal/writer.py", line 35, in write + self._ds.write(record) + File "/home/sanchit_huggingface_co/gcp/lib/python3.9/site-packages/wandb/sdk/internal/datastore.py", line 276, in write + ret = self._write_data(s) + File "/home/sanchit_huggingface_co/gcp/lib/python3.9/site-packages/wandb/sdk/internal/datastore.py", line 236, in _write_data + self._write_record(s) + File "/home/sanchit_huggingface_co/gcp/lib/python3.9/site-packages/wandb/sdk/internal/datastore.py", line 213, in _write_record + self._fp.write(s) +OSError: [Errno 28] No space left on device +2022-05-04 10:00:57,310 INFO MainThread:49247 [internal.py:handle_exit():79] Internal process exited diff --git a/wandb/run-20220504_095140-cwhobv6l/logs/debug.log b/wandb/run-20220504_095140-cwhobv6l/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..64ed54d6b60ee74abc27299e6f475ac399840255 --- /dev/null +++ b/wandb/run-20220504_095140-cwhobv6l/logs/debug.log @@ -0,0 +1,30 @@ +2022-05-04 09:51:40,116 INFO MainThread:49135 [wandb_setup.py:_flush():75] Loading settings from /home/sanchit_huggingface_co/.config/wandb/settings +2022-05-04 09:51:40,116 INFO MainThread:49135 [wandb_setup.py:_flush():75] Loading settings from wandb/settings +2022-05-04 09:51:40,116 INFO MainThread:49135 [wandb_setup.py:_flush():75] Loading settings from environment variables: {'entity': 'sanchit-gandhi', 'project': 'xtreme_s_xlsr_2_bart_covost2_fr_en', 'sweep_id': '39ci3gkf', 'root_dir': '/home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2', 'run_id': 'cwhobv6l', 'sweep_param_path': '/home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/sweep-39ci3gkf/config-cwhobv6l.yaml'} +2022-05-04 09:51:40,116 INFO MainThread:49135 [wandb_setup.py:_flush():75] Inferring run settings from compute environment: {'program_relpath': 'run_xtreme_s.py', 'program': '/home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/run_xtreme_s.py'} +2022-05-04 09:51:40,116 INFO MainThread:49135 [wandb_init.py:_log_setup():386] Logging user logs to /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/logs/debug.log +2022-05-04 09:51:40,116 INFO MainThread:49135 [wandb_init.py:_log_setup():387] Logging internal logs to /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en_2/wandb/run-20220504_095140-cwhobv6l/logs/debug-internal.log +2022-05-04 09:51:40,116 INFO MainThread:49135 [wandb_init.py:init():420] calling init triggers +2022-05-04 09:51:40,117 INFO MainThread:49135 [wandb_init.py:init():425] wandb.init called with sweep_config: {'eval_split_name': 'test', 'eval_steps': 500, 'evaluation_strategy': 'steps', 'generation_max_length': 40, 'generation_num_beams': 1, 'gradient_accumulation_steps': 8, 'greater_is_better': True, 'hidden_dropout': 0.043267782095468554, 'language': 'fr.en', 'learning_rate': 0.0009027256702272704, 'logging_steps': 1, 'max_duration_in_seconds': 20, 'metric_for_best_model': 'bleu', 'model_name_or_path': './', 'num_train_epochs': 3, 'output_dir': './', 'per_device_eval_batch_size': 4, 'per_device_train_batch_size': 4, 'save_steps': 500, 'task': 'covost2', 'warmup_steps': 500} +config: {} +2022-05-04 09:51:40,117 INFO MainThread:49135 [wandb_init.py:init():471] starting backend +2022-05-04 09:51:40,117 INFO MainThread:49135 [backend.py:_multiprocessing_setup():99] multiprocessing start_methods=fork,spawn,forkserver, using: spawn +2022-05-04 09:51:40,196 INFO MainThread:49135 [backend.py:ensure_launched():219] starting backend process... +2022-05-04 09:51:40,274 INFO MainThread:49135 [backend.py:ensure_launched():224] started backend process with pid: 49247 +2022-05-04 09:51:40,276 INFO MainThread:49135 [wandb_init.py:init():480] backend started and connected +2022-05-04 09:51:40,279 INFO MainThread:49135 [wandb_run.py:_config_callback():966] config_cb None None {'eval_split_name': 'test', 'eval_steps': 500, 'evaluation_strategy': 'steps', 'generation_max_length': 40, 'generation_num_beams': 1, 'gradient_accumulation_steps': 8, 'greater_is_better': True, 'hidden_dropout': 0.043267782095468554, 'language': 'fr.en', 'learning_rate': 0.0009027256702272704, 'logging_steps': 1, 'max_duration_in_seconds': 20, 'metric_for_best_model': 'bleu', 'model_name_or_path': './', 'num_train_epochs': 3, 'output_dir': './', 'per_device_eval_batch_size': 4, 'per_device_train_batch_size': 4, 'save_steps': 500, 'task': 'covost2', 'warmup_steps': 500} +2022-05-04 09:51:40,293 INFO MainThread:49135 [wandb_init.py:init():550] updated telemetry +2022-05-04 09:51:40,476 INFO MainThread:49135 [wandb_init.py:init():581] communicating current version +2022-05-04 09:51:41,256 INFO MainThread:49135 [wandb_init.py:init():586] got version response upgrade_message: "wandb version 0.12.16 is available! To upgrade, please run:\n $ pip install wandb --upgrade" + +2022-05-04 09:51:41,256 INFO MainThread:49135 [wandb_init.py:init():596] communicating run to backend with 30 second timeout +2022-05-04 09:51:41,328 INFO MainThread:49135 [wandb_init.py:init():624] starting run threads in backend +2022-05-04 09:51:43,760 INFO MainThread:49135 [wandb_run.py:_console_start():1827] atexit reg +2022-05-04 09:51:43,761 INFO MainThread:49135 [wandb_run.py:_redirect():1701] redirect: SettingsConsole.REDIRECT +2022-05-04 09:51:43,761 INFO MainThread:49135 [wandb_run.py:_redirect():1706] Redirecting console. +2022-05-04 09:51:43,763 INFO MainThread:49135 [wandb_run.py:_redirect():1762] Redirects installed. +2022-05-04 09:51:43,763 INFO MainThread:49135 [wandb_init.py:init():651] run started, returning control to user process +2022-05-04 09:51:43,768 INFO MainThread:49135 [wandb_run.py:_config_callback():966] config_cb None None {'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'torch.float32', 'use_bfloat16': False, 'pruned_heads': {}, 'tie_word_embeddings': False, 'is_encoder_decoder': True, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 40, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'chunk_size_feed_forward': 0, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'architectures': ['SpeechEncoderDecoderModel'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': None, 'pad_token_id': 1, 'eos_token_id': 2, 'sep_token_id': None, 'decoder_start_token_id': 0, 'task_specific_params': None, 'problem_type': None, '_name_or_path': './', 'transformers_version': None, 'decoder': {'vocab_size': 50265, 'max_position_embeddings': 1024, 'd_model': 1024, 'encoder_ffn_dim': 4096, 'encoder_layers': 12, 'encoder_attention_heads': 16, 'decoder_ffn_dim': 4096, 'decoder_layers': 12, 'decoder_attention_heads': 16, 'dropout': 0.1, 'attention_dropout': 0.1, 'activation_dropout': 0.1, 'activation_function': 'gelu', 'init_std': 0.02, 'encoder_layerdrop': 0.0, 'decoder_layerdrop': 0.0, 'classifier_dropout': 0.0, 'use_cache': True, 'num_hidden_layers': 12, 'scale_embedding': False, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': None, 'use_bfloat16': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'is_encoder_decoder': False, 'is_decoder': True, 'cross_attention_hidden_size': None, 'add_cross_attention': True, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': True, 'num_beams': 4, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 3, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'chunk_size_feed_forward': 0, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'architectures': ['BartModel'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1', 2: 'LABEL_2'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1, 'LABEL_2': 2}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 0, 'pad_token_id': 1, 'eos_token_id': 2, 'sep_token_id': None, 'decoder_start_token_id': 2, 'task_specific_params': {'summarization': {'length_penalty': 1.0, 'max_length': 128, 'min_length': 12, 'num_beams': 4}, 'summarization_cnn': {'length_penalty': 2.0, 'max_length': 142, 'min_length': 56, 'num_beams': 4}, 'summarization_xsum': {'length_penalty': 1.0, 'max_length': 62, 'min_length': 11, 'num_beams': 6}}, 'problem_type': None, '_name_or_path': 'facebook/bart-large', 'transformers_version': '4.19.0.dev0', 'add_bias_logits': False, 'add_final_layer_norm': False, 'classif_dropout': 0.1, 'gradient_checkpointing': False, 'normalize_before': False, 'model_type': 'bart'}, 'encoder': {'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float32', 'use_bfloat16': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'chunk_size_feed_forward': 0, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'architectures': ['Wav2Vec2ForPreTraining'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 1, 'pad_token_id': 0, 'eos_token_id': 2, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'facebook/wav2vec2-xls-r-300m', 'transformers_version': '4.19.0.dev0', 'feat_extract_dropout': 0.0, 'gradient_checkpointing': False, 'num_feat_extract_layers': 7, 'hidden_size': 1024, 'feat_extract_norm': 'layer', 'feat_extract_activation': 'gelu', 'conv_dim': [512, 512, 512, 512, 512, 512, 512], 'conv_stride': [5, 2, 2, 2, 2, 2, 2], 'conv_kernel': [10, 3, 3, 3, 3, 2, 2], 'conv_bias': True, 'num_conv_pos_embeddings': 128, 'num_conv_pos_embedding_groups': 16, 'num_hidden_layers': 24, 'intermediate_size': 4096, 'hidden_act': 'gelu', 'num_attention_heads': 16, 'hidden_dropout': 0.043267782095468554, 'attention_dropout': 0.1, 'activation_dropout': 0.0, 'feat_proj_dropout': 0.0, 'final_dropout': 0.0, 'layerdrop': 0.0, 'layer_norm_eps': 1e-05, 'initializer_range': 0.02, 'vocab_size': 32, 'do_stable_layer_norm': True, 'use_weighted_layer_sum': False, 'apply_spec_augment': True, 'mask_time_prob': 0.1, 'mask_time_length': 10, 'mask_time_min_masks': 2, 'mask_feature_prob': 0.0, 'mask_feature_length': 10, 'mask_feature_min_masks': 0, 'num_codevectors_per_group': 320, 'num_codevector_groups': 2, 'contrastive_logits_temperature': 0.1, 'feat_quantizer_dropout': 0.0, 'num_negatives': 100, 'codevector_dim': 768, 'proj_codevector_dim': 768, 'diversity_loss_weight': 0.1, 'ctc_loss_reduction': 'sum', 'ctc_zero_infinity': False, 'add_adapter': True, 'adapter_kernel_size': 3, 'adapter_stride': 2, 'num_adapter_layers': 3, 'output_hidden_size': 1024, 'classifier_proj_size': 256, 'tdnn_dim': [512, 512, 512, 512, 1500], 'tdnn_kernel': [5, 3, 3, 1, 1], 'tdnn_dilation': [1, 2, 3, 1, 1], 'xvector_output_dim': 512, 'model_type': 'wav2vec2'}, 'model_type': 'speech-encoder-decoder', 'processor_class': 'Wav2Vec2Processor', 'use_cache': False, 'overwrite_output_dir': True, 'do_train': True, 'do_eval': True, 'do_predict': False, 'prediction_loss_only': False, 'per_gpu_train_batch_size': 'None', 'per_gpu_eval_batch_size': 'None', 'eval_accumulation_steps': 'None', 'eval_delay': 0, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'warmup_ratio': 0.0, 'log_level': -1, 'log_level_replica': -1, 'log_on_each_node': True, 'logging_dir': './runs/May04_09-50-47_sanchit--v100', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_total_limit': 'None', 'save_on_each_node': False, 'no_cuda': False, 'seed': 42, 'data_seed': 'None', 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'amp', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': 'None', 'local_rank': -1, 'xpu_backend': 'None', 'tpu_num_cores': 'None', 'tpu_metrics_debug': False, 'debug': '[]', 'dataloader_drop_last': False, 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': './', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': 'None', 'load_best_model_at_end': True, 'ignore_data_skip': False, 'sharded_ddp': '[]', 'deepspeed': 'None', 'label_smoothing_factor': 0.0, 'optim': 'adamw_hf', 'adafactor': False, 'group_by_length': True, 'length_column_name': 'length', 'report_to': "['tensorboard', 'wandb', 'codecarbon']", 'ddp_find_unused_parameters': 'None', 'ddp_bucket_cap_mb': 'None', 'dataloader_pin_memory': True, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': 'None', 'hub_model_id': 'None', 'hub_strategy': 'every_save', 'hub_token': '', 'hub_private_repo': False, 'gradient_checkpointing': True, 'include_inputs_for_metrics': False, 'fp16_backend': 'auto', 'push_to_hub_model_id': 'None', 'push_to_hub_organization': 'None', 'push_to_hub_token': '', '_n_gpu': 1, 'mp_parameters': '', 'sortish_sampler': False, 'predict_with_generate': True, 'train_batch_size': 4, 'eval_batch_size': 4} +2022-05-04 09:51:43,771 INFO MainThread:49135 [wandb_watch.py:watch():43] Watching +2022-05-04 10:00:59,038 INFO MainThread:49135 [wandb_run.py:_atexit_cleanup():1797] got exitcode: 1 +2022-05-04 10:00:59,041 INFO MainThread:49135 [wandb_run.py:_restore():1769] restore diff --git a/wandb/run-20220504_095140-cwhobv6l/run-cwhobv6l.wandb b/wandb/run-20220504_095140-cwhobv6l/run-cwhobv6l.wandb new file mode 100644 index 0000000000000000000000000000000000000000..1451068b8700a1278f0cdf9c6cff431c60264725 --- /dev/null +++ b/wandb/run-20220504_095140-cwhobv6l/run-cwhobv6l.wandb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b17524328f5ecfcbc10674f5443d3e22974d37a7e2a290d455bd5784a6cf9fe +size 16915806 diff --git a/wandb/run-20220504_142129-w4rlzz90/files/config.yaml b/wandb/run-20220504_142129-w4rlzz90/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a962ec14cca5bf98dff8b588494d87a3cc6fecc4 --- /dev/null +++ b/wandb/run-20220504_142129-w4rlzz90/files/config.yaml @@ -0,0 +1,9388 @@ +wandb_version: 1 + +_n_gpu: + desc: null + value: 1 +_name_or_path: + desc: null + value: ./ +_wandb: + desc: null + value: + cli_version: 0.12.10 + framework: huggingface + huggingface_version: 4.19.0.dev0 + is_jupyter_run: false + is_kaggle_kernel: false + m: + - 1: train/global_step + 6: + - 3 + - 1: train/loss + 5: 1 + 6: + - 1 + - 1: train/learning_rate + 5: 1 + 6: + - 1 + - 1: train/epoch + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.fc2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.fc2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.fc2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.fc2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.fc2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.fc2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.fc1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.fc1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.fc1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.fc1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.fc1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.fc1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.encoder_attn_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.encoder_attn_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.encoder_attn_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.encoder_attn_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.encoder_attn_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.encoder_attn_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.encoder_attn\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.encoder_attn\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.encoder_attn\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.encoder_attn\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.encoder_attn\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.encoder_attn\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.encoder_attn\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.encoder_attn\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.encoder_attn\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.encoder_attn\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.encoder_attn\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.encoder_attn\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.encoder_attn\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.encoder_attn\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.encoder_attn\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.encoder_attn\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.encoder_attn\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.encoder_attn\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.encoder_attn\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.encoder_attn\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.encoder_attn\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.encoder_attn\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.encoder_attn\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.encoder_attn\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.self_attn_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.self_attn_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.self_attn_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.self_attn_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.self_attn_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.self_attn_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.self_attn\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.self_attn\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.self_attn\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.self_attn\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.self_attn\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.self_attn\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.self_attn\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.self_attn\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.self_attn\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.self_attn\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.self_attn\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.self_attn\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.self_attn\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.self_attn\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.self_attn\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.self_attn\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.self_attn\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.self_attn\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.self_attn\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.self_attn\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.self_attn\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.self_attn\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.self_attn\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.11\.self_attn\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.fc2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.fc2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.fc2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.fc2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.fc2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.fc2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.fc1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.fc1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.fc1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.fc1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.fc1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.fc1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.encoder_attn_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.encoder_attn_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.encoder_attn_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.encoder_attn_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.encoder_attn_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.encoder_attn_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.encoder_attn\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.encoder_attn\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.encoder_attn\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.encoder_attn\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.encoder_attn\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.encoder_attn\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.encoder_attn\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.encoder_attn\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.encoder_attn\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.encoder_attn\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.encoder_attn\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.encoder_attn\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.encoder_attn\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.encoder_attn\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.encoder_attn\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.encoder_attn\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.encoder_attn\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.encoder_attn\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.encoder_attn\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.encoder_attn\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.encoder_attn\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.encoder_attn\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.encoder_attn\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.encoder_attn\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.self_attn_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.self_attn_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.self_attn_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.self_attn_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.self_attn_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.self_attn_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.self_attn\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.self_attn\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.self_attn\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.self_attn\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.self_attn\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.self_attn\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.self_attn\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.self_attn\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.self_attn\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.self_attn\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.self_attn\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.self_attn\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.self_attn\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.self_attn\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.self_attn\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.self_attn\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.self_attn\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.self_attn\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.self_attn\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.self_attn\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.self_attn\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.self_attn\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.self_attn\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.10\.self_attn\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.fc2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.fc2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.fc2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.fc2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.fc2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.fc2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.fc1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.fc1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.fc1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.fc1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.fc1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.fc1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.encoder_attn_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.encoder_attn_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.encoder_attn_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.encoder_attn_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.encoder_attn_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.encoder_attn_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.encoder_attn\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.encoder_attn\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.encoder_attn\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.encoder_attn\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.encoder_attn\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.encoder_attn\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.encoder_attn\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.encoder_attn\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.encoder_attn\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.encoder_attn\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.encoder_attn\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.encoder_attn\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.encoder_attn\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.encoder_attn\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.encoder_attn\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.encoder_attn\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.encoder_attn\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.encoder_attn\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.encoder_attn\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.encoder_attn\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.encoder_attn\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.encoder_attn\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.encoder_attn\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.encoder_attn\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.self_attn_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.self_attn_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.self_attn_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.self_attn_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.self_attn_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.self_attn_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.self_attn\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.self_attn\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.self_attn\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.self_attn\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.self_attn\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.self_attn\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.self_attn\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.self_attn\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.self_attn\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.self_attn\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.self_attn\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.self_attn\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.self_attn\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.self_attn\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.self_attn\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.self_attn\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.self_attn\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.self_attn\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.self_attn\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.self_attn\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.self_attn\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.self_attn\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.self_attn\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.9\.self_attn\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.fc2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.fc2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.fc2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.fc2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.fc2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.fc2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.fc1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.fc1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.fc1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.fc1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.fc1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.fc1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.encoder_attn_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.encoder_attn_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.encoder_attn_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.encoder_attn_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.encoder_attn_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.encoder_attn_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.encoder_attn\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.encoder_attn\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.encoder_attn\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.encoder_attn\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.encoder_attn\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.encoder_attn\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.encoder_attn\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.encoder_attn\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.encoder_attn\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.encoder_attn\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.encoder_attn\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.encoder_attn\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.encoder_attn\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.encoder_attn\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.encoder_attn\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.encoder_attn\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.encoder_attn\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.encoder_attn\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.encoder_attn\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.encoder_attn\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.encoder_attn\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.encoder_attn\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.encoder_attn\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.encoder_attn\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.self_attn_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.self_attn_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.self_attn_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.self_attn_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.self_attn_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.self_attn_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.self_attn\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.self_attn\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.self_attn\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.self_attn\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.self_attn\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.self_attn\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.self_attn\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.self_attn\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.self_attn\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.self_attn\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.self_attn\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.self_attn\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.self_attn\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.self_attn\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.self_attn\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.self_attn\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.self_attn\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.self_attn\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.self_attn\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.self_attn\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.self_attn\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.self_attn\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.self_attn\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.8\.self_attn\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.fc2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.fc2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.fc2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.fc2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.fc2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.fc2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.fc1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.fc1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.fc1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.fc1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.fc1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.fc1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.encoder_attn_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.encoder_attn_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.encoder_attn_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.encoder_attn_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.encoder_attn_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.encoder_attn_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.encoder_attn\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.encoder_attn\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.encoder_attn\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.encoder_attn\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.encoder_attn\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.encoder_attn\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.encoder_attn\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.encoder_attn\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.encoder_attn\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.encoder_attn\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.encoder_attn\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.encoder_attn\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.encoder_attn\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.encoder_attn\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.encoder_attn\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.encoder_attn\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.encoder_attn\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.encoder_attn\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.encoder_attn\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.encoder_attn\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.encoder_attn\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.encoder_attn\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.encoder_attn\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.encoder_attn\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.self_attn_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.self_attn_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.self_attn_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.self_attn_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.self_attn_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.self_attn_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.self_attn\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.self_attn\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.self_attn\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.self_attn\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.self_attn\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.self_attn\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.self_attn\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.self_attn\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.self_attn\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.self_attn\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.self_attn\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.self_attn\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.self_attn\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.self_attn\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.self_attn\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.self_attn\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.self_attn\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.self_attn\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.self_attn\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.self_attn\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.self_attn\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.self_attn\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.self_attn\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.7\.self_attn\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.fc2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.fc2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.fc2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.fc2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.fc2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.fc2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.fc1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.fc1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.fc1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.fc1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.fc1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.fc1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.encoder_attn_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.encoder_attn_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.encoder_attn_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.encoder_attn_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.encoder_attn_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.encoder_attn_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.encoder_attn\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.encoder_attn\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.encoder_attn\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.encoder_attn\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.encoder_attn\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.encoder_attn\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.encoder_attn\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.encoder_attn\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.encoder_attn\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.encoder_attn\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.encoder_attn\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.encoder_attn\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.encoder_attn\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.encoder_attn\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.encoder_attn\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.encoder_attn\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.encoder_attn\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.encoder_attn\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.encoder_attn\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.encoder_attn\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.encoder_attn\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.encoder_attn\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.encoder_attn\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.encoder_attn\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.self_attn_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.self_attn_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.self_attn_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.self_attn_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.self_attn_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.self_attn_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.self_attn\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.self_attn\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.self_attn\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.self_attn\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.self_attn\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.self_attn\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.self_attn\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.self_attn\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.self_attn\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.self_attn\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.self_attn\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.self_attn\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.self_attn\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.self_attn\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.self_attn\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.self_attn\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.self_attn\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.self_attn\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.self_attn\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.self_attn\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.self_attn\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.self_attn\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.self_attn\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.6\.self_attn\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.fc2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.fc2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.fc2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.fc2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.fc2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.fc2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.fc1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.fc1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.fc1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.fc1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.fc1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.fc1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.encoder_attn_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.encoder_attn_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.encoder_attn_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.encoder_attn_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.encoder_attn_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.encoder_attn_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.encoder_attn\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.encoder_attn\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.encoder_attn\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.encoder_attn\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.encoder_attn\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.encoder_attn\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.encoder_attn\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.encoder_attn\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.encoder_attn\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.encoder_attn\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.encoder_attn\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.encoder_attn\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.encoder_attn\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.encoder_attn\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.encoder_attn\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.encoder_attn\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.encoder_attn\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.encoder_attn\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.encoder_attn\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.encoder_attn\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.encoder_attn\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.encoder_attn\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.encoder_attn\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.encoder_attn\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.self_attn_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.self_attn_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.self_attn_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.self_attn_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.self_attn_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.self_attn_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.self_attn\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.self_attn\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.self_attn\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.self_attn\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.self_attn\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.self_attn\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.self_attn\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.self_attn\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.self_attn\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.self_attn\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.self_attn\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.self_attn\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.self_attn\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.self_attn\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.self_attn\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.self_attn\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.self_attn\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.self_attn\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.self_attn\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.self_attn\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.self_attn\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.self_attn\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.self_attn\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.5\.self_attn\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.fc2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.fc2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.fc2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.fc2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.fc2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.fc2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.fc1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.fc1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.fc1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.fc1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.fc1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.fc1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.encoder_attn_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.encoder_attn_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.encoder_attn_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.encoder_attn_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.encoder_attn_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.encoder_attn_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.encoder_attn\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.encoder_attn\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.encoder_attn\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.encoder_attn\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.encoder_attn\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.encoder_attn\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.encoder_attn\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.encoder_attn\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.encoder_attn\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.encoder_attn\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.encoder_attn\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.encoder_attn\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.encoder_attn\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.encoder_attn\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.encoder_attn\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.encoder_attn\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.encoder_attn\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.encoder_attn\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.encoder_attn\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.encoder_attn\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.encoder_attn\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.encoder_attn\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.encoder_attn\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.encoder_attn\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.self_attn_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.self_attn_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.self_attn_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.self_attn_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.self_attn_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.self_attn_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.self_attn\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.self_attn\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.self_attn\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.self_attn\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.self_attn\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.self_attn\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.self_attn\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.self_attn\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.self_attn\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.self_attn\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.self_attn\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.self_attn\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.self_attn\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.self_attn\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.self_attn\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.self_attn\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.self_attn\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.self_attn\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.self_attn\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.self_attn\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.self_attn\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.self_attn\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.self_attn\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.4\.self_attn\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.fc2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.fc2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.fc2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.fc2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.fc2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.fc2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.fc1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.fc1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.fc1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.fc1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.fc1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.fc1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.encoder_attn_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.encoder_attn_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.encoder_attn_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.encoder_attn_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.encoder_attn_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.encoder_attn_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.encoder_attn\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.encoder_attn\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.encoder_attn\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.encoder_attn\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.encoder_attn\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.encoder_attn\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.encoder_attn\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.encoder_attn\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.encoder_attn\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.encoder_attn\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.encoder_attn\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.encoder_attn\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.encoder_attn\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.encoder_attn\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.encoder_attn\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.encoder_attn\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.encoder_attn\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.encoder_attn\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.encoder_attn\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.encoder_attn\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.encoder_attn\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.encoder_attn\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.encoder_attn\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.encoder_attn\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.self_attn_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.self_attn_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.self_attn_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.self_attn_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.self_attn_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.self_attn_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.self_attn\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.self_attn\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.self_attn\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.self_attn\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.self_attn\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.self_attn\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.self_attn\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.self_attn\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.self_attn\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.self_attn\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.self_attn\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.self_attn\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.self_attn\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.self_attn\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.self_attn\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.self_attn\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.self_attn\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.self_attn\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.self_attn\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.self_attn\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.self_attn\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.self_attn\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.self_attn\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.3\.self_attn\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.fc2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.fc2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.fc2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.fc2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.fc2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.fc2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.fc1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.fc1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.fc1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.fc1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.fc1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.fc1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.encoder_attn_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.encoder_attn_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.encoder_attn_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.encoder_attn_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.encoder_attn_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.encoder_attn_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.encoder_attn\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.encoder_attn\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.encoder_attn\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.encoder_attn\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.encoder_attn\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.encoder_attn\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.encoder_attn\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.encoder_attn\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.encoder_attn\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.encoder_attn\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.encoder_attn\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.encoder_attn\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.encoder_attn\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.encoder_attn\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.encoder_attn\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.encoder_attn\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.encoder_attn\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.encoder_attn\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.encoder_attn\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.encoder_attn\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.encoder_attn\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.encoder_attn\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.encoder_attn\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.encoder_attn\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.self_attn_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.self_attn_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.self_attn_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.self_attn_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.self_attn_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.self_attn_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.self_attn\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.self_attn\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.self_attn\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.self_attn\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.self_attn\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.self_attn\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.self_attn\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.self_attn\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.self_attn\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.self_attn\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.self_attn\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.self_attn\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.self_attn\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.self_attn\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.self_attn\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.self_attn\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.self_attn\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.self_attn\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.self_attn\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.self_attn\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.self_attn\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.self_attn\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.self_attn\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.2\.self_attn\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.fc2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.fc2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.fc2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.fc2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.fc2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.fc2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.fc1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.fc1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.fc1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.fc1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.fc1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.fc1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.encoder_attn_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.encoder_attn_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.encoder_attn_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.encoder_attn_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.encoder_attn_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.encoder_attn_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.encoder_attn\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.encoder_attn\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.encoder_attn\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.encoder_attn\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.encoder_attn\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.encoder_attn\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.encoder_attn\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.encoder_attn\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.encoder_attn\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.encoder_attn\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.encoder_attn\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.encoder_attn\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.encoder_attn\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.encoder_attn\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.encoder_attn\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.encoder_attn\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.encoder_attn\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.encoder_attn\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.encoder_attn\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.encoder_attn\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.encoder_attn\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.encoder_attn\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.encoder_attn\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.encoder_attn\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.self_attn_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.self_attn_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.self_attn_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.self_attn_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.self_attn_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.self_attn_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.self_attn\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.self_attn\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.self_attn\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.self_attn\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.self_attn\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.self_attn\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.self_attn\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.self_attn\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.self_attn\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.self_attn\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.self_attn\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.self_attn\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.self_attn\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.self_attn\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.self_attn\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.self_attn\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.self_attn\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.self_attn\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.self_attn\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.self_attn\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.self_attn\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.self_attn\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.self_attn\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.1\.self_attn\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.fc2\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.fc2\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.fc2\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.fc2\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.fc2\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.fc2\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.fc1\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.fc1\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.fc1\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.fc1\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.fc1\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.fc1\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.encoder_attn_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.encoder_attn_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.encoder_attn_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.encoder_attn_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.encoder_attn_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.encoder_attn_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.encoder_attn\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.encoder_attn\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.encoder_attn\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.encoder_attn\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.encoder_attn\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.encoder_attn\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.encoder_attn\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.encoder_attn\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.encoder_attn\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.encoder_attn\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.encoder_attn\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.encoder_attn\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.encoder_attn\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.encoder_attn\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.encoder_attn\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.encoder_attn\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.encoder_attn\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.encoder_attn\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.encoder_attn\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.encoder_attn\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.encoder_attn\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.encoder_attn\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.encoder_attn\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.encoder_attn\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.self_attn_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.self_attn_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.self_attn_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.self_attn_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.self_attn_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.self_attn_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.self_attn\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.self_attn\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.self_attn\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.self_attn\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.self_attn\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.self_attn\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.self_attn\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.self_attn\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.self_attn\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.self_attn\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.self_attn\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.self_attn\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.self_attn\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.self_attn\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.self_attn\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.self_attn\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.self_attn\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.self_attn\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.self_attn\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.self_attn\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.self_attn\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.self_attn\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.self_attn\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layers\.0\.self_attn\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layernorm_embedding\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layernorm_embedding\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layernorm_embedding\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layernorm_embedding\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layernorm_embedding\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.layernorm_embedding\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.embed_positions\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.embed_positions\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.embed_positions\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.embed_tokens\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.embed_tokens\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/decoder\.model\.decoder\.embed_tokens\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.2\.conv\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.2\.conv\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.2\.conv\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.2\.conv\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.2\.conv\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.2\.conv\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.1\.conv\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.1\.conv\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.1\.conv\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.1\.conv\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.1\.conv\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.1\.conv\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.0\.conv\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.0\.conv\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.0\.conv\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.0\.conv\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.0\.conv\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.adapter\.layers\.0\.conv\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.23\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.22\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.21\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.20\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.19\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.18\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.17\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.16\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.15\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.14\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.13\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.12\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.11\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.10\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.9\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.8\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.7\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.6\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.5\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.4\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.3\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.2\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.1\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.feed_forward\.output_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.feed_forward\.output_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.feed_forward\.output_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.feed_forward\.output_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.feed_forward\.output_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.feed_forward\.output_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.feed_forward\.intermediate_dense\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.feed_forward\.intermediate_dense\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.feed_forward\.intermediate_dense\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.feed_forward\.intermediate_dense\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.feed_forward\.intermediate_dense\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.feed_forward\.intermediate_dense\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.final_layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.final_layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.final_layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.final_layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.final_layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.final_layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.out_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.out_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.out_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.out_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.out_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.out_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.v_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.v_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.v_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.v_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.v_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.v_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.k_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.k_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.k_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.k_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.k_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.k_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.q_proj\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.q_proj\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.q_proj\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.q_proj\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.q_proj\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.attention\.q_proj\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.layers\.0\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.pos_conv_embed\.conv\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.pos_conv_embed\.conv\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.pos_conv_embed\.conv\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.pos_conv_embed\.conv\.weight_v._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.pos_conv_embed\.conv\.weight_v.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.pos_conv_embed\.conv\.weight_v.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.pos_conv_embed\.conv\.weight_g._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.pos_conv_embed\.conv\.weight_g.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.encoder\.pos_conv_embed\.conv\.weight_g.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.masked_spec_embed._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.masked_spec_embed.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.masked_spec_embed.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.feature_projection\.projection\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.feature_projection\.projection\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.feature_projection\.projection\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.feature_projection\.projection\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.feature_projection\.projection\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.feature_projection\.projection\.bias.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.feature_projection\.layer_norm\.weight._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.feature_projection\.layer_norm\.weight.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.feature_projection\.layer_norm\.weight.bins + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.feature_projection\.layer_norm\.bias._type + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.feature_projection\.layer_norm\.bias.values + 5: 1 + 6: + - 1 + - 1: gradients/encoder\.feature_projection\.layer_norm\.bias.bins + 5: 1 + 6: + - 1 + - 1: eval/loss + 5: 1 + 6: + - 1 + - 1: eval/bleu + 5: 1 + 6: + - 1 + - 1: eval/runtime + 5: 1 + 6: + - 1 + - 1: eval/samples_per_second + 5: 1 + 6: + - 1 + - 1: eval/steps_per_second + 5: 1 + 6: + - 1 + python_version: 3.9.5 + start_time: 1651674089 + t: + 1: + - 1 + - 2 + - 3 + - 5 + - 11 + - 12 + 3: + - 13 + 4: 3.9.5 + 5: 0.12.10 + 6: 4.19.0.dev0 + 8: + - 5 +adafactor: + desc: null + value: false +adam_beta1: + desc: null + value: 0.9 +adam_beta2: + desc: null + value: 0.999 +adam_epsilon: + desc: null + value: 1.0e-08 +add_cross_attention: + desc: null + value: false +architectures: + desc: null + value: + - SpeechEncoderDecoderModel +bad_words_ids: + desc: null + value: null +bf16: + desc: null + value: false +bf16_full_eval: + desc: null + value: false +bos_token_id: + desc: null + value: null +chunk_size_feed_forward: + desc: null + value: 0 +cross_attention_hidden_size: + desc: null + value: null +data_seed: + desc: null + value: None +dataloader_drop_last: + desc: null + value: false +dataloader_num_workers: + desc: null + value: 0 +dataloader_pin_memory: + desc: null + value: true +ddp_bucket_cap_mb: + desc: null + value: None +ddp_find_unused_parameters: + desc: null + value: None +debug: + desc: null + value: '[]' +decoder: + desc: null + value: + _name_or_path: facebook/bart-large + activation_dropout: 0.1 + activation_function: gelu + add_bias_logits: false + add_cross_attention: true + add_final_layer_norm: false + architectures: + - BartModel + attention_dropout: 0.1 + bad_words_ids: null + bos_token_id: 0 + chunk_size_feed_forward: 0 + classif_dropout: 0.1 + classifier_dropout: 0.0 + cross_attention_hidden_size: null + d_model: 1024 + decoder_attention_heads: 16 + decoder_ffn_dim: 4096 + decoder_layerdrop: 0.0 + decoder_layers: 12 + decoder_start_token_id: 2 + diversity_penalty: 0.0 + do_sample: false + dropout: 0.1 + early_stopping: true + encoder_attention_heads: 16 + encoder_ffn_dim: 4096 + encoder_layerdrop: 0.0 + encoder_layers: 12 + encoder_no_repeat_ngram_size: 0 + eos_token_id: 2 + exponential_decay_length_penalty: null + finetuning_task: null + forced_bos_token_id: 0 + forced_eos_token_id: 2 + gradient_checkpointing: false + id2label: + '0': LABEL_0 + '1': LABEL_1 + '2': LABEL_2 + init_std: 0.02 + is_decoder: true + is_encoder_decoder: false + label2id: + LABEL_0: 0 + LABEL_1: 1 + LABEL_2: 2 + length_penalty: 1.0 + max_length: 20 + max_position_embeddings: 1024 + min_length: 0 + model_type: bart + no_repeat_ngram_size: 3 + normalize_before: false + num_beam_groups: 1 + num_beams: 4 + num_hidden_layers: 12 + num_return_sequences: 1 + output_attentions: false + output_hidden_states: false + output_scores: false + pad_token_id: 1 + prefix: null + problem_type: null + pruned_heads: {} + remove_invalid_values: false + repetition_penalty: 1.0 + return_dict: true + return_dict_in_generate: false + scale_embedding: false + sep_token_id: null + task_specific_params: + summarization: + length_penalty: 1.0 + max_length: 128 + min_length: 12 + num_beams: 4 + summarization_cnn: + length_penalty: 2.0 + max_length: 142 + min_length: 56 + num_beams: 4 + summarization_xsum: + length_penalty: 1.0 + max_length: 62 + min_length: 11 + num_beams: 6 + temperature: 1.0 + tie_encoder_decoder: false + tie_word_embeddings: true + tokenizer_class: null + top_k: 50 + top_p: 1.0 + torch_dtype: null + torchscript: false + transformers_version: 4.19.0.dev0 + typical_p: 1.0 + use_bfloat16: false + use_cache: true + vocab_size: 50265 +decoder_start_token_id: + desc: null + value: 0 +deepspeed: + desc: null + value: None +disable_tqdm: + desc: null + value: false +diversity_penalty: + desc: null + value: 0.0 +do_eval: + desc: null + value: true +do_predict: + desc: null + value: false +do_sample: + desc: null + value: false +do_train: + desc: null + value: true +early_stopping: + desc: null + value: false +encoder: + desc: null + value: + _name_or_path: facebook/wav2vec2-xls-r-300m + activation_dropout: 0.0 + adapter_kernel_size: 3 + adapter_stride: 2 + add_adapter: true + add_cross_attention: false + apply_spec_augment: true + architectures: + - Wav2Vec2ForPreTraining + attention_dropout: 0.1 + bad_words_ids: null + bos_token_id: 1 + chunk_size_feed_forward: 0 + classifier_proj_size: 256 + codevector_dim: 768 + contrastive_logits_temperature: 0.1 + conv_bias: true + conv_dim: + - 512 + - 512 + - 512 + - 512 + - 512 + - 512 + - 512 + conv_kernel: + - 10 + - 3 + - 3 + - 3 + - 3 + - 2 + - 2 + conv_stride: + - 5 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + cross_attention_hidden_size: null + ctc_loss_reduction: sum + ctc_zero_infinity: false + decoder_start_token_id: null + diversity_loss_weight: 0.1 + diversity_penalty: 0.0 + do_sample: false + do_stable_layer_norm: true + early_stopping: false + encoder_no_repeat_ngram_size: 0 + eos_token_id: 2 + exponential_decay_length_penalty: null + feat_extract_activation: gelu + feat_extract_dropout: 0.0 + feat_extract_norm: layer + feat_proj_dropout: 0.0 + feat_quantizer_dropout: 0.0 + final_dropout: 0.0 + finetuning_task: null + forced_bos_token_id: null + forced_eos_token_id: null + gradient_checkpointing: false + hidden_act: gelu + hidden_dropout: 0.17305159310134854 + hidden_size: 1024 + id2label: + '0': LABEL_0 + '1': LABEL_1 + initializer_range: 0.02 + intermediate_size: 4096 + is_decoder: false + is_encoder_decoder: false + label2id: + LABEL_0: 0 + LABEL_1: 1 + layer_norm_eps: 1.0e-05 + layerdrop: 0.0 + length_penalty: 1.0 + mask_feature_length: 10 + mask_feature_min_masks: 0 + mask_feature_prob: 0.0 + mask_time_length: 10 + mask_time_min_masks: 2 + mask_time_prob: 0.1 + max_length: 20 + min_length: 0 + model_type: wav2vec2 + no_repeat_ngram_size: 0 + num_adapter_layers: 3 + num_attention_heads: 16 + num_beam_groups: 1 + num_beams: 1 + num_codevector_groups: 2 + num_codevectors_per_group: 320 + num_conv_pos_embedding_groups: 16 + num_conv_pos_embeddings: 128 + num_feat_extract_layers: 7 + num_hidden_layers: 24 + num_negatives: 100 + num_return_sequences: 1 + output_attentions: false + output_hidden_size: 1024 + output_hidden_states: false + output_scores: false + pad_token_id: 0 + prefix: null + problem_type: null + proj_codevector_dim: 768 + pruned_heads: {} + remove_invalid_values: false + repetition_penalty: 1.0 + return_dict: true + return_dict_in_generate: false + sep_token_id: null + task_specific_params: null + tdnn_dilation: + - 1 + - 2 + - 3 + - 1 + - 1 + tdnn_dim: + - 512 + - 512 + - 512 + - 512 + - 1500 + tdnn_kernel: + - 5 + - 3 + - 3 + - 1 + - 1 + temperature: 1.0 + tie_encoder_decoder: false + tie_word_embeddings: true + tokenizer_class: null + top_k: 50 + top_p: 1.0 + torch_dtype: float32 + torchscript: false + transformers_version: 4.19.0.dev0 + typical_p: 1.0 + use_bfloat16: false + use_weighted_layer_sum: false + vocab_size: 32 + xvector_output_dim: 512 +encoder_no_repeat_ngram_size: + desc: null + value: 0 +eos_token_id: + desc: null + value: 2 +eval_accumulation_steps: + desc: null + value: None +eval_batch_size: + desc: null + value: 8 +eval_delay: + desc: null + value: 0 +eval_split_name: + desc: null + value: test +eval_steps: + desc: null + value: 500 +evaluation_strategy: + desc: null + value: steps +exponential_decay_length_penalty: + desc: null + value: null +finetuning_task: + desc: null + value: null +forced_bos_token_id: + desc: null + value: null +forced_eos_token_id: + desc: null + value: null +fp16: + desc: null + value: true +fp16_backend: + desc: null + value: auto +fp16_full_eval: + desc: null + value: false +fp16_opt_level: + desc: null + value: O1 +generation_max_length: + desc: null + value: 40 +generation_num_beams: + desc: null + value: 1 +gradient_accumulation_steps: + desc: null + value: 8 +gradient_checkpointing: + desc: null + value: true +greater_is_better: + desc: null + value: true +group_by_length: + desc: null + value: true +half_precision_backend: + desc: null + value: amp +hidden_dropout: + desc: null + value: 0.17305159310134854 +hub_model_id: + desc: null + value: None +hub_private_repo: + desc: null + value: false +hub_strategy: + desc: null + value: every_save +hub_token: + desc: null + value: +id2label: + desc: null + value: + '0': LABEL_0 + '1': LABEL_1 +ignore_data_skip: + desc: null + value: false +include_inputs_for_metrics: + desc: null + value: false +is_decoder: + desc: null + value: false +is_encoder_decoder: + desc: null + value: true +label2id: + desc: null + value: + LABEL_0: 0 + LABEL_1: 1 +label_names: + desc: null + value: None +label_smoothing_factor: + desc: null + value: 0.0 +language: + desc: null + value: fr.en +learning_rate: + desc: null + value: 0.00012335092351490598 +length_column_name: + desc: null + value: length +length_penalty: + desc: null + value: 1.0 +load_best_model_at_end: + desc: null + value: true +local_rank: + desc: null + value: -1 +log_level: + desc: null + value: -1 +log_level_replica: + desc: null + value: -1 +log_on_each_node: + desc: null + value: true +logging_dir: + desc: null + value: ./runs/May04_13-30-49_sanchit--v100 +logging_first_step: + desc: null + value: false +logging_nan_inf_filter: + desc: null + value: true +logging_steps: + desc: null + value: 1 +logging_strategy: + desc: null + value: steps +lr_scheduler_type: + desc: null + value: linear +max_duration_in_seconds: + desc: null + value: 20 +max_grad_norm: + desc: null + value: 1.0 +max_length: + desc: null + value: 40 +max_steps: + desc: null + value: -1 +metric_for_best_model: + desc: null + value: bleu +min_length: + desc: null + value: 0 +model_name_or_path: + desc: null + value: ./ +model_type: + desc: null + value: speech-encoder-decoder +mp_parameters: + desc: null + value: '' +no_cuda: + desc: null + value: false +no_repeat_ngram_size: + desc: null + value: 0 +num_beam_groups: + desc: null + value: 1 +num_beams: + desc: null + value: 1 +num_return_sequences: + desc: null + value: 1 +num_train_epochs: + desc: null + value: 3 +optim: + desc: null + value: adamw_hf +output_attentions: + desc: null + value: false +output_dir: + desc: null + value: ./ +output_hidden_states: + desc: null + value: false +output_scores: + desc: null + value: false +overwrite_output_dir: + desc: null + value: true +pad_token_id: + desc: null + value: 1 +past_index: + desc: null + value: -1 +per_device_eval_batch_size: + desc: null + value: 8 +per_device_train_batch_size: + desc: null + value: 8 +per_gpu_eval_batch_size: + desc: null + value: None +per_gpu_train_batch_size: + desc: null + value: None +predict_with_generate: + desc: null + value: true +prediction_loss_only: + desc: null + value: false +prefix: + desc: null + value: null +problem_type: + desc: null + value: null +processor_class: + desc: null + value: Wav2Vec2Processor +pruned_heads: + desc: null + value: {} +push_to_hub: + desc: null + value: true +push_to_hub_model_id: + desc: null + value: None +push_to_hub_organization: + desc: null + value: None +push_to_hub_token: + desc: null + value: +remove_invalid_values: + desc: null + value: false +remove_unused_columns: + desc: null + value: true +repetition_penalty: + desc: null + value: 1.0 +report_to: + desc: null + value: '[''tensorboard'', ''wandb'', ''codecarbon'']' +resume_from_checkpoint: + desc: null + value: None +return_dict: + desc: null + value: true +return_dict_in_generate: + desc: null + value: false +run_name: + desc: null + value: ./ +save_on_each_node: + desc: null + value: false +save_steps: + desc: null + value: 500 +save_strategy: + desc: null + value: steps +save_total_limit: + desc: null + value: None +seed: + desc: null + value: 42 +sep_token_id: + desc: null + value: null +sharded_ddp: + desc: null + value: '[]' +skip_memory_metrics: + desc: null + value: true +sortish_sampler: + desc: null + value: false +task: + desc: null + value: covost2 +task_specific_params: + desc: null + value: null +temperature: + desc: null + value: 1.0 +tf32: + desc: null + value: None +tie_encoder_decoder: + desc: null + value: false +tie_word_embeddings: + desc: null + value: false +tokenizer_class: + desc: null + value: null +top_k: + desc: null + value: 50 +top_p: + desc: null + value: 1.0 +torch_dtype: + desc: null + value: torch.float32 +torchscript: + desc: null + value: false +tpu_metrics_debug: + desc: null + value: false +tpu_num_cores: + desc: null + value: None +train_batch_size: + desc: null + value: 8 +transformers_version: + desc: null + value: null +typical_p: + desc: null + value: 1.0 +use_bfloat16: + desc: null + value: false +use_cache: + desc: null + value: false +use_legacy_prediction_loop: + desc: null + value: false +warmup_ratio: + desc: null + value: 0.0 +warmup_steps: + desc: null + value: 500 +weight_decay: + desc: null + value: 0.0 +xpu_backend: + desc: null + value: None diff --git a/wandb/run-20220504_142129-w4rlzz90/files/output.log b/wandb/run-20220504_142129-w4rlzz90/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..77030c35f8859d7719586e23a80df6f418540da6 --- /dev/null +++ b/wandb/run-20220504_142129-w4rlzz90/files/output.log @@ -0,0 +1,3367 @@ +wandb: WARNING Config item 'output_dir' was locked by 'sweep' (ignored update). +wandb: WARNING Config item 'evaluation_strategy' was locked by 'sweep' (ignored update). +wandb: WARNING Config item 'per_device_train_batch_size' was locked by 'sweep' (ignored update). +wandb: WARNING Config item 'per_device_eval_batch_size' was locked by 'sweep' (ignored update). +wandb: WARNING Config item 'gradient_accumulation_steps' was locked by 'sweep' (ignored update). +wandb: WARNING Config item 'learning_rate' was locked by 'sweep' (ignored update). +wandb: WARNING Config item 'num_train_epochs' was locked by 'sweep' (ignored update). +wandb: WARNING Config item 'warmup_steps' was locked by 'sweep' (ignored update). +wandb: WARNING Config item 'logging_steps' was locked by 'sweep' (ignored update). +wandb: WARNING Config item 'save_steps' was locked by 'sweep' (ignored update). +wandb: WARNING Config item 'eval_steps' was locked by 'sweep' (ignored update). +wandb: WARNING Config item 'metric_for_best_model' was locked by 'sweep' (ignored update). +wandb: WARNING Config item 'greater_is_better' was locked by 'sweep' (ignored update). +wandb: WARNING Config item 'generation_max_length' was locked by 'sweep' (ignored update). +wandb: WARNING Config item 'generation_num_beams' was locked by 'sweep' (ignored update). + 0%| | 0/9720 [00:00', 'hub_private_repo': False, 'gradient_checkpointing': True, 'include_inputs_for_metrics': False, 'fp16_backend': 'auto', 'push_to_hub_model_id': 'None', 'push_to_hub_organization': 'None', 'push_to_hub_token': '', '_n_gpu': 1, 'mp_parameters': '', 'sortish_sampler': False, 'predict_with_generate': True, 'train_batch_size': 8, 'eval_batch_size': 8} +2022-05-04 14:21:35,290 INFO MainThread:50430 [wandb_watch.py:watch():43] Watching diff --git a/wandb/run-20220504_142129-w4rlzz90/run-w4rlzz90.wandb b/wandb/run-20220504_142129-w4rlzz90/run-w4rlzz90.wandb new file mode 100644 index 0000000000000000000000000000000000000000..09bcfd7ee11667c96e037c858f44e8e304f9862d --- /dev/null +++ b/wandb/run-20220504_142129-w4rlzz90/run-w4rlzz90.wandb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3616310918127bd61378fd052e8d32c119cbe17a00a48ba056dfbd70f31c5607 +size 52804765 diff --git a/wandb/sweep-39ci3gkf/config-02efb61k.yaml b/wandb/sweep-39ci3gkf/config-02efb61k.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-036adkae.yaml b/wandb/sweep-39ci3gkf/config-036adkae.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-0jsw6y52.yaml b/wandb/sweep-39ci3gkf/config-0jsw6y52.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-0tjnxp9c.yaml b/wandb/sweep-39ci3gkf/config-0tjnxp9c.yaml new file mode 100644 index 0000000000000000000000000000000000000000..80ffa6b1e7380a2a708f5790108942062a1f10e9 --- /dev/null +++ b/wandb/sweep-39ci3gkf/config-0tjnxp9c.yaml @@ -0,0 +1,44 @@ +wandb_version: 1 + +eval_split_name: + value: test +eval_steps: + value: 500 +evaluation_strategy: + value: steps +generation_max_length: + value: 40 +generation_num_beams: + value: 1 +gradient_accumulation_steps: + value: 8 +greater_is_better: + value: true +hidden_dropout: + value: 0.04710763887058459 +language: + value: fr.en +learning_rate: + value: 0.0004931764407845492 +logging_steps: + value: 1 +max_duration_in_seconds: + value: 20 +metric_for_best_model: + value: bleu +model_name_or_path: + value: ./ +num_train_epochs: + value: 3 +output_dir: + value: ./ +per_device_eval_batch_size: + value: 4 +per_device_train_batch_size: + value: 4 +save_steps: + value: 500 +task: + value: covost2 +warmup_steps: + value: 500 diff --git a/wandb/sweep-39ci3gkf/config-133w4qs4.yaml b/wandb/sweep-39ci3gkf/config-133w4qs4.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-13mscymm.yaml b/wandb/sweep-39ci3gkf/config-13mscymm.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-189blfh0.yaml b/wandb/sweep-39ci3gkf/config-189blfh0.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-1c5eo75h.yaml b/wandb/sweep-39ci3gkf/config-1c5eo75h.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-1me8izq9.yaml b/wandb/sweep-39ci3gkf/config-1me8izq9.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-1nhl7zrl.yaml b/wandb/sweep-39ci3gkf/config-1nhl7zrl.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-1njcrmd6.yaml b/wandb/sweep-39ci3gkf/config-1njcrmd6.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-2drv4evk.yaml b/wandb/sweep-39ci3gkf/config-2drv4evk.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-2fvd85by.yaml b/wandb/sweep-39ci3gkf/config-2fvd85by.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-2hw9e51o.yaml b/wandb/sweep-39ci3gkf/config-2hw9e51o.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-2tl78ghc.yaml b/wandb/sweep-39ci3gkf/config-2tl78ghc.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-30riinnb.yaml b/wandb/sweep-39ci3gkf/config-30riinnb.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-31gui8xs.yaml b/wandb/sweep-39ci3gkf/config-31gui8xs.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-3baljyl7.yaml b/wandb/sweep-39ci3gkf/config-3baljyl7.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-3co4nuvv.yaml b/wandb/sweep-39ci3gkf/config-3co4nuvv.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-3jj2xpak.yaml b/wandb/sweep-39ci3gkf/config-3jj2xpak.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-3st78hww.yaml b/wandb/sweep-39ci3gkf/config-3st78hww.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-3v04mqas.yaml b/wandb/sweep-39ci3gkf/config-3v04mqas.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-437lflmq.yaml b/wandb/sweep-39ci3gkf/config-437lflmq.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8fc1d12b41dccd319d2cae82d2c8dbb983c06930 --- /dev/null +++ b/wandb/sweep-39ci3gkf/config-437lflmq.yaml @@ -0,0 +1,44 @@ +wandb_version: 1 + +eval_split_name: + value: test +eval_steps: + value: 500 +evaluation_strategy: + value: steps +generation_max_length: + value: 40 +generation_num_beams: + value: 1 +gradient_accumulation_steps: + value: 8 +greater_is_better: + value: true +hidden_dropout: + value: 0.0426720669293319 +language: + value: fr.en +learning_rate: + value: 0.00036405375863644303 +logging_steps: + value: 1 +max_duration_in_seconds: + value: 20 +metric_for_best_model: + value: bleu +model_name_or_path: + value: ./ +num_train_epochs: + value: 3 +output_dir: + value: ./ +per_device_eval_batch_size: + value: 4 +per_device_train_batch_size: + value: 4 +save_steps: + value: 500 +task: + value: covost2 +warmup_steps: + value: 500 diff --git a/wandb/sweep-39ci3gkf/config-452y1wxm.yaml b/wandb/sweep-39ci3gkf/config-452y1wxm.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-49n7u239.yaml b/wandb/sweep-39ci3gkf/config-49n7u239.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-4f5zlftm.yaml b/wandb/sweep-39ci3gkf/config-4f5zlftm.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-4mmi5hx5.yaml b/wandb/sweep-39ci3gkf/config-4mmi5hx5.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-5647deek.yaml b/wandb/sweep-39ci3gkf/config-5647deek.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-5hyr1y5a.yaml b/wandb/sweep-39ci3gkf/config-5hyr1y5a.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-5kkiy2u5.yaml b/wandb/sweep-39ci3gkf/config-5kkiy2u5.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-5pfd4uix.yaml b/wandb/sweep-39ci3gkf/config-5pfd4uix.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-5q2cdm5r.yaml b/wandb/sweep-39ci3gkf/config-5q2cdm5r.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-5qd5ddb0.yaml b/wandb/sweep-39ci3gkf/config-5qd5ddb0.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-5vjkay2d.yaml b/wandb/sweep-39ci3gkf/config-5vjkay2d.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-6f557slg.yaml b/wandb/sweep-39ci3gkf/config-6f557slg.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-6fu9x1b5.yaml b/wandb/sweep-39ci3gkf/config-6fu9x1b5.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-7lvxldon.yaml b/wandb/sweep-39ci3gkf/config-7lvxldon.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-7pmn4z8b.yaml b/wandb/sweep-39ci3gkf/config-7pmn4z8b.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-7vb8oyjq.yaml b/wandb/sweep-39ci3gkf/config-7vb8oyjq.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-8bxh2txl.yaml b/wandb/sweep-39ci3gkf/config-8bxh2txl.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-8imh7535.yaml b/wandb/sweep-39ci3gkf/config-8imh7535.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-8sk2da4c.yaml b/wandb/sweep-39ci3gkf/config-8sk2da4c.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-8v35e2h3.yaml b/wandb/sweep-39ci3gkf/config-8v35e2h3.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-8y1zwzwd.yaml b/wandb/sweep-39ci3gkf/config-8y1zwzwd.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-91w21ocr.yaml b/wandb/sweep-39ci3gkf/config-91w21ocr.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-9j8kytdd.yaml b/wandb/sweep-39ci3gkf/config-9j8kytdd.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-ab6gpo3h.yaml b/wandb/sweep-39ci3gkf/config-ab6gpo3h.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-ai8yufre.yaml b/wandb/sweep-39ci3gkf/config-ai8yufre.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-asp4iq4p.yaml b/wandb/sweep-39ci3gkf/config-asp4iq4p.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-b2l825wt.yaml b/wandb/sweep-39ci3gkf/config-b2l825wt.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-b4ex84oq.yaml b/wandb/sweep-39ci3gkf/config-b4ex84oq.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-box7ugpr.yaml b/wandb/sweep-39ci3gkf/config-box7ugpr.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-c5dtaxg7.yaml b/wandb/sweep-39ci3gkf/config-c5dtaxg7.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-c6wgqyba.yaml b/wandb/sweep-39ci3gkf/config-c6wgqyba.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-cae9f436.yaml b/wandb/sweep-39ci3gkf/config-cae9f436.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-cd24km5d.yaml b/wandb/sweep-39ci3gkf/config-cd24km5d.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-cd6cmhdu.yaml b/wandb/sweep-39ci3gkf/config-cd6cmhdu.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-ct5ki3nl.yaml b/wandb/sweep-39ci3gkf/config-ct5ki3nl.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-cwhobv6l.yaml b/wandb/sweep-39ci3gkf/config-cwhobv6l.yaml new file mode 100644 index 0000000000000000000000000000000000000000..51df51a56c8a59eee23ab9d2db759bb0290ef228 --- /dev/null +++ b/wandb/sweep-39ci3gkf/config-cwhobv6l.yaml @@ -0,0 +1,44 @@ +wandb_version: 1 + +eval_split_name: + value: test +eval_steps: + value: 500 +evaluation_strategy: + value: steps +generation_max_length: + value: 40 +generation_num_beams: + value: 1 +gradient_accumulation_steps: + value: 8 +greater_is_better: + value: true +hidden_dropout: + value: 0.043267782095468554 +language: + value: fr.en +learning_rate: + value: 0.0009027256702272704 +logging_steps: + value: 1 +max_duration_in_seconds: + value: 20 +metric_for_best_model: + value: bleu +model_name_or_path: + value: ./ +num_train_epochs: + value: 3 +output_dir: + value: ./ +per_device_eval_batch_size: + value: 4 +per_device_train_batch_size: + value: 4 +save_steps: + value: 500 +task: + value: covost2 +warmup_steps: + value: 500 diff --git a/wandb/sweep-39ci3gkf/config-cxniex7c.yaml b/wandb/sweep-39ci3gkf/config-cxniex7c.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-d86vblpz.yaml b/wandb/sweep-39ci3gkf/config-d86vblpz.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-dbr6hvzf.yaml b/wandb/sweep-39ci3gkf/config-dbr6hvzf.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-dlole7om.yaml b/wandb/sweep-39ci3gkf/config-dlole7om.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-dt0a7a5g.yaml b/wandb/sweep-39ci3gkf/config-dt0a7a5g.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-dxcjr49y.yaml b/wandb/sweep-39ci3gkf/config-dxcjr49y.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-en5jd5ws.yaml b/wandb/sweep-39ci3gkf/config-en5jd5ws.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-feb7ccin.yaml b/wandb/sweep-39ci3gkf/config-feb7ccin.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-ftlbqcvh.yaml b/wandb/sweep-39ci3gkf/config-ftlbqcvh.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-fulme1ky.yaml b/wandb/sweep-39ci3gkf/config-fulme1ky.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-gg7t8oyj.yaml b/wandb/sweep-39ci3gkf/config-gg7t8oyj.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-gv47m2wj.yaml b/wandb/sweep-39ci3gkf/config-gv47m2wj.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-gxg6ypwr.yaml b/wandb/sweep-39ci3gkf/config-gxg6ypwr.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-gzpu6h4y.yaml b/wandb/sweep-39ci3gkf/config-gzpu6h4y.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-h58qh9e5.yaml b/wandb/sweep-39ci3gkf/config-h58qh9e5.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-hg9yajo1.yaml b/wandb/sweep-39ci3gkf/config-hg9yajo1.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-i1t41ud3.yaml b/wandb/sweep-39ci3gkf/config-i1t41ud3.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-i2l0mo8u.yaml b/wandb/sweep-39ci3gkf/config-i2l0mo8u.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-i5695zp6.yaml b/wandb/sweep-39ci3gkf/config-i5695zp6.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-i98ajb30.yaml b/wandb/sweep-39ci3gkf/config-i98ajb30.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-iiaoedtq.yaml b/wandb/sweep-39ci3gkf/config-iiaoedtq.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-ikre2wst.yaml b/wandb/sweep-39ci3gkf/config-ikre2wst.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-itmb5mo6.yaml b/wandb/sweep-39ci3gkf/config-itmb5mo6.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-j1zcp89y.yaml b/wandb/sweep-39ci3gkf/config-j1zcp89y.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-j5n5jrs8.yaml b/wandb/sweep-39ci3gkf/config-j5n5jrs8.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-j6x8ehwj.yaml b/wandb/sweep-39ci3gkf/config-j6x8ehwj.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-jh0kzmzn.yaml b/wandb/sweep-39ci3gkf/config-jh0kzmzn.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-jhqap352.yaml b/wandb/sweep-39ci3gkf/config-jhqap352.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-jmiinif8.yaml b/wandb/sweep-39ci3gkf/config-jmiinif8.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-jz5ubc9o.yaml b/wandb/sweep-39ci3gkf/config-jz5ubc9o.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-jzdyrl99.yaml b/wandb/sweep-39ci3gkf/config-jzdyrl99.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-k3rsk6hj.yaml b/wandb/sweep-39ci3gkf/config-k3rsk6hj.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-kh571ag8.yaml b/wandb/sweep-39ci3gkf/config-kh571ag8.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-kihto6w5.yaml b/wandb/sweep-39ci3gkf/config-kihto6w5.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-kstpp505.yaml b/wandb/sweep-39ci3gkf/config-kstpp505.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-lf5dwjqi.yaml b/wandb/sweep-39ci3gkf/config-lf5dwjqi.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-lfb2nohc.yaml b/wandb/sweep-39ci3gkf/config-lfb2nohc.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-lim2java.yaml b/wandb/sweep-39ci3gkf/config-lim2java.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-lkb4hw82.yaml b/wandb/sweep-39ci3gkf/config-lkb4hw82.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-lunpgy62.yaml b/wandb/sweep-39ci3gkf/config-lunpgy62.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-ly209ho3.yaml b/wandb/sweep-39ci3gkf/config-ly209ho3.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-lya2nvhn.yaml b/wandb/sweep-39ci3gkf/config-lya2nvhn.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-m0xepxjk.yaml b/wandb/sweep-39ci3gkf/config-m0xepxjk.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-mkqmicar.yaml b/wandb/sweep-39ci3gkf/config-mkqmicar.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-mp1o9uph.yaml b/wandb/sweep-39ci3gkf/config-mp1o9uph.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-n8g9e9r6.yaml b/wandb/sweep-39ci3gkf/config-n8g9e9r6.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-nt9h2vnq.yaml b/wandb/sweep-39ci3gkf/config-nt9h2vnq.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-ntyz8v75.yaml b/wandb/sweep-39ci3gkf/config-ntyz8v75.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-nw8moz4m.yaml b/wandb/sweep-39ci3gkf/config-nw8moz4m.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-osrfy83j.yaml b/wandb/sweep-39ci3gkf/config-osrfy83j.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-p74sw8in.yaml b/wandb/sweep-39ci3gkf/config-p74sw8in.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-p835ehx4.yaml b/wandb/sweep-39ci3gkf/config-p835ehx4.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-p9ce37wy.yaml b/wandb/sweep-39ci3gkf/config-p9ce37wy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-pilhrgo3.yaml b/wandb/sweep-39ci3gkf/config-pilhrgo3.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-q6twnwyu.yaml b/wandb/sweep-39ci3gkf/config-q6twnwyu.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-qljts08y.yaml b/wandb/sweep-39ci3gkf/config-qljts08y.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-qup1mfrh.yaml b/wandb/sweep-39ci3gkf/config-qup1mfrh.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-qxryhq8t.yaml b/wandb/sweep-39ci3gkf/config-qxryhq8t.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-rb4f3k1g.yaml b/wandb/sweep-39ci3gkf/config-rb4f3k1g.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-rcdig5d8.yaml b/wandb/sweep-39ci3gkf/config-rcdig5d8.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-ryaf84w1.yaml b/wandb/sweep-39ci3gkf/config-ryaf84w1.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-sfj13ewe.yaml b/wandb/sweep-39ci3gkf/config-sfj13ewe.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-szth3v3g.yaml b/wandb/sweep-39ci3gkf/config-szth3v3g.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-t4qsp0ve.yaml b/wandb/sweep-39ci3gkf/config-t4qsp0ve.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-t4qvhqr3.yaml b/wandb/sweep-39ci3gkf/config-t4qvhqr3.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-tlbuh3kz.yaml b/wandb/sweep-39ci3gkf/config-tlbuh3kz.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-tmfj4jpq.yaml b/wandb/sweep-39ci3gkf/config-tmfj4jpq.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-tojhdszj.yaml b/wandb/sweep-39ci3gkf/config-tojhdszj.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-tpbxk6ny.yaml b/wandb/sweep-39ci3gkf/config-tpbxk6ny.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-u1ga9wqd.yaml b/wandb/sweep-39ci3gkf/config-u1ga9wqd.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-u39m1vrt.yaml b/wandb/sweep-39ci3gkf/config-u39m1vrt.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-ufnbozze.yaml b/wandb/sweep-39ci3gkf/config-ufnbozze.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-uhrny9vf.yaml b/wandb/sweep-39ci3gkf/config-uhrny9vf.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-upa19yyk.yaml b/wandb/sweep-39ci3gkf/config-upa19yyk.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-v1lzjewd.yaml b/wandb/sweep-39ci3gkf/config-v1lzjewd.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-vdsi97g4.yaml b/wandb/sweep-39ci3gkf/config-vdsi97g4.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-vpkow8hh.yaml b/wandb/sweep-39ci3gkf/config-vpkow8hh.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-vt59bvt4.yaml b/wandb/sweep-39ci3gkf/config-vt59bvt4.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-vxmpw7g1.yaml b/wandb/sweep-39ci3gkf/config-vxmpw7g1.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-w0ughkez.yaml b/wandb/sweep-39ci3gkf/config-w0ughkez.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-w7qxvq9e.yaml b/wandb/sweep-39ci3gkf/config-w7qxvq9e.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-wa1zgb3b.yaml b/wandb/sweep-39ci3gkf/config-wa1zgb3b.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-xb7ml2uh.yaml b/wandb/sweep-39ci3gkf/config-xb7ml2uh.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-xkcbtlad.yaml b/wandb/sweep-39ci3gkf/config-xkcbtlad.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-xsrh468t.yaml b/wandb/sweep-39ci3gkf/config-xsrh468t.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-y46ij0ab.yaml b/wandb/sweep-39ci3gkf/config-y46ij0ab.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-y80blmqi.yaml b/wandb/sweep-39ci3gkf/config-y80blmqi.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-ynp6pqx0.yaml b/wandb/sweep-39ci3gkf/config-ynp6pqx0.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-yrvng6zd.yaml b/wandb/sweep-39ci3gkf/config-yrvng6zd.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-ytqj5ak9.yaml b/wandb/sweep-39ci3gkf/config-ytqj5ak9.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-yx7z2kh3.yaml b/wandb/sweep-39ci3gkf/config-yx7z2kh3.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-zjpq0zrp.yaml b/wandb/sweep-39ci3gkf/config-zjpq0zrp.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-zo5ad1jz.yaml b/wandb/sweep-39ci3gkf/config-zo5ad1jz.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-zvlw3x5x.yaml b/wandb/sweep-39ci3gkf/config-zvlw3x5x.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-39ci3gkf/config-zwuvydq9.yaml b/wandb/sweep-39ci3gkf/config-zwuvydq9.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/sweep-pvyx3mpp/config-w4rlzz90.yaml b/wandb/sweep-pvyx3mpp/config-w4rlzz90.yaml new file mode 100644 index 0000000000000000000000000000000000000000..bdc0d1342395c85882c622c734afdb6887688fc3 --- /dev/null +++ b/wandb/sweep-pvyx3mpp/config-w4rlzz90.yaml @@ -0,0 +1,44 @@ +wandb_version: 1 + +eval_split_name: + value: test +eval_steps: + value: 500 +evaluation_strategy: + value: steps +generation_max_length: + value: 40 +generation_num_beams: + value: 1 +gradient_accumulation_steps: + value: 8 +greater_is_better: + value: true +hidden_dropout: + value: 0.17305159310134854 +language: + value: fr.en +learning_rate: + value: 0.00012335092351490598 +logging_steps: + value: 1 +max_duration_in_seconds: + value: 20 +metric_for_best_model: + value: bleu +model_name_or_path: + value: ./ +num_train_epochs: + value: 3 +output_dir: + value: ./ +per_device_eval_batch_size: + value: 8 +per_device_train_batch_size: + value: 8 +save_steps: + value: 500 +task: + value: covost2 +warmup_steps: + value: 500