| { | |
| "_name_or_path": "distributed/optimized-gpt2-1b", | |
| "activation_function": "gelu_new", | |
| "all_reduce_scores": { | |
| "0": "NON_PARTICIPATING", | |
| "1": "NON_PARTICIPATING", | |
| "10": "NON_PARTICIPATING", | |
| "100": "NON_PARTICIPATING", | |
| "101": "NON_PARTICIPATING", | |
| "102": "NON_PARTICIPATING", | |
| "103": "NON_PARTICIPATING", | |
| "104": "SUCCESS", | |
| "105": "NON_PARTICIPATING", | |
| "106": "NON_PARTICIPATING", | |
| "107": "NON_PARTICIPATING", | |
| "108": "NON_PARTICIPATING", | |
| "109": "NON_PARTICIPATING", | |
| "11": "SUCCESS", | |
| "110": "NON_PARTICIPATING", | |
| "111": "SUCCESS", | |
| "112": "NON_PARTICIPATING", | |
| "113": "NON_PARTICIPATING", | |
| "114": "NON_PARTICIPATING", | |
| "115": "NON_PARTICIPATING", | |
| "116": "NON_PARTICIPATING", | |
| "117": "SUCCESS", | |
| "118": "NON_PARTICIPATING", | |
| "119": "NON_PARTICIPATING", | |
| "12": "SUCCESS", | |
| "120": "SUCCESS", | |
| "121": "NON_PARTICIPATING", | |
| "122": "SUCCESS", | |
| "123": "NON_PARTICIPATING", | |
| "124": "NON_PARTICIPATING", | |
| "125": "SUCCESS", | |
| "126": "NON_PARTICIPATING", | |
| "127": "NON_PARTICIPATING", | |
| "128": "NON_PARTICIPATING", | |
| "129": "NON_PARTICIPATING", | |
| "13": "SUCCESS", | |
| "130": "NON_PARTICIPATING", | |
| "131": "NON_PARTICIPATING", | |
| "132": "NON_PARTICIPATING", | |
| "133": "NON_PARTICIPATING", | |
| "134": "NON_PARTICIPATING", | |
| "135": "NON_PARTICIPATING", | |
| "136": "NON_PARTICIPATING", | |
| "137": "NON_PARTICIPATING", | |
| "138": "NON_PARTICIPATING", | |
| "139": "NON_PARTICIPATING", | |
| "14": "NON_PARTICIPATING", | |
| "140": "NON_PARTICIPATING", | |
| "141": "SUCCESS", | |
| "142": "NON_PARTICIPATING", | |
| "143": "NON_PARTICIPATING", | |
| "144": "NON_PARTICIPATING", | |
| "145": "NON_PARTICIPATING", | |
| "146": "NON_PARTICIPATING", | |
| "147": "NON_PARTICIPATING", | |
| "148": "NON_PARTICIPATING", | |
| "149": "NON_PARTICIPATING", | |
| "15": "NON_PARTICIPATING", | |
| "150": "NON_PARTICIPATING", | |
| "151": "NON_PARTICIPATING", | |
| "152": "NON_PARTICIPATING", | |
| "153": "NON_PARTICIPATING", | |
| "154": "NON_PARTICIPATING", | |
| "155": "NON_PARTICIPATING", | |
| "156": "NON_PARTICIPATING", | |
| "157": "NON_PARTICIPATING", | |
| "158": "NON_PARTICIPATING", | |
| "159": "NON_PARTICIPATING", | |
| "16": "NON_PARTICIPATING", | |
| "160": "NON_PARTICIPATING", | |
| "161": "NON_PARTICIPATING", | |
| "162": "NON_PARTICIPATING", | |
| "163": "NON_PARTICIPATING", | |
| "164": "NON_PARTICIPATING", | |
| "165": "NON_PARTICIPATING", | |
| "166": "NON_PARTICIPATING", | |
| "167": "NON_PARTICIPATING", | |
| "168": "NON_PARTICIPATING", | |
| "169": "NON_PARTICIPATING", | |
| "17": "NON_PARTICIPATING", | |
| "170": "NON_PARTICIPATING", | |
| "171": "NON_PARTICIPATING", | |
| "172": "NON_PARTICIPATING", | |
| "173": "NON_PARTICIPATING", | |
| "174": "NON_PARTICIPATING", | |
| "175": "NON_PARTICIPATING", | |
| "176": "NON_PARTICIPATING", | |
| "177": "NON_PARTICIPATING", | |
| "178": "SUCCESS", | |
| "179": "NON_PARTICIPATING", | |
| "18": "SUCCESS", | |
| "180": "NON_PARTICIPATING", | |
| "181": "NON_PARTICIPATING", | |
| "182": "NON_PARTICIPATING", | |
| "183": "NON_PARTICIPATING", | |
| "184": "NON_PARTICIPATING", | |
| "185": "NON_PARTICIPATING", | |
| "186": "NON_PARTICIPATING", | |
| "187": "NON_PARTICIPATING", | |
| "188": "NON_PARTICIPATING", | |
| "189": "NON_PARTICIPATING", | |
| "19": "SUCCESS", | |
| "190": "NON_PARTICIPATING", | |
| "191": "NON_PARTICIPATING", | |
| "192": "NON_PARTICIPATING", | |
| "193": "NON_PARTICIPATING", | |
| "194": "NON_PARTICIPATING", | |
| "195": "NON_PARTICIPATING", | |
| "196": "NON_PARTICIPATING", | |
| "197": "NON_PARTICIPATING", | |
| "198": "NON_PARTICIPATING", | |
| "199": "NON_PARTICIPATING", | |
| "2": "SUCCESS", | |
| "20": "SUCCESS", | |
| "200": "NON_PARTICIPATING", | |
| "201": "NON_PARTICIPATING", | |
| "202": "NON_PARTICIPATING", | |
| "203": "NON_PARTICIPATING", | |
| "204": "NON_PARTICIPATING", | |
| "205": "NON_PARTICIPATING", | |
| "206": "NON_PARTICIPATING", | |
| "207": "NON_PARTICIPATING", | |
| "208": "NON_PARTICIPATING", | |
| "209": "NON_PARTICIPATING", | |
| "21": "NON_PARTICIPATING", | |
| "210": "NON_PARTICIPATING", | |
| "211": "NON_PARTICIPATING", | |
| "212": "NON_PARTICIPATING", | |
| "213": "NON_PARTICIPATING", | |
| "214": "NON_PARTICIPATING", | |
| "215": "NON_PARTICIPATING", | |
| "216": "NON_PARTICIPATING", | |
| "217": "NON_PARTICIPATING", | |
| "218": "NON_PARTICIPATING", | |
| "219": "NON_PARTICIPATING", | |
| "22": "NON_PARTICIPATING", | |
| "220": "NON_PARTICIPATING", | |
| "221": "NON_PARTICIPATING", | |
| "222": "NON_PARTICIPATING", | |
| "223": "SUCCESS", | |
| "224": "NON_PARTICIPATING", | |
| "225": "SUCCESS", | |
| "226": "NON_PARTICIPATING", | |
| "227": "NON_PARTICIPATING", | |
| "228": "NON_PARTICIPATING", | |
| "229": "NON_PARTICIPATING", | |
| "23": "SUCCESS", | |
| "230": "NON_PARTICIPATING", | |
| "231": "NON_PARTICIPATING", | |
| "232": "NON_PARTICIPATING", | |
| "233": "NON_PARTICIPATING", | |
| "234": "NON_PARTICIPATING", | |
| "235": "NON_PARTICIPATING", | |
| "236": "SUCCESS", | |
| "237": "NON_PARTICIPATING", | |
| "238": "NON_PARTICIPATING", | |
| "239": "NON_PARTICIPATING", | |
| "24": "SUCCESS", | |
| "240": "NON_PARTICIPATING", | |
| "241": "NON_PARTICIPATING", | |
| "242": "NON_PARTICIPATING", | |
| "243": "NON_PARTICIPATING", | |
| "244": "NON_PARTICIPATING", | |
| "245": "NON_PARTICIPATING", | |
| "246": "NON_PARTICIPATING", | |
| "247": "NON_PARTICIPATING", | |
| "248": "NON_PARTICIPATING", | |
| "249": "NON_PARTICIPATING", | |
| "25": "SUCCESS", | |
| "250": "SUCCESS", | |
| "251": "NON_PARTICIPATING", | |
| "252": "NON_PARTICIPATING", | |
| "253": "NON_PARTICIPATING", | |
| "254": "NON_PARTICIPATING", | |
| "255": "NON_PARTICIPATING", | |
| "26": "SUCCESS", | |
| "27": "SUCCESS", | |
| "28": "SUCCESS", | |
| "29": "NON_PARTICIPATING", | |
| "3": "NON_PARTICIPATING", | |
| "30": "NON_PARTICIPATING", | |
| "31": "NON_PARTICIPATING", | |
| "32": "SUCCESS", | |
| "33": "NON_PARTICIPATING", | |
| "34": "NON_PARTICIPATING", | |
| "35": "SUCCESS", | |
| "36": "SUCCESS", | |
| "37": "SUCCESS", | |
| "38": "SUCCESS", | |
| "39": "SUCCESS", | |
| "4": "SUCCESS", | |
| "40": "NON_PARTICIPATING", | |
| "41": "NON_PARTICIPATING", | |
| "42": "SUCCESS", | |
| "43": "NON_PARTICIPATING", | |
| "44": "NON_PARTICIPATING", | |
| "45": "NON_PARTICIPATING", | |
| "46": "NON_PARTICIPATING", | |
| "47": "SUCCESS", | |
| "48": "NON_PARTICIPATING", | |
| "49": "SUCCESS", | |
| "5": "SUCCESS", | |
| "50": "SUCCESS", | |
| "51": "SUCCESS", | |
| "52": "NON_PARTICIPATING", | |
| "53": "SUCCESS", | |
| "54": "NON_PARTICIPATING", | |
| "55": "SUCCESS", | |
| "56": "SUCCESS", | |
| "57": "NON_PARTICIPATING", | |
| "58": "NON_PARTICIPATING", | |
| "59": "NON_PARTICIPATING", | |
| "6": "NON_PARTICIPATING", | |
| "60": "NON_PARTICIPATING", | |
| "61": "NON_PARTICIPATING", | |
| "62": "NON_PARTICIPATING", | |
| "63": "SUCCESS", | |
| "64": "SUCCESS", | |
| "65": "NON_PARTICIPATING", | |
| "66": "SUCCESS", | |
| "67": "NON_PARTICIPATING", | |
| "68": "SUCCESS", | |
| "69": "SUCCESS", | |
| "7": "SUCCESS", | |
| "70": "SUCCESS", | |
| "71": "NON_PARTICIPATING", | |
| "72": "SUCCESS", | |
| "73": "NON_PARTICIPATING", | |
| "74": "SUCCESS", | |
| "75": "SUCCESS", | |
| "76": "NON_PARTICIPATING", | |
| "77": "NON_PARTICIPATING", | |
| "78": "SUCCESS", | |
| "79": "NON_PARTICIPATING", | |
| "8": "NON_PARTICIPATING", | |
| "80": "SUCCESS", | |
| "81": "SUCCESS", | |
| "82": "SUCCESS", | |
| "83": "SUCCESS", | |
| "84": "NON_PARTICIPATING", | |
| "85": "SUCCESS", | |
| "86": "NON_PARTICIPATING", | |
| "87": "NON_PARTICIPATING", | |
| "88": "SUCCESS", | |
| "89": "NON_PARTICIPATING", | |
| "9": "SUCCESS", | |
| "90": "SUCCESS", | |
| "91": "NON_PARTICIPATING", | |
| "92": "SUCCESS", | |
| "93": "NON_PARTICIPATING", | |
| "94": "NON_PARTICIPATING", | |
| "95": "NON_PARTICIPATING", | |
| "96": "SUCCESS", | |
| "97": "SUCCESS", | |
| "98": "NON_PARTICIPATING", | |
| "99": "SUCCESS" | |
| }, | |
| "architectures": [ | |
| "GPTOptim" | |
| ], | |
| "attn_pdrop": 0.1, | |
| "auto_map": { | |
| "AutoConfig": "distributed/optimized-gpt2-500m--configuration_gpt_optimized.GPTOptimConfig", | |
| "AutoModelForCausalLM": "distributed/optimized-gpt2-500m--modeling_gpt_optimized.GPTOptim" | |
| }, | |
| "block_list": [ | |
| 5630453, | |
| 5630481 | |
| ], | |
| "block_size": 1024, | |
| "bos_token_id": 50256, | |
| "embd_pdrop": 0.1, | |
| "eos_token_id": 50256, | |
| "initializer_range": 0.02, | |
| "inner_step": 304, | |
| "inner_steps": 0, | |
| "last_allreduce_block": 5628399, | |
| "layer_norm_epsilon": 1e-05, | |
| "model_type": "gpt_optimized", | |
| "n_embd": 1280, | |
| "n_head": 32, | |
| "n_inner": null, | |
| "n_layer": 48, | |
| "n_positions": 1024, | |
| "reorder_and_upcast_attn": false, | |
| "resid_pdrop": 0.1, | |
| "scale_attn_by_inverse_layer_idx": false, | |
| "scale_attn_weights": true, | |
| "summary_activation": null, | |
| "summary_first_dropout": 0.1, | |
| "summary_proj_to_labels": true, | |
| "summary_type": "cls_index", | |
| "summary_use_proj": true, | |
| "torch_dtype": "float32", | |
| "transformers_version": "4.39.3", | |
| "use_cache": true, | |
| "vocab_size": 50257 | |
| } | |