babylm-baseline-100m-gpt-bert-masked-focus / configuration_gpt_bert.py

Uploading the modeling file

c28c53e verified 4 days ago

1.85 kB

	from __future__ import annotations

	import json
	import pathlib
	import copy

	from typing import Any
	from transformers.configuration_utils import PretrainedConfig


	class ModelConfig(PretrainedConfig):

	def __init__(self: ModelConfig, config_file: pathlib.Path \| str \| None = None, **kwargs):
	"""
	"""
	super().__init__(**kwargs)
	if config_file is None:
	self.attention_probs_dropout_prob: float = 0.1
	self.hidden_dropout_prob = 0.1
	self.hidden_size = 768
	self.intermediate_size = 2560
	self.max_sequence_length = 512
	self.position_bucket_size = 32
	self.num_attention_heads = 12
	self.num_layers = 12
	self.vocab_size = 8192
	self.layer_norm_eps = 1e-5
	else:
	if config_file == "str":
	config_file = pathlib.Path(config_file)

	config: dict[str, Any] = json.load(config_file.open("r"))

	for key, value in config.items():
	setattr(self, key, value)

	def __repr__(self) -> str:
	return str(self.to_json_string())

	def to_dict(self) -> dict[str, Any]:
	"""Serializes this instance to a Python dictionary."""
	output: dict[str, Any] = copy.deepcopy(self.__dict__)
	return output

	def to_json_string(self) -> str:
	"""Serializes this instance to a JSON string."""
	return json.dumps(self.to_dict(), indent=2, sort_keys=True) + "\n"

	def to_json_file(self, json_file_path: pathlib.Path \| str) -> None:
	"""Save this instance to a json file."""
	if isinstance(json_file_path, str):
	json_file_path: pathlib.Path = pathlib.Path(json_file_path)
	with json_file_path.open("w", encoding='utf-8') as writer:
	writer.write(self.to_json_string())