--- library_name: transformers pipeline_tag: text-generation inference: true widget: - text: Hello! example_title: Hello world group: Python --- This tiny model is for debugging. It is randomly initialized with the config adapted from [openbmb/MiniCPM4-8B](https://huggingface.co/openbmb/MiniCPM4-8B). ### Example usage: ```python import torch from transformers import AutoModelForCausalLM, AutoTokenizer model_id = "yujiepan/minicpm4-tiny-random" device = "cuda" tokenizer = AutoTokenizer.from_pretrained(model_id) model = AutoModelForCausalLM.from_pretrained( model_id, torch_dtype=torch.bfloat16, device_map=device, trust_remote_code=True) # User can directly use the chat interface # responds, history = model.chat(tokenizer, "Write an article about Artificial Intelligence.", temperature=0.7, top_p=0.7) # print(responds) # User can also use the generate interface messages = [ {"role": "user", "content": "Write an article about Artificial Intelligence."}, ] prompt_text = tokenizer.apply_chat_template( messages, tokenize=False, add_generation_prompt=True, ) model_inputs = tokenizer([prompt_text], return_tensors="pt").to(device) model_outputs = model.generate( **model_inputs, max_new_tokens=32, top_p=0.7, temperature=0.7 ) output_token_ids = [ model_outputs[i][len(model_inputs[i]):] for i in range(len(model_inputs['input_ids'])) ] responses = tokenizer.batch_decode(output_token_ids, skip_special_tokens=True)[0] print(responses) ``` ### Codes to create this repo: ```python import json from pathlib import Path import torch import accelerate from huggingface_hub import hf_hub_download from transformers import ( AutoConfig, AutoModelForCausalLM, AutoTokenizer, GenerationConfig, set_seed, ) source_model_id = "openbmb/MiniCPM4-8B" save_folder = "/tmp/yujiepan/minicpm4-tiny-random" processor = AutoTokenizer.from_pretrained(source_model_id) processor.save_pretrained(save_folder) with open(hf_hub_download(source_model_id, filename='config.json', repo_type='model'), 'r', encoding='utf-8') as f: config_json = json.load(f) config_json["hidden_size"] = 64 config_json['intermediate_size'] = 128 config_json['num_attention_heads'] = 2 config_json['num_key_value_heads'] = 1 config_json['dim_model_base'] = 32 config_json['num_hidden_layers'] = 2 config_json['tie_word_embeddings'] = True for k, v in config_json['auto_map'].items(): config_json['auto_map'][k] = f'{source_model_id}--{v}' automap = config_json['auto_map'] factor = config_json['rope_scaling']['long_factor'] config_json['rope_scaling']['long_factor'] = factor[:16] config_json['rope_scaling']['short_factor'] = factor[:16] with open(f"{save_folder}/config.json", "w", encoding='utf-8') as f: json.dump(config_json, f, indent=2) config = AutoConfig.from_pretrained( save_folder, trust_remote_code=True, ) print(config) torch.set_default_dtype(torch.bfloat16) model = AutoModelForCausalLM.from_config(config, trust_remote_code=True) torch.set_default_dtype(torch.float32) model.generation_config = GenerationConfig.from_pretrained( source_model_id, trust_remote_code=True, ) set_seed(42) with torch.no_grad(): for name, p in sorted(model.named_parameters()): torch.nn.init.normal_(p, 0, 0.2) print(name, p.shape) pass model.save_pretrained(save_folder) with open(f"{save_folder}/config.json", "r", encoding='utf-8') as f: config_json = json.load(f) config_json['auto_map'] = automap with open(f"{save_folder}/config.json", "w", encoding='utf-8') as f: json.dump(config_json, f, indent=2) for python_file in Path(save_folder).glob('*.py'): python_file.unlink() ```