hazyresearch
/

Weaver_Distilled_ModernBERT_Large_for_MATH500

Text Classification

Model card Files Files and versions

jonsaadfalcon commited on Jun 12

Commit

0e40d5d

·

verified ·

1 Parent(s): 8ed81f4

Update README.md

Files changed (1) hide show

README.md +45 -12

README.md CHANGED Viewed

@@ -15,35 +15,68 @@ This is a distilled cross-encoder model based on ModernBERT-large, trained to pr
 - **Training Objective**: Binary classification (correct/incorrect answer prediction)
 ## Usage
 ```python
 from custom_crossencoder import CustomCrossEncoder, TrainingConfig
-# Initialize model
 config = TrainingConfig(
-    model_name="answerdotai/ModernBERT-large",
     max_length=4096,
-    mlp_hidden_dims=[1024, 512, 256]
 )
 model = CustomCrossEncoder(config)
-# Load checkpoint
-model.load_state_dict(torch.load("hazyresearch/Weaver_Distilled_ModernBERT_Large_for_MATH500"))
-model.eval()
-# Get prediction
-instruction = "Your instruction here"
-answer = "Your answer here"
 encoded = model.tokenizer(
     text=instruction,
-    text_pair=answer,
     truncation=True,
-    max_length=4096,
     padding="max_length",
     return_tensors="pt"
 )
 with torch.no_grad():
-    prediction = model(encoded["input_ids"], encoded["attention_mask"])
 ```
 ## Running Evaluation

 - **Training Objective**: Binary classification (correct/incorrect answer prediction)
 ## Usage
+TODO: ADD POINTER TO CUSTOM_CROSSENCODER.PY SCRIPT
 ```python
+import torch
+import logging
 from custom_crossencoder import CustomCrossEncoder, TrainingConfig
+# Setup logging
+logging.basicConfig(format="%(asctime)s - %(message)s", level=logging.INFO)
+logger = logging.getLogger(__name__)
+# Model configuration
 config = TrainingConfig(
+    model_name="answerdotai/ModernBERT-large",  # Base model to use
     max_length=4096,
+    mlp_hidden_dims=[1024, 512, 256],  # Default for ModernBERT
+    dropout_rate=0.1,
+    dataset_path="hazyresearch/MATH500_with_Llama_3.1_70B_Instruct_v1",
 )
+# Model path - using HuggingFace model repository
+checkpoint_path = "hazyresearch/Weaver_Distilled_ModernBERT_Large_for_MATH500"
+# Load model
+logger.info(f"Loading model from checkpoint: {checkpoint_path}")
 model = CustomCrossEncoder(config)
+model.load_finetuned_checkpoint(checkpoint_path)
+model.eval()  # Set to evaluation mode
+# Dummy example
+instruction = "Solve the following math problem: What is 2 + 2?"
+response = "The answer is 4. This is because when we add 2 and 2 together, we get 4."
+# Tokenize input
 encoded = model.tokenizer(
     text=instruction,
+    text_pair=response,
     truncation=True,
+    max_length=config.max_length,
     padding="max_length",
     return_tensors="pt"
 )
+# Get prediction
+logger.info("\nMaking prediction on dummy example:")
+logger.info(f"Instruction: {instruction}")
+logger.info(f"Response: {response}")
+# Move tensors to the same device as model
+device = next(model.parameters()).device
+input_ids = encoded["input_ids"].to(device)
+attention_mask = encoded["attention_mask"].to(device)
+# Get raw score
 with torch.no_grad():
+    score = model(input_ids, attention_mask).item()
+logger.info(f"\nRaw prediction score: {score:.4f}")
+# Get binary prediction (using 0.5 threshold)
+binary_prediction = "Correct" if score >= 0.5 else "Incorrect"
+logger.info(f"Binary prediction (threshold 0.5): {binary_prediction}")
 ```
 ## Running Evaluation