Upload RT-DETRv2 voucher classifier

Browse files

Files changed (16) hide show

README.md +30 -2
checkpoint-11/config.json +129 -0
checkpoint-11/model.safetensors +3 -0
checkpoint-11/optimizer.pt +3 -0
checkpoint-11/preprocessor_config.json +26 -0
checkpoint-11/rng_state.pth +3 -0
checkpoint-11/scheduler.pt +3 -0
checkpoint-11/trainer_state.json +48 -0
checkpoint-11/training_args.bin +3 -0
checkpoint-22/model.safetensors +1 -1
checkpoint-22/optimizer.pt +1 -1
checkpoint-22/trainer_state.json +33 -4
checkpoint-22/training_args.bin +1 -1
model.safetensors +1 -1
runs/Aug14_00-31-34_9db0f8c974d2/events.out.tfevents.1755131495.9db0f8c974d2.97819.0 +3 -0
training_args.bin +1 -1

README.md CHANGED Viewed

@@ -74,6 +74,33 @@ This model is a fine-tuned version of [PekingU/rtdetr_v2_r101vd](https://hugging
 **Final Evaluation Results:**
 **Model Configuration:**
 - **Base model**: PekingU/rtdetr_v2_r101vd
 - **Architecture**: rtdetr_v2_r101vd
@@ -90,12 +117,13 @@ This model is a fine-tuned version of [PekingU/rtdetr_v2_r101vd](https://hugging
 **Training Time**: 0.6 minutes
 **Training Summary:**
-- **Final training loss**: 1361.9480
 ### MLflow Tracking
-- **MLflow Run ID**: 65eb62e7fd564f99981143809773def8
 - **MLflow Experiment**: RT-DETRv2_Voucher_Classification

 **Final Evaluation Results:**
+**Overall Detection Performance:**
+- **mAP**: 0.0000
+- **mAP@50**: 0.0000
+- **mAP@75**: 0.0000
+**Per-Class Average Precision:**
+- **Digital invoices**: 0.0000 (needs improvement)
+- **Fisico receipts**: 0.0000 (needs improvement)
+- **Tesoreria receipts**: 0.0000 (needs improvement)
+**Model Confidence:**
+- **Digital invoices mean confidence**: 0.7041 (moderate)
+- **Fisico receipts mean confidence**: 0.5998 (low)
+- **Tesoreria receipts mean confidence**: 0.5715 (low)
+**Performance by Object Size:**
+- **Small objects**: 0.0000
+- **Medium objects**: -1.0000
+- **Large objects**: 0.0000
+**Evaluation Dataset:**
+- **Digital invoices**: 157 samples (28.5%)
+- **Fisico receipts**: 261 samples (47.4%)
+- **Tesoreria receipts**: 133 samples (24.1%)
+- **Total evaluation samples**: 551
 **Model Configuration:**
 - **Base model**: PekingU/rtdetr_v2_r101vd
 - **Architecture**: rtdetr_v2_r101vd
 **Training Time**: 0.6 minutes
 **Training Summary:**
+- **Final training loss**: 1361.6241
+- **Final learning rate**: 1.43e-07
 ### MLflow Tracking
+- **MLflow Run ID**: 0bf1954e36da45088455964384408885
 - **MLflow Experiment**: RT-DETRv2_Voucher_Classification

checkpoint-11/config.json ADDED Viewed

	@@ -0,0 +1,129 @@

+{
+  "activation_dropout": 0.0,
+  "activation_function": "silu",
+  "anchor_image_size": null,
+  "architectures": [
+    "RTDetrV2ForObjectDetection"
+  ],
+  "attention_dropout": 0.0,
+  "auxiliary_loss": true,
+  "backbone": null,
+  "backbone_config": {
+    "depths": [
+      3,
+      4,
+      23,
+      3
+    ],
+    "downsample_in_bottleneck": false,
+    "downsample_in_first_stage": false,
+    "embedding_size": 64,
+    "hidden_act": "relu",
+    "hidden_sizes": [
+      256,
+      512,
+      1024,
+      2048
+    ],
+    "layer_type": "bottleneck",
+    "model_type": "rt_detr_resnet",
+    "num_channels": 3,
+    "out_features": [
+      "stage2",
+      "stage3",
+      "stage4"
+    ],
+    "out_indices": [
+      2,
+      3,
+      4
+    ],
+    "stage_names": [
+      "stem",
+      "stage1",
+      "stage2",
+      "stage3",
+      "stage4"
+    ],
+    "torch_dtype": "float32"
+  },
+  "backbone_kwargs": null,
+  "batch_norm_eps": 1e-05,
+  "box_noise_scale": 1.0,
+  "d_model": 256,
+  "decoder_activation_function": "relu",
+  "decoder_attention_heads": 8,
+  "decoder_ffn_dim": 1024,
+  "decoder_in_channels": [
+    384,
+    384,
+    384
+  ],
+  "decoder_layers": 6,
+  "decoder_method": "default",
+  "decoder_n_levels": 3,
+  "decoder_n_points": 4,
+  "decoder_offset_scale": 0.5,
+  "disable_custom_kernels": true,
+  "dropout": 0.0,
+  "encode_proj_layers": [
+    2
+  ],
+  "encoder_activation_function": "gelu",
+  "encoder_attention_heads": 8,
+  "encoder_ffn_dim": 2048,
+  "encoder_hidden_dim": 384,
+  "encoder_in_channels": [
+    512,
+    1024,
+    2048
+  ],
+  "encoder_layers": 1,
+  "eos_coefficient": 0.0001,
+  "eval_size": null,
+  "feat_strides": [
+    8,
+    16,
+    32
+  ],
+  "focal_loss_alpha": 0.75,
+  "focal_loss_gamma": 2.0,
+  "freeze_backbone_batch_norms": true,
+  "hidden_expansion": 1.0,
+  "id2label": {
+    "0": "LABEL_0",
+    "1": "LABEL_1",
+    "2": "LABEL_2"
+  },
+  "initializer_bias_prior_prob": null,
+  "initializer_range": 0.01,
+  "is_encoder_decoder": true,
+  "label2id": {
+    "LABEL_0": 0,
+    "LABEL_1": 1,
+    "LABEL_2": 2
+  },
+  "label_noise_ratio": 0.5,
+  "layer_norm_eps": 1e-05,
+  "learn_initial_query": false,
+  "matcher_alpha": 0.25,
+  "matcher_bbox_cost": 5.0,
+  "matcher_class_cost": 2.0,
+  "matcher_gamma": 2.0,
+  "matcher_giou_cost": 2.0,
+  "model_type": "rt_detr_v2",
+  "normalize_before": false,
+  "num_denoising": 100,
+  "num_feature_levels": 3,
+  "num_queries": 300,
+  "positional_encoding_temperature": 10000,
+  "torch_dtype": "float32",
+  "transformers_version": "4.55.0",
+  "use_focal_loss": true,
+  "use_pretrained_backbone": false,
+  "use_timm_backbone": false,
+  "weight_loss_bbox": 5.0,
+  "weight_loss_giou": 2.0,
+  "weight_loss_vfl": 1.0,
+  "with_box_refine": true
+}

checkpoint-11/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0e3e068bda225fc31bb49ed7183a789b37bd93976d6a96831e17aa015749abd1
+size 306699044

checkpoint-11/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:87dabdb27f4f90cb9270ced17931fcd21fa54ed6e3e1e9639cdf5c810f120e43
+size 611580433

checkpoint-11/preprocessor_config.json ADDED Viewed

	@@ -0,0 +1,26 @@

+{
+  "do_convert_annotations": true,
+  "do_normalize": false,
+  "do_pad": false,
+  "do_rescale": true,
+  "do_resize": true,
+  "format": "coco_detection",
+  "image_mean": [
+    0.485,
+    0.456,
+    0.406
+  ],
+  "image_processor_type": "RTDetrImageProcessor",
+  "image_std": [
+    0.229,
+    0.224,
+    0.225
+  ],
+  "pad_size": null,
+  "resample": 2,
+  "rescale_factor": 0.00392156862745098,
+  "size": {
+    "height": 640,
+    "width": 640
+  }
+}

checkpoint-11/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:485c3e554af1b9085b3c02357f2e127e19bd275a84ebb30b1fb13fd4d5d20bb3
+size 14244

checkpoint-11/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:99e8799425df17af4b2976f77dd0c9b3661c7dfc83e78d7f432f2f7e04f3e76a
+size 1064

checkpoint-11/trainer_state.json ADDED Viewed

	@@ -0,0 +1,48 @@

+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 1.0,
+  "eval_steps": 500,
+  "global_step": 11,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.47619047619047616,
+      "grad_norm": 5716.10205078125,
+      "learning_rate": 3.0000000000000004e-08,
+      "loss": 1383.1007,
+      "step": 5
+    },
+    {
+      "epoch": 0.9523809523809523,
+      "grad_norm": 12343.9111328125,
+      "learning_rate": 6.75e-08,
+      "loss": 1349.0135,
+      "step": 10
+    }
+  ],
+  "logging_steps": 5,
+  "max_steps": 22,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 2,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 2.816299339776e+17,
+  "train_batch_size": 24,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoint-11/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c1180767f5de13a6a025f0c99a50dc82acaeafd2f978f6044f91019e1561dc92
+size 5368

checkpoint-22/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:41bcea9c8faca4547536af694dd5ad4a304153763de5c5d40387fb9d71bedb70
 size 306699044

 version https://git-lfs.github.com/spec/v1
+oid sha256:88ca34ef98d20cb695317e3577b14ccd8027ea4cfe525424549bd643cd07258d
 size 306699044

checkpoint-22/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:443919119e128db40e45a79d6a18a299f9ef64236143bc3b0a00dc2b02f890c4
 size 611580433

 version https://git-lfs.github.com/spec/v1
+oid sha256:53bdea1aaa6c5b3169bdf07cc3b8a586e5a2917de523de9504f1bf309d1e6fef
 size 611580433

checkpoint-22/trainer_state.json CHANGED Viewed

@@ -3,17 +3,46 @@
   "best_metric": null,
   "best_model_checkpoint": null,
   "epoch": 2.0,
-  "eval_steps": 100,
   "global_step": 22,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
-  "log_history": [],
-  "logging_steps": 50,
   "max_steps": 22,
   "num_input_tokens_seen": 0,
   "num_train_epochs": 2,
-  "save_steps": 200,
   "stateful_callbacks": {
     "TrainerControl": {
       "args": {

   "best_metric": null,
   "best_model_checkpoint": null,
   "epoch": 2.0,
+  "eval_steps": 500,
   "global_step": 22,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.47619047619047616,
+      "grad_norm": 5716.10205078125,
+      "learning_rate": 3.0000000000000004e-08,
+      "loss": 1383.1007,
+      "step": 5
+    },
+    {
+      "epoch": 0.9523809523809523,
+      "grad_norm": 12343.9111328125,
+      "learning_rate": 6.75e-08,
+      "loss": 1349.0135,
+      "step": 10
+    },
+    {
+      "epoch": 1.380952380952381,
+      "grad_norm": 10210.48046875,
+      "learning_rate": 1.05e-07,
+      "loss": 1340.3351,
+      "step": 15
+    },
+    {
+      "epoch": 1.8571428571428572,
+      "grad_norm": 7633.39599609375,
+      "learning_rate": 1.425e-07,
+      "loss": 1369.3844,
+      "step": 20
+    }
+  ],
+  "logging_steps": 5,
   "max_steps": 22,
   "num_input_tokens_seen": 0,
   "num_train_epochs": 2,
+  "save_steps": 500,
   "stateful_callbacks": {
     "TrainerControl": {
       "args": {

checkpoint-22/training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ee47b52e482bd2dc4e70d1d92d8db4b33fc8cc8bb349a91709c9f357b574e2d1
 size 5368

 version https://git-lfs.github.com/spec/v1
+oid sha256:c1180767f5de13a6a025f0c99a50dc82acaeafd2f978f6044f91019e1561dc92
 size 5368

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:41bcea9c8faca4547536af694dd5ad4a304153763de5c5d40387fb9d71bedb70
 size 306699044

 version https://git-lfs.github.com/spec/v1
+oid sha256:88ca34ef98d20cb695317e3577b14ccd8027ea4cfe525424549bd643cd07258d
 size 306699044

runs/Aug14_00-31-34_9db0f8c974d2/events.out.tfevents.1755131495.9db0f8c974d2.97819.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9da04950d3bca5c43a7a018693a7d06aa8974c8ef4f8712c9e9a73ffe62f1669
+size 8224

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ee47b52e482bd2dc4e70d1d92d8db4b33fc8cc8bb349a91709c9f357b574e2d1
 size 5368

 version https://git-lfs.github.com/spec/v1
+oid sha256:c1180767f5de13a6a025f0c99a50dc82acaeafd2f978f6044f91019e1561dc92
 size 5368