danhtran2mind commited on
Commit
ce500ca
·
verified ·
1 Parent(s): ae05bfd

Upload 38 files

Browse files
Files changed (39) hide show
  1. .gitattributes +2 -0
  2. .python-version +1 -0
  3. LICENSE +21 -0
  4. apps/gradio_app.py +68 -0
  5. apps/gradio_app/config.py +15 -0
  6. apps/gradio_app/old-processor.py +109 -0
  7. apps/gradio_app/processor.py +110 -0
  8. apps/gradio_app/static/scripts.js +0 -0
  9. apps/gradio_app/static/styles.css +100 -0
  10. apps/gradio_app/utils.py +14 -0
  11. apps/old-gradio_app.py +55 -0
  12. apps/old2-gradio_app.py +183 -0
  13. assets/lp_image.jpg +0 -0
  14. assets/lp_video - Trim.mp4 +3 -0
  15. assets/lp_video.mp4 +3 -0
  16. ckpts/README.md +3 -0
  17. configs/datasets_config.yaml +27 -0
  18. data/__init__.py +0 -0
  19. docs/inference/inference_doc.md +127 -0
  20. docs/scripts/scripts_doc.md +126 -0
  21. docs/training/training_doc.md +18 -0
  22. requirements/requirements.txt +8 -0
  23. requirements/requirements_compatible.txt +8 -0
  24. scripts/download_and_process_datasets.py +53 -0
  25. scripts/download_ckpts.py +62 -0
  26. scripts/old-download_and_process_datasets.py +61 -0
  27. src/license_plate_detector_ocr/__init__.py +0 -0
  28. src/license_plate_detector_ocr/data/dataset_processing/__init__.py +0 -0
  29. src/license_plate_detector_ocr/data/dataset_processing/config_loader.py +5 -0
  30. src/license_plate_detector_ocr/data/dataset_processing/converter.py +190 -0
  31. src/license_plate_detector_ocr/data/dataset_processing/downloader.py +67 -0
  32. src/license_plate_detector_ocr/data/dataset_processing/processor.py +41 -0
  33. src/license_plate_detector_ocr/infer.py +173 -0
  34. src/license_plate_detector_ocr/inference/__init__.py +0 -0
  35. src/license_plate_detector_ocr/inference/paddleocr_infer.py +76 -0
  36. src/license_plate_detector_ocr/old-infer.py +124 -0
  37. src/license_plate_detector_ocr/train.py +0 -0
  38. src/license_plate_detector_ocr/training/__init__.py +0 -0
  39. src/license_plate_detector_ocr/training/train_yolo.py +51 -0
.gitattributes CHANGED
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ assets/lp_video[[:space:]]-[[:space:]]Trim.mp4 filter=lfs diff=lfs merge=lfs -text
37
+ assets/lp_video.mp4 filter=lfs diff=lfs merge=lfs -text
.python-version ADDED
@@ -0,0 +1 @@
 
 
1
+ python v3.11.13
LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Danh Tran
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
apps/gradio_app.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import os
3
+ from gradio_app.config import setup_logging, setup_sys_path
4
+ from gradio_app.processor import gradio_process, update_preview, update_visibility
5
+
6
+ # Initialize logging and sys.path
7
+ setup_logging()
8
+ setup_sys_path()
9
+
10
+ # Load custom CSS
11
+ custom_css = open(os.path.join(os.path.dirname(__file__), "gradio_app", "static", "styles.css"), "r").read()
12
+
13
+ # Gradio Interface
14
+ with gr.Blocks(css=custom_css) as iface:
15
+ gr.Markdown(
16
+ """
17
+ # License Plate Detection and OCR
18
+ Upload an image or video to detect and read license plates. Outputs are saved in `apps/gradio_app/temp_data/`.
19
+ Debug logs are saved in `apps/gradio_app/debug.log`.
20
+ """,
21
+ elem_classes="markdown-title"
22
+ )
23
+
24
+ with gr.Row():
25
+ with gr.Column(scale=1):
26
+ input_file = gr.File(label="Upload Image or Video", elem_classes="custom-file-input")
27
+ input_type = gr.Radio(choices=["Image", "Video"], label="Input Type", value="Image", elem_classes="custom-radio")
28
+ with gr.Blocks():
29
+ input_preview_image = gr.Image(label="Input Preview", visible=True, elem_classes="custom-image")
30
+ input_preview_video = gr.Video(label="Input Preview", visible=False, elem_classes="custom-video")
31
+ with gr.Row():
32
+ clear_button = gr.Button("Clear", variant="secondary", elem_classes="custom-button secondary")
33
+ submit_button = gr.Button("Submit", variant="primary", elem_classes="custom-button primary")
34
+ with gr.Column(scale=2):
35
+ with gr.Blocks():
36
+ output_image = gr.Image(label="Processed Output (Image)", type="numpy", visible=True, elem_classes="custom-image")
37
+ output_video = gr.Video(label="Processed Output (Video)", visible=False, elem_classes="custom-video")
38
+ output_text = gr.Textbox(label="Detected License Plates", lines=10, elem_classes="custom-textbox")
39
+
40
+ # Update preview and output visibility when input type changes
41
+ input_type.change(
42
+ fn=update_visibility,
43
+ inputs=input_type,
44
+ outputs=[input_preview_image, input_preview_video, output_image, output_video]
45
+ )
46
+
47
+ # Update preview when file is uploaded
48
+ input_file.change(
49
+ fn=update_preview,
50
+ inputs=[input_file, input_type],
51
+ outputs=[input_preview_image, input_preview_video]
52
+ )
53
+
54
+ # Bind the processing function
55
+ submit_button.click(
56
+ fn=gradio_process,
57
+ inputs=[input_file, input_type],
58
+ outputs=[output_image, output_video, output_text, input_preview_image, input_preview_video]
59
+ )
60
+
61
+ # Clear button functionality
62
+ clear_button.click(
63
+ fn=lambda: (None, None, None, "Image", None, None, None, None),
64
+ outputs=[input_file, output_image, output_video, input_type, input_preview_image, input_preview_video, output_image, output_video]
65
+ )
66
+
67
+ if __name__ == "__main__":
68
+ iface.launch(share=True)
apps/gradio_app/config.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ import os
3
+ import sys
4
+
5
+ def setup_logging():
6
+ """Set up logging to a file for debugging."""
7
+ logging.basicConfig(
8
+ filename="apps/gradio_app/debug.log",
9
+ level=logging.DEBUG,
10
+ format="%(asctime)s - %(levelname)s - %(message)s"
11
+ )
12
+
13
+ def setup_sys_path():
14
+ """Adjust sys.path to include the src directory."""
15
+ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'src', 'license_plate_detector_ocr')))
apps/gradio_app/old-processor.py ADDED
@@ -0,0 +1,109 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ import shutil
4
+ import traceback
5
+ import logging
6
+ import gradio as gr
7
+ from gradio_app.utils import convert_to_supported_format
8
+ # Adjust sys.path to include the src directory
9
+ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__),
10
+ '..', '..', 'src', 'license_plate_detector_ocr')))
11
+ from infer import infer, is_image_file
12
+
13
+
14
+
15
+ def gradio_process(input_file, input_type):
16
+ """Process the input file (image or video) for license plate detection and OCR."""
17
+ try:
18
+ logging.debug(f"Input file path: {input_file.name}")
19
+ print(f"Input file path: {input_file.name}")
20
+
21
+ # Copy input file to temp_data directory to ensure stability
22
+ temp_input_dir = "apps/gradio_app/temp_data"
23
+ os.makedirs(temp_input_dir, exist_ok=True)
24
+ temp_input_path = os.path.join(temp_input_dir, os.path.basename(input_file.name))
25
+ shutil.copy(input_file.name, temp_input_path)
26
+ logging.debug(f"Copied input file to: {temp_input_path}")
27
+
28
+ # Verify input file exists
29
+ if not os.path.exists(temp_input_path):
30
+ error_msg = f"Error: Input file {temp_input_path} does not exist."
31
+ logging.error(error_msg)
32
+ return None, None, error_msg, None, None
33
+
34
+ # Set output path
35
+ output_dir = "apps/gradio_app/temp_data"
36
+ os.makedirs(output_dir, exist_ok=True)
37
+ output_filename = os.path.splitext(os.path.basename(temp_input_path))[0] + ('_output.jpg' if is_image_file(temp_input_path) else '_output.mp4')
38
+ output_path = os.path.join(output_dir, output_filename)
39
+ logging.debug(f"Output path: {output_path}")
40
+
41
+ # Call the infer function
42
+ result_array, plate_texts = infer(temp_input_path, output_path)
43
+
44
+ if result_array is None and is_image_file(temp_input_path):
45
+ error_msg = f"Error: Processing failed for {temp_input_path}. 'infer' returned None."
46
+ logging.error(error_msg)
47
+ return None, None, error_msg, None, None
48
+
49
+ # Validate output file for videos
50
+ if not is_image_file(temp_input_path):
51
+ if not os.path.exists(output_path):
52
+ error_msg = f"Error: Output video file {output_path} was not created."
53
+ logging.error(error_msg)
54
+ return None, None, error_msg, None, None
55
+ # Convert output video to supported format
56
+ converted_output_path = os.path.join(output_dir, f"converted_{os.path.basename(output_path)}")
57
+ converted_path = convert_to_supported_format(output_path, converted_output_path)
58
+ if converted_path is None:
59
+ error_msg = f"Error: Failed to convert output video {output_path} to supported format."
60
+ logging.error(error_msg)
61
+ return None, None, error_msg, None, None
62
+ output_path = converted_path
63
+
64
+ # Format plate texts
65
+ if is_image_file(temp_input_path):
66
+ formatted_texts = "\n".join(plate_texts) if plate_texts else "No plates detected"
67
+ logging.debug(f"Image processed successfully. Plate texts: {formatted_texts}")
68
+ return result_array, None, formatted_texts, temp_input_path, None
69
+ else:
70
+ formatted_texts = []
71
+ for i, texts in enumerate(plate_texts):
72
+ if texts:
73
+ formatted_texts.append(f"Frame {i+1}: {', '.join(texts)}")
74
+ formatted_texts = "\n".join(formatted_texts) if formatted_texts else "No plates detected"
75
+ logging.debug(f"Video processed successfully. Plate texts: {formatted_texts}")
76
+ return None, output_path, formatted_texts, None, temp_input_path
77
+ except Exception as e:
78
+ error_message = f"Error processing {input_file.name}: {str(e)}\n{traceback.format_exc()}"
79
+ logging.error(error_message)
80
+ print(error_message)
81
+ return None, None, error_message, None, None
82
+
83
+ def update_preview(file, input_type):
84
+ """Return file path for the appropriate preview component based on input type."""
85
+ if not file:
86
+ logging.debug("No file provided for preview.")
87
+ return None, None
88
+ logging.debug(f"Updating preview for {input_type}: {file.name}")
89
+ # Verify file exists
90
+ if not os.path.exists(file.name):
91
+ logging.error(f"Input file {file.name} does not exist.")
92
+ return None, None
93
+ # Check if video format is supported
94
+ if input_type == "Video" and not file.name.lower().endswith(('.mp4', '.webm')):
95
+ logging.error(f"Unsupported video format for {file.name}. Use MP4 or WebM.")
96
+ return None, None
97
+ return file.name if input_type == "Image" else None, file.name if input_type == "Video" else None
98
+
99
+ def update_visibility(input_type):
100
+ """Update visibility of input/output components based on input type."""
101
+ logging.debug(f"Updating visibility for input type: {input_type}")
102
+ is_image = input_type == "Image"
103
+ is_video = input_type == "Video"
104
+ return (
105
+ gr.update(visible=is_image),
106
+ gr.update(visible=is_video),
107
+ gr.update(visible=is_image),
108
+ gr.update(visible=is_video)
109
+ )
apps/gradio_app/processor.py ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ import shutil
4
+ import traceback
5
+ import logging
6
+ import gradio as gr
7
+ import uuid # Import uuid module
8
+ from gradio_app.utils import convert_to_supported_format
9
+
10
+ # Adjust sys.path to include the src directory
11
+ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..', 'src', 'license_plate_detector_ocr')))
12
+ from infer import infer, is_image_file
13
+
14
+ def gradio_process(input_file, input_type):
15
+ """Process the input file (image or video) for license plate detection and OCR."""
16
+ try:
17
+ logging.debug(f"Input file path: {input_file.name}")
18
+ print(f"Input file path: {input_file.name}")
19
+
20
+ # Copy input file to temp_data directory to ensure stability
21
+ temp_input_dir = "apps/gradio_app/temp_data"
22
+ os.makedirs(temp_input_dir, exist_ok=True)
23
+ temp_input_path = os.path.join(temp_input_dir, os.path.basename(input_file.name))
24
+ shutil.copy(input_file.name, temp_input_path)
25
+ logging.debug(f"Copied input file to: {temp_input_path}")
26
+
27
+ # Verify input file exists
28
+ if not os.path.exists(temp_input_path):
29
+ error_msg = f"Error: Input file {temp_input_path} does not exist."
30
+ logging.error(error_msg)
31
+ return None, None, error_msg, None, None
32
+
33
+ # Set output path with UUID
34
+ output_dir = "apps/gradio_app/temp_data"
35
+ os.makedirs(output_dir, exist_ok=True)
36
+ # Modified line with UUID
37
+ unique_id = str(uuid.uuid4())[:8] # Use first 8 characters of UUID for brevity
38
+ output_filename = f"{os.path.splitext(os.path.basename(temp_input_path))[0]}_{unique_id}_output{'_output.jpg' if is_image_file(temp_input_path) else '_output.mp4'}"
39
+ output_path = os.path.join(output_dir, output_filename)
40
+ logging.debug(f"Output path: {output_path}")
41
+
42
+ # Call the infer function
43
+ result_array, plate_texts = infer(temp_input_path, output_path)
44
+
45
+ if result_array is None and is_image_file(temp_input_path):
46
+ error_msg = f"Error: Processing failed for {temp_input_path}. 'infer' returned None."
47
+ logging.error(error_msg)
48
+ return None, None, error_msg, None, None
49
+
50
+ # Validate output file for videos
51
+ if not is_image_file(temp_input_path):
52
+ if not os.path.exists(output_path):
53
+ error_msg = f"Error: Output video file {output_path} was not created."
54
+ logging.error(error_msg)
55
+ return None, None, error_msg, None, None
56
+ # Convert output video to supported format
57
+ converted_output_path = os.path.join(output_dir, f"converted_{os.path.basename(output_path)}")
58
+ converted_path = convert_to_supported_format(output_path, converted_output_path)
59
+ if converted_path is None:
60
+ error_msg = f"Error: Failed to convert output video {output_path} to supported format."
61
+ logging.error(error_msg)
62
+ return None, None, error_msg, None, None
63
+ output_path = converted_path
64
+
65
+ # Format plate texts
66
+ if is_image_file(temp_input_path):
67
+ formatted_texts = "\n".join(plate_texts) if plate_texts else "No plates detected"
68
+ logging.debug(f"Image processed successfully. Plate texts: {formatted_texts}")
69
+ return result_array, None, formatted_texts, temp_input_path, None
70
+ else:
71
+ formatted_texts = []
72
+ for i, texts in enumerate(plate_texts):
73
+ if texts:
74
+ formatted_texts.append(f"Frame {i+1}: {', '.join(texts)}")
75
+ formatted_texts = "\n".join(formatted_texts) if formatted_texts else "No plates detected"
76
+ logging.debug(f"Video processed successfully. Plate texts: {formatted_texts}")
77
+ return None, output_path, formatted_texts, None, temp_input_path
78
+ except Exception as e:
79
+ error_message = f"Error processing {input_file.name}: {str(e)}\n{traceback.format_exc()}"
80
+ logging.error(error_message)
81
+ print(error_message)
82
+ return None, None, error_message, None, None
83
+
84
+ def update_preview(file, input_type):
85
+ """Return file path for the appropriate preview component based on input type."""
86
+ if not file:
87
+ logging.debug("No file provided for preview.")
88
+ return None, None
89
+ logging.debug(f"Updating preview for {input_type}: {file.name}")
90
+ # Verify file exists
91
+ if not os.path.exists(file.name):
92
+ logging.error(f"Input file {file.name} does not exist.")
93
+ return None, None
94
+ # Check if video format is supported
95
+ if input_type == "Video" and not file.name.lower().endswith(('.mp4', '.webm')):
96
+ logging.error(f"Unsupported video format for {file.name}. Use MP4 or WebM.")
97
+ return None, None
98
+ return file.name if input_type == "Image" else None, file.name if input_type == "Video" else None
99
+
100
+ def update_visibility(input_type):
101
+ """Update visibility of input/output components based on input type."""
102
+ logging.debug(f"Updating visibility for input type: {input_type}")
103
+ is_image = input_type == "Image"
104
+ is_video = input_type == "Video"
105
+ return (
106
+ gr.update(visible=is_image),
107
+ gr.update(visible=is_video),
108
+ gr.update(visible=is_image),
109
+ gr.update(visible=is_video)
110
+ )
apps/gradio_app/static/scripts.js ADDED
File without changes
apps/gradio_app/static/styles.css ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /* General body styling */
2
+ .gradio-container {
3
+ background: linear-gradient(180deg, #f9fafb, #f1efef);
4
+ font-family: 'Quicksand', ui-sans-serif, sans-serif;
5
+ color: #6b46c1; /* Purple-800 for text (neutral hue) */
6
+ font-size: 16px; /* Medium text size */
7
+ font-weight: 400;
8
+ }
9
+
10
+ /* Dark mode background */
11
+ @media (prefers-color-scheme: dark) {
12
+ .gradio-container {
13
+ background: linear-gradient(180deg, #1f2937, #111827);
14
+ color: #d6bcfa; /* Lighter purple for dark mode */
15
+ }
16
+ }
17
+
18
+ /* Block styling (containers for components) */
19
+ .block {
20
+ border: 1px solid #e9d8fd; /* Purple-200 for borders */
21
+ border-radius: 8px; /* Medium radius */
22
+ box-shadow: 0 1px 2px rgba(0, 0, 0, 0.05); /* Small shadow */
23
+ padding: 16px; /* Medium spacing */
24
+ background: #f1efef;
25
+ }
26
+
27
+ /* Input fields */
28
+ input[type="text"], textarea {
29
+ background: #faf5ff; /* Purple-50 for input background */
30
+ border: 1px solid #e9d8fd; /* Purple-200 for borders */
31
+ border-radius: 8px;
32
+ padding: 8px;
33
+ font-family: 'Quicksand', ui-sans-serif, sans-serif;
34
+ font-size: 16px;
35
+ color: #6b46c1;
36
+ box-shadow: none;
37
+ }
38
+ input[type="text"]:focus, textarea:focus {
39
+ outline: none;
40
+ box-shadow: 0 1px 2px rgba(0, 0, 0, 0.1); /* Small shadow on focus */
41
+ border-color: #48bb78; /* Green-400 for focus */
42
+ }
43
+
44
+ /* Primary button */
45
+ button.primary {
46
+ background: #48bb78; /* Green-400 */
47
+ color: #f1efef;
48
+ border: none;
49
+ border-radius: 8px;
50
+ padding: 8px 16px;
51
+ font-family: 'Quicksand', ui-sans-serif, sans-serif;
52
+ font-size: 16px;
53
+ font-weight: 500;
54
+ box-shadow: 0 1px 2px rgba(0, 0, 0, 0.05);
55
+ cursor: pointer;
56
+ }
57
+ button.primary:hover {
58
+ background: #ed8936; /* Orange-400 for hover */
59
+ box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1); /* Medium shadow on hover */
60
+ }
61
+
62
+ /* Secondary button */
63
+ button.secondary {
64
+ color: #48bb78; /* Green-400 for text */
65
+ border: 1px solid #48bb78; /* Green-400 for border */
66
+ border-radius: 8px;
67
+ padding: 8px 16px;
68
+ font-family: 'Quicksand', ui-sans-serif, sans-serif;
69
+ font-size: 16px;
70
+ font-weight: 500;
71
+ box-shadow: 0 1px 2px rgba(0, 0, 0, 0.05);
72
+ cursor: pointer;
73
+ }
74
+ button.secondary:hover {
75
+ background: #ed8936; /* Orange-400 for hover */
76
+ color: #48bb78;
77
+ box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
78
+ }
79
+
80
+ /* Slider styling */
81
+ input[type="range"] {
82
+ accent-color: #ed8936; /* Orange-400 for slider */
83
+ }
84
+ @media (prefers-color-scheme: dark) {
85
+ input[type="range"] {
86
+ accent-color: #f6ad55; /* Orange-600 for dark mode */
87
+ }
88
+ }
89
+
90
+ /* Markdown headers */
91
+ h2 {
92
+ font-weight: 500;
93
+ color: #6b46c1; /* Purple-800 */
94
+ margin-bottom: 16px;
95
+ }
96
+
97
+ /* Code or monospace elements */
98
+ code, pre {
99
+ font-family: 'IBM Plex Mono', ui-monospace, monospace;
100
+ }
apps/gradio_app/utils.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import ffmpeg
2
+ import logging
3
+
4
+ def convert_to_supported_format(input_path, output_path):
5
+ """Convert video to a browser-compatible format (MP4 with H.264 codec)."""
6
+ try:
7
+ stream = ffmpeg.input(input_path)
8
+ stream = ffmpeg.output(stream, output_path, vcodec='h264', acodec='aac', format='mp4', loglevel='quiet')
9
+ ffmpeg.run(stream)
10
+ logging.debug(f"Converted video to {output_path}")
11
+ return output_path
12
+ except Exception as e:
13
+ logging.error(f"Error converting video {input_path}: {str(e)}")
14
+ return None
apps/old-gradio_app.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import os
3
+ import sys
4
+
5
+ # Adjust sys.path to include the src directory
6
+ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'src', 'license_plate_detector_ocr')))
7
+ from infer import infer, is_image_file
8
+
9
+ def gradio_process(input_file, input_type):
10
+ """Process the input file (image or video) for license plate detection and OCR."""
11
+ # Debugging: Print input file path
12
+ print(f"Input file path: {input_file.name}")
13
+
14
+ # Set default output path in apps/gradio_app/temp_data/
15
+ output_dir = "apps/gradio_app/temp_data"
16
+ os.makedirs(output_dir, exist_ok=True)
17
+ output_filename = os.path.splitext(os.path.basename(input_file.name))[0] + ('_output.jpg' if is_image_file(input_file.name) else '_output.mp4')
18
+ output_path = os.path.join(output_dir, output_filename)
19
+
20
+ # Call the infer function from infer.py
21
+ result_array, plate_texts = infer(input_file.name, output_path)
22
+
23
+ if result_array is None:
24
+ return None, f"Error: Processing failed for {input_file.name}"
25
+
26
+ # Format plate texts for output
27
+ if is_image_file(input_file.name):
28
+ formatted_texts = "\n".join(plate_texts) if plate_texts else "No plates detected"
29
+ return result_array, formatted_texts
30
+ else:
31
+ # For videos, plate_texts is a list of lists (per frame)
32
+ formatted_texts = []
33
+ for i, texts in enumerate(plate_texts):
34
+ if texts:
35
+ formatted_texts.append(f"Frame {i+1}: {', '.join(texts)}")
36
+ formatted_texts = "\n".join(formatted_texts) if formatted_texts else "No plates detected"
37
+ return output_path, formatted_texts
38
+
39
+ # Gradio Interface
40
+ iface = gr.Interface(
41
+ fn=gradio_process,
42
+ inputs=[
43
+ gr.File(label="Upload Image or Video"),
44
+ gr.Radio(choices=["Image", "Video"], label="Input Type", value="Image")
45
+ ],
46
+ outputs=[
47
+ gr.Image(label="Processed Output", type="numpy"),
48
+ gr.Textbox(label="Detected License Plates")
49
+ ],
50
+ title="License Plate Detection and OCR",
51
+ description="Upload an image or video to detect and read license plates. Outputs are saved in apps/gradio_app/temp_data/."
52
+ )
53
+
54
+ if __name__ == "__main__":
55
+ iface.launch(share=True)
apps/old2-gradio_app.py ADDED
@@ -0,0 +1,183 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import os
3
+ import sys
4
+ import traceback
5
+ import logging
6
+ import shutil
7
+ import ffmpeg
8
+
9
+ # Set up logging to a file for debugging
10
+ logging.basicConfig(
11
+ filename="apps/gradio_app/debug.log",
12
+ level=logging.DEBUG,
13
+ format="%(asctime)s - %(levelname)s - %(message)s"
14
+ )
15
+
16
+ # Adjust sys.path to include the src directory
17
+ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'src', 'license_plate_detector_ocr')))
18
+ from infer import infer, is_image_file
19
+
20
+ def convert_to_supported_format(input_path, output_path):
21
+ """Convert video to a browser-compatible format (MP4 with H.264 codec)."""
22
+ try:
23
+ stream = ffmpeg.input(input_path)
24
+ stream = ffmpeg.output(stream, output_path, vcodec='h264', acodec='aac', format='mp4', loglevel='quiet')
25
+ ffmpeg.run(stream)
26
+ logging.debug(f"Converted video to {output_path}")
27
+ return output_path
28
+ except Exception as e:
29
+ logging.error(f"Error converting video {input_path}: {str(e)}")
30
+ return None
31
+
32
+ def gradio_process(input_file, input_type):
33
+ """Process the input file (image or video) for license plate detection and OCR."""
34
+ try:
35
+ logging.debug(f"Input file path: {input_file.name}")
36
+ print(f"Input file path: {input_file.name}")
37
+
38
+ # Copy input file to temp_data directory to ensure stability
39
+ temp_input_dir = "apps/gradio_app/temp_data"
40
+ os.makedirs(temp_input_dir, exist_ok=True)
41
+ temp_input_path = os.path.join(temp_input_dir, os.path.basename(input_file.name))
42
+ shutil.copy(input_file.name, temp_input_path)
43
+ logging.debug(f"Copied input file to: {temp_input_path}")
44
+
45
+ # Verify input file exists
46
+ if not os.path.exists(temp_input_path):
47
+ error_msg = f"Error: Input file {temp_input_path} does not exist."
48
+ logging.error(error_msg)
49
+ return None, None, error_msg, None, None
50
+
51
+ # Set output path
52
+ output_dir = "apps/gradio_app/temp_data"
53
+ os.makedirs(output_dir, exist_ok=True)
54
+ output_filename = os.path.splitext(os.path.basename(temp_input_path))[0] + ('_output.jpg' if is_image_file(temp_input_path) else '_output.mp4')
55
+ output_path = os.path.join(output_dir, output_filename)
56
+ logging.debug(f"Output path: {output_path}")
57
+
58
+ # Call the infer function
59
+ result_array, plate_texts = infer(temp_input_path, output_path)
60
+
61
+ if result_array is None and is_image_file(temp_input_path):
62
+ error_msg = f"Error: Processing failed for {temp_input_path}. 'infer' returned None."
63
+ logging.error(error_msg)
64
+ return None, None, error_msg, None, None
65
+
66
+ # Validate output file for videos
67
+ if not is_image_file(temp_input_path):
68
+ if not os.path.exists(output_path):
69
+ error_msg = f"Error: Output video file {output_path} was not created."
70
+ logging.error(error_msg)
71
+ return None, None, error_msg, None, None
72
+ # Convert output video to supported format
73
+ converted_output_path = os.path.join(output_dir, f"converted_{os.path.basename(output_path)}")
74
+ converted_path = convert_to_supported_format(output_path, converted_output_path)
75
+ if converted_path is None:
76
+ error_msg = f"Error: Failed to convert output video {output_path} to supported format."
77
+ logging.error(error_msg)
78
+ return None, None, error_msg, None, None
79
+ output_path = converted_path
80
+
81
+ # Format plate texts
82
+ if is_image_file(temp_input_path):
83
+ formatted_texts = "\n".join(plate_texts) if plate_texts else "No plates detected"
84
+ logging.debug(f"Image processed successfully. Plate texts: {formatted_texts}")
85
+ return result_array, None, formatted_texts, temp_input_path, None
86
+ else:
87
+ formatted_texts = []
88
+ for i, texts in enumerate(plate_texts):
89
+ if texts:
90
+ formatted_texts.append(f"Frame {i+1}: {', '.join(texts)}")
91
+ formatted_texts = "\n".join(formatted_texts) if formatted_texts else "No plates detected"
92
+ logging.debug(f"Video processed successfully. Plate texts: {formatted_texts}")
93
+ return None, output_path, formatted_texts, None, temp_input_path
94
+ except Exception as e:
95
+ error_message = f"Error processing {input_file.name}: {str(e)}\n{traceback.format_exc()}"
96
+ logging.error(error_message)
97
+ print(error_message)
98
+ return None, None, error_message, None, None
99
+
100
+ def update_preview(file, input_type):
101
+ """Return file path for the appropriate preview component based on input type."""
102
+ if not file:
103
+ logging.debug("No file provided for preview.")
104
+ return None, None
105
+ logging.debug(f"Updating preview for {input_type}: {file.name}")
106
+ # Verify file exists
107
+ if not os.path.exists(file.name):
108
+ logging.error(f"Input file {file.name} does not exist.")
109
+ return None, None
110
+ # Check if video format is supported
111
+ if input_type == "Video" and not file.name.lower().endswith(('.mp4', '.webm')):
112
+ logging.error(f"Unsupported video format for {file.name}. Use MP4 or WebM.")
113
+ return None, None
114
+ return file.name if input_type == "Image" else None, file.name if input_type == "Video" else None
115
+
116
+ def update_visibility(input_type):
117
+ """Update visibility of input/output components based on input type."""
118
+ logging.debug(f"Updating visibility for input type: {input_type}")
119
+ is_image = input_type == "Image"
120
+ is_video = input_type == "Video"
121
+ return (
122
+ gr.update(visible=is_image),
123
+ gr.update(visible=is_video),
124
+ gr.update(visible=is_image),
125
+ gr.update(visible=is_video)
126
+ )
127
+
128
+ # Gradio Interface
129
+ with gr.Blocks() as iface:
130
+ gr.Markdown(
131
+ """
132
+ # License Plate Detection and OCR
133
+ Upload an image or video to detect and read license plates. Outputs are saved in `apps/gradio_app/temp_data/`.
134
+ Debug logs are saved in `apps/gradio_app/debug.log`.
135
+ """,
136
+ elem_classes="markdown-title"
137
+ )
138
+
139
+ with gr.Row():
140
+ with gr.Column(scale=1):
141
+ input_file = gr.File(label="Upload Image or Video")
142
+ input_type = gr.Radio(choices=["Image", "Video"], label="Input Type", value="Image")
143
+ with gr.Blocks():
144
+ input_preview_image = gr.Image(label="Input Preview", visible=True)
145
+ input_preview_video = gr.Video(label="Input Preview", visible=False)
146
+ with gr.Row():
147
+ clear_button = gr.Button("Clear", variant="secondary")
148
+ submit_button = gr.Button("Submit", variant="primary")
149
+ with gr.Column(scale=2):
150
+ with gr.Blocks():
151
+ output_image = gr.Image(label="Processed Output (Image)", type="numpy", visible=True)
152
+ output_video = gr.Video(label="Processed Output (Video)", visible=False)
153
+ output_text = gr.Textbox(label="Detected License Plates", lines=10)
154
+
155
+ # Update preview and output visibility when input type changes
156
+ input_type.change(
157
+ fn=update_visibility,
158
+ inputs=input_type,
159
+ outputs=[input_preview_image, input_preview_video, output_image, output_video]
160
+ )
161
+
162
+ # Update preview when file is uploaded
163
+ input_file.change(
164
+ fn=update_preview,
165
+ inputs=[input_file, input_type],
166
+ outputs=[input_preview_image, input_preview_video]
167
+ )
168
+
169
+ # Bind the processing function
170
+ submit_button.click(
171
+ fn=gradio_process,
172
+ inputs=[input_file, input_type],
173
+ outputs=[output_image, output_video, output_text, input_preview_image, input_preview_video]
174
+ )
175
+
176
+ # Clear button functionality
177
+ clear_button.click(
178
+ fn=lambda: (None, None, None, "Image", None, None, None, None),
179
+ outputs=[input_file, output_image, output_video, input_type, input_preview_image, input_preview_video, output_image, output_video]
180
+ )
181
+
182
+ if __name__ == "__main__":
183
+ iface.launch(share=True)
assets/lp_image.jpg ADDED
assets/lp_video - Trim.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6625c922f561331f928cb6465646a299bcd8ba99f329bfb62a379c29ed33ea19
3
+ size 405454
assets/lp_video.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:72dececeb4cc1ce1da5264211578c9331a3fb31d36bf21ac2f40471d70e2121d
3
+ size 4984385
ckpts/README.md ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ Checkpoints Folder 🚀
2
+
3
+ This folder holds model checkpoints from training runs 🏋️‍♂️. Each file saves the model's weights & state at specific epochs ⏰. Named by epoch or timestamp, they’re ready for resuming or eval! 🧠 Keep some disk space free for these hefty files! 💾
configs/datasets_config.yaml ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ datasets:
2
+ - id: fareselmenshawii/large-license-plate-dataset
3
+ type: kaggle
4
+ - id: duydieunguyen/licenseplates
5
+ type: kaggle
6
+ - id: ronakgohil/license-plate-dataset
7
+ type: kaggle
8
+ - id: bomaich/vnlicenseplate
9
+ type: kaggle
10
+ - id: congtuu/vietnamese-license-plate-obb
11
+ type: kaggle
12
+ - id: haitonthat/vietnam-license-plate-bounding-box
13
+ type: kaggle
14
+ - id: university-of-southeastern-philippines-cnl9c/license-plate-detection-merged-projects
15
+ type: roboflow
16
+ format: yolov11
17
+ version: 3
18
+ - id: ev-dshfb/license-plate-w8chc
19
+ type: roboflow
20
+ format: yolov11
21
+ version: 1
22
+ - id: kanwal-masroor-gv4jr/yolov7-license-plate-detection
23
+ type: roboflow
24
+ format: yolov11
25
+ version: 3
26
+ - id: keremberke/license-plate-object-detection
27
+ type: huggingface
data/__init__.py ADDED
File without changes
docs/inference/inference_doc.md ADDED
@@ -0,0 +1,127 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # License Plate Detection and OCR Inference Documentation
2
+
3
+ This document describes the inference process for the license plate detection and OCR system implemented in the provided Python scripts. The system uses a YOLO model for license plate detection and PaddleOCR for text recognition. Below are the details of the inference process and the arguments required to run the scripts.
4
+
5
+ ## Overview
6
+
7
+ The system consists of two main scripts:
8
+ 1. **`paddleocr_infer.py`**: Handles OCR processing for license plate text extraction.
9
+ 2. **`infer.py`**: Manages the main inference pipeline, including license plate detection and OCR, for both images and videos.
10
+
11
+ The scripts process input images or videos, detect license plates using a YOLO model, crop the detected regions, and extract text using PaddleOCR. The output includes processed images or videos with bounding boxes and text annotations, along with the extracted license plate texts.
12
+
13
+ ## Inference Process
14
+
15
+ ### 1. `paddleocr_infer.py`
16
+
17
+ This script defines the OCR functionality using PaddleOCR, optimized for English license plate recognition.
18
+
19
+ #### Key Function: `process_ocr`
20
+ - **Purpose**: Extracts text from a single image, a list of images, or a NumPy array representing an image.
21
+ - **Input**:
22
+ - `image_input`: Can be one of the following:
23
+ - `str`: Path to a single image file.
24
+ - `List[str]`: List of paths to multiple image files.
25
+ - `np.ndarray`: A 3D NumPy array (height, width, channels) representing an image.
26
+ - **Output**:
27
+ - For a single image or array: A list of extracted text strings (`List[str]`).
28
+ - For multiple images: A list of lists, each containing extracted text strings for an image (`List[List[str]]`).
29
+ - **Behavior**:
30
+ - Initializes PaddleOCR with English language settings and slim models for detection and recognition.
31
+ - Processes input(s) and extracts text from detected regions.
32
+ - Handles single or multiple inputs uniformly by converting single inputs to a list for processing.
33
+
34
+ #### Example Usage
35
+ ```python
36
+ # Single image
37
+ result = process_ocr('<path_to_plate_image_1>') # Returns List[str]
38
+
39
+ # Multiple images
40
+ results = process_ocr(['<path_to_plate_image_1>', '<path_to_plate_image_2>', '<path_to_plate_image_3>']) # Returns List[List[str]]
41
+
42
+ # Image array
43
+ import cv2
44
+ image_array = cv2.imread('<path_to_plate_image>')
45
+ result = process_ocr(image_array) # Returns List[str]
46
+ ```
47
+
48
+ ### 2. `infer.py`
49
+
50
+ This script integrates YOLO-based license plate detection with OCR to process images or videos.
51
+
52
+ #### Main Function: `infer`
53
+ - **Purpose**: Processes an input image or video to detect license plates and extract text.
54
+ - **Input Arguments**:
55
+ - `input_path` (`str`, required): Path to the input image or video file.
56
+ - `output_path` (`str`, optional): Path to save the processed output file. If not provided, defaults to the input path with `_output` appended (e.g., `input.jpg` → `input_output.jpg`).
57
+ - **Output**:
58
+ - `result_array`: A NumPy array representing the processed image (3D for images, 4D for videos) or `None` if processing fails.
59
+ - `plate_texts`: A list of extracted license plate texts (`List[str]` for images, `List[List[str]]` for videos) or `None` if processing fails.
60
+ - **Behavior**:
61
+ - Loads a YOLO model from `ckpts/yolo/finetune/runs/license_plate_detector/weights/best.pt`.
62
+ - Checks if the input is an image or video based on file extension.
63
+ - Calls `process_image` for images or `process_video` for videos.
64
+ - Saves the output with bounding boxes and text annotations.
65
+
66
+ #### Helper Functions
67
+ - **`is_image_file(file_path)`**:
68
+ - Checks if a file has a valid image extension (`.jpg`, `.jpeg`, `.png`, `.bmp`, `.tiff`).
69
+ - Returns `True` for images, `False` otherwise.
70
+ - **`process_image(model, image_path, output_path)`**:
71
+ - Processes a single image for license plate detection and OCR.
72
+ - Draws bounding boxes and text with confidence scores on the image.
73
+ - Saves the processed image to `output_path`.
74
+ - Returns the processed image array and extracted texts.
75
+ - **`process_video(model, video_path, output_path)`**:
76
+ - Processes a video frame by frame for license plate detection and OCR.
77
+ - Draws bounding boxes and text with confidence scores on each frame.
78
+ - Saves the processed video to `output_path`.
79
+ - Returns a 4D NumPy array of frames and a list of per-frame extracted texts.
80
+
81
+ #### Command-Line Arguments
82
+ The script supports command-line execution with the following arguments:
83
+ - `--input_path` (`str`, required): Path to the input image or video file.
84
+ - Example: `--input_path <path_to_plate_image_1>` or `--input_path video.mp4`
85
+ - `--output_path` (`str`, optional): Path to save the processed output file.
86
+ - Example: `--output_path output/plate_output.jpg`
87
+ - If not specified, defaults to the input path with `_output` appended.
88
+
89
+ #### Example Command-Line Usage
90
+ ```bash
91
+ # Process an image
92
+ python infer.py --input_path <path_to_plate_image_1> --output_path output/plate_output.jpg
93
+
94
+ # Process a video
95
+ python infer.py --input_path video.mp4 --output_path output/video_output.mp4
96
+ ```
97
+
98
+ ## Requirements
99
+ - **Python Libraries**:
100
+ - `paddleocr`: For OCR processing.
101
+ - `ultralytics`: For YOLO model inference.
102
+ - `opencv-python` (`cv2`): For image and video processing.
103
+ - `numpy`: For array operations.
104
+ - **Model File**:
105
+ - YOLO model weights at `ckpts/yolo/finetune/runs/license_plate_detector/weights/best.pt`.
106
+ - **Input Files**:
107
+ - Images: `.jpg`, `.jpeg`, `.png`, `.bmp`, `.tiff`.
108
+ - Videos: Any format supported by OpenCV (e.g., `.mp4`).
109
+
110
+ ## Output Format
111
+ - **Images**:
112
+ - Processed image saved with bounding boxes and text annotations.
113
+ - Returns a 3D NumPy array (height, width, channels) and a list of extracted texts (`List[str]`).
114
+ - **Videos**:
115
+ - Processed video saved with bounding boxes and text annotations on each frame.
116
+ - Returns a 4D NumPy array (frames, height, width, channels) and a list of per-frame extracted texts (`List[List[str]]`).
117
+
118
+ ## Error Handling
119
+ - Checks for the existence of the model file and input file.
120
+ - Validates image array dimensions (must be 3D).
121
+ - Handles failures in loading images/videos or during model inference, returning `None` for both outputs in case of errors.
122
+
123
+ ## Notes
124
+ - The YOLO model and PaddleOCR are configured for English license plates. Modify `lang` or model names in `paddleocr_infer.py` for other languages.
125
+ - Ensure the model weights file exists at the specified path.
126
+ - Output directories are created automatically if they do not exist.
127
+ - For videos, frames without detected plates are included in the output to maintain continuity.
docs/scripts/scripts_doc.md ADDED
@@ -0,0 +1,126 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ # YOLOv12 Object Detection Training Guide
3
+
4
+ This guide provides instructions for training an object detection model using YOLOv12. The example below demonstrates how to fine-tune the YOLOv12n model. Pre-trained checkpoints are available for download from the Ultralytics Releases page. You can see more at this URL:
5
+ [Ultralytics Releases](https://github.com/ultralytics/assets/releases)
6
+
7
+ ## Prerequisites
8
+
9
+ - Ensure you have the Ultralytics YOLO package installed.
10
+
11
+ - Download the desired YOLOv12 model checkpoint (e.g., yolo12n.pt) using the provided script.
12
+
13
+
14
+ ## 1 Downloading Pre-trained Models
15
+
16
+ To download YOLOv12 model checkpoints, run the following command:
17
+
18
+ ```bash
19
+ python scripts/download_yolo_model.py \
20
+ --url <yolo_model_released_url> \
21
+ --output-dir <saved_yolo_model_path>
22
+ ```
23
+
24
+ This will save the pre-trained weights to the ./ckpts/raw/ directory.
25
+
26
+ ## 2 Process Dataset
27
+ Here is the CLI command to download and process datasets.
28
+ ```bash
29
+ python scripts/download_and_process_datasets.py \
30
+ --output-dir <combined_dataset_path> \
31
+ --dataset-base-dir <directory_containing_all_datasets> \
32
+ --config <datasets_config_path> \
33
+ --platforms <list_of_platforms_to_download_from> \ # e.g., ["kaggle", "roboflow", "huggingface"]
34
+ --roboflow-api-key <roboflow_api_key> # Optional: required if "roboflow" is included in --platforms
35
+ ```
36
+ For example:
37
+ ```bash
38
+ python scripts/download_and_process_datasets.py \
39
+ --output-dir ./datasets/yolo_standard_dataset \
40
+ --dataset-base-dir ./datasets/all_datasets \
41
+ --config ./config/dataset_config.yaml \
42
+ --roboflow-api-key YOUR_ROBOFLOW_APIKEY \
43
+ --platforms "kaggle" "roboflow" "huggingface" # e.g., ["kaggle", "roboflow", "huggingface"]
44
+ ```
45
+ For help:
46
+ ```bash
47
+ python scripts/download_and_process_datasets.py -h
48
+ ```
49
+ ## 3 Fine-Tuning the Model
50
+ <!--
51
+ To fine-tune a YOLOv12 model for object detection, use the provided training script with customizable parameters. Run the following command and adjust the arguments based on your requirements:
52
+
53
+ ```bash
54
+ python scripts/train_yolo.py \
55
+ --epochs <number_of_epochs> \
56
+ --batch <batch_size> \
57
+ --device <cuda_device_id_or_list|cpu> \
58
+ --project <path_to_save_results> \
59
+ --name <project_name> \
60
+ --resume # Optional: resume training from the last checkpoint
61
+ ```
62
+
63
+ ### Example Configuration
64
+
65
+ For reference, the equivalent configuration using the yolo CLI command is shown below:
66
+
67
+ ```bash
68
+ python scripts/train_yolo.py\
69
+ --epochs 100 \
70
+ --batch 32 \
71
+ --device 0 \
72
+ --project "./ckpts/finetune/runs" \
73
+ --name "license_plate_detector"
74
+ ```
75
+
76
+ ### More Configurations
77
+ Run this CLI command to show `Help`.
78
+ ```bash
79
+ python scripts/train_yolo.py -h
80
+ ```
81
+ -->
82
+ To fine-tune a YOLOv12 model for object detection, use the provided training script with customizable parameters. Run the following command and adjust the arguments based on your requirements:
83
+
84
+ ```bash
85
+ yolo detect train \
86
+ model=<yolo_model_path or yolo_version_name> \
87
+ data=<dataset_config_path> \
88
+ epochs=<number_of_epochs> \
89
+ batch=<batch_size> \
90
+ patience=<early_stopping_patience> \
91
+ imgsz=<image_size> \
92
+ lr0=<initial_learning_rate> \
93
+ lrf=<final_learning_rate> \
94
+ device=<device_id or list_of_cuda or "cpu"> \
95
+ project=<output_directory> \
96
+ name=<experiment_name> \
97
+ save=<true or false> \
98
+ resume=<true or false>
99
+ ```
100
+
101
+ ### Example Configuration
102
+
103
+ For reference, the equivalent configuration using the yolo CLI command is shown below:
104
+ ```bash
105
+ yolo detect train \
106
+ model="./ckpts/raw/yolo12n.pt" \
107
+ data="./datasets/yolo_standard_dataset/data.yaml" \
108
+ epochs=100 \
109
+ batch=32 \
110
+ patience=20 \
111
+ imgsz=640 \
112
+ lr0=0.01 \
113
+ lrf=0.001 \
114
+ device=0 \
115
+ project="./ckpts/finetune/runs" \
116
+ name="license_plate_detector" \
117
+ save=true \
118
+ resume=false
119
+ ```
120
+ ### More Configurations
121
+ Run this CLI command to show `Help`.
122
+ ```bash
123
+ yolo --help
124
+ ```
125
+
126
+ ## Using PaddleOCR
docs/training/training_doc.md ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Training Arguments for YOLOv10 License Plate Detection
2
+
3
+ Below are the command-line arguments used to configure the training of a YOLOv10 model for license plate detection.
4
+
5
+ | Argument | Type | Default Value | Description |
6
+ |----------------|------------|--------------------------------------------|-----------------------------------------------------------------------------|
7
+ | `--model` | `str` | `./ckpts/raw/yolo12n.pt` | Path to the model file or model name (e.g., "yolo12n.pt"). |
8
+ | `--data` | `str` | `./datasets/yolo_standard_dataset/data.yaml`| Path to the dataset YAML file specifying the dataset configuration. |
9
+ | `--epochs` | `int` | `100` | Number of training epochs. |
10
+ | `--batch` | `int` | `64` | Batch size for training. |
11
+ | `--resume` | `boolean` | `False` | Resume training from the last checkpoint if set. |
12
+ | `--patience` | `int` | `20` | Number of epochs to wait for improvement before early stopping. |
13
+ | `--lr0` | `float` | `0.01` | Initial learning rate for training. |
14
+ | `--lrf` | `float` | `0.001` | Final learning rate for training. |
15
+ | `--device` | `str` | `0` | Device to train on (e.g., `0` for GPU, `[0,1]` for multiple GPUs, or `cpu`). |
16
+ | `--project` | `str` | `./ckpts/finetune/runs` | Directory to save training results. |
17
+ | `--name` | `str` | `license_plate_detector` | Name of the training run. |
18
+ | `--save` | `boolean` | `True` | Save training results if set. |
requirements/requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ opencv-python
2
+ ultralytics
3
+ roboflow
4
+ wget
5
+ ffmpeg-python
6
+ paddleocr
7
+ paddlepaddle-gpu
8
+ paddlepaddle
requirements/requirements_compatible.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ opencv-python==4.11.0.86
2
+ ultralytics==8.3.162
3
+ roboflow==1.1.66
4
+ wget==3.2
5
+ ffmpeg-python==0.2.0
6
+ paddleocr==2.9.0
7
+ paddlepaddle-gpu==2.6.2
8
+ paddlepaddle==2.6.2
scripts/download_and_process_datasets.py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import logging
3
+ from pathlib import Path
4
+ import sys
5
+ import os
6
+
7
+ # Append the current directory to sys.path
8
+ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..',
9
+ "src", "license_plate_detector_ocr", "data")))
10
+
11
+ from dataset_processing import config_loader, downloader, processor, converter
12
+
13
+ def main(args):
14
+ logging.basicConfig(filename='dataset_conversion.log', level=logging.INFO,
15
+ format='%(asctime)s - %(levelname)s - %(message)s')
16
+
17
+ config = config_loader.load_config(args.config)
18
+ datasets = config['datasets']
19
+ os.makedirs(args.dataset_base_dir, exist_ok=True)
20
+ os.makedirs(args.output_dir, exist_ok=True)
21
+
22
+ # Download datasets
23
+ for idx, ds in enumerate(datasets):
24
+ if ds['type'] == 'kaggle' and 'kaggle' in args.platforms:
25
+ downloader.download_kaggle_dataset(ds['id'], Path(args.dataset_base_dir) / f"dataset_{idx}")
26
+ elif ds['type'] == 'roboflow' and 'roboflow' in args.platforms:
27
+ downloader.download_roboflow_dataset(ds['id'], ds['format'], ds['version'], Path(args.dataset_base_dir) / f"dataset_{idx}", args.roboflow_api_key)
28
+ elif ds['type'] == 'huggingface' and 'huggingface' in args.platforms:
29
+ downloader.download_huggingface_dataset(ds['id'], Path(args.dataset_base_dir) / f"dataset_{idx}")
30
+
31
+ # Convert and combine datasets
32
+ converter.coco_kaggle_to_yolo(args.dataset_base_dir, args.output_dir)
33
+ for idx, ds in enumerate(datasets):
34
+ if ds['type'] == 'roboflow' and 'roboflow' in args.platforms:
35
+ converter.copy_dataset_to_combined_folder(Path(args.dataset_base_dir) / f"dataset_{idx}", args.output_dir)
36
+ for idx, ds in enumerate(datasets):
37
+ if ds['type'] == 'huggingface' and 'huggingface' in args.platforms:
38
+ converter.convert_coco_huggingface_to_yolo(
39
+ dataset_base_path=Path(args.dataset_base_dir) / f"dataset_{idx}/license-plate-object-detection/data",
40
+ output_dir=args.output_dir)
41
+ processor.process_folders(args.output_dir)
42
+
43
+ if __name__ == "__main__":
44
+ parser = argparse.ArgumentParser(description="Download and process license plate datasets.")
45
+ parser.add_argument("--output-dir", default="./data/yolo_standard_dataset", help="Output directory for YOLOv11 dataset")
46
+ parser.add_argument("--dataset-base-dir", default="./data/all_datasets", help="Base directory for downloaded datasets")
47
+ parser.add_argument("--roboflow-api-key", required='roboflow' in sys.argv, help="Roboflow API key for downloading datasets")
48
+ parser.add_argument("--config", default="./configs/datasets_config.yaml", help="Path to dataset config YAML")
49
+ parser.add_argument("--platforms", nargs="*", default=["kaggle", "roboflow", "huggingface"], choices=["kaggle", "roboflow", "huggingface"], help="Platforms to download (default: all)")
50
+
51
+ args = parser.parse_args()
52
+
53
+ main(args)
scripts/download_ckpts.py ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import wget
2
+ import os
3
+ import argparse
4
+ from huggingface_hub import hf_hub_download
5
+
6
+ def download_model_ckpts(args):
7
+ """
8
+ Download the yolo12n.pt model from a URL and the best.pt model file from a Hugging Face repository.
9
+ """
10
+ # Download yolo12n.pt from GitHub URL
11
+ model_url = args.url
12
+ output_dir = args.output_dir
13
+ raw_output_dir = os.path.join(output_dir, 'raw') # Subdirectory for wget download
14
+
15
+ # Extract filename from URL
16
+ filename = model_url.split("/")[-1]
17
+ output_path = os.path.join(raw_output_dir, filename)
18
+
19
+ # Create directory if it doesn't exist
20
+ os.makedirs(os.path.dirname(output_path), exist_ok=True)
21
+
22
+ # Download with wget
23
+ wget.download(
24
+ model_url,
25
+ out=output_path,
26
+ bar=wget.bar_adaptive # Show progress bar
27
+ )
28
+ print(f"\nDownloaded model from {model_url} to {output_path}")
29
+
30
+ # Download best.pt from Hugging Face repository
31
+ hf_repo = args.hf_repo
32
+ model_file = "yolo/finetune/runs/license_plate_detector/weights/best.pt"
33
+
34
+ # Construct output path for Hugging Face file
35
+ hf_output_path = os.path.join(output_dir, "best.pt")
36
+
37
+ # Create directory if it doesn't exist
38
+ os.makedirs(os.path.dirname(hf_output_path), exist_ok=True)
39
+
40
+ # Download the specific file from Hugging Face
41
+ downloaded_path = hf_hub_download(
42
+ repo_id=hf_repo,
43
+ filename=model_file,
44
+ local_dir=output_dir,
45
+ local_dir_use_symlinks=False
46
+ )
47
+ print(f"\nDownloaded model file from {hf_repo} to {downloaded_path}")
48
+
49
+ if __name__ == "__main__":
50
+ # Set up argument parser
51
+ parser = argparse.ArgumentParser(description="Download yolo12n.pt from URL and best.pt from Hugging Face repository.")
52
+ parser.add_argument('--url', type=str,
53
+ default='https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo12n.pt',
54
+ help='URL of the yolo12n.pt model to download via wget')
55
+ parser.add_argument('--output-dir', type=str, default='./ckpts',
56
+ help='Base output directory for downloaded model files')
57
+ parser.add_argument('--hf-repo', type=str, default='danhtran2mind/license-plate-detector-ocr',
58
+ help='Hugging Face repository ID to download model file from')
59
+
60
+ # Parse arguments
61
+ args = parser.parse_args()
62
+ download_model_ckpts(args)
scripts/old-download_and_process_datasets.py ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import logging
3
+ from pathlib import Path
4
+ import sys
5
+ import os
6
+
7
+ # Add parent directory to sys.path
8
+ # parent_dir = str(Path(__file__).resolve().parents[1])
9
+ # sys.path.insert(0, parent_dir)
10
+
11
+ # # Append datasets folder to sys.path
12
+ # datasets_dir = os.path.join(parent_dir, "datasets")
13
+ # sys.path.insert(0, datasets_dir)
14
+
15
+ # Append the current directory to sys.path
16
+ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..',
17
+ "src", "license_plate_detector_ocr", "data")))
18
+
19
+ from dataset_processing import config_loader, downloader, processor, converter
20
+
21
+ def main(args):
22
+ logging.basicConfig(filename='dataset_conversion.log', level=logging.INFO,
23
+ format='%(asctime)s - %(levelname)s - %(message)s')
24
+
25
+ config = config_loader.load_config(args.config)
26
+ datasets = config['datasets']
27
+ os.makedirs(args.dataset_base_dir, exist_ok=True)
28
+ os.makedirs(args.output_dir, exist_ok=True)
29
+
30
+ # Download datasets
31
+ for idx, ds in enumerate(datasets):
32
+ if ds['type'] == 'kaggle' and 'kaggle' in args.platforms:
33
+ downloader.download_kaggle_dataset(ds['id'], Path(args.dataset_base_dir) / f"dataset_{idx}")
34
+ elif ds['type'] == 'roboflow' and 'roboflow' in args.platforms:
35
+ downloader.download_roboflow_dataset(ds['id'], ds['format'], ds['version'], Path(args.dataset_base_dir) / f"dataset_{idx}", args.roboflow_api_key)
36
+ elif ds['type'] == 'huggingface' and 'huggingface' in args.platforms:
37
+ downloader.download_huggingface_dataset(ds['id'], Path(args.dataset_base_dir) / f"dataset_{idx}")
38
+
39
+ # Convert and combine datasets
40
+ converter.coco_kaggle_to_yolo(args.dataset_base_dir, args.output_dir)
41
+ for idx, ds in enumerate(datasets):
42
+ if ds['type'] == 'roboflow' and 'roboflow' in args.platforms:
43
+ converter.copy_dataset_to_combined_folder(Path(args.dataset_base_dir) / f"dataset_{idx}", args.output_dir)
44
+ for idx, ds in enumerate(datasets):
45
+ if ds['type'] == 'huggingface' and 'huggingface' in args.platforms:
46
+ converter.convert_coco_huggingface_to_yolo(
47
+ dataset_base_path=Path(args.dataset_base_dir) / f"dataset_{idx}/license-plate-object-detection/data",
48
+ output_dir=args.output_dir)
49
+ processor.process_folders(args.output_dir)
50
+
51
+ if __name__ == "__main__":
52
+ parser = argparse.ArgumentParser(description="Download and process license plate datasets.")
53
+ parser.add_argument("--output-dir", default="./data/yolo_standard_dataset", help="Output directory for YOLOv11 dataset")
54
+ parser.add_argument("--dataset-base-dir", default="./data/all_datasets", help="Base directory for downloaded datasets")
55
+ parser.add_argument("--roboflow-api-key", required=True, help="Roboflow API key for downloading datasets")
56
+ parser.add_argument("--config", default="./configs/datasets_config.yaml", help="Path to dataset config YAML")
57
+ parser.add_argument("--platforms", nargs="*", default=["kaggle", "roboflow", "huggingface"], choices=["kaggle", "roboflow", "huggingface"], help="Platforms to download (default: all)")
58
+
59
+ args = parser.parse_args()
60
+
61
+ main(args)
src/license_plate_detector_ocr/__init__.py ADDED
File without changes
src/license_plate_detector_ocr/data/dataset_processing/__init__.py ADDED
File without changes
src/license_plate_detector_ocr/data/dataset_processing/config_loader.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ import yaml
2
+
3
+ def load_config(config_path):
4
+ with open(config_path, 'r') as f:
5
+ return yaml.safe_load(f)
src/license_plate_detector_ocr/data/dataset_processing/converter.py ADDED
@@ -0,0 +1,190 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import os
3
+ import shutil
4
+ import uuid
5
+ import yaml
6
+ import logging
7
+ from pathlib import Path
8
+
9
+ def convert_coco_huggingface_to_yolo(dataset_base_path, output_dir):
10
+ for dataset_type in ["train", "valid", "test"]:
11
+ coco_path = Path(dataset_base_path) / dataset_type / "_annotations.coco.json"
12
+ if not coco_path.exists():
13
+ logging.info(f"Skipping {dataset_type}: {coco_path} not found")
14
+ continue
15
+ yolo_dir = Path(output_dir) / dataset_type
16
+ images_dir = yolo_dir / "images"
17
+ labels_dir = yolo_dir / "labels"
18
+ for dir_path in [yolo_dir, images_dir, labels_dir]:
19
+ dir_path.mkdir(parents=True, exist_ok=True)
20
+ with open(coco_path) as f:
21
+ coco_data = json.load(f)
22
+ img_id_to_file = {img['id']: img['file_name'] for img in coco_data['images']}
23
+ img_id_to_dims = {img['id']: (img['width'], img['height']) for img in coco_data['images']}
24
+ for ann in coco_data['annotations']:
25
+ img_id = ann['image_id']
26
+ cat_id = ann['category_id']
27
+ x_min, y_min, bbox_w, bbox_h = ann['bbox']
28
+ width, height = img_id_to_dims[img_id]
29
+ x_center = (x_min + bbox_w / 2) / width
30
+ y_center = (y_min + bbox_h / 2) / height
31
+ norm_w = bbox_w / width
32
+ norm_h = bbox_h / height
33
+ unique_id = str(uuid.uuid4())
34
+ original_filename = Path(img_id_to_file[img_id]).stem
35
+ new_filename = f"{original_filename}_{unique_id}"
36
+ label_file = labels_dir / f"{new_filename}.txt"
37
+ with open(label_file, 'a') as f:
38
+ f.write(f"{cat_id} {x_center:.6f} {y_center:.6f} {norm_w:.6f} {norm_h:.6f}\n")
39
+ src_img = Path(coco_path).parent / img_id_to_file[img_id]
40
+ dst_img = images_dir / f"{new_filename}{Path(img_id_to_file[img_id]).suffix}"
41
+ if src_img.exists() and not dst_img.exists():
42
+ shutil.copy(src_img, dst_img)
43
+ logging.info(f"Copied {src_img} to {dst_img}")
44
+ yaml_content = {
45
+ "path": str(Path(output_dir).absolute()),
46
+ "train": "train/images",
47
+ "valid": "valid/images",
48
+ "test": "test/images",
49
+ "names": {0: "license_plate"}
50
+ }
51
+ yaml_path = Path(output_dir) / "data.yaml"
52
+ if not yaml_path.exists():
53
+ with open(yaml_path, 'w') as f:
54
+ yaml.dump(yaml_content, f, default_flow_style=False)
55
+ logging.info(f"Created {yaml_path}")
56
+
57
+ def create_yolo_structure(output_dir):
58
+ for d in ['train', 'valid', 'test']:
59
+ for sub in ['images', 'labels']:
60
+ os.makedirs(os.path.join(output_dir, d, sub), exist_ok=True)
61
+ logging.info(f"Created YOLOv11 directory structure at {output_dir}")
62
+
63
+ def copy_matched_files(src_image_dir, src_label_dir, dest_image_dir, dest_label_dir, split):
64
+ src_image_path = Path(src_image_dir)
65
+ src_label_path = Path(src_label_dir)
66
+ dest_image_path = Path(dest_image_dir)
67
+ dest_label_path = Path(dest_label_dir)
68
+ copied_files = set()
69
+ image_files = {}
70
+ for ext in ['jpg', 'JPG', 'jpeg', 'JPEG', 'png', 'PNG']:
71
+ for f in src_image_path.glob(f'*.{ext}'):
72
+ image_files[f.stem.lower()] = f
73
+ label_files = {f.stem.lower(): f for f in src_label_path.glob('*.txt')}
74
+ matched_stems = set(image_files.keys()) & set(label_files.keys())
75
+ for stem in matched_stems:
76
+ image_file = image_files[stem]
77
+ label_file = label_files[stem]
78
+ unique_id = str(uuid.uuid4())
79
+ new_filename = f"{stem}_{split}_{unique_id}"
80
+ dest_image_file = dest_image_path / f"{new_filename}{image_file.suffix}"
81
+ dest_label_file = dest_label_path / f"{new_filename}{label_file.suffix}"
82
+ shutil.copy(image_file, dest_image_file)
83
+ shutil.copy(label_file, dest_label_file)
84
+ copied_files.add(dest_image_file.name)
85
+ logging.info(f"Copied {image_file} to {dest_image_file}")
86
+ logging.info(f"Copied {label_file} to {dest_label_file}")
87
+ images_without_labels = set(image_files.keys()) - matched_stems
88
+ labels_without_images = set(label_files.keys()) - matched_stems
89
+ for stem in images_without_labels:
90
+ logging.warning(f"Image without label in {src_image_dir}: {image_files[stem]}")
91
+ for stem in labels_without_images:
92
+ logging.warning(f"Label without image in {src_label_dir}: {label_files[stem]}")
93
+ return copied_files, len(images_without_labels), len(labels_without_images)
94
+
95
+ def create_data_yaml(output_dir):
96
+ data_yaml = {
97
+ 'train': '../train/images',
98
+ 'val': '../valid/images',
99
+ 'test': '../test/images',
100
+ 'nc': 1,
101
+ 'names': ['license_plate']
102
+ }
103
+ with open(os.path.join(output_dir, 'data.yaml'), 'w') as f:
104
+ yaml.dump(data_yaml, f, default_flow_style=False)
105
+ logging.info("Created data.yaml")
106
+
107
+ def coco_kaggle_to_yolo(all_datasets_path, output_dir="yolo_standard_dataset"):
108
+ datasets = {
109
+ 'dataset_0': {
110
+ 'train': {'images': 'images/train', 'labels': 'labels/train'},
111
+ 'valid': {'images': 'images/val', 'labels': 'labels/val'},
112
+ 'test': {'images': 'images/test', 'labels': 'labels/test'}
113
+ },
114
+ 'dataset_1': {
115
+ 'train': {'images': 'images/train', 'labels': 'labels/train'},
116
+ 'valid': {'images': 'images/val', 'labels': 'labels/val'},
117
+ 'test': {}
118
+ },
119
+ 'dataset_2': {
120
+ 'train': {'images': 'archive/images/train', 'labels': 'archive/labels/train'},
121
+ 'valid': {'images': 'archive/images/val', 'labels': 'archive/labels/val'},
122
+ 'test': {}
123
+ },
124
+ 'dataset_3': {
125
+ 'train': {'images': 'train/images', 'labels': 'train/labels'},
126
+ 'valid': {'images': 'valid/images', 'labels': 'valid/labels'},
127
+ 'test': {'images': 'test/images', 'labels': 'test/labels'}
128
+ },
129
+ 'dataset_4': {
130
+ 'train': {'images': 'train/images', 'labels': 'train/labels'},
131
+ 'valid': {'images': 'valid/images', 'labels': 'valid/labels'},
132
+ 'test': {}
133
+ },
134
+ 'dataset_5': {
135
+ 'train': {'images': 'train/images', 'labels': 'train/labels'},
136
+ 'valid': {'images': 'valid/images', 'labels': 'valid/labels'},
137
+ 'test': {}
138
+ }
139
+ }
140
+ create_yolo_structure(output_dir)
141
+ total_mismatches = 0
142
+ from tqdm import tqdm
143
+ for dataset_name, splits in tqdm(datasets.items(), desc="Processing Kaggle Datasets"):
144
+ for split in ['train', 'valid', 'test']:
145
+ if split not in splits or not splits[split]:
146
+ continue
147
+ src_images = os.path.join(all_datasets_path, dataset_name, splits[split]['images'])
148
+ dest_images = os.path.join(output_dir, split, 'images')
149
+ src_labels = os.path.join(all_datasets_path, dataset_name, splits[split]['labels'])
150
+ dest_labels = os.path.join(output_dir, split, 'labels')
151
+ copied_files, img_mismatches, lbl_mismatches = copy_matched_files(
152
+ src_images, src_labels, dest_images, dest_labels, split
153
+ )
154
+ total_mismatches += img_mismatches + lbl_mismatches
155
+ if img_mismatches > 0 or lbl_mismatches > 0:
156
+ logging.warning(f"Mismatches in {dataset_name} {split} split: "
157
+ f"{img_mismatches} images without labels, {lbl_mismatches} labels without images")
158
+ create_data_yaml(output_dir)
159
+ logging.info(f"Dataset conversion completed. Total mismatches: {total_mismatches}")
160
+ if total_mismatches > 0:
161
+ logging.info("Check dataset_conversion.log for details on mismatched files")
162
+
163
+ def copy_dataset_to_combined_folder(dataset_path, combined_dataset_folder):
164
+ # if not dataset_path.exists():
165
+ # logging.error(f"No child folders found in {dataset_path}. Please check the dataset structure.")
166
+ # return
167
+ # child_folder_names = [p.name for p in dataset_path.iterdir() if p.is_dir()]
168
+ for folder in ['train', 'valid', 'test']:
169
+ # source_path = dataset_path / child_folder_names[0] / folder
170
+ source_path = dataset_path / folder
171
+ dest_path = Path(combined_dataset_folder) / folder
172
+ if not source_path.exists():
173
+ logging.warning(f"Source folder does not exist: {source_path}")
174
+ continue
175
+ unique_id = str(uuid.uuid4())
176
+ for sub in ['images', 'labels']:
177
+ src_dir = source_path / sub
178
+ dest_dir = dest_path / sub
179
+ if not src_dir.exists():
180
+ logging.warning(f"Source directory does not exist: {src_dir}")
181
+ continue
182
+ dest_dir.mkdir(parents=True, exist_ok=True)
183
+ for src_file in src_dir.glob('*'):
184
+ try:
185
+ new_filename = f"{src_file.stem}_{folder}_{unique_id}{src_file.suffix}"
186
+ dest_file = dest_dir / new_filename
187
+ shutil.copy(src_file, dest_file)
188
+ logging.info(f"Copied {src_file} to {dest_file}")
189
+ except Exception as e:
190
+ logging.error(f"Failed to copy {src_file} to {dest_file}: {str(e)}")
src/license_plate_detector_ocr/data/dataset_processing/downloader.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import urllib.request
2
+ import zipfile
3
+ import subprocess
4
+ from pathlib import Path
5
+ import logging
6
+ import os
7
+
8
+ def download_kaggle_dataset(dataset_id, output_dir):
9
+ try:
10
+ dataset_name = dataset_id.split("/")[-1]
11
+ zip_path = output_dir / f"{dataset_name}.zip"
12
+ output_dir.mkdir(parents=True, exist_ok=True)
13
+ urllib.request.urlretrieve(
14
+ f"https://www.kaggle.com/api/v1/datasets/download/{dataset_id}",
15
+ str(zip_path)
16
+ )
17
+ with zipfile.ZipFile(zip_path, 'r') as zip_ref:
18
+ zip_ref.extractall(output_dir)
19
+ logging.info(f"Downloaded Kaggle dataset: {dataset_id}")
20
+ except Exception as e:
21
+ logging.error(f"Failed to download Kaggle dataset {dataset_id}: {str(e)}")
22
+
23
+ def download_roboflow_dataset(dataset_id, format_type, version, output_dir, api_key):
24
+ try:
25
+ from roboflow import Roboflow
26
+ import shutil
27
+ rf = Roboflow(api_key=api_key)
28
+ username, dataset_name = dataset_id.split("/")
29
+ project = rf.workspace(username).project(dataset_name)
30
+ version_obj = project.version(version)
31
+ dataset = version_obj.download(format_type)
32
+ dataset_path = Path(dataset.location)
33
+ output_dir = Path(output_dir)
34
+ output_dir.mkdir(parents=True, exist_ok=True)
35
+ # Move/copy all files and folders from dataset_path to output_dir
36
+ for item in dataset_path.iterdir():
37
+ dest = output_dir / item.name
38
+ if item.is_dir():
39
+ if dest.exists():
40
+ shutil.rmtree(dest)
41
+ shutil.copytree(item, dest)
42
+ else:
43
+ shutil.copy2(item, dest)
44
+ logging.info(f"Downloaded Roboflow dataset: {dataset_id} to {output_dir}")
45
+ # Optionally, clean up the original download directory
46
+ try:
47
+ shutil.rmtree(dataset_path)
48
+ except Exception as cleanup_e:
49
+ logging.warning(f"Could not remove original Roboflow download dir {dataset_path}: {cleanup_e}")
50
+ except Exception as e:
51
+ logging.error(f"Failed to download Roboflow dataset {dataset_id}: {str(e)}")
52
+
53
+ def download_huggingface_dataset(dataset_id, output_dir):
54
+ try:
55
+ output_dir.mkdir(parents=True, exist_ok=True)
56
+ subprocess.run(["git", "clone", f"https://huggingface.co/datasets/{dataset_id}"], cwd=output_dir)
57
+ data_dir = output_dir / dataset_id.split("/")[-1] / "data"
58
+ for d in ["train", "valid", "test"]:
59
+ (data_dir / d).mkdir(exist_ok=True)
60
+ for z, d in zip(["train.zip", "test.zip", "valid.zip"], ["train", "valid", "test"]):
61
+ zip_path = data_dir / z
62
+ if zip_path.exists():
63
+ with zipfile.ZipFile(zip_path, 'r') as zf:
64
+ zf.extractall(data_dir / d)
65
+ logging.info(f"Downloaded HuggingFace dataset: {dataset_id}")
66
+ except Exception as e:
67
+ logging.error(f"Failed to download HuggingFace dataset {dataset_id}: {str(e)}")
src/license_plate_detector_ocr/data/dataset_processing/processor.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import glob
3
+ import logging
4
+
5
+ def check_and_remove_invalid_pairs(base_path, folder):
6
+ label_dir = os.path.join(base_path, folder, 'labels')
7
+ image_dir = os.path.join(base_path, folder, 'images')
8
+ label_files = glob.glob(os.path.join(label_dir, '*.txt'))
9
+ for label_path in label_files:
10
+ try:
11
+ with open(label_path, 'r') as f:
12
+ file_contents = f.read().strip()
13
+ if not file_contents:
14
+ remove_pair(label_path, image_dir)
15
+ continue
16
+ lines = file_contents.splitlines()
17
+ for line in lines:
18
+ elements = line.strip().split()
19
+ if len(elements) != 5:
20
+ remove_pair(label_path, image_dir)
21
+ break
22
+ except Exception as e:
23
+ logging.error(f"Error reading {label_path}: {e}")
24
+ remove_pair(label_path, image_dir)
25
+
26
+ def remove_pair(label_path, image_dir):
27
+ base_name = os.path.splitext(os.path.basename(label_path))[0]
28
+ image_path = os.path.join(image_dir, f"{base_name}.jpg")
29
+ try:
30
+ os.remove(label_path)
31
+ except FileNotFoundError:
32
+ logging.warning(f"Label file not found: {label_path}")
33
+ try:
34
+ os.remove(image_path)
35
+ except FileNotFoundError:
36
+ logging.warning(f"Image file not found: {image_path}")
37
+
38
+ def process_folders(base_path):
39
+ for folder in ['train', 'valid', 'test']:
40
+ logging.info(f"Processing {folder} folder...")
41
+ check_and_remove_invalid_pairs(base_path, folder)
src/license_plate_detector_ocr/infer.py ADDED
@@ -0,0 +1,173 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ import cv2
4
+ import numpy as np
5
+ from ultralytics import YOLO
6
+ from inference.paddleocr_infer import process_ocr
7
+
8
+ # Append the current directory to sys.path
9
+ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__))))
10
+
11
+ def is_image_file(file_path):
12
+ """Check if the file is an image based on its extension."""
13
+ image_extensions = {'.jpg', '.jpeg', '.png', '.bmp', '.tiff'}
14
+ return os.path.splitext(file_path)[1].lower() in image_extensions
15
+
16
+ def process_image(model, image_path, output_path=None):
17
+ """Process a single image for license plate detection and OCR."""
18
+ image = cv2.imread(image_path)
19
+ if image is None:
20
+ print(f"Error: Could not load image from {image_path}")
21
+ return None, None
22
+
23
+ try:
24
+ results = model(image_path)
25
+ except Exception as e:
26
+ print(f"Error during image inference: {e}")
27
+ return None, None
28
+
29
+ plate_texts = []
30
+ for result in results:
31
+ for box in result.boxes:
32
+ x1, y1, x2, y2 = map(int, box.xyxy[0])
33
+ confidence = box.conf[0]
34
+
35
+ # Crop the license plate region
36
+ plate_region = image[y1:y2, x1:x2]
37
+ # Run OCR on the cropped region
38
+ ocr_results = process_ocr(plate_region)
39
+ plate_text = ocr_results[0] if ocr_results else "No text detected"
40
+ plate_texts.append(plate_text)
41
+
42
+ # Draw bounding box and OCR text on the image
43
+ cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0), 2)
44
+ label = f"{plate_text} ({confidence:.2f})"
45
+ cv2.putText(image, label, (x1, y1 - 10),
46
+ cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
47
+
48
+ # Set default output path if not provided
49
+ if output_path is None:
50
+ output_path = os.path.splitext(image_path)[0] + '_output.jpg'
51
+
52
+ # Ensure output directory exists
53
+ os.makedirs(os.path.dirname(output_path) or '.', exist_ok=True)
54
+ cv2.imwrite(output_path, image)
55
+ print(f"Saved processed image to {output_path}")
56
+
57
+ return image, plate_texts
58
+
59
+ def process_video(model, video_path, output_path=None):
60
+ """Process a video for license plate detection and OCR, writing text on detected boxes."""
61
+ cap = cv2.VideoCapture(video_path)
62
+ if not cap.isOpened():
63
+ print(f"Error: Could not open video at {video_path}")
64
+ return None, None
65
+
66
+ # Get video properties
67
+ width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
68
+ height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
69
+ fps = int(cap.get(cv2.CAP_PROP_FPS))
70
+
71
+ # Set default output path if not provided
72
+ if output_path is None:
73
+ output_path = os.path.splitext(video_path)[0] + '_output.mp4'
74
+
75
+ # Ensure output directory exists
76
+ os.makedirs(os.path.dirname(output_path) or '.', exist_ok=True)
77
+
78
+ # Prepare output video
79
+ fourcc = cv2.VideoWriter_fourcc(*'mp4v')
80
+ out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
81
+
82
+ frames = []
83
+ all_plate_texts = []
84
+
85
+ while cap.isOpened():
86
+ ret, frame = cap.read()
87
+ if not ret:
88
+ print("End of video or error reading frame.")
89
+ break
90
+
91
+ try:
92
+ results = model(frame)
93
+ except Exception as e:
94
+ print(f"Error during video inference: {e}")
95
+ break
96
+
97
+ frame_plate_texts = []
98
+ boxes_detected = False
99
+
100
+ for result in results:
101
+ for box in result.boxes:
102
+ boxes_detected = True
103
+ x1, y1, x2, y2 = map(int, box.xyxy[0])
104
+ confidence = box.conf[0]
105
+
106
+ # Crop the license plate region
107
+ plate_region = frame[y1:y2, x1:x2]
108
+
109
+ # Run OCR on the cropped region
110
+ ocr_results = process_ocr(plate_region)
111
+ plate_text = ocr_results[0] if ocr_results else "No text detected"
112
+ frame_plate_texts.append(plate_text)
113
+
114
+ # Draw bounding box and OCR text on the frame
115
+ cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
116
+ label = f"{plate_text} ({confidence:.2f})"
117
+ cv2.putText(frame, label, (x1, y1 - 10),
118
+ cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
119
+
120
+ if boxes_detected:
121
+ frames.append(frame)
122
+ all_plate_texts.append(frame_plate_texts)
123
+ else:
124
+ # Append frame even if no boxes detected to maintain video continuity
125
+ frames.append(frame)
126
+ all_plate_texts.append([])
127
+
128
+ out.write(frame)
129
+
130
+ cap.release()
131
+ out.release()
132
+ print(f"Saved processed video to {output_path}")
133
+
134
+ if not frames:
135
+ print("No frames processed.")
136
+ return None, None
137
+
138
+ # Convert list of frames to 4D NumPy array
139
+ video_array = np.stack(frames, axis=0)
140
+ return video_array, all_plate_texts
141
+
142
+ def infer(input_path, output_path=None):
143
+ """Main function to process either an image or video for license plate detection and OCR."""
144
+ model_path = "ckpts/yolo/finetune/runs/license_plate_detector/weights/best.pt"
145
+
146
+ if not os.path.exists(model_path):
147
+ print(f"Error: Model file not found at {model_path}")
148
+ return None, None
149
+
150
+ if not os.path.exists(input_path):
151
+ print(f"Error: Input file not found at {input_path}")
152
+ return None, None
153
+
154
+ try:
155
+ model = YOLO(model_path)
156
+ except Exception as e:
157
+ print(f"Error loading model: {e}")
158
+ return None, None
159
+
160
+ if is_image_file(input_path):
161
+ result_array, plate_texts = process_image(model, input_path, output_path)
162
+ else:
163
+ result_array, plate_texts = process_video(model, video_path=input_path, output_path=output_path)
164
+
165
+ return result_array, plate_texts
166
+
167
+ if __name__ == "__main__":
168
+ import argparse
169
+ parser = argparse.ArgumentParser(description="Detect and read license plates in an image or video.")
170
+ parser.add_argument("--input_path", type=str, required=True, help="Path to the input image or video file")
171
+ parser.add_argument("--output_path", type=str, default=None, help="Path to save the output file (optional)")
172
+ args = parser.parse_args()
173
+ result_array, plate_texts = infer(args.input_path, args.output_path)
src/license_plate_detector_ocr/inference/__init__.py ADDED
File without changes
src/license_plate_detector_ocr/inference/paddleocr_infer.py ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from paddleocr import PaddleOCR
2
+ from typing import Union, List
3
+ import numpy as np
4
+
5
+ # Initialize PaddleOCR once with optimized settings for English license plate recognition
6
+ OCR = PaddleOCR(
7
+ lang='en',
8
+ use_doc_orientation_classify=False,
9
+ use_doc_unwarping=False,
10
+ use_textline_orientation=False,
11
+ text_detection_model_name='en_PP-OCRv3_det_slim',
12
+ text_recognition_model_name='en_PP-OCRv3_rec_slim'
13
+ )
14
+
15
+ def process_ocr(image_input: Union[str, List[str], np.ndarray]) -> Union[List[str], List[List[str]]]:
16
+ """
17
+ Process OCR on a single image path, a list of image paths, or a 3D image array.
18
+
19
+ Args:
20
+ image_input: A single image path (str), a list of image paths (List[str]), or a 3D NumPy array (np.ndarray)
21
+
22
+ Returns:
23
+ For a single image or array: List of extracted text strings
24
+ For multiple images: List of lists, each containing extracted text strings for an image
25
+ """
26
+ # Convert single inputs to a list for unified processing
27
+ if isinstance(image_input, str):
28
+ image_inputs = [image_input]
29
+ elif isinstance(image_input, np.ndarray):
30
+ if image_input.ndim != 3:
31
+ raise ValueError("Image array must be 3-dimensional (height, width, channels)")
32
+ image_inputs = [image_input]
33
+ else:
34
+ image_inputs = image_input
35
+
36
+ # Process each image or array and extract text
37
+ results = []
38
+ for input_item in image_inputs:
39
+ ocr_results = OCR.ocr(input_item, cls=False) # cls=False since angle classification is disabled
40
+ plate_list = [' '.join(word_info[-1][0] for word_info in line) for line in ocr_results if line]
41
+ results.append(plate_list)
42
+
43
+ # Return a single list for a single image/array, or list of lists for multiple images
44
+ return results[0] if isinstance(image_input, (str, np.ndarray)) else results
45
+
46
+ if __name__ == '__main__':
47
+ # Example with a single image path
48
+ single_image = 'plate-1.png'
49
+ single_result = process_ocr(single_image)
50
+ print("Single image path results:")
51
+ print(single_result)
52
+ for plate in single_result:
53
+ print(plate)
54
+
55
+ # Example with multiple image paths
56
+ image_list = ['plate-1.png', 'plate-2.png', 'plate-3.jpg']
57
+ multi_results = process_ocr(image_list)
58
+ print("\nMultiple image path results:")
59
+ print(multi_results)
60
+ for idx, plates in enumerate(multi_results):
61
+ print(f"Image {idx + 1} ({image_list[idx]}):")
62
+ for plate in plates:
63
+ print(plate)
64
+
65
+ # Example with a 3D image array (simulated)
66
+ # Note: Replace this with actual image data in practice
67
+ import cv2
68
+ image_array = cv2.imread('lp_image.jpg') # Load an image as a NumPy array
69
+ if image_array is not None:
70
+ array_result = process_ocr(image_array)
71
+ print("\nSingle image array results:")
72
+ print(array_result)
73
+ for plate in array_result:
74
+ print(plate)
75
+ else:
76
+ print("\nFailed to load image array for testing")
src/license_plate_detector_ocr/old-infer.py ADDED
@@ -0,0 +1,124 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+
4
+ # Append the current directory to sys.path
5
+ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__))))
6
+
7
+ from inference import paddleocr_infer
8
+
9
+ # Example with multiple images
10
+ image_list = ['plate-1.png', 'plate-2.png', 'plate-3.jpg']
11
+ multi_results = paddleocr_infer.process_ocr(image_list)
12
+ print("\nMultiple image results:")
13
+ print(multi_results)
14
+ for idx, plates in enumerate(multi_results):
15
+ print(f"Image {idx + 1} ({image_list[idx]}):")
16
+ for plate in plates:
17
+ print(plate)
18
+
19
+
20
+ ####yolo####
21
+ import cv2
22
+ from ultralytics import YOLO
23
+ import os
24
+ import argparse
25
+ import numpy as np
26
+
27
+ def is_image_file(file_path):
28
+ """Check if the file is an image based on its extension."""
29
+ image_extensions = {'.jpg', '.jpeg', '.png', '.bmp', '.tiff'}
30
+ return os.path.splitext(file_path)[1].lower() in image_extensions
31
+
32
+ def process_image(model, image_path):
33
+ """Process a single image for license plate detection and return the processed 3D array."""
34
+ image = cv2.imread(image_path)
35
+ if image is None:
36
+ print(f"Error: Could not load image from {image_path}")
37
+ return None
38
+
39
+ try:
40
+ results = model(image_path)
41
+ except Exception as e:
42
+ print(f"Error during image inference: {e}")
43
+ return None
44
+
45
+ for result in results:
46
+ for box in result.boxes:
47
+ x1, y1, x2, y2 = map(int, box.xyxy[0])
48
+ confidence = box.conf[0]
49
+ cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0), 2)
50
+ cv2.putText(image, f"License Plate: {confidence:.2f}", (x1, y1 - 10),
51
+ cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
52
+
53
+ return image
54
+
55
+ def process_video(model, video_path):
56
+ """Process a video for license plate detection and return the processed 4D array."""
57
+ cap = cv2.VideoCapture(video_path)
58
+ if not cap.isOpened():
59
+ print(f"Error: Could not open video at {video_path}")
60
+ return None
61
+
62
+ frames = []
63
+ while cap.isOpened():
64
+ ret, frame = cap.read()
65
+ if not ret:
66
+ print("End of video or error reading frame.")
67
+ break
68
+
69
+ try:
70
+ results = model(frame)
71
+ except Exception as e:
72
+ print(f"Error during video inference: {e}")
73
+ break
74
+
75
+ for result in results:
76
+ for box in result.boxes:
77
+ x1, y1, x2, y2 = map(int, box.xyxy[0])
78
+ confidence = box.conf[0]
79
+ cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
80
+ cv2.putText(frame, f"License Plate: {confidence:.2f}", (x1, y1 - 10),
81
+ cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
82
+
83
+ frames.append(frame)
84
+
85
+ cap.release()
86
+ if not frames:
87
+ print("No frames processed.")
88
+ return None
89
+
90
+ # Convert list of frames to 4D NumPy array (num_frames, height, width, channels)
91
+ video_array = np.stack(frames, axis=0)
92
+ return video_array
93
+
94
+ def main(input_path):
95
+ """Main function to process either an image or video for license plate detection."""
96
+ model_path = "ckpts/yolo/finetune/runs/license_plate_detector/weights/best.pt"
97
+
98
+ if not os.path.exists(model_path):
99
+ print(f"Error: Model file not found at {model_path}")
100
+ return None
101
+
102
+ if not os.path.exists(input_path):
103
+ print(f"Error: Input file not found at {input_path}")
104
+ return None
105
+
106
+ try:
107
+ model = YOLO(model_path)
108
+ except Exception as e:
109
+ print(f"Error loading model: {e}")
110
+ return None
111
+
112
+ if is_image_file(input_path):
113
+ return process_image(model, input_path)
114
+ else:
115
+ return process_video(model, input_path)
116
+
117
+ if __name__ == "__main__":
118
+ parser = argparse.ArgumentParser(description="Detect license plates in an image or video.")
119
+ parser.add_argument("input_path", type=str, help="Path to the input image or video file")
120
+ args = parser.parse_args()
121
+ result = main(args.input_path)
122
+ if result is not None:
123
+ print(f"Processed array shape: {result.shape}")
124
+ # _array = main("input_image.jpg")
src/license_plate_detector_ocr/train.py ADDED
File without changes
src/license_plate_detector_ocr/training/__init__.py ADDED
File without changes
src/license_plate_detector_ocr/training/train_yolo.py ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from ultralytics import YOLO
2
+ from pathlib import Path
3
+ import sys
4
+ import os
5
+ import argparse
6
+
7
+ def train_yolo(args):
8
+ # Add parent directory to sys.path
9
+ parent_dir = str(Path(__file__).resolve().parents[1])
10
+ if parent_dir not in sys.path:
11
+ sys.path.insert(0, parent_dir)
12
+
13
+ # Load the YOLOv10 model
14
+ model = YOLO(args.model)
15
+
16
+ # Train the model
17
+ model.train(
18
+ data=args.data,
19
+ task='detect',
20
+ mode='train',
21
+ epochs=args.epochs,
22
+ batch=args.batch,
23
+ resume=args.resume,
24
+ patience=args.patience,
25
+ lr0=args.lr0,
26
+ lrf=args.lrf,
27
+ device=args.device,
28
+ project=args.project,
29
+ name=args.name,
30
+ save=args.save
31
+ )
32
+
33
+ if __name__ == "__main__":
34
+ # Set up argument parser
35
+ parser = argparse.ArgumentParser(description='Train a YOLOv10 model for license plate detection.')
36
+ parser.add_argument('--model', type=str, default='./ckpts/raw/yolo12n.pt', help='Model path or model name like "yolo12n.pt"')
37
+ parser.add_argument('--data', type=str, default='./datasets/yolo_standard_dataset/data.yaml', help='Path to the dataset YAML file')
38
+ parser.add_argument('--epochs', type=int, default=100, help='Number of training epochs')
39
+ parser.add_argument('--batch', type=int, default=64, help='Batch size for training')
40
+ parser.add_argument('--resume', action='store_true', help='Resume training from the last checkpoint')
41
+ parser.add_argument('--patience', type=int, default=20, help='Early stopping patience')
42
+ parser.add_argument('--lr0', type=float, default=0.01, help='Initial learning rate')
43
+ parser.add_argument('--lrf', type=float, default=0.001, help='Final learning rate')
44
+ parser.add_argument('--device', type=str, default='0', help='Device to train on (e.g., 0, [0,1], or cpu)')
45
+ parser.add_argument('--project', type=str, default='./ckpts/finetune/runs', help='Directory to save training results')
46
+ parser.add_argument('--name', type=str, default='license_plate_detector', help='Name of the training run')
47
+ parser.add_argument('--save', action='store_true', default=True, help='Save training results')
48
+
49
+ # Parse arguments and pass to train_yolo
50
+ args = parser.parse_args()
51
+ train_yolo(args)