Spaces:

danhtran2mind
/

License-Plate-Detector-OCR

Sleeping

App Files Files Community

danhtran2mind commited on Aug 2

Commit

ce500ca

verified ·

1 Parent(s): ae05bfd

Upload 38 files

Browse files

Files changed (39) hide show

.gitattributes +2 -0
.python-version +1 -0
LICENSE +21 -0
apps/gradio_app.py +68 -0
apps/gradio_app/config.py +15 -0
apps/gradio_app/old-processor.py +109 -0
apps/gradio_app/processor.py +110 -0
apps/gradio_app/static/scripts.js +0 -0
apps/gradio_app/static/styles.css +100 -0
apps/gradio_app/utils.py +14 -0
apps/old-gradio_app.py +55 -0
apps/old2-gradio_app.py +183 -0
assets/lp_image.jpg +0 -0
assets/lp_video - Trim.mp4 +3 -0
assets/lp_video.mp4 +3 -0
ckpts/README.md +3 -0
configs/datasets_config.yaml +27 -0
data/__init__.py +0 -0
docs/inference/inference_doc.md +127 -0
docs/scripts/scripts_doc.md +126 -0
docs/training/training_doc.md +18 -0
requirements/requirements.txt +8 -0
requirements/requirements_compatible.txt +8 -0
scripts/download_and_process_datasets.py +53 -0
scripts/download_ckpts.py +62 -0
scripts/old-download_and_process_datasets.py +61 -0
src/license_plate_detector_ocr/__init__.py +0 -0
src/license_plate_detector_ocr/data/dataset_processing/__init__.py +0 -0
src/license_plate_detector_ocr/data/dataset_processing/config_loader.py +5 -0
src/license_plate_detector_ocr/data/dataset_processing/converter.py +190 -0
src/license_plate_detector_ocr/data/dataset_processing/downloader.py +67 -0
src/license_plate_detector_ocr/data/dataset_processing/processor.py +41 -0
src/license_plate_detector_ocr/infer.py +173 -0
src/license_plate_detector_ocr/inference/__init__.py +0 -0
src/license_plate_detector_ocr/inference/paddleocr_infer.py +76 -0
src/license_plate_detector_ocr/old-infer.py +124 -0
src/license_plate_detector_ocr/train.py +0 -0
src/license_plate_detector_ocr/training/__init__.py +0 -0
src/license_plate_detector_ocr/training/train_yolo.py +51 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+assets/lp_video[[:space:]]-[[:space:]]Trim.mp4 filter=lfs diff=lfs merge=lfs -text
+assets/lp_video.mp4 filter=lfs diff=lfs merge=lfs -text

.python-version ADDED Viewed

	@@ -0,0 +1 @@


1	+ python v3.11.13

LICENSE ADDED Viewed

	@@ -0,0 +1,21 @@

+MIT License
+Copyright (c) 2025 Danh Tran
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

apps/gradio_app.py ADDED Viewed

	@@ -0,0 +1,68 @@

+import gradio as gr
+import os
+from gradio_app.config import setup_logging, setup_sys_path
+from gradio_app.processor import gradio_process, update_preview, update_visibility
+# Initialize logging and sys.path
+setup_logging()
+setup_sys_path()
+# Load custom CSS
+custom_css = open(os.path.join(os.path.dirname(__file__), "gradio_app", "static", "styles.css"), "r").read()
+# Gradio Interface
+with gr.Blocks(css=custom_css) as iface:
+    gr.Markdown(
+        """
+        # License Plate Detection and OCR
+        Upload an image or video to detect and read license plates. Outputs are saved in `apps/gradio_app/temp_data/`.
+        Debug logs are saved in `apps/gradio_app/debug.log`.
+        """,
+        elem_classes="markdown-title"
+    )
+    with gr.Row():
+        with gr.Column(scale=1):
+            input_file = gr.File(label="Upload Image or Video", elem_classes="custom-file-input")
+            input_type = gr.Radio(choices=["Image", "Video"], label="Input Type", value="Image", elem_classes="custom-radio")
+            with gr.Blocks():
+                input_preview_image = gr.Image(label="Input Preview", visible=True, elem_classes="custom-image")
+                input_preview_video = gr.Video(label="Input Preview", visible=False, elem_classes="custom-video")
+            with gr.Row():
+                clear_button = gr.Button("Clear", variant="secondary", elem_classes="custom-button secondary")
+                submit_button = gr.Button("Submit", variant="primary", elem_classes="custom-button primary")
+        with gr.Column(scale=2):
+            with gr.Blocks():
+                output_image = gr.Image(label="Processed Output (Image)", type="numpy", visible=True, elem_classes="custom-image")
+                output_video = gr.Video(label="Processed Output (Video)", visible=False, elem_classes="custom-video")
+            output_text = gr.Textbox(label="Detected License Plates", lines=10, elem_classes="custom-textbox")
+    # Update preview and output visibility when input type changes
+    input_type.change(
+        fn=update_visibility,
+        inputs=input_type,
+        outputs=[input_preview_image, input_preview_video, output_image, output_video]
+    )
+    # Update preview when file is uploaded
+    input_file.change(
+        fn=update_preview,
+        inputs=[input_file, input_type],
+        outputs=[input_preview_image, input_preview_video]
+    )
+    # Bind the processing function
+    submit_button.click(
+        fn=gradio_process,
+        inputs=[input_file, input_type],
+        outputs=[output_image, output_video, output_text, input_preview_image, input_preview_video]
+    )
+    # Clear button functionality
+    clear_button.click(
+        fn=lambda: (None, None, None, "Image", None, None, None, None),
+        outputs=[input_file, output_image, output_video, input_type, input_preview_image, input_preview_video, output_image, output_video]
+    )
+if __name__ == "__main__":
+    iface.launch(share=True)

apps/gradio_app/config.py ADDED Viewed

	@@ -0,0 +1,15 @@

+import logging
+import os
+import sys
+def setup_logging():
+    """Set up logging to a file for debugging."""
+    logging.basicConfig(
+        filename="apps/gradio_app/debug.log",
+        level=logging.DEBUG,
+        format="%(asctime)s - %(levelname)s - %(message)s"
+    )
+def setup_sys_path():
+    """Adjust sys.path to include the src directory."""
+    sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'src', 'license_plate_detector_ocr')))

apps/gradio_app/old-processor.py ADDED Viewed

	@@ -0,0 +1,109 @@

+import os
+import sys
+import shutil
+import traceback
+import logging
+import gradio as gr
+from gradio_app.utils import convert_to_supported_format
+# Adjust sys.path to include the src directory
+sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__),
+                                              '..', '..', 'src', 'license_plate_detector_ocr')))
+from infer import infer, is_image_file
+def gradio_process(input_file, input_type):
+    """Process the input file (image or video) for license plate detection and OCR."""
+    try:
+        logging.debug(f"Input file path: {input_file.name}")
+        print(f"Input file path: {input_file.name}")
+        # Copy input file to temp_data directory to ensure stability
+        temp_input_dir = "apps/gradio_app/temp_data"
+        os.makedirs(temp_input_dir, exist_ok=True)
+        temp_input_path = os.path.join(temp_input_dir, os.path.basename(input_file.name))
+        shutil.copy(input_file.name, temp_input_path)
+        logging.debug(f"Copied input file to: {temp_input_path}")
+        # Verify input file exists
+        if not os.path.exists(temp_input_path):
+            error_msg = f"Error: Input file {temp_input_path} does not exist."
+            logging.error(error_msg)
+            return None, None, error_msg, None, None
+        # Set output path
+        output_dir = "apps/gradio_app/temp_data"
+        os.makedirs(output_dir, exist_ok=True)
+        output_filename = os.path.splitext(os.path.basename(temp_input_path))[0] + ('_output.jpg' if is_image_file(temp_input_path) else '_output.mp4')
+        output_path = os.path.join(output_dir, output_filename)
+        logging.debug(f"Output path: {output_path}")
+        # Call the infer function
+        result_array, plate_texts = infer(temp_input_path, output_path)
+        if result_array is None and is_image_file(temp_input_path):
+            error_msg = f"Error: Processing failed for {temp_input_path}. 'infer' returned None."
+            logging.error(error_msg)
+            return None, None, error_msg, None, None
+        # Validate output file for videos
+        if not is_image_file(temp_input_path):
+            if not os.path.exists(output_path):
+                error_msg = f"Error: Output video file {output_path} was not created."
+                logging.error(error_msg)
+                return None, None, error_msg, None, None
+            # Convert output video to supported format
+            converted_output_path = os.path.join(output_dir, f"converted_{os.path.basename(output_path)}")
+            converted_path = convert_to_supported_format(output_path, converted_output_path)
+            if converted_path is None:
+                error_msg = f"Error: Failed to convert output video {output_path} to supported format."
+                logging.error(error_msg)
+                return None, None, error_msg, None, None
+            output_path = converted_path
+        # Format plate texts
+        if is_image_file(temp_input_path):
+            formatted_texts = "\n".join(plate_texts) if plate_texts else "No plates detected"
+            logging.debug(f"Image processed successfully. Plate texts: {formatted_texts}")
+            return result_array, None, formatted_texts, temp_input_path, None
+        else:
+            formatted_texts = []
+            for i, texts in enumerate(plate_texts):
+                if texts:
+                    formatted_texts.append(f"Frame {i+1}: {', '.join(texts)}")
+            formatted_texts = "\n".join(formatted_texts) if formatted_texts else "No plates detected"
+            logging.debug(f"Video processed successfully. Plate texts: {formatted_texts}")
+            return None, output_path, formatted_texts, None, temp_input_path
+    except Exception as e:
+        error_message = f"Error processing {input_file.name}: {str(e)}\n{traceback.format_exc()}"
+        logging.error(error_message)
+        print(error_message)
+        return None, None, error_message, None, None
+def update_preview(file, input_type):
+    """Return file path for the appropriate preview component based on input type."""
+    if not file:
+        logging.debug("No file provided for preview.")
+        return None, None
+    logging.debug(f"Updating preview for {input_type}: {file.name}")
+    # Verify file exists
+    if not os.path.exists(file.name):
+        logging.error(f"Input file {file.name} does not exist.")
+        return None, None
+    # Check if video format is supported
+    if input_type == "Video" and not file.name.lower().endswith(('.mp4', '.webm')):
+        logging.error(f"Unsupported video format for {file.name}. Use MP4 or WebM.")
+        return None, None
+    return file.name if input_type == "Image" else None, file.name if input_type == "Video" else None
+def update_visibility(input_type):
+    """Update visibility of input/output components based on input type."""
+    logging.debug(f"Updating visibility for input type: {input_type}")
+    is_image = input_type == "Image"
+    is_video = input_type == "Video"
+    return (
+        gr.update(visible=is_image),
+        gr.update(visible=is_video),
+        gr.update(visible=is_image),
+        gr.update(visible=is_video)
+    )

apps/gradio_app/processor.py ADDED Viewed

	@@ -0,0 +1,110 @@

+import os
+import sys
+import shutil
+import traceback
+import logging
+import gradio as gr
+import uuid  # Import uuid module
+from gradio_app.utils import convert_to_supported_format
+# Adjust sys.path to include the src directory
+sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..', 'src', 'license_plate_detector_ocr')))
+from infer import infer, is_image_file
+def gradio_process(input_file, input_type):
+    """Process the input file (image or video) for license plate detection and OCR."""
+    try:
+        logging.debug(f"Input file path: {input_file.name}")
+        print(f"Input file path: {input_file.name}")
+        # Copy input file to temp_data directory to ensure stability
+        temp_input_dir = "apps/gradio_app/temp_data"
+        os.makedirs(temp_input_dir, exist_ok=True)
+        temp_input_path = os.path.join(temp_input_dir, os.path.basename(input_file.name))
+        shutil.copy(input_file.name, temp_input_path)
+        logging.debug(f"Copied input file to: {temp_input_path}")
+        # Verify input file exists
+        if not os.path.exists(temp_input_path):
+            error_msg = f"Error: Input file {temp_input_path} does not exist."
+            logging.error(error_msg)
+            return None, None, error_msg, None, None
+        # Set output path with UUID
+        output_dir = "apps/gradio_app/temp_data"
+        os.makedirs(output_dir, exist_ok=True)
+        # Modified line with UUID
+        unique_id = str(uuid.uuid4())[:8]  # Use first 8 characters of UUID for brevity
+        output_filename = f"{os.path.splitext(os.path.basename(temp_input_path))[0]}_{unique_id}_output{'_output.jpg' if is_image_file(temp_input_path) else '_output.mp4'}"
+        output_path = os.path.join(output_dir, output_filename)
+        logging.debug(f"Output path: {output_path}")
+        # Call the infer function
+        result_array, plate_texts = infer(temp_input_path, output_path)
+        if result_array is None and is_image_file(temp_input_path):
+            error_msg = f"Error: Processing failed for {temp_input_path}. 'infer' returned None."
+            logging.error(error_msg)
+            return None, None, error_msg, None, None
+        # Validate output file for videos
+        if not is_image_file(temp_input_path):
+            if not os.path.exists(output_path):
+                error_msg = f"Error: Output video file {output_path} was not created."
+                logging.error(error_msg)
+                return None, None, error_msg, None, None
+            # Convert output video to supported format
+            converted_output_path = os.path.join(output_dir, f"converted_{os.path.basename(output_path)}")
+            converted_path = convert_to_supported_format(output_path, converted_output_path)
+            if converted_path is None:
+                error_msg = f"Error: Failed to convert output video {output_path} to supported format."
+                logging.error(error_msg)
+                return None, None, error_msg, None, None
+            output_path = converted_path
+        # Format plate texts
+        if is_image_file(temp_input_path):
+            formatted_texts = "\n".join(plate_texts) if plate_texts else "No plates detected"
+            logging.debug(f"Image processed successfully. Plate texts: {formatted_texts}")
+            return result_array, None, formatted_texts, temp_input_path, None
+        else:
+            formatted_texts = []
+            for i, texts in enumerate(plate_texts):
+                if texts:
+                    formatted_texts.append(f"Frame {i+1}: {', '.join(texts)}")
+            formatted_texts = "\n".join(formatted_texts) if formatted_texts else "No plates detected"
+            logging.debug(f"Video processed successfully. Plate texts: {formatted_texts}")
+            return None, output_path, formatted_texts, None, temp_input_path
+    except Exception as e:
+        error_message = f"Error processing {input_file.name}: {str(e)}\n{traceback.format_exc()}"
+        logging.error(error_message)
+        print(error_message)
+        return None, None, error_message, None, None
+def update_preview(file, input_type):
+    """Return file path for the appropriate preview component based on input type."""
+    if not file:
+        logging.debug("No file provided for preview.")
+        return None, None
+    logging.debug(f"Updating preview for {input_type}: {file.name}")
+    # Verify file exists
+    if not os.path.exists(file.name):
+        logging.error(f"Input file {file.name} does not exist.")
+        return None, None
+    # Check if video format is supported
+    if input_type == "Video" and not file.name.lower().endswith(('.mp4', '.webm')):
+        logging.error(f"Unsupported video format for {file.name}. Use MP4 or WebM.")
+        return None, None
+    return file.name if input_type == "Image" else None, file.name if input_type == "Video" else None
+def update_visibility(input_type):
+    """Update visibility of input/output components based on input type."""
+    logging.debug(f"Updating visibility for input type: {input_type}")
+    is_image = input_type == "Image"
+    is_video = input_type == "Video"
+    return (
+        gr.update(visible=is_image),
+        gr.update(visible=is_video),
+        gr.update(visible=is_image),
+        gr.update(visible=is_video)
+    )

apps/gradio_app/static/scripts.js ADDED Viewed

File without changes

apps/gradio_app/static/styles.css ADDED Viewed

	@@ -0,0 +1,100 @@

+/* General body styling */
+.gradio-container {
+    background: linear-gradient(180deg, #f9fafb, #f1efef);
+    font-family: 'Quicksand', ui-sans-serif, sans-serif;
+    color: #6b46c1; /* Purple-800 for text (neutral hue) */
+    font-size: 16px; /* Medium text size */
+    font-weight: 400;
+}
+/* Dark mode background */
+@media (prefers-color-scheme: dark) {
+    .gradio-container {
+        background: linear-gradient(180deg, #1f2937, #111827);
+        color: #d6bcfa; /* Lighter purple for dark mode */
+    }
+}
+/* Block styling (containers for components) */
+.block {
+    border: 1px solid #e9d8fd; /* Purple-200 for borders */
+    border-radius: 8px; /* Medium radius */
+    box-shadow: 0 1px 2px rgba(0, 0, 0, 0.05); /* Small shadow */
+    padding: 16px; /* Medium spacing */
+    background: #f1efef;
+}
+/* Input fields */
+input[type="text"], textarea {
+    background: #faf5ff; /* Purple-50 for input background */
+    border: 1px solid #e9d8fd; /* Purple-200 for borders */
+    border-radius: 8px;
+    padding: 8px;
+    font-family: 'Quicksand', ui-sans-serif, sans-serif;
+    font-size: 16px;
+    color: #6b46c1;
+    box-shadow: none;
+}
+input[type="text"]:focus, textarea:focus {
+    outline: none;
+    box-shadow: 0 1px 2px rgba(0, 0, 0, 0.1); /* Small shadow on focus */
+    border-color: #48bb78; /* Green-400 for focus */
+}
+/* Primary button */
+button.primary {
+    background: #48bb78; /* Green-400 */
+    color: #f1efef;
+    border: none;
+    border-radius: 8px;
+    padding: 8px 16px;
+    font-family: 'Quicksand', ui-sans-serif, sans-serif;
+    font-size: 16px;
+    font-weight: 500;
+    box-shadow: 0 1px 2px rgba(0, 0, 0, 0.05);
+    cursor: pointer;
+}
+button.primary:hover {
+    background: #ed8936; /* Orange-400 for hover */
+    box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1); /* Medium shadow on hover */
+}
+/* Secondary button */
+button.secondary {
+    color: #48bb78; /* Green-400 for text */
+    border: 1px solid #48bb78; /* Green-400 for border */
+    border-radius: 8px;
+    padding: 8px 16px;
+    font-family: 'Quicksand', ui-sans-serif, sans-serif;
+    font-size: 16px;
+    font-weight: 500;
+    box-shadow: 0 1px 2px rgba(0, 0, 0, 0.05);
+    cursor: pointer;
+}
+button.secondary:hover {
+    background: #ed8936; /* Orange-400 for hover */
+    color: #48bb78;
+    box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
+}
+/* Slider styling */
+input[type="range"] {
+    accent-color: #ed8936; /* Orange-400 for slider */
+}
+@media (prefers-color-scheme: dark) {
+    input[type="range"] {
+        accent-color: #f6ad55; /* Orange-600 for dark mode */
+    }
+}
+/* Markdown headers */
+h2 {
+    font-weight: 500;
+    color: #6b46c1; /* Purple-800 */
+    margin-bottom: 16px;
+}
+/* Code or monospace elements */
+code, pre {
+    font-family: 'IBM Plex Mono', ui-monospace, monospace;
+}

apps/gradio_app/utils.py ADDED Viewed

	@@ -0,0 +1,14 @@

+import ffmpeg
+import logging
+def convert_to_supported_format(input_path, output_path):
+    """Convert video to a browser-compatible format (MP4 with H.264 codec)."""
+    try:
+        stream = ffmpeg.input(input_path)
+        stream = ffmpeg.output(stream, output_path, vcodec='h264', acodec='aac', format='mp4', loglevel='quiet')
+        ffmpeg.run(stream)
+        logging.debug(f"Converted video to {output_path}")
+        return output_path
+    except Exception as e:
+        logging.error(f"Error converting video {input_path}: {str(e)}")
+        return None

apps/old-gradio_app.py ADDED Viewed

	@@ -0,0 +1,55 @@

+import gradio as gr
+import os
+import sys
+# Adjust sys.path to include the src directory
+sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'src', 'license_plate_detector_ocr')))
+from infer import infer, is_image_file
+def gradio_process(input_file, input_type):
+    """Process the input file (image or video) for license plate detection and OCR."""
+    # Debugging: Print input file path
+    print(f"Input file path: {input_file.name}")
+    # Set default output path in apps/gradio_app/temp_data/
+    output_dir = "apps/gradio_app/temp_data"
+    os.makedirs(output_dir, exist_ok=True)
+    output_filename = os.path.splitext(os.path.basename(input_file.name))[0] + ('_output.jpg' if is_image_file(input_file.name) else '_output.mp4')
+    output_path = os.path.join(output_dir, output_filename)
+    # Call the infer function from infer.py
+    result_array, plate_texts = infer(input_file.name, output_path)
+    if result_array is None:
+        return None, f"Error: Processing failed for {input_file.name}"
+    # Format plate texts for output
+    if is_image_file(input_file.name):
+        formatted_texts = "\n".join(plate_texts) if plate_texts else "No plates detected"
+        return result_array, formatted_texts
+    else:
+        # For videos, plate_texts is a list of lists (per frame)
+        formatted_texts = []
+        for i, texts in enumerate(plate_texts):
+            if texts:
+                formatted_texts.append(f"Frame {i+1}: {', '.join(texts)}")
+        formatted_texts = "\n".join(formatted_texts) if formatted_texts else "No plates detected"
+        return output_path, formatted_texts
+# Gradio Interface
+iface = gr.Interface(
+    fn=gradio_process,
+    inputs=[
+        gr.File(label="Upload Image or Video"),
+        gr.Radio(choices=["Image", "Video"], label="Input Type", value="Image")
+    ],
+    outputs=[
+        gr.Image(label="Processed Output", type="numpy"),
+        gr.Textbox(label="Detected License Plates")
+    ],
+    title="License Plate Detection and OCR",
+    description="Upload an image or video to detect and read license plates. Outputs are saved in apps/gradio_app/temp_data/."
+)
+if __name__ == "__main__":
+    iface.launch(share=True)

apps/old2-gradio_app.py ADDED Viewed

	@@ -0,0 +1,183 @@

+import gradio as gr
+import os
+import sys
+import traceback
+import logging
+import shutil
+import ffmpeg
+# Set up logging to a file for debugging
+logging.basicConfig(
+    filename="apps/gradio_app/debug.log",
+    level=logging.DEBUG,
+    format="%(asctime)s - %(levelname)s - %(message)s"
+)
+# Adjust sys.path to include the src directory
+sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'src', 'license_plate_detector_ocr')))
+from infer import infer, is_image_file
+def convert_to_supported_format(input_path, output_path):
+    """Convert video to a browser-compatible format (MP4 with H.264 codec)."""
+    try:
+        stream = ffmpeg.input(input_path)
+        stream = ffmpeg.output(stream, output_path, vcodec='h264', acodec='aac', format='mp4', loglevel='quiet')
+        ffmpeg.run(stream)
+        logging.debug(f"Converted video to {output_path}")
+        return output_path
+    except Exception as e:
+        logging.error(f"Error converting video {input_path}: {str(e)}")
+        return None
+def gradio_process(input_file, input_type):
+    """Process the input file (image or video) for license plate detection and OCR."""
+    try:
+        logging.debug(f"Input file path: {input_file.name}")
+        print(f"Input file path: {input_file.name}")
+        # Copy input file to temp_data directory to ensure stability
+        temp_input_dir = "apps/gradio_app/temp_data"
+        os.makedirs(temp_input_dir, exist_ok=True)
+        temp_input_path = os.path.join(temp_input_dir, os.path.basename(input_file.name))
+        shutil.copy(input_file.name, temp_input_path)
+        logging.debug(f"Copied input file to: {temp_input_path}")
+        # Verify input file exists
+        if not os.path.exists(temp_input_path):
+            error_msg = f"Error: Input file {temp_input_path} does not exist."
+            logging.error(error_msg)
+            return None, None, error_msg, None, None
+        # Set output path
+        output_dir = "apps/gradio_app/temp_data"
+        os.makedirs(output_dir, exist_ok=True)
+        output_filename = os.path.splitext(os.path.basename(temp_input_path))[0] + ('_output.jpg' if is_image_file(temp_input_path) else '_output.mp4')
+        output_path = os.path.join(output_dir, output_filename)
+        logging.debug(f"Output path: {output_path}")
+        # Call the infer function
+        result_array, plate_texts = infer(temp_input_path, output_path)
+        if result_array is None and is_image_file(temp_input_path):
+            error_msg = f"Error: Processing failed for {temp_input_path}. 'infer' returned None."
+            logging.error(error_msg)
+            return None, None, error_msg, None, None
+        # Validate output file for videos
+        if not is_image_file(temp_input_path):
+            if not os.path.exists(output_path):
+                error_msg = f"Error: Output video file {output_path} was not created."
+                logging.error(error_msg)
+                return None, None, error_msg, None, None
+            # Convert output video to supported format
+            converted_output_path = os.path.join(output_dir, f"converted_{os.path.basename(output_path)}")
+            converted_path = convert_to_supported_format(output_path, converted_output_path)
+            if converted_path is None:
+                error_msg = f"Error: Failed to convert output video {output_path} to supported format."
+                logging.error(error_msg)
+                return None, None, error_msg, None, None
+            output_path = converted_path
+        # Format plate texts
+        if is_image_file(temp_input_path):
+            formatted_texts = "\n".join(plate_texts) if plate_texts else "No plates detected"
+            logging.debug(f"Image processed successfully. Plate texts: {formatted_texts}")
+            return result_array, None, formatted_texts, temp_input_path, None
+        else:
+            formatted_texts = []
+            for i, texts in enumerate(plate_texts):
+                if texts:
+                    formatted_texts.append(f"Frame {i+1}: {', '.join(texts)}")
+            formatted_texts = "\n".join(formatted_texts) if formatted_texts else "No plates detected"
+            logging.debug(f"Video processed successfully. Plate texts: {formatted_texts}")
+            return None, output_path, formatted_texts, None, temp_input_path
+    except Exception as e:
+        error_message = f"Error processing {input_file.name}: {str(e)}\n{traceback.format_exc()}"
+        logging.error(error_message)
+        print(error_message)
+        return None, None, error_message, None, None
+def update_preview(file, input_type):
+    """Return file path for the appropriate preview component based on input type."""
+    if not file:
+        logging.debug("No file provided for preview.")
+        return None, None
+    logging.debug(f"Updating preview for {input_type}: {file.name}")
+    # Verify file exists
+    if not os.path.exists(file.name):
+        logging.error(f"Input file {file.name} does not exist.")
+        return None, None
+    # Check if video format is supported
+    if input_type == "Video" and not file.name.lower().endswith(('.mp4', '.webm')):
+        logging.error(f"Unsupported video format for {file.name}. Use MP4 or WebM.")
+        return None, None
+    return file.name if input_type == "Image" else None, file.name if input_type == "Video" else None
+def update_visibility(input_type):
+    """Update visibility of input/output components based on input type."""
+    logging.debug(f"Updating visibility for input type: {input_type}")
+    is_image = input_type == "Image"
+    is_video = input_type == "Video"
+    return (
+        gr.update(visible=is_image),
+        gr.update(visible=is_video),
+        gr.update(visible=is_image),
+        gr.update(visible=is_video)
+    )
+# Gradio Interface
+with gr.Blocks() as iface:
+    gr.Markdown(
+        """
+        # License Plate Detection and OCR
+        Upload an image or video to detect and read license plates. Outputs are saved in `apps/gradio_app/temp_data/`.
+        Debug logs are saved in `apps/gradio_app/debug.log`.
+        """,
+        elem_classes="markdown-title"
+    )
+    with gr.Row():
+        with gr.Column(scale=1):
+            input_file = gr.File(label="Upload Image or Video")
+            input_type = gr.Radio(choices=["Image", "Video"], label="Input Type", value="Image")
+            with gr.Blocks():
+                input_preview_image = gr.Image(label="Input Preview", visible=True)
+                input_preview_video = gr.Video(label="Input Preview", visible=False)
+            with gr.Row():
+                clear_button = gr.Button("Clear", variant="secondary")
+                submit_button = gr.Button("Submit", variant="primary")
+        with gr.Column(scale=2):
+            with gr.Blocks():
+                output_image = gr.Image(label="Processed Output (Image)", type="numpy", visible=True)
+                output_video = gr.Video(label="Processed Output (Video)", visible=False)
+            output_text = gr.Textbox(label="Detected License Plates", lines=10)
+    # Update preview and output visibility when input type changes
+    input_type.change(
+        fn=update_visibility,
+        inputs=input_type,
+        outputs=[input_preview_image, input_preview_video, output_image, output_video]
+    )
+    # Update preview when file is uploaded
+    input_file.change(
+        fn=update_preview,
+        inputs=[input_file, input_type],
+        outputs=[input_preview_image, input_preview_video]
+    )
+    # Bind the processing function
+    submit_button.click(
+        fn=gradio_process,
+        inputs=[input_file, input_type],
+        outputs=[output_image, output_video, output_text, input_preview_image, input_preview_video]
+    )
+    # Clear button functionality
+    clear_button.click(
+        fn=lambda: (None, None, None, "Image", None, None, None, None),
+        outputs=[input_file, output_image, output_video, input_type, input_preview_image, input_preview_video, output_image, output_video]
+    )
+if __name__ == "__main__":
+    iface.launch(share=True)

assets/lp_image.jpg ADDED Viewed

assets/lp_video - Trim.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6625c922f561331f928cb6465646a299bcd8ba99f329bfb62a379c29ed33ea19
+size 405454

assets/lp_video.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:72dececeb4cc1ce1da5264211578c9331a3fb31d36bf21ac2f40471d70e2121d
+size 4984385

ckpts/README.md ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ Checkpoints Folder 🚀
2	+
3	+ This folder holds model checkpoints from training runs 🏋️‍♂️. Each file saves the model's weights & state at specific epochs ⏰. Named by epoch or timestamp, they’re ready for resuming or eval! 🧠 Keep some disk space free for these hefty files! 💾

configs/datasets_config.yaml ADDED Viewed

	@@ -0,0 +1,27 @@

+datasets:
+  - id: fareselmenshawii/large-license-plate-dataset
+    type: kaggle
+  - id: duydieunguyen/licenseplates
+    type: kaggle
+  - id: ronakgohil/license-plate-dataset
+    type: kaggle
+  - id: bomaich/vnlicenseplate
+    type: kaggle
+  - id: congtuu/vietnamese-license-plate-obb
+    type: kaggle
+  - id: haitonthat/vietnam-license-plate-bounding-box
+    type: kaggle
+  - id: university-of-southeastern-philippines-cnl9c/license-plate-detection-merged-projects
+    type: roboflow
+    format: yolov11
+    version: 3
+  - id: ev-dshfb/license-plate-w8chc
+    type: roboflow
+    format: yolov11
+    version: 1
+  - id: kanwal-masroor-gv4jr/yolov7-license-plate-detection
+    type: roboflow
+    format: yolov11
+    version: 3
+  - id: keremberke/license-plate-object-detection
+    type: huggingface

data/__init__.py ADDED Viewed

File without changes

docs/inference/inference_doc.md ADDED Viewed

	@@ -0,0 +1,127 @@

+# License Plate Detection and OCR Inference Documentation
+This document describes the inference process for the license plate detection and OCR system implemented in the provided Python scripts. The system uses a YOLO model for license plate detection and PaddleOCR for text recognition. Below are the details of the inference process and the arguments required to run the scripts.
+## Overview
+The system consists of two main scripts:
+1. **`paddleocr_infer.py`**: Handles OCR processing for license plate text extraction.
+2. **`infer.py`**: Manages the main inference pipeline, including license plate detection and OCR, for both images and videos.
+The scripts process input images or videos, detect license plates using a YOLO model, crop the detected regions, and extract text using PaddleOCR. The output includes processed images or videos with bounding boxes and text annotations, along with the extracted license plate texts.
+## Inference Process
+### 1. `paddleocr_infer.py`
+This script defines the OCR functionality using PaddleOCR, optimized for English license plate recognition.
+#### Key Function: `process_ocr`
+- **Purpose**: Extracts text from a single image, a list of images, or a NumPy array representing an image.
+- **Input**:
+  - `image_input`: Can be one of the following:
+    - `str`: Path to a single image file.
+    - `List[str]`: List of paths to multiple image files.
+    - `np.ndarray`: A 3D NumPy array (height, width, channels) representing an image.
+- **Output**:
+  - For a single image or array: A list of extracted text strings (`List[str]`).
+  - For multiple images: A list of lists, each containing extracted text strings for an image (`List[List[str]]`).
+- **Behavior**:
+  - Initializes PaddleOCR with English language settings and slim models for detection and recognition.
+  - Processes input(s) and extracts text from detected regions.
+  - Handles single or multiple inputs uniformly by converting single inputs to a list for processing.
+#### Example Usage
+```python
+# Single image
+result = process_ocr('<path_to_plate_image_1>')  # Returns List[str]
+# Multiple images
+results = process_ocr(['<path_to_plate_image_1>', '<path_to_plate_image_2>', '<path_to_plate_image_3>'])  # Returns List[List[str]]
+# Image array
+import cv2
+image_array = cv2.imread('<path_to_plate_image>')
+result = process_ocr(image_array)  # Returns List[str]
+```
+### 2. `infer.py`
+This script integrates YOLO-based license plate detection with OCR to process images or videos.
+#### Main Function: `infer`
+- **Purpose**: Processes an input image or video to detect license plates and extract text.
+- **Input Arguments**:
+  - `input_path` (`str`, required): Path to the input image or video file.
+  - `output_path` (`str`, optional): Path to save the processed output file. If not provided, defaults to the input path with `_output` appended (e.g., `input.jpg` → `input_output.jpg`).
+- **Output**:
+  - `result_array`: A NumPy array representing the processed image (3D for images, 4D for videos) or `None` if processing fails.
+  - `plate_texts`: A list of extracted license plate texts (`List[str]` for images, `List[List[str]]` for videos) or `None` if processing fails.
+- **Behavior**:
+  - Loads a YOLO model from `ckpts/yolo/finetune/runs/license_plate_detector/weights/best.pt`.
+  - Checks if the input is an image or video based on file extension.
+  - Calls `process_image` for images or `process_video` for videos.
+  - Saves the output with bounding boxes and text annotations.
+#### Helper Functions
+- **`is_image_file(file_path)`**:
+  - Checks if a file has a valid image extension (`.jpg`, `.jpeg`, `.png`, `.bmp`, `.tiff`).
+  - Returns `True` for images, `False` otherwise.
+- **`process_image(model, image_path, output_path)`**:
+  - Processes a single image for license plate detection and OCR.
+  - Draws bounding boxes and text with confidence scores on the image.
+  - Saves the processed image to `output_path`.
+  - Returns the processed image array and extracted texts.
+- **`process_video(model, video_path, output_path)`**:
+  - Processes a video frame by frame for license plate detection and OCR.
+  - Draws bounding boxes and text with confidence scores on each frame.
+  - Saves the processed video to `output_path`.
+  - Returns a 4D NumPy array of frames and a list of per-frame extracted texts.
+#### Command-Line Arguments
+The script supports command-line execution with the following arguments:
+- `--input_path` (`str`, required): Path to the input image or video file.
+  - Example: `--input_path <path_to_plate_image_1>` or `--input_path video.mp4`
+- `--output_path` (`str`, optional): Path to save the processed output file.
+  - Example: `--output_path output/plate_output.jpg`
+  - If not specified, defaults to the input path with `_output` appended.
+#### Example Command-Line Usage
+```bash
+# Process an image
+python infer.py --input_path <path_to_plate_image_1> --output_path output/plate_output.jpg
+# Process a video
+python infer.py --input_path video.mp4 --output_path output/video_output.mp4
+```
+## Requirements
+- **Python Libraries**:
+  - `paddleocr`: For OCR processing.
+  - `ultralytics`: For YOLO model inference.
+  - `opencv-python` (`cv2`): For image and video processing.
+  - `numpy`: For array operations.
+- **Model File**:
+  - YOLO model weights at `ckpts/yolo/finetune/runs/license_plate_detector/weights/best.pt`.
+- **Input Files**:
+  - Images: `.jpg`, `.jpeg`, `.png`, `.bmp`, `.tiff`.
+  - Videos: Any format supported by OpenCV (e.g., `.mp4`).
+## Output Format
+- **Images**:
+  - Processed image saved with bounding boxes and text annotations.
+  - Returns a 3D NumPy array (height, width, channels) and a list of extracted texts (`List[str]`).
+- **Videos**:
+  - Processed video saved with bounding boxes and text annotations on each frame.
+  - Returns a 4D NumPy array (frames, height, width, channels) and a list of per-frame extracted texts (`List[List[str]]`).
+## Error Handling
+- Checks for the existence of the model file and input file.
+- Validates image array dimensions (must be 3D).
+- Handles failures in loading images/videos or during model inference, returning `None` for both outputs in case of errors.
+## Notes
+- The YOLO model and PaddleOCR are configured for English license plates. Modify `lang` or model names in `paddleocr_infer.py` for other languages.
+- Ensure the model weights file exists at the specified path.
+- Output directories are created automatically if they do not exist.
+- For videos, frames without detected plates are included in the output to maintain continuity.

docs/scripts/scripts_doc.md ADDED Viewed

	@@ -0,0 +1,126 @@

+# YOLOv12 Object Detection Training Guide
+This guide provides instructions for training an object detection model using YOLOv12. The example below demonstrates how to fine-tune the YOLOv12n model. Pre-trained checkpoints are available for download from the Ultralytics Releases page. You can see more at this URL:
+[Ultralytics Releases](https://github.com/ultralytics/assets/releases)
+## Prerequisites
+-   Ensure you have the Ultralytics YOLO package installed.
+-   Download the desired YOLOv12 model checkpoint (e.g., yolo12n.pt) using the provided script.
+## 1 Downloading Pre-trained Models
+To download YOLOv12 model checkpoints, run the following command:
+```bash
+python scripts/download_yolo_model.py \
+    --url <yolo_model_released_url> \
+    --output-dir <saved_yolo_model_path>
+```
+This will save the pre-trained weights to the ./ckpts/raw/ directory.
+## 2 Process Dataset
+Here is the CLI command to download and process datasets.
+```bash
+python scripts/download_and_process_datasets.py \
+    --output-dir <combined_dataset_path> \
+    --dataset-base-dir <directory_containing_all_datasets> \
+    --config <datasets_config_path> \
+    --platforms <list_of_platforms_to_download_from> \  # e.g., ["kaggle", "roboflow", "huggingface"]
+    --roboflow-api-key <roboflow_api_key>  # Optional: required if "roboflow" is included in --platforms
+```
+For example:
+```bash
+python scripts/download_and_process_datasets.py \
+    --output-dir ./datasets/yolo_standard_dataset \
+    --dataset-base-dir ./datasets/all_datasets \
+    --config ./config/dataset_config.yaml \
+    --roboflow-api-key YOUR_ROBOFLOW_APIKEY \
+    --platforms "kaggle" "roboflow" "huggingface" # e.g., ["kaggle", "roboflow", "huggingface"]
+```
+For help:
+```bash
+python scripts/download_and_process_datasets.py -h
+```
+## 3 Fine-Tuning the Model
+<!--
+To fine-tune a YOLOv12 model for object detection, use the provided training script with customizable parameters. Run the following command and adjust the arguments based on your requirements:
+```bash
+python scripts/train_yolo.py \
+    --epochs <number_of_epochs> \
+    --batch <batch_size> \
+    --device <cuda_device_id_or_list|cpu> \
+    --project <path_to_save_results> \
+    --name <project_name> \
+    --resume  # Optional: resume training from the last checkpoint
+```
+### Example Configuration
+For reference, the equivalent configuration using the yolo CLI command is shown below:
+```bash
+python scripts/train_yolo.py\
+    --epochs 100 \
+    --batch 32 \
+    --device 0 \
+    --project "./ckpts/finetune/runs" \
+    --name "license_plate_detector"
+```
+### More Configurations
+Run this CLI command to show `Help`.
+```bash
+python scripts/train_yolo.py -h
+```
+-->
+To fine-tune a YOLOv12 model for object detection, use the provided training script with customizable parameters. Run the following command and adjust the arguments based on your requirements:
+```bash
+yolo detect train \
+    model=<yolo_model_path or yolo_version_name> \
+    data=<dataset_config_path> \
+    epochs=<number_of_epochs> \
+    batch=<batch_size> \
+    patience=<early_stopping_patience> \
+    imgsz=<image_size> \
+    lr0=<initial_learning_rate> \
+    lrf=<final_learning_rate> \
+    device=<device_id or list_of_cuda or "cpu"> \
+    project=<output_directory> \
+    name=<experiment_name> \
+    save=<true or false> \
+    resume=<true or false>
+```
+### Example Configuration
+For reference, the equivalent configuration using the yolo CLI command is shown below:
+```bash
+yolo detect train \
+    model="./ckpts/raw/yolo12n.pt" \
+    data="./datasets/yolo_standard_dataset/data.yaml" \
+    epochs=100 \
+    batch=32 \
+    patience=20 \
+    imgsz=640 \
+    lr0=0.01 \
+    lrf=0.001 \
+    device=0 \
+    project="./ckpts/finetune/runs" \
+    name="license_plate_detector" \
+    save=true \
+    resume=false
+```
+### More Configurations
+Run this CLI command to show `Help`.
+```bash
+yolo --help
+```
+## Using PaddleOCR

docs/training/training_doc.md ADDED Viewed

	@@ -0,0 +1,18 @@

+# Training Arguments for YOLOv10 License Plate Detection
+Below are the command-line arguments used to configure the training of a YOLOv10 model for license plate detection.
+| Argument       | Type       | Default Value                              | Description                                                                 |
+|----------------|------------|--------------------------------------------|-----------------------------------------------------------------------------|
+| `--model`      | `str`      | `./ckpts/raw/yolo12n.pt`                   | Path to the model file or model name (e.g., "yolo12n.pt").                   |
+| `--data`       | `str`      | `./datasets/yolo_standard_dataset/data.yaml`| Path to the dataset YAML file specifying the dataset configuration.          |
+| `--epochs`     | `int`      | `100`                                      | Number of training epochs.                                                  |
+| `--batch`      | `int`      | `64`                                       | Batch size for training.                                                    |
+| `--resume`     | `boolean`  | `False`                                    | Resume training from the last checkpoint if set.                             |
+| `--patience`   | `int`      | `20`                                       | Number of epochs to wait for improvement before early stopping.              |
+| `--lr0`        | `float`    | `0.01`                                     | Initial learning rate for training.                                          |
+| `--lrf`        | `float`    | `0.001`                                    | Final learning rate for training.                                           |
+| `--device`     | `str`      | `0`                                        | Device to train on (e.g., `0` for GPU, `[0,1]` for multiple GPUs, or `cpu`). |
+| `--project`    | `str`      | `./ckpts/finetune/runs`                    | Directory to save training results.                                          |
+| `--name`       | `str`      | `license_plate_detector`                   | Name of the training run.                                                   |
+| `--save`       | `boolean`  | `True`                                     | Save training results if set.                                                |

requirements/requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+opencv-python
+ultralytics
+roboflow
+wget
+ffmpeg-python
+paddleocr
+paddlepaddle-gpu
+paddlepaddle

requirements/requirements_compatible.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+opencv-python==4.11.0.86
+ultralytics==8.3.162
+roboflow==1.1.66
+wget==3.2
+ffmpeg-python==0.2.0
+paddleocr==2.9.0
+paddlepaddle-gpu==2.6.2
+paddlepaddle==2.6.2

scripts/download_and_process_datasets.py ADDED Viewed

	@@ -0,0 +1,53 @@

+import argparse
+import logging
+from pathlib import Path
+import sys
+import os
+# Append the current directory to sys.path
+sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..',
+                "src", "license_plate_detector_ocr", "data")))
+from dataset_processing import config_loader, downloader, processor, converter
+def main(args):
+    logging.basicConfig(filename='dataset_conversion.log', level=logging.INFO,
+                        format='%(asctime)s - %(levelname)s - %(message)s')
+    config = config_loader.load_config(args.config)
+    datasets = config['datasets']
+    os.makedirs(args.dataset_base_dir, exist_ok=True)
+    os.makedirs(args.output_dir, exist_ok=True)
+    # Download datasets
+    for idx, ds in enumerate(datasets):
+        if ds['type'] == 'kaggle' and 'kaggle' in args.platforms:
+            downloader.download_kaggle_dataset(ds['id'], Path(args.dataset_base_dir) / f"dataset_{idx}")
+        elif ds['type'] == 'roboflow' and 'roboflow' in args.platforms:
+            downloader.download_roboflow_dataset(ds['id'], ds['format'], ds['version'], Path(args.dataset_base_dir) / f"dataset_{idx}", args.roboflow_api_key)
+        elif ds['type'] == 'huggingface' and 'huggingface' in args.platforms:
+            downloader.download_huggingface_dataset(ds['id'], Path(args.dataset_base_dir) / f"dataset_{idx}")
+    # Convert and combine datasets
+    converter.coco_kaggle_to_yolo(args.dataset_base_dir, args.output_dir)
+    for idx, ds in enumerate(datasets):
+        if ds['type'] == 'roboflow' and 'roboflow' in args.platforms:
+            converter.copy_dataset_to_combined_folder(Path(args.dataset_base_dir) / f"dataset_{idx}", args.output_dir)
+    for idx, ds in enumerate(datasets):
+        if ds['type'] == 'huggingface' and 'huggingface' in args.platforms:
+            converter.convert_coco_huggingface_to_yolo(
+                dataset_base_path=Path(args.dataset_base_dir) / f"dataset_{idx}/license-plate-object-detection/data",
+                output_dir=args.output_dir)
+    processor.process_folders(args.output_dir)
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Download and process license plate datasets.")
+    parser.add_argument("--output-dir", default="./data/yolo_standard_dataset", help="Output directory for YOLOv11 dataset")
+    parser.add_argument("--dataset-base-dir", default="./data/all_datasets", help="Base directory for downloaded datasets")
+    parser.add_argument("--roboflow-api-key", required='roboflow' in sys.argv, help="Roboflow API key for downloading datasets")
+    parser.add_argument("--config", default="./configs/datasets_config.yaml", help="Path to dataset config YAML")
+    parser.add_argument("--platforms", nargs="*", default=["kaggle", "roboflow", "huggingface"], choices=["kaggle", "roboflow", "huggingface"], help="Platforms to download (default: all)")
+    args = parser.parse_args()
+    main(args)

scripts/download_ckpts.py ADDED Viewed

	@@ -0,0 +1,62 @@

+import wget
+import os
+import argparse
+from huggingface_hub import hf_hub_download
+def download_model_ckpts(args):
+    """
+    Download the yolo12n.pt model from a URL and the best.pt model file from a Hugging Face repository.
+    """
+    # Download yolo12n.pt from GitHub URL
+    model_url = args.url
+    output_dir = args.output_dir
+    raw_output_dir = os.path.join(output_dir, 'raw')  # Subdirectory for wget download
+    # Extract filename from URL
+    filename = model_url.split("/")[-1]
+    output_path = os.path.join(raw_output_dir, filename)
+    # Create directory if it doesn't exist
+    os.makedirs(os.path.dirname(output_path), exist_ok=True)
+    # Download with wget
+    wget.download(
+        model_url,
+        out=output_path,
+        bar=wget.bar_adaptive  # Show progress bar
+    )
+    print(f"\nDownloaded model from {model_url} to {output_path}")
+    # Download best.pt from Hugging Face repository
+    hf_repo = args.hf_repo
+    model_file = "yolo/finetune/runs/license_plate_detector/weights/best.pt"
+    # Construct output path for Hugging Face file
+    hf_output_path = os.path.join(output_dir, "best.pt")
+    # Create directory if it doesn't exist
+    os.makedirs(os.path.dirname(hf_output_path), exist_ok=True)
+    # Download the specific file from Hugging Face
+    downloaded_path = hf_hub_download(
+        repo_id=hf_repo,
+        filename=model_file,
+        local_dir=output_dir,
+        local_dir_use_symlinks=False
+    )
+    print(f"\nDownloaded model file from {hf_repo} to {downloaded_path}")
+if __name__ == "__main__":
+    # Set up argument parser
+    parser = argparse.ArgumentParser(description="Download yolo12n.pt from URL and best.pt from Hugging Face repository.")
+    parser.add_argument('--url', type=str,
+                        default='https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo12n.pt',
+                        help='URL of the yolo12n.pt model to download via wget')
+    parser.add_argument('--output-dir', type=str, default='./ckpts',
+                        help='Base output directory for downloaded model files')
+    parser.add_argument('--hf-repo', type=str, default='danhtran2mind/license-plate-detector-ocr',
+                        help='Hugging Face repository ID to download model file from')
+    # Parse arguments
+    args = parser.parse_args()
+    download_model_ckpts(args)

scripts/old-download_and_process_datasets.py ADDED Viewed

	@@ -0,0 +1,61 @@

+import argparse
+import logging
+from pathlib import Path
+import sys
+import os
+# Add parent directory to sys.path
+# parent_dir = str(Path(__file__).resolve().parents[1])
+# sys.path.insert(0, parent_dir)
+# # Append datasets folder to sys.path
+# datasets_dir = os.path.join(parent_dir, "datasets")
+# sys.path.insert(0, datasets_dir)
+# Append the current directory to sys.path
+sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..',
+                "src", "license_plate_detector_ocr", "data")))
+from dataset_processing import config_loader, downloader, processor, converter
+def main(args):
+    logging.basicConfig(filename='dataset_conversion.log', level=logging.INFO,
+                        format='%(asctime)s - %(levelname)s - %(message)s')
+    config = config_loader.load_config(args.config)
+    datasets = config['datasets']
+    os.makedirs(args.dataset_base_dir, exist_ok=True)
+    os.makedirs(args.output_dir, exist_ok=True)
+    # Download datasets
+    for idx, ds in enumerate(datasets):
+        if ds['type'] == 'kaggle' and 'kaggle' in args.platforms:
+            downloader.download_kaggle_dataset(ds['id'], Path(args.dataset_base_dir) / f"dataset_{idx}")
+        elif ds['type'] == 'roboflow' and 'roboflow' in args.platforms:
+            downloader.download_roboflow_dataset(ds['id'], ds['format'], ds['version'], Path(args.dataset_base_dir) / f"dataset_{idx}", args.roboflow_api_key)
+        elif ds['type'] == 'huggingface' and 'huggingface' in args.platforms:
+            downloader.download_huggingface_dataset(ds['id'], Path(args.dataset_base_dir) / f"dataset_{idx}")
+    # Convert and combine datasets
+    converter.coco_kaggle_to_yolo(args.dataset_base_dir, args.output_dir)
+    for idx, ds in enumerate(datasets):
+        if ds['type'] == 'roboflow' and 'roboflow' in args.platforms:
+            converter.copy_dataset_to_combined_folder(Path(args.dataset_base_dir) / f"dataset_{idx}", args.output_dir)
+    for idx, ds in enumerate(datasets):
+        if ds['type'] == 'huggingface' and 'huggingface' in args.platforms:
+            converter.convert_coco_huggingface_to_yolo(
+                dataset_base_path=Path(args.dataset_base_dir) / f"dataset_{idx}/license-plate-object-detection/data",
+                output_dir=args.output_dir)
+    processor.process_folders(args.output_dir)
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Download and process license plate datasets.")
+    parser.add_argument("--output-dir", default="./data/yolo_standard_dataset", help="Output directory for YOLOv11 dataset")
+    parser.add_argument("--dataset-base-dir", default="./data/all_datasets", help="Base directory for downloaded datasets")
+    parser.add_argument("--roboflow-api-key", required=True, help="Roboflow API key for downloading datasets")
+    parser.add_argument("--config", default="./configs/datasets_config.yaml", help="Path to dataset config YAML")
+    parser.add_argument("--platforms", nargs="*", default=["kaggle", "roboflow", "huggingface"], choices=["kaggle", "roboflow", "huggingface"], help="Platforms to download (default: all)")
+    args = parser.parse_args()
+    main(args)

src/license_plate_detector_ocr/__init__.py ADDED Viewed

File without changes

src/license_plate_detector_ocr/data/dataset_processing/__init__.py ADDED Viewed

File without changes

src/license_plate_detector_ocr/data/dataset_processing/config_loader.py ADDED Viewed

	@@ -0,0 +1,5 @@

+import yaml
+def load_config(config_path):
+    with open(config_path, 'r') as f:
+        return yaml.safe_load(f)

src/license_plate_detector_ocr/data/dataset_processing/converter.py ADDED Viewed

	@@ -0,0 +1,190 @@

+import json
+import os
+import shutil
+import uuid
+import yaml
+import logging
+from pathlib import Path
+def convert_coco_huggingface_to_yolo(dataset_base_path, output_dir):
+    for dataset_type in ["train", "valid", "test"]:
+        coco_path = Path(dataset_base_path) / dataset_type / "_annotations.coco.json"
+        if not coco_path.exists():
+            logging.info(f"Skipping {dataset_type}: {coco_path} not found")
+            continue
+        yolo_dir = Path(output_dir) / dataset_type
+        images_dir = yolo_dir / "images"
+        labels_dir = yolo_dir / "labels"
+        for dir_path in [yolo_dir, images_dir, labels_dir]:
+            dir_path.mkdir(parents=True, exist_ok=True)
+        with open(coco_path) as f:
+            coco_data = json.load(f)
+        img_id_to_file = {img['id']: img['file_name'] for img in coco_data['images']}
+        img_id_to_dims = {img['id']: (img['width'], img['height']) for img in coco_data['images']}
+        for ann in coco_data['annotations']:
+            img_id = ann['image_id']
+            cat_id = ann['category_id']
+            x_min, y_min, bbox_w, bbox_h = ann['bbox']
+            width, height = img_id_to_dims[img_id]
+            x_center = (x_min + bbox_w / 2) / width
+            y_center = (y_min + bbox_h / 2) / height
+            norm_w = bbox_w / width
+            norm_h = bbox_h / height
+            unique_id = str(uuid.uuid4())
+            original_filename = Path(img_id_to_file[img_id]).stem
+            new_filename = f"{original_filename}_{unique_id}"
+            label_file = labels_dir / f"{new_filename}.txt"
+            with open(label_file, 'a') as f:
+                f.write(f"{cat_id} {x_center:.6f} {y_center:.6f} {norm_w:.6f} {norm_h:.6f}\n")
+            src_img = Path(coco_path).parent / img_id_to_file[img_id]
+            dst_img = images_dir / f"{new_filename}{Path(img_id_to_file[img_id]).suffix}"
+            if src_img.exists() and not dst_img.exists():
+                shutil.copy(src_img, dst_img)
+                logging.info(f"Copied {src_img} to {dst_img}")
+    yaml_content = {
+        "path": str(Path(output_dir).absolute()),
+        "train": "train/images",
+        "valid": "valid/images",
+        "test": "test/images",
+        "names": {0: "license_plate"}
+    }
+    yaml_path = Path(output_dir) / "data.yaml"
+    if not yaml_path.exists():
+        with open(yaml_path, 'w') as f:
+            yaml.dump(yaml_content, f, default_flow_style=False)
+        logging.info(f"Created {yaml_path}")
+def create_yolo_structure(output_dir):
+    for d in ['train', 'valid', 'test']:
+        for sub in ['images', 'labels']:
+            os.makedirs(os.path.join(output_dir, d, sub), exist_ok=True)
+    logging.info(f"Created YOLOv11 directory structure at {output_dir}")
+def copy_matched_files(src_image_dir, src_label_dir, dest_image_dir, dest_label_dir, split):
+    src_image_path = Path(src_image_dir)
+    src_label_path = Path(src_label_dir)
+    dest_image_path = Path(dest_image_dir)
+    dest_label_path = Path(dest_label_dir)
+    copied_files = set()
+    image_files = {}
+    for ext in ['jpg', 'JPG', 'jpeg', 'JPEG', 'png', 'PNG']:
+        for f in src_image_path.glob(f'*.{ext}'):
+            image_files[f.stem.lower()] = f
+    label_files = {f.stem.lower(): f for f in src_label_path.glob('*.txt')}
+    matched_stems = set(image_files.keys()) & set(label_files.keys())
+    for stem in matched_stems:
+        image_file = image_files[stem]
+        label_file = label_files[stem]
+        unique_id = str(uuid.uuid4())
+        new_filename = f"{stem}_{split}_{unique_id}"
+        dest_image_file = dest_image_path / f"{new_filename}{image_file.suffix}"
+        dest_label_file = dest_label_path / f"{new_filename}{label_file.suffix}"
+        shutil.copy(image_file, dest_image_file)
+        shutil.copy(label_file, dest_label_file)
+        copied_files.add(dest_image_file.name)
+        logging.info(f"Copied {image_file} to {dest_image_file}")
+        logging.info(f"Copied {label_file} to {dest_label_file}")
+    images_without_labels = set(image_files.keys()) - matched_stems
+    labels_without_images = set(label_files.keys()) - matched_stems
+    for stem in images_without_labels:
+        logging.warning(f"Image without label in {src_image_dir}: {image_files[stem]}")
+    for stem in labels_without_images:
+        logging.warning(f"Label without image in {src_label_dir}: {label_files[stem]}")
+    return copied_files, len(images_without_labels), len(labels_without_images)
+def create_data_yaml(output_dir):
+    data_yaml = {
+        'train': '../train/images',
+        'val': '../valid/images',
+        'test': '../test/images',
+        'nc': 1,
+        'names': ['license_plate']
+    }
+    with open(os.path.join(output_dir, 'data.yaml'), 'w') as f:
+        yaml.dump(data_yaml, f, default_flow_style=False)
+    logging.info("Created data.yaml")
+def coco_kaggle_to_yolo(all_datasets_path, output_dir="yolo_standard_dataset"):
+    datasets = {
+        'dataset_0': {
+            'train': {'images': 'images/train', 'labels': 'labels/train'},
+            'valid': {'images': 'images/val', 'labels': 'labels/val'},
+            'test': {'images': 'images/test', 'labels': 'labels/test'}
+        },
+        'dataset_1': {
+            'train': {'images': 'images/train', 'labels': 'labels/train'},
+            'valid': {'images': 'images/val', 'labels': 'labels/val'},
+            'test': {}
+        },
+        'dataset_2': {
+            'train': {'images': 'archive/images/train', 'labels': 'archive/labels/train'},
+            'valid': {'images': 'archive/images/val', 'labels': 'archive/labels/val'},
+            'test': {}
+        },
+        'dataset_3': {
+            'train': {'images': 'train/images', 'labels': 'train/labels'},
+            'valid': {'images': 'valid/images', 'labels': 'valid/labels'},
+            'test': {'images': 'test/images', 'labels': 'test/labels'}
+        },
+        'dataset_4': {
+            'train': {'images': 'train/images', 'labels': 'train/labels'},
+            'valid': {'images': 'valid/images', 'labels': 'valid/labels'},
+            'test': {}
+        },
+        'dataset_5': {
+            'train': {'images': 'train/images', 'labels': 'train/labels'},
+            'valid': {'images': 'valid/images', 'labels': 'valid/labels'},
+            'test': {}
+        }
+    }
+    create_yolo_structure(output_dir)
+    total_mismatches = 0
+    from tqdm import tqdm
+    for dataset_name, splits in tqdm(datasets.items(), desc="Processing Kaggle Datasets"):
+        for split in ['train', 'valid', 'test']:
+            if split not in splits or not splits[split]:
+                continue
+            src_images = os.path.join(all_datasets_path, dataset_name, splits[split]['images'])
+            dest_images = os.path.join(output_dir, split, 'images')
+            src_labels = os.path.join(all_datasets_path, dataset_name, splits[split]['labels'])
+            dest_labels = os.path.join(output_dir, split, 'labels')
+            copied_files, img_mismatches, lbl_mismatches = copy_matched_files(
+                src_images, src_labels, dest_images, dest_labels, split
+            )
+            total_mismatches += img_mismatches + lbl_mismatches
+            if img_mismatches > 0 or lbl_mismatches > 0:
+                logging.warning(f"Mismatches in {dataset_name} {split} split: "
+                               f"{img_mismatches} images without labels, {lbl_mismatches} labels without images")
+    create_data_yaml(output_dir)
+    logging.info(f"Dataset conversion completed. Total mismatches: {total_mismatches}")
+    if total_mismatches > 0:
+        logging.info("Check dataset_conversion.log for details on mismatched files")
+def copy_dataset_to_combined_folder(dataset_path, combined_dataset_folder):
+    # if not dataset_path.exists():
+    #     logging.error(f"No child folders found in {dataset_path}. Please check the dataset structure.")
+    #     return
+    # child_folder_names = [p.name for p in dataset_path.iterdir() if p.is_dir()]
+    for folder in ['train', 'valid', 'test']:
+        # source_path = dataset_path / child_folder_names[0] / folder
+        source_path = dataset_path / folder
+        dest_path = Path(combined_dataset_folder) / folder
+        if not source_path.exists():
+            logging.warning(f"Source folder does not exist: {source_path}")
+            continue
+        unique_id = str(uuid.uuid4())
+        for sub in ['images', 'labels']:
+            src_dir = source_path / sub
+            dest_dir = dest_path / sub
+            if not src_dir.exists():
+                logging.warning(f"Source directory does not exist: {src_dir}")
+                continue
+            dest_dir.mkdir(parents=True, exist_ok=True)
+            for src_file in src_dir.glob('*'):
+                try:
+                    new_filename = f"{src_file.stem}_{folder}_{unique_id}{src_file.suffix}"
+                    dest_file = dest_dir / new_filename
+                    shutil.copy(src_file, dest_file)
+                    logging.info(f"Copied {src_file} to {dest_file}")
+                except Exception as e:
+                    logging.error(f"Failed to copy {src_file} to {dest_file}: {str(e)}")

src/license_plate_detector_ocr/data/dataset_processing/downloader.py ADDED Viewed

	@@ -0,0 +1,67 @@

+import urllib.request
+import zipfile
+import subprocess
+from pathlib import Path
+import logging
+import os
+def download_kaggle_dataset(dataset_id, output_dir):
+    try:
+        dataset_name = dataset_id.split("/")[-1]
+        zip_path = output_dir / f"{dataset_name}.zip"
+        output_dir.mkdir(parents=True, exist_ok=True)
+        urllib.request.urlretrieve(
+            f"https://www.kaggle.com/api/v1/datasets/download/{dataset_id}",
+            str(zip_path)
+        )
+        with zipfile.ZipFile(zip_path, 'r') as zip_ref:
+            zip_ref.extractall(output_dir)
+        logging.info(f"Downloaded Kaggle dataset: {dataset_id}")
+    except Exception as e:
+        logging.error(f"Failed to download Kaggle dataset {dataset_id}: {str(e)}")
+def download_roboflow_dataset(dataset_id, format_type, version, output_dir, api_key):
+    try:
+        from roboflow import Roboflow
+        import shutil
+        rf = Roboflow(api_key=api_key)
+        username, dataset_name = dataset_id.split("/")
+        project = rf.workspace(username).project(dataset_name)
+        version_obj = project.version(version)
+        dataset = version_obj.download(format_type)
+        dataset_path = Path(dataset.location)
+        output_dir = Path(output_dir)
+        output_dir.mkdir(parents=True, exist_ok=True)
+        # Move/copy all files and folders from dataset_path to output_dir
+        for item in dataset_path.iterdir():
+            dest = output_dir / item.name
+            if item.is_dir():
+                if dest.exists():
+                    shutil.rmtree(dest)
+                shutil.copytree(item, dest)
+            else:
+                shutil.copy2(item, dest)
+        logging.info(f"Downloaded Roboflow dataset: {dataset_id} to {output_dir}")
+        # Optionally, clean up the original download directory
+        try:
+            shutil.rmtree(dataset_path)
+        except Exception as cleanup_e:
+            logging.warning(f"Could not remove original Roboflow download dir {dataset_path}: {cleanup_e}")
+    except Exception as e:
+        logging.error(f"Failed to download Roboflow dataset {dataset_id}: {str(e)}")
+def download_huggingface_dataset(dataset_id, output_dir):
+    try:
+        output_dir.mkdir(parents=True, exist_ok=True)
+        subprocess.run(["git", "clone", f"https://huggingface.co/datasets/{dataset_id}"], cwd=output_dir)
+        data_dir = output_dir / dataset_id.split("/")[-1] / "data"
+        for d in ["train", "valid", "test"]:
+            (data_dir / d).mkdir(exist_ok=True)
+        for z, d in zip(["train.zip", "test.zip", "valid.zip"], ["train", "valid", "test"]):
+            zip_path = data_dir / z
+            if zip_path.exists():
+                with zipfile.ZipFile(zip_path, 'r') as zf:
+                    zf.extractall(data_dir / d)
+        logging.info(f"Downloaded HuggingFace dataset: {dataset_id}")
+    except Exception as e:
+        logging.error(f"Failed to download HuggingFace dataset {dataset_id}: {str(e)}")

src/license_plate_detector_ocr/data/dataset_processing/processor.py ADDED Viewed

	@@ -0,0 +1,41 @@

+import os
+import glob
+import logging
+def check_and_remove_invalid_pairs(base_path, folder):
+    label_dir = os.path.join(base_path, folder, 'labels')
+    image_dir = os.path.join(base_path, folder, 'images')
+    label_files = glob.glob(os.path.join(label_dir, '*.txt'))
+    for label_path in label_files:
+        try:
+            with open(label_path, 'r') as f:
+                file_contents = f.read().strip()
+                if not file_contents:
+                    remove_pair(label_path, image_dir)
+                    continue
+                lines = file_contents.splitlines()
+                for line in lines:
+                    elements = line.strip().split()
+                    if len(elements) != 5:
+                        remove_pair(label_path, image_dir)
+                        break
+        except Exception as e:
+            logging.error(f"Error reading {label_path}: {e}")
+            remove_pair(label_path, image_dir)
+def remove_pair(label_path, image_dir):
+    base_name = os.path.splitext(os.path.basename(label_path))[0]
+    image_path = os.path.join(image_dir, f"{base_name}.jpg")
+    try:
+        os.remove(label_path)
+    except FileNotFoundError:
+        logging.warning(f"Label file not found: {label_path}")
+    try:
+        os.remove(image_path)
+    except FileNotFoundError:
+        logging.warning(f"Image file not found: {image_path}")
+def process_folders(base_path):
+    for folder in ['train', 'valid', 'test']:
+        logging.info(f"Processing {folder} folder...")
+        check_and_remove_invalid_pairs(base_path, folder)

src/license_plate_detector_ocr/infer.py ADDED Viewed

	@@ -0,0 +1,173 @@

+import os
+import sys
+import cv2
+import numpy as np
+from ultralytics import YOLO
+from inference.paddleocr_infer import process_ocr
+# Append the current directory to sys.path
+sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__))))
+def is_image_file(file_path):
+    """Check if the file is an image based on its extension."""
+    image_extensions = {'.jpg', '.jpeg', '.png', '.bmp', '.tiff'}
+    return os.path.splitext(file_path)[1].lower() in image_extensions
+def process_image(model, image_path, output_path=None):
+    """Process a single image for license plate detection and OCR."""
+    image = cv2.imread(image_path)
+    if image is None:
+        print(f"Error: Could not load image from {image_path}")
+        return None, None
+    try:
+        results = model(image_path)
+    except Exception as e:
+        print(f"Error during image inference: {e}")
+        return None, None
+    plate_texts = []
+    for result in results:
+        for box in result.boxes:
+            x1, y1, x2, y2 = map(int, box.xyxy[0])
+            confidence = box.conf[0]
+            # Crop the license plate region
+            plate_region = image[y1:y2, x1:x2]
+            # Run OCR on the cropped region
+            ocr_results = process_ocr(plate_region)
+            plate_text = ocr_results[0] if ocr_results else "No text detected"
+            plate_texts.append(plate_text)
+            # Draw bounding box and OCR text on the image
+            cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0), 2)
+            label = f"{plate_text} ({confidence:.2f})"
+            cv2.putText(image, label, (x1, y1 - 10),
+                       cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
+    # Set default output path if not provided
+    if output_path is None:
+        output_path = os.path.splitext(image_path)[0] + '_output.jpg'
+    # Ensure output directory exists
+    os.makedirs(os.path.dirname(output_path) or '.', exist_ok=True)
+    cv2.imwrite(output_path, image)
+    print(f"Saved processed image to {output_path}")
+    return image, plate_texts
+def process_video(model, video_path, output_path=None):
+    """Process a video for license plate detection and OCR, writing text on detected boxes."""
+    cap = cv2.VideoCapture(video_path)
+    if not cap.isOpened():
+        print(f"Error: Could not open video at {video_path}")
+        return None, None
+    # Get video properties
+    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
+    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+    fps = int(cap.get(cv2.CAP_PROP_FPS))
+    # Set default output path if not provided
+    if output_path is None:
+        output_path = os.path.splitext(video_path)[0] + '_output.mp4'
+    # Ensure output directory exists
+    os.makedirs(os.path.dirname(output_path) or '.', exist_ok=True)
+    # Prepare output video
+    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
+    out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
+    frames = []
+    all_plate_texts = []
+    while cap.isOpened():
+        ret, frame = cap.read()
+        if not ret:
+            print("End of video or error reading frame.")
+            break
+        try:
+            results = model(frame)
+        except Exception as e:
+            print(f"Error during video inference: {e}")
+            break
+        frame_plate_texts = []
+        boxes_detected = False
+        for result in results:
+            for box in result.boxes:
+                boxes_detected = True
+                x1, y1, x2, y2 = map(int, box.xyxy[0])
+                confidence = box.conf[0]
+                # Crop the license plate region
+                plate_region = frame[y1:y2, x1:x2]
+                # Run OCR on the cropped region
+                ocr_results = process_ocr(plate_region)
+                plate_text = ocr_results[0] if ocr_results else "No text detected"
+                frame_plate_texts.append(plate_text)
+                # Draw bounding box and OCR text on the frame
+                cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
+                label = f"{plate_text} ({confidence:.2f})"
+                cv2.putText(frame, label, (x1, y1 - 10),
+                           cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
+        if boxes_detected:
+            frames.append(frame)
+            all_plate_texts.append(frame_plate_texts)
+        else:
+            # Append frame even if no boxes detected to maintain video continuity
+            frames.append(frame)
+            all_plate_texts.append([])
+        out.write(frame)
+    cap.release()
+    out.release()
+    print(f"Saved processed video to {output_path}")
+    if not frames:
+        print("No frames processed.")
+        return None, None
+    # Convert list of frames to 4D NumPy array
+    video_array = np.stack(frames, axis=0)
+    return video_array, all_plate_texts
+def infer(input_path, output_path=None):
+    """Main function to process either an image or video for license plate detection and OCR."""
+    model_path = "ckpts/yolo/finetune/runs/license_plate_detector/weights/best.pt"
+    if not os.path.exists(model_path):
+        print(f"Error: Model file not found at {model_path}")
+        return None, None
+    if not os.path.exists(input_path):
+        print(f"Error: Input file not found at {input_path}")
+        return None, None
+    try:
+        model = YOLO(model_path)
+    except Exception as e:
+        print(f"Error loading model: {e}")
+        return None, None
+    if is_image_file(input_path):
+        result_array, plate_texts = process_image(model, input_path, output_path)
+    else:
+        result_array, plate_texts = process_video(model, video_path=input_path, output_path=output_path)
+    return result_array, plate_texts
+if __name__ == "__main__":
+    import argparse
+    parser = argparse.ArgumentParser(description="Detect and read license plates in an image or video.")
+    parser.add_argument("--input_path", type=str, required=True, help="Path to the input image or video file")
+    parser.add_argument("--output_path", type=str, default=None, help="Path to save the output file (optional)")
+    args = parser.parse_args()
+    result_array, plate_texts = infer(args.input_path, args.output_path)

src/license_plate_detector_ocr/inference/__init__.py ADDED Viewed

File without changes

src/license_plate_detector_ocr/inference/paddleocr_infer.py ADDED Viewed

	@@ -0,0 +1,76 @@

+from paddleocr import PaddleOCR
+from typing import Union, List
+import numpy as np
+# Initialize PaddleOCR once with optimized settings for English license plate recognition
+OCR = PaddleOCR(
+    lang='en',
+    use_doc_orientation_classify=False,
+    use_doc_unwarping=False,
+    use_textline_orientation=False,
+    text_detection_model_name='en_PP-OCRv3_det_slim',
+    text_recognition_model_name='en_PP-OCRv3_rec_slim'
+)
+def process_ocr(image_input: Union[str, List[str], np.ndarray]) -> Union[List[str], List[List[str]]]:
+    """
+    Process OCR on a single image path, a list of image paths, or a 3D image array.
+    Args:
+        image_input: A single image path (str), a list of image paths (List[str]), or a 3D NumPy array (np.ndarray)
+    Returns:
+        For a single image or array: List of extracted text strings
+        For multiple images: List of lists, each containing extracted text strings for an image
+    """
+    # Convert single inputs to a list for unified processing
+    if isinstance(image_input, str):
+        image_inputs = [image_input]
+    elif isinstance(image_input, np.ndarray):
+        if image_input.ndim != 3:
+            raise ValueError("Image array must be 3-dimensional (height, width, channels)")
+        image_inputs = [image_input]
+    else:
+        image_inputs = image_input
+    # Process each image or array and extract text
+    results = []
+    for input_item in image_inputs:
+        ocr_results = OCR.ocr(input_item, cls=False)  # cls=False since angle classification is disabled
+        plate_list = [' '.join(word_info[-1][0] for word_info in line) for line in ocr_results if line]
+        results.append(plate_list)
+    # Return a single list for a single image/array, or list of lists for multiple images
+    return results[0] if isinstance(image_input, (str, np.ndarray)) else results
+if __name__ == '__main__':
+    # Example with a single image path
+    single_image = 'plate-1.png'
+    single_result = process_ocr(single_image)
+    print("Single image path results:")
+    print(single_result)
+    for plate in single_result:
+        print(plate)
+    # Example with multiple image paths
+    image_list = ['plate-1.png', 'plate-2.png', 'plate-3.jpg']
+    multi_results = process_ocr(image_list)
+    print("\nMultiple image path results:")
+    print(multi_results)
+    for idx, plates in enumerate(multi_results):
+        print(f"Image {idx + 1} ({image_list[idx]}):")
+        for plate in plates:
+            print(plate)
+    # Example with a 3D image array (simulated)
+    # Note: Replace this with actual image data in practice
+    import cv2
+    image_array = cv2.imread('lp_image.jpg')  # Load an image as a NumPy array
+    if image_array is not None:
+        array_result = process_ocr(image_array)
+        print("\nSingle image array results:")
+        print(array_result)
+        for plate in array_result:
+            print(plate)
+    else:
+        print("\nFailed to load image array for testing")

src/license_plate_detector_ocr/old-infer.py ADDED Viewed

	@@ -0,0 +1,124 @@

+import os
+import sys
+# Append the current directory to sys.path
+sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__))))
+from inference import paddleocr_infer
+# Example with multiple images
+image_list = ['plate-1.png', 'plate-2.png', 'plate-3.jpg']
+multi_results = paddleocr_infer.process_ocr(image_list)
+print("\nMultiple image results:")
+print(multi_results)
+for idx, plates in enumerate(multi_results):
+    print(f"Image {idx + 1} ({image_list[idx]}):")
+    for plate in plates:
+        print(plate)
+####yolo####
+import cv2
+from ultralytics import YOLO
+import os
+import argparse
+import numpy as np
+def is_image_file(file_path):
+    """Check if the file is an image based on its extension."""
+    image_extensions = {'.jpg', '.jpeg', '.png', '.bmp', '.tiff'}
+    return os.path.splitext(file_path)[1].lower() in image_extensions
+def process_image(model, image_path):
+    """Process a single image for license plate detection and return the processed 3D array."""
+    image = cv2.imread(image_path)
+    if image is None:
+        print(f"Error: Could not load image from {image_path}")
+        return None
+    try:
+        results = model(image_path)
+    except Exception as e:
+        print(f"Error during image inference: {e}")
+        return None
+    for result in results:
+        for box in result.boxes:
+            x1, y1, x2, y2 = map(int, box.xyxy[0])
+            confidence = box.conf[0]
+            cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0), 2)
+            cv2.putText(image, f"License Plate: {confidence:.2f}", (x1, y1 - 10),
+                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
+    return image
+def process_video(model, video_path):
+    """Process a video for license plate detection and return the processed 4D array."""
+    cap = cv2.VideoCapture(video_path)
+    if not cap.isOpened():
+        print(f"Error: Could not open video at {video_path}")
+        return None
+    frames = []
+    while cap.isOpened():
+        ret, frame = cap.read()
+        if not ret:
+            print("End of video or error reading frame.")
+            break
+        try:
+            results = model(frame)
+        except Exception as e:
+            print(f"Error during video inference: {e}")
+            break
+        for result in results:
+            for box in result.boxes:
+                x1, y1, x2, y2 = map(int, box.xyxy[0])
+                confidence = box.conf[0]
+                cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
+                cv2.putText(frame, f"License Plate: {confidence:.2f}", (x1, y1 - 10),
+                            cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
+        frames.append(frame)
+    cap.release()
+    if not frames:
+        print("No frames processed.")
+        return None
+    # Convert list of frames to 4D NumPy array (num_frames, height, width, channels)
+    video_array = np.stack(frames, axis=0)
+    return video_array
+def main(input_path):
+    """Main function to process either an image or video for license plate detection."""
+    model_path = "ckpts/yolo/finetune/runs/license_plate_detector/weights/best.pt"
+    if not os.path.exists(model_path):
+        print(f"Error: Model file not found at {model_path}")
+        return None
+    if not os.path.exists(input_path):
+        print(f"Error: Input file not found at {input_path}")
+        return None
+    try:
+        model = YOLO(model_path)
+    except Exception as e:
+        print(f"Error loading model: {e}")
+        return None
+    if is_image_file(input_path):
+        return process_image(model, input_path)
+    else:
+        return process_video(model, input_path)
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Detect license plates in an image or video.")
+    parser.add_argument("input_path", type=str, help="Path to the input image or video file")
+    args = parser.parse_args()
+    result = main(args.input_path)
+    if result is not None:
+        print(f"Processed array shape: {result.shape}")
+    # _array = main("input_image.jpg")

src/license_plate_detector_ocr/train.py ADDED Viewed

File without changes

src/license_plate_detector_ocr/training/__init__.py ADDED Viewed

File without changes

src/license_plate_detector_ocr/training/train_yolo.py ADDED Viewed

	@@ -0,0 +1,51 @@

+from ultralytics import YOLO
+from pathlib import Path
+import sys
+import os
+import argparse
+def train_yolo(args):
+    # Add parent directory to sys.path
+    parent_dir = str(Path(__file__).resolve().parents[1])
+    if parent_dir not in sys.path:
+        sys.path.insert(0, parent_dir)
+    # Load the YOLOv10 model
+    model = YOLO(args.model)
+    # Train the model
+    model.train(
+        data=args.data,
+        task='detect',
+        mode='train',
+        epochs=args.epochs,
+        batch=args.batch,
+        resume=args.resume,
+        patience=args.patience,
+        lr0=args.lr0,
+        lrf=args.lrf,
+        device=args.device,
+        project=args.project,
+        name=args.name,
+        save=args.save
+    )
+if __name__ == "__main__":
+    # Set up argument parser
+    parser = argparse.ArgumentParser(description='Train a YOLOv10 model for license plate detection.')
+    parser.add_argument('--model', type=str, default='./ckpts/raw/yolo12n.pt', help='Model path or model name like "yolo12n.pt"')
+    parser.add_argument('--data', type=str, default='./datasets/yolo_standard_dataset/data.yaml', help='Path to the dataset YAML file')
+    parser.add_argument('--epochs', type=int, default=100, help='Number of training epochs')
+    parser.add_argument('--batch', type=int, default=64, help='Batch size for training')
+    parser.add_argument('--resume', action='store_true', help='Resume training from the last checkpoint')
+    parser.add_argument('--patience', type=int, default=20, help='Early stopping patience')
+    parser.add_argument('--lr0', type=float, default=0.01, help='Initial learning rate')
+    parser.add_argument('--lrf', type=float, default=0.001, help='Final learning rate')
+    parser.add_argument('--device', type=str, default='0', help='Device to train on (e.g., 0, [0,1], or cpu)')
+    parser.add_argument('--project', type=str, default='./ckpts/finetune/runs', help='Directory to save training results')
+    parser.add_argument('--name', type=str, default='license_plate_detector', help='Name of the training run')
+    parser.add_argument('--save', action='store_true', default=True, help='Save training results')
+    # Parse arguments and pass to train_yolo
+    args = parser.parse_args()
+    train_yolo(args)