Spaces:

Kishan11
/

handwriting_generation_v2

Runtime error

File size: 6,244 Bytes

2f22cf7

import gradio as gr
from inference import OneDMInference
import os
from PIL import Image
import cv2
import numpy as np
import torch
import torch.nn.functional as F

# Load the model
model = OneDMInference(
    model_path='one_dm_finetuned.pt',
    cfg_path='configs/finetuned.yml'
)

# Define Laplacian kernel (ensure it’s on the correct device if needed)
laplace = torch.tensor(
    [[0, 1, 0],
     [1, -4, 1],
     [0, 1, 0]], dtype=torch.float, requires_grad=False
).view(1, 1, 3, 3)

def generate_laplace_image(image_path, target_size=(64, 64)):
    """
    Generate a Laplace image from the input image using a Laplacian filter.
    Adjusted to match model-expected dimensions (e.g., 64x64).
    """
    # Read image
    image = cv2.imread(image_path)
    if image is None:
        raise ValueError(f"Could not read image at {image_path}")

    # Convert to grayscale
    image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    # Resize to model-compatible size (e.g., 64x64)
    image = cv2.resize(image, target_size)

    # Convert to tensor
    x = torch.from_numpy(image).unsqueeze(0).unsqueeze(0).float()

    # Normalize input
    x = x / 255.0

    # Apply Laplacian filter with proper padding
    y = F.conv2d(x, laplace, stride=1, padding=1)  # Padding=1 keeps spatial dims intact

    # Process output
    y = y.squeeze().numpy()
    y = np.clip(y * 255.0, 0, 255)
    y = y.astype(np.uint8)

    # Apply thresholding
    _, threshold = cv2.threshold(y, 0, 255, cv2.THRESH_OTSU)

    # Save output
    laplace_path = os.path.splitext(image_path)[0] + "_laplace.png"
    cv2.imwrite(laplace_path, threshold)

    return laplace_path
def generate_handwriting(text, style_image, laplace_image=None):
    output_dir = "./generated"
    os.makedirs(output_dir, exist_ok=True)

    # Assume model expects 64x64 inputs based on logs (adjust if config specifies otherwise)
    target_size = (64, 64)

    # Generate Laplace image if not provided
    if laplace_image is None:
        laplace_image = generate_laplace_image(style_image, target_size)
    else:
        # Ensure provided Laplace image matches expected size
        laplace_img = cv2.imread(laplace_image, cv2.IMREAD_GRAYSCALE)
        if laplace_img.shape != target_size:
            laplace_img = cv2.resize(laplace_img, target_size)
            laplace_image = os.path.splitext(laplace_image)[0] + "_resized.png"
            cv2.imwrite(laplace_image, laplace_img)

    # Resize style image to match model expectations
    style_img = cv2.imread(style_image)
    style_img_resized = cv2.resize(style_img, target_size)
    style_image_resized = os.path.splitext(style_image)[0] + "_resized.png"
    cv2.imwrite(style_image_resized, style_img_resized)

    # Generate handwriting for each word
    words = text.split()
    generated_image_paths = []
    for word in words:
        output_paths = model.generate(
            text=word,
            style_path=style_image_resized,  # Use resized style image
            laplace_path=laplace_image,      # Use Laplace image
            output_dir=output_dir
        )
        generated_image_paths.append(output_paths[0])

    # Load generated images
    images = [Image.open(img_path) for img_path in generated_image_paths]

    # Constants for spacing and margins (adjusted for better spacing)
    word_gap = 5  # Reduced from 20 to 5 for closer word spacing
    line_gap = 20  # Reduced from 30 for tighter lines
    max_words_per_line = 5
    top_margin = 10  # Reduced from 30
    left_margin = 10  # Reduced from 30

    # Calculate line dimensions
    lines = []
    current_line = []
    current_line_width = 0
    current_line_height = 0

    for img in images:
        if len(current_line) >= max_words_per_line or current_line_width + img.size[0] > 500:  # Add a max width constraint (e.g., 500px)
            lines.append((current_line, current_line_width - word_gap, current_line_height))
            current_line = []
            current_line_width = 0
            current_line_height = 0

        current_line.append(img)
        current_line_width += img.size[0] + word_gap
        current_line_height = max(current_line_height, img.size[1])

    # Add the last line if it has content
    if current_line:
        lines.append((current_line, current_line_width - word_gap, current_line_height))

    # Calculate total dimensions
    total_width = max(line[1] for line in lines) + (2 * left_margin)  # Width of the widest line
    total_height = sum(line[2] for line in lines) + (len(lines) - 1) * line_gap + top_margin

    # Create merged image
    merged_image = Image.new('RGB', (total_width, total_height), color=(255, 255, 255))

    # Paste words into the image
    y_offset = top_margin
    for line_images, line_width, line_height in lines:
        x_offset = left_margin  # Align to the left instead of centering
        for img in line_images:
            # Adjust y_offset for each word to align baselines (optional, if heights vary significantly)
            word_y_offset = y_offset + (line_height - img.size[1])  # Align to the bottom of the line
            merged_image.paste(img, (x_offset, word_y_offset))
            x_offset += img.size[0] + word_gap
        y_offset += line_height + line_gap

    # Save merged image
    merged_image_path = os.path.join(output_dir, "merged_output.png")
    merged_image.save(merged_image_path)

    return merged_image_path, gr.update(value=laplace_image)


# Create Gradio interface
iface = gr.Interface(
    fn=generate_handwriting,
    inputs=[
        gr.Textbox(label="Text to generate"),
        gr.Image(label="Style Image", type="filepath"),
        gr.Image(label="Laplace Image (Optional)", type="filepath")
    ],
    outputs=[
        gr.Image(label="Generated Handwriting"),
        gr.Image(label="Laplace Image (Optional)")
    ],
    title="Handwriting Generation",
    description="Generate handwritten text using One-DM model. If no Laplace image is provided, it will be generated from the style image.",
    examples=[
        ["Hello World",
         "English_data/Dataset/test/169/c04-134-05-08.png",
         "English_data/Dataset_laplace/test/169/c04-134-00-00.png"]
    ]
)

if __name__ == "__main__":
    iface.launch(share=True)