Spaces:

Kishan11
/

handwriting_generation_v2

Runtime error

App Files Files Community

Kishan11 commited on Mar 9

Commit

2f22cf7

verified ·

1 Parent(s): 85ccf49

Update app.py

Browse files

Files changed (1) hide show

app.py +175 -0

app.py CHANGED Viewed

	@@ -0,0 +1,175 @@

+import gradio as gr
+from inference import OneDMInference
+import os
+from PIL import Image
+import cv2
+import numpy as np
+import torch
+import torch.nn.functional as F
+# Load the model
+model = OneDMInference(
+    model_path='one_dm_finetuned.pt',
+    cfg_path='configs/finetuned.yml'
+)
+# Define Laplacian kernel (ensure it’s on the correct device if needed)
+laplace = torch.tensor(
+    [[0, 1, 0],
+     [1, -4, 1],
+     [0, 1, 0]], dtype=torch.float, requires_grad=False
+).view(1, 1, 3, 3)
+def generate_laplace_image(image_path, target_size=(64, 64)):
+    """
+    Generate a Laplace image from the input image using a Laplacian filter.
+    Adjusted to match model-expected dimensions (e.g., 64x64).
+    """
+    # Read image
+    image = cv2.imread(image_path)
+    if image is None:
+        raise ValueError(f"Could not read image at {image_path}")
+    # Convert to grayscale
+    image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
+    # Resize to model-compatible size (e.g., 64x64)
+    image = cv2.resize(image, target_size)
+    # Convert to tensor
+    x = torch.from_numpy(image).unsqueeze(0).unsqueeze(0).float()
+    # Normalize input
+    x = x / 255.0
+    # Apply Laplacian filter with proper padding
+    y = F.conv2d(x, laplace, stride=1, padding=1)  # Padding=1 keeps spatial dims intact
+    # Process output
+    y = y.squeeze().numpy()
+    y = np.clip(y * 255.0, 0, 255)
+    y = y.astype(np.uint8)
+    # Apply thresholding
+    _, threshold = cv2.threshold(y, 0, 255, cv2.THRESH_OTSU)
+    # Save output
+    laplace_path = os.path.splitext(image_path)[0] + "_laplace.png"
+    cv2.imwrite(laplace_path, threshold)
+    return laplace_path
+def generate_handwriting(text, style_image, laplace_image=None):
+    output_dir = "./generated"
+    os.makedirs(output_dir, exist_ok=True)
+    # Assume model expects 64x64 inputs based on logs (adjust if config specifies otherwise)
+    target_size = (64, 64)
+    # Generate Laplace image if not provided
+    if laplace_image is None:
+        laplace_image = generate_laplace_image(style_image, target_size)
+    else:
+        # Ensure provided Laplace image matches expected size
+        laplace_img = cv2.imread(laplace_image, cv2.IMREAD_GRAYSCALE)
+        if laplace_img.shape != target_size:
+            laplace_img = cv2.resize(laplace_img, target_size)
+            laplace_image = os.path.splitext(laplace_image)[0] + "_resized.png"
+            cv2.imwrite(laplace_image, laplace_img)
+    # Resize style image to match model expectations
+    style_img = cv2.imread(style_image)
+    style_img_resized = cv2.resize(style_img, target_size)
+    style_image_resized = os.path.splitext(style_image)[0] + "_resized.png"
+    cv2.imwrite(style_image_resized, style_img_resized)
+    # Generate handwriting for each word
+    words = text.split()
+    generated_image_paths = []
+    for word in words:
+        output_paths = model.generate(
+            text=word,
+            style_path=style_image_resized,  # Use resized style image
+            laplace_path=laplace_image,      # Use Laplace image
+            output_dir=output_dir
+        )
+        generated_image_paths.append(output_paths[0])
+    # Load generated images
+    images = [Image.open(img_path) for img_path in generated_image_paths]
+    # Constants for spacing and margins (adjusted for better spacing)
+    word_gap = 5  # Reduced from 20 to 5 for closer word spacing
+    line_gap = 20  # Reduced from 30 for tighter lines
+    max_words_per_line = 5
+    top_margin = 10  # Reduced from 30
+    left_margin = 10  # Reduced from 30
+    # Calculate line dimensions
+    lines = []
+    current_line = []
+    current_line_width = 0
+    current_line_height = 0
+    for img in images:
+        if len(current_line) >= max_words_per_line or current_line_width + img.size[0] > 500:  # Add a max width constraint (e.g., 500px)
+            lines.append((current_line, current_line_width - word_gap, current_line_height))
+            current_line = []
+            current_line_width = 0
+            current_line_height = 0
+        current_line.append(img)
+        current_line_width += img.size[0] + word_gap
+        current_line_height = max(current_line_height, img.size[1])
+    # Add the last line if it has content
+    if current_line:
+        lines.append((current_line, current_line_width - word_gap, current_line_height))
+    # Calculate total dimensions
+    total_width = max(line[1] for line in lines) + (2 * left_margin)  # Width of the widest line
+    total_height = sum(line[2] for line in lines) + (len(lines) - 1) * line_gap + top_margin
+    # Create merged image
+    merged_image = Image.new('RGB', (total_width, total_height), color=(255, 255, 255))
+    # Paste words into the image
+    y_offset = top_margin
+    for line_images, line_width, line_height in lines:
+        x_offset = left_margin  # Align to the left instead of centering
+        for img in line_images:
+            # Adjust y_offset for each word to align baselines (optional, if heights vary significantly)
+            word_y_offset = y_offset + (line_height - img.size[1])  # Align to the bottom of the line
+            merged_image.paste(img, (x_offset, word_y_offset))
+            x_offset += img.size[0] + word_gap
+        y_offset += line_height + line_gap
+    # Save merged image
+    merged_image_path = os.path.join(output_dir, "merged_output.png")
+    merged_image.save(merged_image_path)
+    return merged_image_path, gr.update(value=laplace_image)
+# Create Gradio interface
+iface = gr.Interface(
+    fn=generate_handwriting,
+    inputs=[
+        gr.Textbox(label="Text to generate"),
+        gr.Image(label="Style Image", type="filepath"),
+        gr.Image(label="Laplace Image (Optional)", type="filepath")
+    ],
+    outputs=[
+        gr.Image(label="Generated Handwriting"),
+        gr.Image(label="Laplace Image (Optional)")
+    ],
+    title="Handwriting Generation",
+    description="Generate handwritten text using One-DM model. If no Laplace image is provided, it will be generated from the style image.",
+    examples=[
+        ["Hello World",
+         "English_data/Dataset/test/169/c04-134-05-08.png",
+         "English_data/Dataset_laplace/test/169/c04-134-00-00.png"]
+    ]
+)
+if __name__ == "__main__":
+    iface.launch(share=True)