File size: 6,244 Bytes
2f22cf7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
import gradio as gr
from inference import OneDMInference
import os
from PIL import Image
import cv2
import numpy as np
import torch
import torch.nn.functional as F

# Load the model
model = OneDMInference(
    model_path='one_dm_finetuned.pt',
    cfg_path='configs/finetuned.yml'
)

# Define Laplacian kernel (ensure it’s on the correct device if needed)
laplace = torch.tensor(
    [[0, 1, 0],
     [1, -4, 1],
     [0, 1, 0]], dtype=torch.float, requires_grad=False
).view(1, 1, 3, 3)

def generate_laplace_image(image_path, target_size=(64, 64)):
    """
    Generate a Laplace image from the input image using a Laplacian filter.
    Adjusted to match model-expected dimensions (e.g., 64x64).
    """
    # Read image
    image = cv2.imread(image_path)
    if image is None:
        raise ValueError(f"Could not read image at {image_path}")

    # Convert to grayscale
    image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    # Resize to model-compatible size (e.g., 64x64)
    image = cv2.resize(image, target_size)

    # Convert to tensor
    x = torch.from_numpy(image).unsqueeze(0).unsqueeze(0).float()

    # Normalize input
    x = x / 255.0

    # Apply Laplacian filter with proper padding
    y = F.conv2d(x, laplace, stride=1, padding=1)  # Padding=1 keeps spatial dims intact

    # Process output
    y = y.squeeze().numpy()
    y = np.clip(y * 255.0, 0, 255)
    y = y.astype(np.uint8)

    # Apply thresholding
    _, threshold = cv2.threshold(y, 0, 255, cv2.THRESH_OTSU)

    # Save output
    laplace_path = os.path.splitext(image_path)[0] + "_laplace.png"
    cv2.imwrite(laplace_path, threshold)

    return laplace_path
def generate_handwriting(text, style_image, laplace_image=None):
    output_dir = "./generated"
    os.makedirs(output_dir, exist_ok=True)

    # Assume model expects 64x64 inputs based on logs (adjust if config specifies otherwise)
    target_size = (64, 64)

    # Generate Laplace image if not provided
    if laplace_image is None:
        laplace_image = generate_laplace_image(style_image, target_size)
    else:
        # Ensure provided Laplace image matches expected size
        laplace_img = cv2.imread(laplace_image, cv2.IMREAD_GRAYSCALE)
        if laplace_img.shape != target_size:
            laplace_img = cv2.resize(laplace_img, target_size)
            laplace_image = os.path.splitext(laplace_image)[0] + "_resized.png"
            cv2.imwrite(laplace_image, laplace_img)

    # Resize style image to match model expectations
    style_img = cv2.imread(style_image)
    style_img_resized = cv2.resize(style_img, target_size)
    style_image_resized = os.path.splitext(style_image)[0] + "_resized.png"
    cv2.imwrite(style_image_resized, style_img_resized)

    # Generate handwriting for each word
    words = text.split()
    generated_image_paths = []
    for word in words:
        output_paths = model.generate(
            text=word,
            style_path=style_image_resized,  # Use resized style image
            laplace_path=laplace_image,      # Use Laplace image
            output_dir=output_dir
        )
        generated_image_paths.append(output_paths[0])

    # Load generated images
    images = [Image.open(img_path) for img_path in generated_image_paths]

    # Constants for spacing and margins (adjusted for better spacing)
    word_gap = 5  # Reduced from 20 to 5 for closer word spacing
    line_gap = 20  # Reduced from 30 for tighter lines
    max_words_per_line = 5
    top_margin = 10  # Reduced from 30
    left_margin = 10  # Reduced from 30

    # Calculate line dimensions
    lines = []
    current_line = []
    current_line_width = 0
    current_line_height = 0

    for img in images:
        if len(current_line) >= max_words_per_line or current_line_width + img.size[0] > 500:  # Add a max width constraint (e.g., 500px)
            lines.append((current_line, current_line_width - word_gap, current_line_height))
            current_line = []
            current_line_width = 0
            current_line_height = 0

        current_line.append(img)
        current_line_width += img.size[0] + word_gap
        current_line_height = max(current_line_height, img.size[1])

    # Add the last line if it has content
    if current_line:
        lines.append((current_line, current_line_width - word_gap, current_line_height))

    # Calculate total dimensions
    total_width = max(line[1] for line in lines) + (2 * left_margin)  # Width of the widest line
    total_height = sum(line[2] for line in lines) + (len(lines) - 1) * line_gap + top_margin

    # Create merged image
    merged_image = Image.new('RGB', (total_width, total_height), color=(255, 255, 255))

    # Paste words into the image
    y_offset = top_margin
    for line_images, line_width, line_height in lines:
        x_offset = left_margin  # Align to the left instead of centering
        for img in line_images:
            # Adjust y_offset for each word to align baselines (optional, if heights vary significantly)
            word_y_offset = y_offset + (line_height - img.size[1])  # Align to the bottom of the line
            merged_image.paste(img, (x_offset, word_y_offset))
            x_offset += img.size[0] + word_gap
        y_offset += line_height + line_gap

    # Save merged image
    merged_image_path = os.path.join(output_dir, "merged_output.png")
    merged_image.save(merged_image_path)

    return merged_image_path, gr.update(value=laplace_image)


# Create Gradio interface
iface = gr.Interface(
    fn=generate_handwriting,
    inputs=[
        gr.Textbox(label="Text to generate"),
        gr.Image(label="Style Image", type="filepath"),
        gr.Image(label="Laplace Image (Optional)", type="filepath")
    ],
    outputs=[
        gr.Image(label="Generated Handwriting"),
        gr.Image(label="Laplace Image (Optional)")
    ],
    title="Handwriting Generation",
    description="Generate handwritten text using One-DM model. If no Laplace image is provided, it will be generated from the style image.",
    examples=[
        ["Hello World",
         "English_data/Dataset/test/169/c04-134-05-08.png",
         "English_data/Dataset_laplace/test/169/c04-134-00-00.png"]
    ]
)

if __name__ == "__main__":
    iface.launch(share=True)