Kishan11 commited on
Commit
2f22cf7
·
verified ·
1 Parent(s): 85ccf49

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +175 -0
app.py CHANGED
@@ -0,0 +1,175 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from inference import OneDMInference
3
+ import os
4
+ from PIL import Image
5
+ import cv2
6
+ import numpy as np
7
+ import torch
8
+ import torch.nn.functional as F
9
+
10
+ # Load the model
11
+ model = OneDMInference(
12
+ model_path='one_dm_finetuned.pt',
13
+ cfg_path='configs/finetuned.yml'
14
+ )
15
+
16
+ # Define Laplacian kernel (ensure it’s on the correct device if needed)
17
+ laplace = torch.tensor(
18
+ [[0, 1, 0],
19
+ [1, -4, 1],
20
+ [0, 1, 0]], dtype=torch.float, requires_grad=False
21
+ ).view(1, 1, 3, 3)
22
+
23
+ def generate_laplace_image(image_path, target_size=(64, 64)):
24
+ """
25
+ Generate a Laplace image from the input image using a Laplacian filter.
26
+ Adjusted to match model-expected dimensions (e.g., 64x64).
27
+ """
28
+ # Read image
29
+ image = cv2.imread(image_path)
30
+ if image is None:
31
+ raise ValueError(f"Could not read image at {image_path}")
32
+
33
+ # Convert to grayscale
34
+ image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
35
+
36
+ # Resize to model-compatible size (e.g., 64x64)
37
+ image = cv2.resize(image, target_size)
38
+
39
+ # Convert to tensor
40
+ x = torch.from_numpy(image).unsqueeze(0).unsqueeze(0).float()
41
+
42
+ # Normalize input
43
+ x = x / 255.0
44
+
45
+ # Apply Laplacian filter with proper padding
46
+ y = F.conv2d(x, laplace, stride=1, padding=1) # Padding=1 keeps spatial dims intact
47
+
48
+ # Process output
49
+ y = y.squeeze().numpy()
50
+ y = np.clip(y * 255.0, 0, 255)
51
+ y = y.astype(np.uint8)
52
+
53
+ # Apply thresholding
54
+ _, threshold = cv2.threshold(y, 0, 255, cv2.THRESH_OTSU)
55
+
56
+ # Save output
57
+ laplace_path = os.path.splitext(image_path)[0] + "_laplace.png"
58
+ cv2.imwrite(laplace_path, threshold)
59
+
60
+ return laplace_path
61
+ def generate_handwriting(text, style_image, laplace_image=None):
62
+ output_dir = "./generated"
63
+ os.makedirs(output_dir, exist_ok=True)
64
+
65
+ # Assume model expects 64x64 inputs based on logs (adjust if config specifies otherwise)
66
+ target_size = (64, 64)
67
+
68
+ # Generate Laplace image if not provided
69
+ if laplace_image is None:
70
+ laplace_image = generate_laplace_image(style_image, target_size)
71
+ else:
72
+ # Ensure provided Laplace image matches expected size
73
+ laplace_img = cv2.imread(laplace_image, cv2.IMREAD_GRAYSCALE)
74
+ if laplace_img.shape != target_size:
75
+ laplace_img = cv2.resize(laplace_img, target_size)
76
+ laplace_image = os.path.splitext(laplace_image)[0] + "_resized.png"
77
+ cv2.imwrite(laplace_image, laplace_img)
78
+
79
+ # Resize style image to match model expectations
80
+ style_img = cv2.imread(style_image)
81
+ style_img_resized = cv2.resize(style_img, target_size)
82
+ style_image_resized = os.path.splitext(style_image)[0] + "_resized.png"
83
+ cv2.imwrite(style_image_resized, style_img_resized)
84
+
85
+ # Generate handwriting for each word
86
+ words = text.split()
87
+ generated_image_paths = []
88
+ for word in words:
89
+ output_paths = model.generate(
90
+ text=word,
91
+ style_path=style_image_resized, # Use resized style image
92
+ laplace_path=laplace_image, # Use Laplace image
93
+ output_dir=output_dir
94
+ )
95
+ generated_image_paths.append(output_paths[0])
96
+
97
+ # Load generated images
98
+ images = [Image.open(img_path) for img_path in generated_image_paths]
99
+
100
+ # Constants for spacing and margins (adjusted for better spacing)
101
+ word_gap = 5 # Reduced from 20 to 5 for closer word spacing
102
+ line_gap = 20 # Reduced from 30 for tighter lines
103
+ max_words_per_line = 5
104
+ top_margin = 10 # Reduced from 30
105
+ left_margin = 10 # Reduced from 30
106
+
107
+ # Calculate line dimensions
108
+ lines = []
109
+ current_line = []
110
+ current_line_width = 0
111
+ current_line_height = 0
112
+
113
+ for img in images:
114
+ if len(current_line) >= max_words_per_line or current_line_width + img.size[0] > 500: # Add a max width constraint (e.g., 500px)
115
+ lines.append((current_line, current_line_width - word_gap, current_line_height))
116
+ current_line = []
117
+ current_line_width = 0
118
+ current_line_height = 0
119
+
120
+ current_line.append(img)
121
+ current_line_width += img.size[0] + word_gap
122
+ current_line_height = max(current_line_height, img.size[1])
123
+
124
+ # Add the last line if it has content
125
+ if current_line:
126
+ lines.append((current_line, current_line_width - word_gap, current_line_height))
127
+
128
+ # Calculate total dimensions
129
+ total_width = max(line[1] for line in lines) + (2 * left_margin) # Width of the widest line
130
+ total_height = sum(line[2] for line in lines) + (len(lines) - 1) * line_gap + top_margin
131
+
132
+ # Create merged image
133
+ merged_image = Image.new('RGB', (total_width, total_height), color=(255, 255, 255))
134
+
135
+ # Paste words into the image
136
+ y_offset = top_margin
137
+ for line_images, line_width, line_height in lines:
138
+ x_offset = left_margin # Align to the left instead of centering
139
+ for img in line_images:
140
+ # Adjust y_offset for each word to align baselines (optional, if heights vary significantly)
141
+ word_y_offset = y_offset + (line_height - img.size[1]) # Align to the bottom of the line
142
+ merged_image.paste(img, (x_offset, word_y_offset))
143
+ x_offset += img.size[0] + word_gap
144
+ y_offset += line_height + line_gap
145
+
146
+ # Save merged image
147
+ merged_image_path = os.path.join(output_dir, "merged_output.png")
148
+ merged_image.save(merged_image_path)
149
+
150
+ return merged_image_path, gr.update(value=laplace_image)
151
+
152
+
153
+ # Create Gradio interface
154
+ iface = gr.Interface(
155
+ fn=generate_handwriting,
156
+ inputs=[
157
+ gr.Textbox(label="Text to generate"),
158
+ gr.Image(label="Style Image", type="filepath"),
159
+ gr.Image(label="Laplace Image (Optional)", type="filepath")
160
+ ],
161
+ outputs=[
162
+ gr.Image(label="Generated Handwriting"),
163
+ gr.Image(label="Laplace Image (Optional)")
164
+ ],
165
+ title="Handwriting Generation",
166
+ description="Generate handwritten text using One-DM model. If no Laplace image is provided, it will be generated from the style image.",
167
+ examples=[
168
+ ["Hello World",
169
+ "English_data/Dataset/test/169/c04-134-05-08.png",
170
+ "English_data/Dataset_laplace/test/169/c04-134-00-00.png"]
171
+ ]
172
+ )
173
+
174
+ if __name__ == "__main__":
175
+ iface.launch(share=True)