-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathDataGenerator.py
162 lines (121 loc) · 5.16 KB
/
DataGenerator.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
import numpy as np
import PIL
from PIL import Image as PImage
from PIL import ImageFilter, ImageFont, ImageDraw
import cv2
import tensorflow as tf
from fontTools.ttLib import TTFont
from typing import Tuple, List
import random
def distort_sample(img : PImage) -> Tuple[PIL.Image.Image, Tuple[int, int], Tuple[int, int]]:
"""
Distort the given image randomly.
Randomly applies the transformations:
rotation, shear, scale, translate,
Randomly applies the filter:
sharpen, blur, smooth
Args:
img: the image which should be distorted.
Returns:
the distorted image
the offset where the character is placed in the image
the size of the character
"""
offset, scale = (0, 0), (64, 64)
t = random.choice(["rotate", "shear", "erode", "dilate"])
f = random.choice(["blur", "sharpen", "smooth"])
# randomly apply transformations...
# rotate image
if("rotate" in t):
img = img.rotate(random.uniform(-10, 10))
# shear image
if("shear" in t):
y_shear = random.uniform(-0.2, 0.2)
x_shear = random.uniform(-0.2, 0.2)
img = img.transform(img.size, PImage.AFFINE, (1, x_shear, 0, y_shear, 1, 0))
# sine transform
t_img = np.array(img)
A = t_img.shape[0] / random.uniform(3.0, 6.0)
w = 2.0 / t_img.shape[1]
shift_factor = random.choice([-1, 1]) * random.uniform(0.10, 0.2)
shift = lambda x: shift_factor * A * np.sin(-2*np.pi*x * w)
for i in range(t_img.shape[0]):
t_img[:,i] = np.roll(t_img[:,i], int(shift(i)))
if("erode" in t):
t_img = cv2.erode(t_img, np.ones((2, 2), np.uint8), iterations=1)
if("dilate" in t):
t_img = cv2.dilate(t_img, np.ones((3, 3), np.uint8), iterations=1)
img = PImage.fromarray(t_img, mode="L")
# FILTER
# blur
if("blur" in f):
img = img.filter(ImageFilter.GaussianBlur(radius=random.uniform(0.5, 1.2)))
# sharpen
if("sharpen" in f):
img = img.filter(ImageFilter.SHARPEN)
# smooth
if("smooth" in f):
img = img.filter(ImageFilter.SMOOTH)
return img, offset, scale
def generate_images(
amounts : List[int], kanjis : List[str],
fonts : List[List[str]], font_size : int = 50) -> Tuple[np.ndarray, np.ndarray]:
"""
Generates `amount` images of the given `kanji` using given `fonts`
while augmenting 50% of them randomly.
Args:
amount : how many samples should be created per kanji.
kanji : the (kanji) character which should be created
fonts : a list of paths to fonts which should be used
to created images.
Returns:
Two numpy arrays.
One containing all images and the other the matching labels.
"""
cnt = 0
kanji_labels = np.empty(shape=(sum(amounts)), dtype=str)
kanji_imgs = np.zeros(shape=(sum(amounts), 64, 64, 1), dtype=np.uint8)
for kanji_cnt, kanji in enumerate(kanjis):
already_gen_cnt = sum(amounts[:kanji_cnt])
ttfs = [ImageFont.truetype(f, font_size) for f in fonts[kanji_cnt]]
# generate one image per font
for cnt, font in enumerate(ttfs):
if(cnt >= amounts[kanji_cnt]):
break
# make sure that the image fits in the 64x64 image
if(font.getsize(kanji)[0] > 64):
font = font.font_variant(size = font_size - (font.getsize(kanji)[0] - font_size))
if(font.getsize(kanji)[1] > 64):
font = font.font_variant(size = font_size - (font.getsize(kanji)[1] - font_size))
# create the image
img = PImage.new(mode="L", size=(64, 64), color=0)
d = ImageDraw.Draw(img)
d.text(((64 - font.getsize(kanji)[0]) // 4, (64 - font.getsize(kanji)[1]) // 4), kanji, font=font, fill=255)
# store the image / label in the array
kanji_imgs[cnt + already_gen_cnt] = np.array(img).reshape(64, 64, 1)
kanji_labels[cnt + already_gen_cnt] = kanji
# append distorted versions of the images created from the fonts
cnt += 1
while(cnt < amounts[kanji_cnt]):
p_img = tf.keras.preprocessing.image.array_to_img(kanji_imgs[(cnt) % len(ttfs) + already_gen_cnt])
kanji_imgs[cnt+already_gen_cnt] = np.array(distort_sample(p_img)[0]).reshape(64, 64, 1)
kanji_labels[cnt+already_gen_cnt] = kanji
cnt += 1
return kanji_imgs, kanji_labels
def check_font_char_support(fonts : List[str], char : str):
""" Check which of the given fonts supports the given char.
"""
element = (char, [])
for font in fonts:
f = ImageFont.truetype(font, 64)
for table in TTFont(font)['cmap'].tables:
if ord(char) in table.cmap.keys():
# create an image and draw the character
img = PImage.new(mode="L", size=(64, 64), color=0)
d = ImageDraw.Draw(img)
d.text((0, 0), char, font=f, fill=255)
img = np.array(img).reshape(64, 64, 1)
if(np.any(img)):
element[1].append(font)
break
return element