-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy path14-restyle-word-cloud.py
132 lines (101 loc) · 3.93 KB
/
14-restyle-word-cloud.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
from PIL import Image
import collections
from wordcloud import WordCloud
from PIL import ImageDraw
from PIL import ImageFont
import collections
import os
def main():
filepath = os.path.join('..', 'texts_for_processing', 'Ania z Wyspy, Lucy Maud Montgomery, przełożył Andrzej Magórski.txt')
make_word_cloud(filepath, 'word_cloud.png')
statistics = load_text_and_describe_it(filepath)
generate_image_with_text(['Ania z Wyspy', ''] + statistics, 'text_description.png')
base = Image.open("text_description.png")
word_cloud = Image.open("word_cloud.png")
x_anchor = result_image_size()[0]-word_cloud_size()[0]
y_anchor = int((result_image_size()[1]-word_cloud_size()[1])/2)
base.paste(word_cloud, (x_anchor, y_anchor))
base.save('output.png')
base.show()
def background_color():
return (255, 255, 255)
def text_color():
return (28, 28, 28)
def result_image_size():
return (800, 300)
def word_cloud_size():
return (int(result_image_size()[0]*2/3), int(result_image_size()[1]*0.9))
def font_filepath():
return "SpaceGrotesk-SemiBold.otf"
def word_to_color(word, **kwargs):
return text_color()
def make_word_cloud(book_file_filepath, output_filepath):
with open(book_file_filepath, 'r', encoding='utf-8') as book_file:
book_text = book_file.read()
words = text_words(book_text)
word_frequencies = collections.Counter()
word_frequencies.update(words)
# Generate a word cloud image
wordcloud = WordCloud(
background_color=background_color(),
repeat=False,
font_path=font_filepath(),
width=word_cloud_size()[0],
height=word_cloud_size()[1],
)
wordloud = wordcloud.generate_from_frequencies(word_frequencies)
wordcloud.recolor(color_func=word_to_color)
# Display the generated image:
image = wordcloud.to_image()
image.save(output_filepath)
def text_words(text):
for_removal = ['.', ',', '!', ':', ';', '?', '(', ')', '—', '”', '“']
for znak in for_removal:
text = text.replace(znak, " ")
return text.lower().split()
def load_text_and_describe_it(filepath):
returned = []
with open(filepath, 'r', encoding='utf-8') as book_file:
book_text = book_file.read()
words = text_words(book_text)
returned.append("ilość słów w książce: " + str(len(words)))
longest_word = ''
for word in words:
if len(longest_word) < len(word):
longest_word = word
returned.append("najdłuższe słowo: " + longest_word)
word_frequencies = collections.Counter()
word_frequencies.update(words)
returned.append("najczęstsze słowa: ")
returned = returned + frequency_list_as_list_of_lines(word_frequencies.most_common(5))
returned.append("ile znaków książce: " + str(len(book_text)))
returned.append("ilość stron: " + str(len(book_text)//1800+1))
unique_words = set(words)
returned.append("ilość różnych słów: " + str(len(unique_words)))
return returned
def frequency_list_as_list_of_lines(word_frequencies):
returned = []
for word, count in word_frequencies:
returned.append(" " + word + " x" + str(count))
return returned
def text_words(text):
zlikwidować = ['.', ',', '!', ':', ';', '?', '(', ')', '—', '”', '“']
for znak in zlikwidować:
text = text.replace(znak, " ")
return text.lower().split()
def generate_image_with_text(text_lines, output_filepath):
im = Image.new("RGB", result_image_size(), background_color())
y_line_anchor = 10
font = ImageFont.truetype(font_filepath(), 14)
for line in text_lines:
ImageDraw.Draw(
im
).text(
(10, y_line_anchor),
line,
text_color(),
font=font
)
y_line_anchor += 18
im.save(output_filepath)
main()