-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathocr.py
45 lines (35 loc) · 1.49 KB
/
ocr.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
import Enums
from OCR import EasyOCR, Tesseract, AzureOCR
from OCR.HandwrittenOCR import OCR
from PIL import Image
from Classes.Text import Text
from Classes.Block import Block
def get_text(img: Image, blocks: [Block], ocr_system: Enums.OCR) -> [Block]:
if ocr_system == Enums.OCR.CUSTOM:
blocks = EasyOCR.get_text(img, blocks) # Necessary in order to get text bounds
OCR.OCR(img, blocks)
elif ocr_system == Enums.OCR.TESSERACT:
blocks = Tesseract.get_text(img, blocks)
elif ocr_system == Enums.OCR.EASY_OCR:
blocks = EasyOCR.get_text(img, blocks)
elif ocr_system == Enums.OCR.AZURE:
blocks = AzureOCR.OCR(img, blocks)
__fix_whitespaces(blocks)
return blocks
def __fix_whitespaces(blocks: [Block]):
block: Block
special_characters: [str] = ['+', '-', '*', '/', '&', '|', '!', '<', '>', '=']
for block in blocks:
if len(block.Texts) > 0:
block.Texts[0].text = block.Texts[0].text.replace(" ", "")
text: str = ""
is_last_char_special_char: bool = False
for c in block.Texts[0].text:
if (c in special_characters) and (not is_last_char_special_char):
is_last_char_special_char = True
text += " "
elif (c not in special_characters) and is_last_char_special_char:
is_last_char_special_char = False
text += " "
text += c
block.Texts[0].text = text