Skip to content
This repository has been archived by the owner on Sep 21, 2023. It is now read-only.

Escape formatting from OCR results #20

Closed
wants to merge 5 commits into from
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 19 additions & 14 deletions tor_ocr/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,10 +165,27 @@ def decode_image_from_url(url, overlay=False, api_key=__OCR_API_KEY__):
raise ConnectionError(
'Attempted all three OCR.space APIs -- cannot connect!'
)

return result.json()


def clean_formatting(body):
""" Returns a version of the input body with the formatting stripped """
snoodown_chars = [ '>', '~', '*', '_', '#', '^', '+', '-', '`', '|']
body = body.replace(
'\r\n', '\n\n'
).replace(
'/u/', '\\/u/'
).replace(
'/r/', '\\/r/'
).replace(
' u/', ' \\/u/'
).replace(
' r/', ' \\/r/'
)
for escape_char in snoodown_chars:
body = body.replace(escape_char, '\\' + escape_char)
return body

# noinspection PyShadowingNames
def run(config):
time.sleep(config.ocr_delay)
Expand Down Expand Up @@ -220,19 +237,7 @@ def run(config):
# should post a top level comment, then keep replying to
# the comments we make until we run out of chunks.

chunk = chunk.replace(
'\r\n', '\n\n'
).replace(
'/u/', '\\/u/'
).replace(
'/r/', '\\/r/'
).replace(
' u/', ' \\/u/'
).replace(
' r/', ' \\/r/'
).replace(
'>>', '\>\>'
)
chunk = clean_formatting(chunk)

thing_to_reply_to = thing_to_reply_to.reply(_(chunk))

Expand Down