Skip to content

Commit

Permalink
Merge pull request #1010 from Johann-PLW/main
Browse files Browse the repository at this point in the history
Update googleTranslate.py for lava output
  • Loading branch information
Johann-PLW authored Jan 9, 2025
2 parents 9a1c871 + 6d8a4f9 commit a47df9d
Showing 1 changed file with 145 additions and 194 deletions.
339 changes: 145 additions & 194 deletions scripts/artifacts/googleTranslate.py
Original file line number Diff line number Diff line change
@@ -1,205 +1,156 @@
__artifacts_v2__ = {
"googleTranslate": {
"name": "Google Translate",
"description": "History, Favorite translations and Text-To-Speech",
"googleTranslateHistory": {
"name": "Google Translate History",
"description": "History from Google Translate App",
"author": "Django Faiola (djangofaiola.blogspot.com)",
"version": "0.1.0",
"date": "30/05/2024",
"creation_date": "2024-05-30",
"last_update_date": "2025-01-09",
"requirements": "none",
"category": "Google Translate",
"category": "Translator",
"notes": "",
"paths": ('*/mobile/Containers/Data/Application/*/Documents/translate.db*'),
"function": "get_google_translate"
"output_types": "standard",
"artifact_icon": "type"
},
"googleTranslateStarred": {
"name": "Google Translate Favorite Translations",
"description": "Favorite translations from Google Translate App",
"author": "Django Faiola (djangofaiola.blogspot.com)",
"creation_date": "2024-05-30",
"last_update_date": "2025-01-09",
"requirements": "none",
"category": "Translator",
"notes": "",
"paths": ('*/mobile/Containers/Data/Application/*/Documents/translate.db*'),
"output_types": ["html", "tsv", "lava"],
"artifact_icon": "star"
},
"googleTranslateTts": {
"name": "Google Translate Text-To-Speech",
"description": "Text-To-Speech from Google Translate App",
"author": "Django Faiola (djangofaiola.blogspot.com)",
"creation_date": "2024-05-30",
"last_update_date": "2025-01-09",
"requirements": "none",
"category": "Translator",
"notes": "",
"paths": ('*/mobile/Containers/Data/Application/*/Documents/translate.db*'),
"output_types": ["html", "tsv", "lava"],
"artifact_icon": "volume-2",
"html_columns": ['Audio']
}
}

import os
import sys
import shutil
import sqlite3
import textwrap
from pathlib import Path
from scripts.artifact_report import ArtifactHtmlReport
from scripts.ilapfuncs import logfunc, tsv, timeline, open_sqlite_db_readonly, does_table_exist, convert_utc_human_to_timezone, convert_ts_human_to_utc
from scripts.filetype import audio_match


# history
def get_history(file_found, report_folder, database, timezone_offset):
try:
cursor = database.cursor()
cursor.execute('''
SELECT
ROWID AS "_id",
datetime (timestamp, 'unixepoch') AS "timestamp",
(sourcelanguage || '->' || targetlanguage) AS "fromToLang",
sourcetext,
targettext,
star,
romanization
FROM history
''')

all_rows = cursor.fetchall()
usageentries = len(all_rows)
if usageentries > 0:
report = ArtifactHtmlReport('Google Translate History')
report.start_artifact_report(report_folder, 'Google Translate History')
report.add_script()
data_headers = ('Created', 'Language', 'Source text', 'Target text', 'Starred', 'Romanization', 'Location')
data_list = []

for row in all_rows:
# timestamp
timestamp = convert_utc_human_to_timezone(convert_ts_human_to_utc(row[1]), timezone_offset)

# starred
starred = row[5] == 1

# location
location = f'history (ROWID: {row[0]})'

# row
data_list.append((timestamp, row[2], row[3], row[4], starred, row[6], location))

report.write_artifact_data_table(data_headers, data_list, file_found, html_escape=False)
report.end_artifact_report()

tsvname = f'Google Translate History'
tsv(report_folder, data_headers, data_list, tsvname)

tlactivity = 'Google Translate History'
timeline(report_folder, tlactivity, data_list, data_headers)
else:
logfunc('No Google Translate History data available')

except Exception as ex:
logfunc('Exception while parsing Google Translate History: ' + str(ex))


# starred
def get_starred(file_found, report_folder, database, timezone_offset):
try:
cursor = database.cursor()
cursor.execute('''
SELECT
ROWID AS "_id",
(sourcelanguage || '->' || targetlanguage) AS "fromToLang",
sourcetext,
targettext,
romanization
FROM starred
''')

all_rows = cursor.fetchall()
usageentries = len(all_rows)
if usageentries > 0:
report = ArtifactHtmlReport('Google Translate Favorite Translations')
report.start_artifact_report(report_folder, 'Google Translate Favorite Translations')
report.add_script()
data_headers = ('Language', 'Source text', 'Target text', 'Romanization', 'Location')
data_list = []

for row in all_rows:
# location
location = f'starred (ROWID: {row[0]})'

# row
data_list.append((row[1], row[2], row[3], row[4], location))

report.write_artifact_data_table(data_headers, data_list, file_found, html_escape=False)
report.end_artifact_report()

tsvname = f'Google Translate Favorite Translations'
tsv(report_folder, data_headers, data_list, tsvname)

tlactivity = 'Google Translate Favorite Translations'
timeline(report_folder, tlactivity, data_list, data_headers)
else:
logfunc('No Google Translate Favorite Translations data available')

except Exception as ex:
logfunc('Exception while parsing Google Translate Favorite Translations: ' + str(ex))


# tts
def get_tts(file_found, report_folder, database, timezone_offset):
try:
if not does_table_exist(database, 'tts'):
return

cursor = database.cursor()
cursor.execute('''
SELECT
ROWID AS "_id",
language,
text,
audio
FROM tts
''')

all_rows = cursor.fetchall()
usageentries = len(all_rows)
if usageentries > 0:
report = ArtifactHtmlReport('Google Translate Text-To-Speech')
report.start_artifact_report(report_folder, 'Google Translate Text-To-Speech')
report.add_script()
data_headers = ('Language', 'Text', 'Audio', 'Location')
data_list = []

for row in all_rows:
audio_html = ''
# audio
audio = row[3]
if bool(audio):
mimetype = audio_match(audio)
if bool(mimetype):
Path(f'{report_folder}').mkdir(parents=True, exist_ok=True)
audio_filename = f'audio_{row[0]}.{mimetype.extension}'
audio_path = os.path.join(report_folder, audio_filename)
with open(audio_path, "wb") as audio_file:
audio_file.write(audio)
audio_path_html = Path(report_folder).name + '/' + audio_filename
audio_html = f'<audio controls><source src="{audio_path_html}" type="audio/ogg"><source src="{audio_path_html}" type="audio/mpeg">Your browser does not support the audio element.</audio>'

# location
location = f'tts (ROWID: {row[0]})'

# row
data_list.append((row[1], row[2], audio_html, location))

report.write_artifact_data_table(data_headers, data_list, file_found, html_no_escape=['Audio'])
report.end_artifact_report()

tsvname = f'Google Translate Text-To-Speech'
tsv(report_folder, data_headers, data_list, tsvname)

tlactivity = 'Google Translate Text-To-Speech'
timeline(report_folder, tlactivity, data_list, data_headers)
else:
logfunc('No Google Translate Text-To-Speech data available')

except Exception as ex:
logfunc('Exception while parsing Google Translate Text-To-Speech: ' + str(ex))


# google translate
def get_google_translate(files_found, report_folder, seeker, wrap_text, timezone_offset):
for file_found in files_found:
files_found = str(file_found)

# translate.db
if file_found.endswith('translate.db'):
db = open_sqlite_db_readonly(files_found)
try:
# history
get_history(file_found, report_folder, db, timezone_offset)

# favorite translations
get_starred(file_found, report_folder, db, timezone_offset)

# text-to-speech
get_tts(file_found, report_folder, db, timezone_offset)

finally:
db.close()
from scripts.ilapfuncs import artifact_processor, get_file_path, get_sqlite_db_records, does_table_exist, convert_unix_ts_to_utc

@artifact_processor
def googleTranslateHistory(files_found, report_folder, seeker, wrap_text, timezone_offset):
source_path = get_file_path(files_found, "translate.db")
data_list = []

query = '''
SELECT
ROWID AS "_id",
timestamp AS "timestamp",
(sourcelanguage || '->' || targetlanguage) AS "fromToLang",
sourcetext,
targettext,
star,
romanization
FROM history
'''

data_headers = (
('Created', 'datetime'),
'Language',
'Source text',
'Target text',
'Starred',
'Romanization',
'Location')

db_records = get_sqlite_db_records(source_path, query)

for record in db_records:
timestamp = convert_unix_ts_to_utc(record[1]) # timestamp
starred = record[5] == 1 # starred
location = f'history (ROWID: {record[0]})' # location

data_list.append((timestamp, record[2], record[3], record[4], starred, record[6], location))

return data_headers, data_list, source_path


@artifact_processor
def googleTranslateStarred(files_found, report_folder, seeker, wrap_text, timezone_offset):
source_path = get_file_path(files_found, "translate.db")
data_list = []

query = '''
SELECT
ROWID AS "_id",
(sourcelanguage || '->' || targetlanguage) AS "fromToLang",
sourcetext,
targettext,
romanization
FROM starred
'''

data_headers = ('Language', 'Source text', 'Target text', 'Romanization', 'Location')

db_records = get_sqlite_db_records(source_path, query)

for record in db_records:
location = f'starred (ROWID: {record[0]})' # location

data_list.append((record[1], record[2], record[3], record[4], location))

return data_headers, data_list, source_path


@artifact_processor
def googleTranslateTts(files_found, report_folder, seeker, wrap_text, timezone_offset):
source_path = get_file_path(files_found, "translate.db")
data_list = []
data_list_html = []

if not does_table_exist(source_path, 'tts'):
return (), data_list, source_path

query = '''
SELECT
ROWID AS "_id",
language,
text,
audio
FROM tts
'''

data_headers = ('Language', 'Text', 'Audio', 'Location')

db_records = get_sqlite_db_records(source_path, query)

for record in db_records:
# audio
audio_html = ''
audio = record[3]
if bool(audio):
mimetype = audio_match(audio)
if bool(mimetype):
Path(f'{report_folder}').mkdir(parents=True, exist_ok=True)
audio_filename = f'audio_{record[0]}.{mimetype.extension}'
audio_path = os.path.join(report_folder, audio_filename)
with open(audio_path, "wb") as audio_file:
audio_file.write(audio)
audio_path_html = Path(report_folder).name + '/' + audio_filename
audio_html = f'<audio controls><source src="{audio_path_html}" type="audio/ogg"><source src="{audio_path_html}" type="audio/mpeg">Your browser does not support the audio element.</audio>'

location = f'tts (ROWID: {record[0]})' # location

data_list.append((record[1], record[2], audio_path, location))
data_list_html.append((record[1], record[2], audio_html, location))

return data_headers, (data_list, data_list_html), source_path

0 comments on commit a47df9d

Please sign in to comment.