Skip to content

Commit

Permalink
Merge pull request #8 from virtUOS/dev
Browse files Browse the repository at this point in the history
Several interface and configuration changes.
  • Loading branch information
Odrec authored Oct 17, 2024
2 parents ad14c17 + 7479728 commit 01ad6d2
Show file tree
Hide file tree
Showing 2 changed files with 121 additions and 92 deletions.
8 changes: 8 additions & 0 deletions .streamlit/config.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
[server]
maxUploadSize = 1000

[browser]
gatherUsageStats = false

[client]
toolbarMode = "viewer"
205 changes: 113 additions & 92 deletions app.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from dotenv import load_dotenv
from streamlit_quill import st_quill


load_dotenv()

st.set_page_config(
Expand Down Expand Up @@ -116,14 +117,15 @@ def process_youtube_link(youtube_link):
st.session_state.task_id = None
st.session_state.result = None
st.session_state.status = None
st.session_state.error = None
st.session_state.original_file_name = None
st.session_state.media_file_data = None
st.session_state.input_type = None
st.session_state.txt_edit = ""
st.session_state.json_edit = ""
st.session_state.srt_edit = ""
st.session_state.vtt_edit = ""
st.session_state.selected_tab = "txt"
st.session_state.selected_tab = "srt"
st.session_state.is_modified = False # Initialize the modified flag
st.session_state.original_txt = ""
st.session_state.original_json = ""
Expand All @@ -142,14 +144,15 @@ def reset_transcription_state():
st.session_state.task_id = None
st.session_state.result = None
st.session_state.status = None
st.session_state.error = None
st.session_state.original_file_name = None
st.session_state.media_file_data = None
st.session_state.input_type = None
st.session_state.txt_edit = ""
st.session_state.json_edit = ""
st.session_state.srt_edit = ""
st.session_state.vtt_edit = ""
st.session_state.selected_tab = "txt"
st.session_state.selected_tab = "srt"
st.session_state.is_modified = False
st.session_state.original_txt = ""
st.session_state.original_json = ""
Expand Down Expand Up @@ -200,7 +203,9 @@ def callback_disable_controls():
st.session_state.youtube_link = st.text_input("Enter YouTube video link")

lang = st.selectbox("Select Language", ["de", "en", "es", "fr", "it", "ja", "nl", "pt", "uk", "zh"])
model = st.selectbox("Select Model", ["tiny", "small", "base", "medium", "large-v2", "large-v3"], index=2)
model = st.selectbox("Select Model", ["base", "large-v3"], index=0,
help="base: balance between quality and speed of transcription; "
"large-v3: slower transcription speed but highest quality.")
detect_speakers = st.toggle("Detect different speakers",
value=True,
help="This activates diarization for the transcription. Diarization "
Expand Down Expand Up @@ -275,9 +280,11 @@ def callback_disable_controls():
if status['status'] == "SUCCESS":
st.session_state.status = "SUCCESS"
st.session_state.result = status.get('result', {})
st.success("Transcription successful!")
break
elif status['status'] == "FAILURE":
st.session_state.status = "FAILURE"
st.session_state.error = status # We want all the information about the failure to display in next refresh
st.error(f"Transcription failed. Error: {status.get('error', 'Unknown error')}")
break
else:
Expand All @@ -297,58 +304,19 @@ def callback_disable_controls():

# Display result if transcription is successful
if st.session_state.status == "SUCCESS" and st.session_state.result:
st.success("Transcription successful!")

base_name = os.path.splitext(st.session_state.original_file_name)[0]

result = st.session_state.result

button1_col, button2_col, button3_col, button4_col = st.columns(4)

# Handle conditional checks for content before creating buttons
if 'vtt_content' in result and result['vtt_content']:
button1_col.download_button(
label="Download VTT File",
data=BytesIO(result['vtt_content'].encode('utf-8')),
file_name=f"{base_name}_{lang}.vtt",
mime="text/vtt"
)

if 'txt_content' in result and result['txt_content']:
button2_col.download_button(
label="Download TXT File",
data=BytesIO(result['txt_content'].encode('utf-8')),
file_name=f"{base_name}_{lang}.txt",
mime="text/plain"
)

if 'json_content' in result and result['json_content']:
button3_col.download_button(
label="Download JSON File",
data=BytesIO(result['json_content'].encode('utf-8')),
file_name=f"{base_name}_{lang}.json",
mime="application/json"
)

if 'srt_content' in result and result['srt_content']:
button4_col.download_button(
label="Download SRT File",
data=BytesIO(result['srt_content'].encode('utf-8')),
file_name=f"{base_name}_{lang}.srt",
mime="text/srt"
)

st.write("Transcription Result:")

# Create columns for the media and editor
media_col, editor_col = st.columns([3, 7])

with media_col:

# Expander around the media player
with st.expander("Media Player", expanded=True):
# Display the media player at the top
if st.session_state.media_file_data:
ext = os.path.splitext(st.session_state.original_file_name)[1].lower()

if st.session_state.input_type == "Upload File":
ext = os.path.splitext(st.session_state.original_file_name)[1].lower()
if ext in ['.mp3', '.wav']:
st.audio(st.session_state.media_file_data)
elif ext in ['.mp4']:
Expand All @@ -357,65 +325,118 @@ def callback_disable_controls():
st.video(st.session_state.media_file_data, subtitles={lang: subtitle_content})
else:
st.video(st.session_state.media_file_data)

else:
st.video(st.session_state.youtube_link)

with editor_col:
st.selectbox("Select format to view/edit", ["txt", "json", "srt", "vtt"], key="selected_tab")
st.selectbox("Select format to view/edit", ["srt", "json", "txt", "vtt"], key="selected_tab")

# Add help text for each format
format_help_texts = {
'txt': "**txt**: Plain text format. "
"Contains the raw transcribed text without any formatting or timing information.",
'json': "**json**: JSON format. "
"Provides structured data, including the transcription along with metadata such as timestamps "
"and speaker info.",
'srt': "**srt**: SubRip Subtitle format. Used for video subtitles. "
"Includes transcribed text with timing for synchronization with videos.",
'vtt': "**vtt**: WebVTT format. Used for web video subtitles. "
"Similar to SRT but supports additional styling and metadata."
}

# Display the help text for the selected format
st.write(format_help_texts[st.session_state.selected_tab])

# Add CSS to limit editor height and enable scrolling
st.markdown("""
# Adjust the CSS for the editor if needed
st.markdown("""
<style>
.element-container:has(> iframe) {
height: 1000px;
overflow-y: scroll;
overflow-x: hidden;
height: 400px;
overflow-y: scroll;
overflow-x: hidden;
}
</style>
""", unsafe_allow_html=True)

# Load content into the editor
if st.session_state.selected_tab == "txt":
if st.session_state.txt_edit == "":
st.session_state.txt_edit = normalize_text(result.get('txt_content', ''))
st.session_state.original_txt = st.session_state.txt_edit
st_quill(value=st.session_state.txt_edit, key="txt_edit")

elif st.session_state.selected_tab == "json":
if st.session_state.json_edit == "":
st.session_state.json_edit = normalize_text(result.get('json_content', ''))
st.session_state.original_json = st.session_state.json_edit
st_quill(value=st.session_state.json_edit, key="json_edit")

elif st.session_state.selected_tab == "srt":
if st.session_state.srt_edit == "":
st.session_state.srt_edit = normalize_text(result.get('srt_content', ''))
st.session_state.original_srt = st.session_state.srt_edit
st_quill(value=st.session_state.srt_edit, key="srt_edit")

elif st.session_state.selected_tab == "vtt":
if st.session_state.vtt_edit == "":
st.session_state.vtt_edit = normalize_text(result.get('vtt_content', ''))
st.session_state.original_vtt = st.session_state.vtt_edit
st_quill(value=st.session_state.vtt_edit, key="vtt_edit")

# Compare the current content with the original content
is_modified = False

if st.session_state.selected_tab == "txt":
is_modified = normalize_text(st.session_state.txt_edit) != normalize_text(st.session_state.original_txt)
elif st.session_state.selected_tab == "json":
is_modified = normalize_text(st.session_state.json_edit) != normalize_text(st.session_state.original_json)
elif st.session_state.selected_tab == "srt":
is_modified = normalize_text(st.session_state.srt_edit) != normalize_text(st.session_state.original_srt)
elif st.session_state.selected_tab == "vtt":
is_modified = normalize_text(st.session_state.vtt_edit) != normalize_text(st.session_state.original_vtt)

st.session_state.is_modified = is_modified
# Load content into the editor
if st.session_state.selected_tab == "txt":
if st.session_state.txt_edit == "":
st.session_state.txt_edit = normalize_text(result.get('txt_content', ''))
st.session_state.original_txt = st.session_state.txt_edit
st_quill(value=st.session_state.txt_edit, key="txt_edit")

elif st.session_state.selected_tab == "json":
if st.session_state.json_edit == "":
st.session_state.json_edit = normalize_text(result.get('json_content', ''))
st.session_state.original_json = st.session_state.json_edit
st_quill(value=st.session_state.json_edit, key="json_edit")

elif st.session_state.selected_tab == "srt":
if st.session_state.srt_edit == "":
st.session_state.srt_edit = normalize_text(result.get('srt_content', ''))
st.session_state.original_srt = st.session_state.srt_edit
st_quill(value=st.session_state.srt_edit, key="srt_edit")

elif st.session_state.selected_tab == "vtt":
if st.session_state.vtt_edit == "":
st.session_state.vtt_edit = normalize_text(result.get('vtt_content', ''))
st.session_state.original_vtt = st.session_state.vtt_edit
st_quill(value=st.session_state.vtt_edit, key="vtt_edit")

# Compare the current content with the original content
is_modified = False

if st.session_state.selected_tab == "txt":
is_modified = normalize_text(st.session_state.txt_edit) != normalize_text(st.session_state.original_txt)
elif st.session_state.selected_tab == "json":
is_modified = normalize_text(st.session_state.json_edit) != normalize_text(st.session_state.original_json)
elif st.session_state.selected_tab == "srt":
is_modified = normalize_text(st.session_state.srt_edit) != normalize_text(st.session_state.original_srt)
elif st.session_state.selected_tab == "vtt":
is_modified = normalize_text(st.session_state.vtt_edit) != normalize_text(st.session_state.original_vtt)

st.session_state.is_modified = is_modified

# Create two columns for the Save and Download buttons
save_col, download_col = st.columns(2)

with save_col:
if st.button("Save Changes", disabled=not st.session_state.is_modified):
save_changes()
st.success("Changes saved successfully!")
time.sleep(1)
st.rerun()

with download_col:
# Prepare the current content and file information based on selected tab
current_format = st.session_state.selected_tab
current_content = ''
file_extension = ''
mime_type = ''

if current_format == 'txt':
current_content = st.session_state.txt_edit
file_extension = 'txt'
mime_type = 'text/plain'
elif current_format == 'json':
current_content = st.session_state.json_edit
file_extension = 'json'
mime_type = 'application/json'
elif current_format == 'srt':
current_content = st.session_state.srt_edit
file_extension = 'srt'
mime_type = 'text/srt'
elif current_format == 'vtt':
current_content = st.session_state.vtt_edit
file_extension = 'vtt'
mime_type = 'text/vtt'

download_button_label = f"Download {current_format.upper()} File"

st.download_button(
label=download_button_label,
data=BytesIO(current_content.encode('utf-8')),
file_name=f"{base_name}_{lang}.{file_extension}",
mime=mime_type
)
elif st.session_state.status == "FAILURE" and 'status' in st.session_state.error:
st.error(f"Transcription failed. Error: {st.session_state.error.get('error', 'Unknown error')}")

0 comments on commit 01ad6d2

Please sign in to comment.