From 7479728c368e7a8f163c148dfc62d8def89385b5 Mon Sep 17 00:00:00 2001 From: odrec Date: Thu, 17 Oct 2024 12:08:50 +0200 Subject: [PATCH] Several interface and configuration changes. -Disable of streamlit usage statistics -Increase of max file size upload to 1GB -Hide developer options from hamburguer menu -Center media and editor on the screen -Only show transcription result message when there's an error -Show srt file as default -Added several help tooltips -Limited models to base and large-v3 --- .streamlit/config.toml | 8 ++ app.py | 205 +++++++++++++++++++++++------------------ 2 files changed, 121 insertions(+), 92 deletions(-) create mode 100644 .streamlit/config.toml diff --git a/.streamlit/config.toml b/.streamlit/config.toml new file mode 100644 index 0000000..30817f1 --- /dev/null +++ b/.streamlit/config.toml @@ -0,0 +1,8 @@ +[server] +maxUploadSize = 1000 + +[browser] +gatherUsageStats = false + +[client] +toolbarMode = "viewer" \ No newline at end of file diff --git a/app.py b/app.py index 01b17ec..ac95c80 100644 --- a/app.py +++ b/app.py @@ -9,6 +9,7 @@ from dotenv import load_dotenv from streamlit_quill import st_quill + load_dotenv() st.set_page_config( @@ -116,6 +117,7 @@ def process_youtube_link(youtube_link): st.session_state.task_id = None st.session_state.result = None st.session_state.status = None + st.session_state.error = None st.session_state.original_file_name = None st.session_state.media_file_data = None st.session_state.input_type = None @@ -123,7 +125,7 @@ def process_youtube_link(youtube_link): st.session_state.json_edit = "" st.session_state.srt_edit = "" st.session_state.vtt_edit = "" - st.session_state.selected_tab = "txt" + st.session_state.selected_tab = "srt" st.session_state.is_modified = False # Initialize the modified flag st.session_state.original_txt = "" st.session_state.original_json = "" @@ -142,6 +144,7 @@ def reset_transcription_state(): st.session_state.task_id = None st.session_state.result = None st.session_state.status = None + st.session_state.error = None st.session_state.original_file_name = None st.session_state.media_file_data = None st.session_state.input_type = None @@ -149,7 +152,7 @@ def reset_transcription_state(): st.session_state.json_edit = "" st.session_state.srt_edit = "" st.session_state.vtt_edit = "" - st.session_state.selected_tab = "txt" + st.session_state.selected_tab = "srt" st.session_state.is_modified = False st.session_state.original_txt = "" st.session_state.original_json = "" @@ -200,7 +203,9 @@ def callback_disable_controls(): st.session_state.youtube_link = st.text_input("Enter YouTube video link") lang = st.selectbox("Select Language", ["de", "en", "es", "fr", "it", "ja", "nl", "pt", "uk", "zh"]) - model = st.selectbox("Select Model", ["tiny", "small", "base", "medium", "large-v2", "large-v3"], index=2) + model = st.selectbox("Select Model", ["base", "large-v3"], index=0, + help="base: balance between quality and speed of transcription; " + "large-v3: slower transcription speed but highest quality.") detect_speakers = st.toggle("Detect different speakers", value=True, help="This activates diarization for the transcription. Diarization " @@ -275,9 +280,11 @@ def callback_disable_controls(): if status['status'] == "SUCCESS": st.session_state.status = "SUCCESS" st.session_state.result = status.get('result', {}) + st.success("Transcription successful!") break elif status['status'] == "FAILURE": st.session_state.status = "FAILURE" + st.session_state.error = status # We want all the information about the failure to display in next refresh st.error(f"Transcription failed. Error: {status.get('error', 'Unknown error')}") break else: @@ -297,58 +304,19 @@ def callback_disable_controls(): # Display result if transcription is successful if st.session_state.status == "SUCCESS" and st.session_state.result: - st.success("Transcription successful!") base_name = os.path.splitext(st.session_state.original_file_name)[0] result = st.session_state.result - button1_col, button2_col, button3_col, button4_col = st.columns(4) - - # Handle conditional checks for content before creating buttons - if 'vtt_content' in result and result['vtt_content']: - button1_col.download_button( - label="Download VTT File", - data=BytesIO(result['vtt_content'].encode('utf-8')), - file_name=f"{base_name}_{lang}.vtt", - mime="text/vtt" - ) - - if 'txt_content' in result and result['txt_content']: - button2_col.download_button( - label="Download TXT File", - data=BytesIO(result['txt_content'].encode('utf-8')), - file_name=f"{base_name}_{lang}.txt", - mime="text/plain" - ) - - if 'json_content' in result and result['json_content']: - button3_col.download_button( - label="Download JSON File", - data=BytesIO(result['json_content'].encode('utf-8')), - file_name=f"{base_name}_{lang}.json", - mime="application/json" - ) - - if 'srt_content' in result and result['srt_content']: - button4_col.download_button( - label="Download SRT File", - data=BytesIO(result['srt_content'].encode('utf-8')), - file_name=f"{base_name}_{lang}.srt", - mime="text/srt" - ) - st.write("Transcription Result:") - # Create columns for the media and editor - media_col, editor_col = st.columns([3, 7]) - - with media_col: - + # Expander around the media player + with st.expander("Media Player", expanded=True): + # Display the media player at the top if st.session_state.media_file_data: - ext = os.path.splitext(st.session_state.original_file_name)[1].lower() - if st.session_state.input_type == "Upload File": + ext = os.path.splitext(st.session_state.original_file_name)[1].lower() if ext in ['.mp3', '.wav']: st.audio(st.session_state.media_file_data) elif ext in ['.mp4']: @@ -357,65 +325,118 @@ def callback_disable_controls(): st.video(st.session_state.media_file_data, subtitles={lang: subtitle_content}) else: st.video(st.session_state.media_file_data) - else: st.video(st.session_state.youtube_link) - with editor_col: - st.selectbox("Select format to view/edit", ["txt", "json", "srt", "vtt"], key="selected_tab") + st.selectbox("Select format to view/edit", ["srt", "json", "txt", "vtt"], key="selected_tab") + + # Add help text for each format + format_help_texts = { + 'txt': "**txt**: Plain text format. " + "Contains the raw transcribed text without any formatting or timing information.", + 'json': "**json**: JSON format. " + "Provides structured data, including the transcription along with metadata such as timestamps " + "and speaker info.", + 'srt': "**srt**: SubRip Subtitle format. Used for video subtitles. " + "Includes transcribed text with timing for synchronization with videos.", + 'vtt': "**vtt**: WebVTT format. Used for web video subtitles. " + "Similar to SRT but supports additional styling and metadata." + } + + # Display the help text for the selected format + st.write(format_help_texts[st.session_state.selected_tab]) - # Add CSS to limit editor height and enable scrolling - st.markdown(""" + # Adjust the CSS for the editor if needed + st.markdown(""" """, unsafe_allow_html=True) - # Load content into the editor - if st.session_state.selected_tab == "txt": - if st.session_state.txt_edit == "": - st.session_state.txt_edit = normalize_text(result.get('txt_content', '')) - st.session_state.original_txt = st.session_state.txt_edit - st_quill(value=st.session_state.txt_edit, key="txt_edit") - - elif st.session_state.selected_tab == "json": - if st.session_state.json_edit == "": - st.session_state.json_edit = normalize_text(result.get('json_content', '')) - st.session_state.original_json = st.session_state.json_edit - st_quill(value=st.session_state.json_edit, key="json_edit") - - elif st.session_state.selected_tab == "srt": - if st.session_state.srt_edit == "": - st.session_state.srt_edit = normalize_text(result.get('srt_content', '')) - st.session_state.original_srt = st.session_state.srt_edit - st_quill(value=st.session_state.srt_edit, key="srt_edit") - - elif st.session_state.selected_tab == "vtt": - if st.session_state.vtt_edit == "": - st.session_state.vtt_edit = normalize_text(result.get('vtt_content', '')) - st.session_state.original_vtt = st.session_state.vtt_edit - st_quill(value=st.session_state.vtt_edit, key="vtt_edit") - - # Compare the current content with the original content - is_modified = False - - if st.session_state.selected_tab == "txt": - is_modified = normalize_text(st.session_state.txt_edit) != normalize_text(st.session_state.original_txt) - elif st.session_state.selected_tab == "json": - is_modified = normalize_text(st.session_state.json_edit) != normalize_text(st.session_state.original_json) - elif st.session_state.selected_tab == "srt": - is_modified = normalize_text(st.session_state.srt_edit) != normalize_text(st.session_state.original_srt) - elif st.session_state.selected_tab == "vtt": - is_modified = normalize_text(st.session_state.vtt_edit) != normalize_text(st.session_state.original_vtt) - - st.session_state.is_modified = is_modified + # Load content into the editor + if st.session_state.selected_tab == "txt": + if st.session_state.txt_edit == "": + st.session_state.txt_edit = normalize_text(result.get('txt_content', '')) + st.session_state.original_txt = st.session_state.txt_edit + st_quill(value=st.session_state.txt_edit, key="txt_edit") + elif st.session_state.selected_tab == "json": + if st.session_state.json_edit == "": + st.session_state.json_edit = normalize_text(result.get('json_content', '')) + st.session_state.original_json = st.session_state.json_edit + st_quill(value=st.session_state.json_edit, key="json_edit") + + elif st.session_state.selected_tab == "srt": + if st.session_state.srt_edit == "": + st.session_state.srt_edit = normalize_text(result.get('srt_content', '')) + st.session_state.original_srt = st.session_state.srt_edit + st_quill(value=st.session_state.srt_edit, key="srt_edit") + + elif st.session_state.selected_tab == "vtt": + if st.session_state.vtt_edit == "": + st.session_state.vtt_edit = normalize_text(result.get('vtt_content', '')) + st.session_state.original_vtt = st.session_state.vtt_edit + st_quill(value=st.session_state.vtt_edit, key="vtt_edit") + + # Compare the current content with the original content + is_modified = False + + if st.session_state.selected_tab == "txt": + is_modified = normalize_text(st.session_state.txt_edit) != normalize_text(st.session_state.original_txt) + elif st.session_state.selected_tab == "json": + is_modified = normalize_text(st.session_state.json_edit) != normalize_text(st.session_state.original_json) + elif st.session_state.selected_tab == "srt": + is_modified = normalize_text(st.session_state.srt_edit) != normalize_text(st.session_state.original_srt) + elif st.session_state.selected_tab == "vtt": + is_modified = normalize_text(st.session_state.vtt_edit) != normalize_text(st.session_state.original_vtt) + + st.session_state.is_modified = is_modified + + # Create two columns for the Save and Download buttons + save_col, download_col = st.columns(2) + + with save_col: if st.button("Save Changes", disabled=not st.session_state.is_modified): save_changes() st.success("Changes saved successfully!") time.sleep(1) st.rerun() + + with download_col: + # Prepare the current content and file information based on selected tab + current_format = st.session_state.selected_tab + current_content = '' + file_extension = '' + mime_type = '' + + if current_format == 'txt': + current_content = st.session_state.txt_edit + file_extension = 'txt' + mime_type = 'text/plain' + elif current_format == 'json': + current_content = st.session_state.json_edit + file_extension = 'json' + mime_type = 'application/json' + elif current_format == 'srt': + current_content = st.session_state.srt_edit + file_extension = 'srt' + mime_type = 'text/srt' + elif current_format == 'vtt': + current_content = st.session_state.vtt_edit + file_extension = 'vtt' + mime_type = 'text/vtt' + + download_button_label = f"Download {current_format.upper()} File" + + st.download_button( + label=download_button_label, + data=BytesIO(current_content.encode('utf-8')), + file_name=f"{base_name}_{lang}.{file_extension}", + mime=mime_type + ) +elif st.session_state.status == "FAILURE" and 'status' in st.session_state.error: + st.error(f"Transcription failed. Error: {st.session_state.error.get('error', 'Unknown error')}")