forked from oconnoob/automatic-video-sections
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
94 lines (71 loc) · 3.2 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
import argparse
import re
import assemblyai as aai
parser = argparse.ArgumentParser()
parser.add_argument('filename')
# define the LeMUR task
LEMUR_PROMPT = """
ROLE:
You are a YouTube content professional. You are very competent and able to come up with catchy names for the different sections of video transcripts that are submitted to you.
CONTEXT:
This transcript is of a logistics meeting at GitLab
INSTRUCTION:
You are provided information about the sections of the transcript under TIMESTAMPS, where the format for each line is `<TIMESTAMP> <SECTION SUMMARY>`."
TIMESTAMPS:
{timestamp_lines}
FORMAT:
<TIMESTAMP> <CATCHY SECTION TITLE>
OUTPUT:
""".strip()
def ms_to_hms(start):
""" convert milliseconds to (hour, minutes, seconds) tuple """
s, ms = divmod(start, 1000)
m, s = divmod(s, 60)
h, m = divmod(m, 60)
return h, m, s
def create_timestamps(chapters):
""" given chapters, return timestamps formatted as (HH):MM:SS <CHAPTER_TITLE> """
last_hour = ms_to_hms(chapters[-1].start)[0]
time_format = "{m:02d}:{s:02d}" if last_hour == 0 else "{h:02d}:{m:02d}:{s:02d}"
lines = []
for idx, chapter in enumerate(chapters):
# first YouTube timestamp must be at zero
h, m, s = (0, 0, 0) if idx == 0 else ms_to_hms(chapter.start)
lines.append(f"{time_format.format(h=h, m=m, s=s)} {chapter.headline}")
return "\n".join(lines)
def filter_timestamps(text):
lines = text.splitlines()
timestamped_lines = [line for line in lines if re.match(r'\d+:\d+', line)] # Use regex to filter lines starting with a timestamp
filtered_text = '\n'.join(timestamped_lines)
return filtered_text
def verify_timestamps(a, b):
""" verify that the timestamps in two strings match """
original = timestamp_lines.splitlines()
filtered = filtered_output.splitlines()
for o, f in zip(original, filtered):
original_time = o.split(' ')[0]
filtered_time = f.split(' ')[0]
if not original_time == filtered_time:
raise RuntimeError(f"Timestamp mismatch - original timestamp '{original_time}' does not match LLM timestamp '{filtered_time}'")
if __name__ == "__main__":
args = parser.parse_args()
# create transcriber. this object is responsible for performing our transcription
transcriber = aai.Transcriber(
# we add a TranscriptionConfig to turn on auto chapters, which is AssemblyAI's model for automatically determining video sections
config=aai.TranscriptionConfig(auto_chapters=True)
)
# transcribe the file
transcript = transcriber.transcribe(args.filename)
if transcript.error: raise RuntimeError(transcript.error)
# create transcript lines from the chapter information
timestamp_lines = create_timestamps(transcript.chapters)
# submit to lemur, extract the response, and filter out irrelevant lines
prompt = LEMUR_PROMPT.format(timestamp_lines=timestamp_lines)
result = transcript.lemur.task(prompt)
output = result.response.strip()
filtered_output = filter_timestamps(output)
# verify the LLM did not modify timesteps
verify_timestamps(timestamp_lines, filtered_output)
# print the results
print("TIMESTAMPS:")
print(filtered_output)