forked from ololoshka2871/Voice-2-txt-faster-whisper
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmain.py
92 lines (65 loc) · 3.05 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
#!/usr/bin/env python
from io import BytesIO
import os
import logging
import functools
import argparse
from aiohttp import web
from faster_whisper import WhisperModel
logger = logging.getLogger(__name__)
async def index(request: web.Request) -> web.Response:
import pathlib
# show api documentation
return web.FileResponse(pathlib.Path(__file__).parent.resolve().joinpath('index.html'))
async def transcribe_post(model: WhisperModel, request: web.Request) -> web.StreamResponse:
if request.headers["Content-Type"] != "audio/wav":
return web.Response(status=415, text="Unsupported Input Media Type")
wav_data = await request.read()
segments, info = model.transcribe(audio=BytesIO(wav_data), vad_filter=True)
logger.debug(
f"Detected language '{info.language}' with probability {info.language_probability}")
segments_result = list()
for segment in segments:
segments_result.append({
'text': segment.text,
'start': segment.start,
'end': segment.end,
})
# return transcripted_text and correct_text as json
return web.json_response({'transcribed_segments': segments_result,
'language': info.language, })
async def start_server(model: str, compute_type: str = 'default', cache_dir: str = None, device: str = 'cpu') -> web.Application:
model_path = f'{cache_dir}/{model}'
logger.info(f'Loading AI model {model_path} to {device}...')
if os.path.isdir(model_path):
model = WhisperModel(model_path, device=device,
compute_type=compute_type, download_root=model_path)
else:
model = WhisperModel(model, device=device,
compute_type=compute_type, download_root=model_path)
app = web.Application()
# call handle_request with tts as first argument
app.add_routes([
web.get('/', handler=index),
web.post('/transcribe', handler=functools.partial(transcribe_post, model))
])
return app
if __name__ == '__main__':
parser = argparse.ArgumentParser(
description='An AI voice to text transcription server')
parser.add_argument('-p', '--port', type=int,
default=3157, help='Port to listen on')
parser.add_argument('-m', '--model',
help='Model name, see https://github.com/openai/whisper#available-models-and-languages',
default='medium')
parser.add_argument('-t', '--compute-type', type=str,
help='default, float16, int8', default='default')
parser.add_argument('-d', '--model-dir',
type=str,
help='Path to model directory',
default='models')
parser.add_argument('-c', '--device', type=str, default='cpu', help='torch device to use')
args = parser.parse_args()
logger.info(f'Starting server at http://localhost:{args.port}/')
web.run_app(start_server(args.model, args.compute_type,
args.model_dir, args.device), port=args.port)