From fa2191104878fed468cb16cc8782c345c98ff7aa Mon Sep 17 00:00:00 2001 From: Reuben Morais Date: Mon, 3 Aug 2020 18:15:38 +0200 Subject: [PATCH 01/12] Rename packages, modules, headers, shared libraries to Mozilla Voice STT --- .gitignore | 3 + Dockerfile.build.tmpl | 6 +- doc/{DeepSpeech.rst => AcousticModel.rst} | 14 +- doc/BUILDING.rst | 48 ++-- doc/C-API.rst | 39 ++-- doc/Decoder.rst | 12 +- doc/Error-Codes.rst | 2 +- doc/Makefile | 2 +- doc/Scorer.rst | 4 +- doc/TRAINING.rst | 31 ++- doc/USING.rst | 26 +-- doc/conf.py | 12 +- doc/doxygen-c.conf | 2 +- doc/index.rst | 14 +- evaluate_tflite.py | 6 +- native_client/Android.mk | 8 +- native_client/BUILD | 14 +- native_client/Makefile | 18 +- native_client/args.h | 16 +- native_client/client.cc | 42 ++-- native_client/ctcdecode/__init__.py | 2 +- native_client/ctcdecode/scorer.cpp | 14 +- native_client/ctcdecode/scorer.h | 2 +- native_client/ctcdecode/swigwrapper.i | 6 +- native_client/deepspeech.cc | 88 +++---- native_client/deepspeech_errors.cc | 6 +- native_client/definitions.mk | 8 +- .../dotnet/DeepSpeechClient/DeepSpeech.cs | 40 ++-- .../DeepSpeechClient/Enums/ErrorCodes.cs | 41 ++-- .../Extensions/NativeExtensions.cs | 4 +- .../dotnet/DeepSpeechClient/NativeImp.cs | 90 ++++---- native_client/dotnet/README.rst | 8 +- .../dotnet/nupkg/deepspeech.nuspec.in | 2 +- native_client/generate_scorer_package.cpp | 6 +- native_client/java/Makefile | 8 +- native_client/java/jni/deepspeech.i | 20 +- .../java/libdeepspeech/CMakeLists.txt | 6 +- .../libdeepspeech/DeepSpeechModel.java | 6 +- native_client/javascript/Makefile | 4 +- native_client/javascript/client.ts | 4 +- native_client/javascript/deepspeech.i | 22 +- native_client/javascript/package.json.in | 2 +- native_client/modelstate.cc | 2 +- native_client/modelstate.h | 4 +- .../{deepspeech.h => mozilla_voice_stt.h} | 214 +++++++++--------- native_client/python/Makefile | 4 +- native_client/python/__init__.py | 46 ++-- native_client/python/client.py | 6 +- native_client/python/impl.i | 26 +-- native_client/python/setup.py | 10 +- .../deepspeech_ios.xcodeproj/project.pbxproj | 16 +- .../swift/deepspeech_ios/DeepSpeech.swift | 98 ++++---- .../deepspeech_ios/deepspeech_ios.modulemap | 4 +- .../project.pbxproj | 12 +- native_client/test/concurrent_streams.py | 2 +- native_client/tflitemodelstate.cc | 24 +- native_client/tfmodelstate.cc | 24 +- taskcluster/android-build.sh | 2 +- taskcluster/arm64-build.sh | 2 +- taskcluster/cuda-build.sh | 2 +- taskcluster/examples-base.tyml | 2 +- taskcluster/host-build.sh | 2 +- taskcluster/ios-build.sh | 2 +- taskcluster/ios-package.sh | 2 +- taskcluster/node-package.sh | 2 +- taskcluster/package.sh | 4 +- taskcluster/rpi3-build.sh | 2 +- taskcluster/tc-all-utils.sh | 2 +- taskcluster/tc-android-utils.sh | 2 +- taskcluster/tc-asserts.sh | 72 +++--- taskcluster/tc-build-utils.sh | 18 +- taskcluster/tc-dotnet-utils.sh | 2 +- taskcluster/tc-evaluate_tflite.sh | 4 +- taskcluster/tc-netframework-ds-tests.sh | 6 +- taskcluster/tc-node-utils.sh | 4 +- taskcluster/tc-package.sh | 16 +- taskcluster/tc-py-utils.sh | 10 +- taskcluster/tc-python-tests.sh | 6 +- taskcluster/tc-python_tflite-tests-prod.sh | 4 +- taskcluster/tc-python_tflite-tests.sh | 4 +- taskcluster/win-build.sh | 10 +- taskcluster/win-opt-base.tyml | 4 - taskcluster/win-package.sh | 4 +- training/deepspeech_training/train.py | 2 +- 84 files changed, 693 insertions(+), 699 deletions(-) rename doc/{DeepSpeech.rst => AcousticModel.rst} (88%) rename native_client/{deepspeech.h => mozilla_voice_stt.h} (56%) diff --git a/.gitignore b/.gitignore index 5a9e6d8adb..19eaf5e55d 100644 --- a/.gitignore +++ b/.gitignore @@ -34,3 +34,6 @@ /doc/xml-java/ Dockerfile.build Dockerfile.train +doc/xml-c +doc/xml-java +doc/xml-dotnet diff --git a/Dockerfile.build.tmpl b/Dockerfile.build.tmpl index 58bea15027..a3982312e3 100644 --- a/Dockerfile.build.tmpl +++ b/Dockerfile.build.tmpl @@ -149,12 +149,12 @@ RUN bazel build \ --copt=-msse4.2 \ --copt=-mavx \ --copt=-fvisibility=hidden \ - //native_client:libdeepspeech.so \ + //native_client:libmozilla_voice_stt.so \ --verbose_failures \ --action_env=LD_LIBRARY_PATH=${LD_LIBRARY_PATH} # Copy built libs to /DeepSpeech/native_client -RUN cp bazel-bin/native_client/libdeepspeech.so /DeepSpeech/native_client/ +RUN cp bazel-bin/native_client/libmozilla_voice_stt.so /DeepSpeech/native_client/ # Build client.cc and install Python client and decoder bindings ENV TFDIR /DeepSpeech/tensorflow @@ -162,7 +162,7 @@ ENV TFDIR /DeepSpeech/tensorflow RUN nproc WORKDIR /DeepSpeech/native_client -RUN make NUM_PROCESSES=$(nproc) deepspeech +RUN make NUM_PROCESSES=$(nproc) mozilla_voice_stt WORKDIR /DeepSpeech RUN cd native_client/python && make NUM_PROCESSES=$(nproc) bindings diff --git a/doc/DeepSpeech.rst b/doc/AcousticModel.rst similarity index 88% rename from doc/DeepSpeech.rst rename to doc/AcousticModel.rst index 3d74d22ec0..cf70af2ebc 100644 --- a/doc/DeepSpeech.rst +++ b/doc/AcousticModel.rst @@ -1,11 +1,5 @@ -DeepSpeech Model -================ - -The aim of this project is to create a simple, open, and ubiquitous speech -recognition engine. Simple, in that the engine should not require server-class -hardware to execute. Open, in that the code and models are released under the -Mozilla Public License. Ubiquitous, in that the engine should run on many -platforms and have bindings to many different languages. +Mozilla Voice STT Acoustic Model +================================ The architecture of the engine was originally motivated by that presented in `Deep Speech: Scaling up end-to-end speech recognition `_. @@ -77,7 +71,7 @@ with respect to all of the model parameters may be done via back-propagation through the rest of the network. We use the Adam method for training `[3] `_. -The complete RNN model is illustrated in the figure below. +The complete LSTM model is illustrated in the figure below. .. image:: ../images/rnn_fig-624x598.png - :alt: DeepSpeech BRNN + :alt: Mozilla Voice STT LSTM diff --git a/doc/BUILDING.rst b/doc/BUILDING.rst index 4d25359ad2..0bc598bdab 100644 --- a/doc/BUILDING.rst +++ b/doc/BUILDING.rst @@ -1,12 +1,12 @@ .. _build-native-client: -Building DeepSpeech Binaries -============================ +Building Mozilla Voice STT Binaries +=================================== This section describes how to rebuild binaries. We have already several prebuilt binaries for all the supported platform, it is highly advised to use them except if you know what you are doing. -If you'd like to build the DeepSpeech binaries yourself, you'll need the following pre-requisites downloaded and installed: +If you'd like to build the Mozilla Voice STT binaries yourself, you'll need the following pre-requisites downloaded and installed: * `Bazel 2.0.0 `_ * `General TensorFlow r2.2 requirements `_ @@ -26,14 +26,14 @@ If you'd like to build the language bindings or the decoder package, you'll also Dependencies ------------ -If you follow these instructions, you should compile your own binaries of DeepSpeech (built on TensorFlow using Bazel). +If you follow these instructions, you should compile your own binaries of Mozilla Voice STT (built on TensorFlow using Bazel). For more information on configuring TensorFlow, read the docs up to the end of `"Configure the Build" `_. Checkout source code ^^^^^^^^^^^^^^^^^^^^ -Clone DeepSpeech source code (TensorFlow will come as a submdule): +Clone Mozilla Voice STT source code (TensorFlow will come as a submdule): .. code-block:: @@ -56,24 +56,24 @@ After you have installed the correct version of Bazel, configure TensorFlow: cd tensorflow ./configure -Compile DeepSpeech ------------------- +Compile Mozilla Voice STT +------------------------- -Compile ``libdeepspeech.so`` +Compile ``libmozilla_voice_stt.so`` ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -Within your TensorFlow directory, there should be a symbolic link to the DeepSpeech ``native_client`` directory. If it is not present, create it with the follow command: +Within your TensorFlow directory, there should be a symbolic link to the Mozilla Voice STT ``native_client`` directory. If it is not present, create it with the follow command: .. code-block:: cd tensorflow ln -s ../native_client -You can now use Bazel to build the main DeepSpeech library, ``libdeepspeech.so``. Add ``--config=cuda`` if you want a CUDA build. +You can now use Bazel to build the main Mozilla Voice STT library, ``libmozilla_voice_stt.so``. Add ``--config=cuda`` if you want a CUDA build. .. code-block:: - bazel build --workspace_status_command="bash native_client/bazel_workspace_status_cmd.sh" --config=monolithic -c opt --copt=-O3 --copt="-D_GLIBCXX_USE_CXX11_ABI=0" --copt=-fvisibility=hidden //native_client:libdeepspeech.so + bazel build --workspace_status_command="bash native_client/bazel_workspace_status_cmd.sh" --config=monolithic -c opt --copt=-O3 --copt="-D_GLIBCXX_USE_CXX11_ABI=0" --copt=-fvisibility=hidden //native_client:libmozilla_voice_stt.so The generated binaries will be saved to ``bazel-bin/native_client/``. @@ -82,12 +82,12 @@ The generated binaries will be saved to ``bazel-bin/native_client/``. Compile ``generate_scorer_package`` ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -Following the same setup as for ``libdeepspeech.so`` above, you can rebuild the ``generate_scorer_package`` binary by adding its target to the command line: ``//native_client:generate_scorer_package``. +Following the same setup as for ``libmozilla_voice_stt.so`` above, you can rebuild the ``generate_scorer_package`` binary by adding its target to the command line: ``//native_client:generate_scorer_package``. Using the example from above you can build the library and that binary at the same time: .. code-block:: - bazel build --workspace_status_command="bash native_client/bazel_workspace_status_cmd.sh" --config=monolithic -c opt --copt=-O3 --copt="-D_GLIBCXX_USE_CXX11_ABI=0" --copt=-fvisibility=hidden //native_client:libdeepspeech.so //native_client:generate_scorer_package + bazel build --workspace_status_command="bash native_client/bazel_workspace_status_cmd.sh" --config=monolithic -c opt --copt=-O3 --copt="-D_GLIBCXX_USE_CXX11_ABI=0" --copt=-fvisibility=hidden //native_client:libmozilla_voice_stt.so //native_client:generate_scorer_package The generated binaries will be saved to ``bazel-bin/native_client/``. @@ -123,7 +123,7 @@ Included are a set of generated Python bindings. After following the above build make bindings pip install dist/deepspeech* -The API mirrors the C++ API and is demonstrated in `client.py `_. Refer to `deepspeech.h `_ for documentation. +The API mirrors the C++ API and is demonstrated in `client.py `_. Refer to the `C API ` for documentation. Install NodeJS / ElectronJS bindings ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -136,7 +136,7 @@ After following the above build and installation instructions, the Node.JS bindi make build make npm-pack -This will create the package ``deepspeech-VERSION.tgz`` in ``native_client/javascript``. +This will create the package ``mozilla_voice_stt-VERSION.tgz`` in ``native_client/javascript``. Install the CTC decoder package ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -165,13 +165,13 @@ So your command line for ``RPi3`` and ``ARMv7`` should look like: .. code-block:: - bazel build --workspace_status_command="bash native_client/bazel_workspace_status_cmd.sh" --config=monolithic --config=rpi3 --config=rpi3_opt -c opt --copt=-O3 --copt=-fvisibility=hidden //native_client:libdeepspeech.so + bazel build --workspace_status_command="bash native_client/bazel_workspace_status_cmd.sh" --config=monolithic --config=rpi3 --config=rpi3_opt -c opt --copt=-O3 --copt=-fvisibility=hidden //native_client:libmozilla_voice_stt.so And your command line for ``LePotato`` and ``ARM64`` should look like: .. code-block:: - bazel build --workspace_status_command="bash native_client/bazel_workspace_status_cmd.sh" --config=monolithic --config=rpi3-armv8 --config=rpi3-armv8_opt -c opt --copt=-O3 --copt=-fvisibility=hidden //native_client:libdeepspeech.so + bazel build --workspace_status_command="bash native_client/bazel_workspace_status_cmd.sh" --config=monolithic --config=rpi3-armv8 --config=rpi3-armv8_opt -c opt --copt=-O3 --copt=-fvisibility=hidden //native_client:libmozilla_voice_stt.so While we test only on RPi3 Raspbian Buster and LePotato ARMBian Buster, anything compatible with ``armv7-a cortex-a53`` or ``armv8-a cortex-a53`` should be fine. @@ -205,27 +205,27 @@ You can then include the library by just adding this line to your implementation 'deepspeech.mozilla.org:libdeepspeech:VERSION@aar' -Building ``libdeepspeech.so`` +Building ``libmozilla_voice_stt.so`` ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -You can build the ``libdeepspeech.so`` using (ARMv7): +You can build the ``libmozilla_voice_stt.so`` using (ARMv7): .. code-block:: - bazel build --workspace_status_command="bash native_client/bazel_workspace_status_cmd.sh" --config=monolithic --config=android --config=android_arm --define=runtime=tflite --action_env ANDROID_NDK_API_LEVEL=21 --cxxopt=-std=c++14 --copt=-D_GLIBCXX_USE_C99 //native_client:libdeepspeech.so + bazel build --workspace_status_command="bash native_client/bazel_workspace_status_cmd.sh" --config=monolithic --config=android --config=android_arm --define=runtime=tflite --action_env ANDROID_NDK_API_LEVEL=21 --cxxopt=-std=c++14 --copt=-D_GLIBCXX_USE_C99 //native_client:libmozilla_voice_stt.so Or (ARM64): .. code-block:: - bazel build --workspace_status_command="bash native_client/bazel_workspace_status_cmd.sh" --config=monolithic --config=android --config=android_arm64 --define=runtime=tflite --action_env ANDROID_NDK_API_LEVEL=21 --cxxopt=-std=c++14 --copt=-D_GLIBCXX_USE_C99 //native_client:libdeepspeech.so + bazel build --workspace_status_command="bash native_client/bazel_workspace_status_cmd.sh" --config=monolithic --config=android --config=android_arm64 --define=runtime=tflite --action_env ANDROID_NDK_API_LEVEL=21 --cxxopt=-std=c++14 --copt=-D_GLIBCXX_USE_C99 //native_client:libmozilla_voice_stt.so Building ``libdeepspeech.aar`` ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ In the unlikely event you have to rebuild the JNI bindings, source code is available under the ``libdeepspeech`` subdirectory. Building depends on shared -object: please ensure to place ``libdeepspeech.so`` into the +object: please ensure to place ``libmozilla_voice_stt.so`` into the ``libdeepspeech/libs/{arm64-v8a,armeabi-v7a,x86_64}/`` matching subdirectories. Building the bindings is managed by ``gradle`` and should be limited to issuing @@ -278,7 +278,7 @@ Running ``deepspeech`` via adb You should use ``adb push`` to send data to device, please refer to Android documentation on how to use that. -Please push DeepSpeech data to ``/sdcard/deepspeech/``\ , including: +Please push Mozilla Voice STT data to ``/sdcard/deepspeech/``\ , including: * ``output_graph.tflite`` which is the TF Lite model @@ -289,7 +289,7 @@ Please push DeepSpeech data to ``/sdcard/deepspeech/``\ , including: Then, push binaries from ``native_client.tar.xz`` to ``/data/local/tmp/ds``\ : * ``deepspeech`` -* ``libdeepspeech.so`` +* ``libmozilla_voice_stt.so`` * ``libc++_shared.so`` You should then be able to run as usual, using a shell from ``adb shell``\ : diff --git a/doc/C-API.rst b/doc/C-API.rst index e96f3e12a6..bddc7d491c 100644 --- a/doc/C-API.rst +++ b/doc/C-API.rst @@ -10,56 +10,59 @@ C API See also the list of error codes including descriptions for each error in :ref:`error-codes`. -.. doxygenfunction:: DS_CreateModel +.. doxygenfunction:: STT_CreateModel :project: deepspeech-c -.. doxygenfunction:: DS_FreeModel +.. doxygenfunction:: STT_FreeModel :project: deepspeech-c -.. doxygenfunction:: DS_EnableExternalScorer +.. doxygenfunction:: STT_EnableExternalScorer :project: deepspeech-c -.. doxygenfunction:: DS_DisableExternalScorer +.. doxygenfunction:: STT_DisableExternalScorer :project: deepspeech-c -.. doxygenfunction:: DS_SetScorerAlphaBeta +.. doxygenfunction:: STT_SetScorerAlphaBeta :project: deepspeech-c -.. doxygenfunction:: DS_GetModelSampleRate +.. doxygenfunction:: STT_GetModelSampleRate :project: deepspeech-c -.. doxygenfunction:: DS_SpeechToText +.. doxygenfunction:: STT_SpeechToText :project: deepspeech-c -.. doxygenfunction:: DS_SpeechToTextWithMetadata +.. doxygenfunction:: STT_SpeechToTextWithMetadata :project: deepspeech-c -.. doxygenfunction:: DS_CreateStream +.. doxygenfunction:: STT_CreateStream :project: deepspeech-c -.. doxygenfunction:: DS_FeedAudioContent +.. doxygenfunction:: STT_FeedAudioContent :project: deepspeech-c -.. doxygenfunction:: DS_IntermediateDecode +.. doxygenfunction:: STT_IntermediateDecode :project: deepspeech-c -.. doxygenfunction:: DS_IntermediateDecodeWithMetadata +.. doxygenfunction:: STT_IntermediateDecodeWithMetadata :project: deepspeech-c -.. doxygenfunction:: DS_FinishStream +.. doxygenfunction:: STT_FinishStream :project: deepspeech-c -.. doxygenfunction:: DS_FinishStreamWithMetadata +.. doxygenfunction:: STT_FinishStreamWithMetadata :project: deepspeech-c -.. doxygenfunction:: DS_FreeStream +.. doxygenfunction:: STT_FreeStream :project: deepspeech-c -.. doxygenfunction:: DS_FreeMetadata +.. doxygenfunction:: STT_FreeMetadata :project: deepspeech-c -.. doxygenfunction:: DS_FreeString +.. doxygenfunction:: STT_FreeString :project: deepspeech-c -.. doxygenfunction:: DS_Version +.. doxygenfunction:: STT_Version + :project: deepspeech-c + +.. doxygenfunction:: STT_ErrorCodeToErrorMessage :project: deepspeech-c diff --git a/doc/Decoder.rst b/doc/Decoder.rst index c335c3173e..9f2381976c 100644 --- a/doc/Decoder.rst +++ b/doc/Decoder.rst @@ -6,7 +6,7 @@ CTC beam search decoder Introduction ^^^^^^^^^^^^ -DeepSpeech uses the `Connectionist Temporal Classification `_ loss function. For an excellent explanation of CTC and its usage, see this Distill article: `Sequence Modeling with CTC `_. This document assumes the reader is familiar with the concepts described in that article, and describes DeepSpeech specific behaviors that developers building systems with DeepSpeech should know to avoid problems. +Mozilla Voice STT uses the `Connectionist Temporal Classification `_ loss function. For an excellent explanation of CTC and its usage, see this Distill article: `Sequence Modeling with CTC `_. This document assumes the reader is familiar with the concepts described in that article, and describes Mozilla Voice STT specific behaviors that developers building systems with Mozilla Voice STT should know to avoid problems. Note: Documentation for the tooling for creating custom scorer packages is available in :ref:`scorer-scripts`. @@ -16,19 +16,19 @@ The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", "SHOULD", "S External scorer ^^^^^^^^^^^^^^^ -DeepSpeech clients support OPTIONAL use of an external language model to improve the accuracy of the predicted transcripts. In the code, command line parameters, and documentation, this is referred to as a "scorer". The scorer is used to compute the likelihood (also called a score, hence the name "scorer") of sequences of words or characters in the output, to guide the decoder towards more likely results. This improves accuracy significantly. +Mozilla Voice STT clients support OPTIONAL use of an external language model to improve the accuracy of the predicted transcripts. In the code, command line parameters, and documentation, this is referred to as a "scorer". The scorer is used to compute the likelihood (also called a score, hence the name "scorer") of sequences of words or characters in the output, to guide the decoder towards more likely results. This improves accuracy significantly. -The use of an external scorer is fully optional. When an external scorer is not specified, DeepSpeech still uses a beam search decoding algorithm, but without any outside scoring. +The use of an external scorer is fully optional. When an external scorer is not specified, Mozilla Voice STT still uses a beam search decoding algorithm, but without any outside scoring. -Currently, the DeepSpeech external scorer is implemented with `KenLM `_, plus some tooling to package the necessary files and metadata into a single ``.scorer`` package. The tooling lives in ``data/lm/``. The scripts included in ``data/lm/`` can be used and modified to build your own language model based on your particular use case or language. See :ref:`scorer-scripts` for more details on how to reproduce our scorer file as well as create your own. +Currently, the Mozilla Voice STT external scorer is implemented with `KenLM `_, plus some tooling to package the necessary files and metadata into a single ``.scorer`` package. The tooling lives in ``data/lm/``. The scripts included in ``data/lm/`` can be used and modified to build your own language model based on your particular use case or language. See :ref:`scorer-scripts` for more details on how to reproduce our scorer file as well as create your own. -The scripts are geared towards replicating the language model files we release as part of `DeepSpeech model releases `_, but modifying them to use different datasets or language model construction parameters should be simple. +The scripts are geared towards replicating the language model files we release as part of `Mozilla Voice STT model releases `_, but modifying them to use different datasets or language model construction parameters should be simple. Decoding modes ^^^^^^^^^^^^^^ -DeepSpeech currently supports two modes of operation with significant differences at both training and decoding time. Note that Bytes output mode is experimental and has not been tested for languages other than Chinese Mandarin. +Mozilla Voice STT currently supports two modes of operation with significant differences at both training and decoding time. Note that Bytes output mode is experimental and has not been tested for languages other than Chinese Mandarin. Default mode (alphabet based) diff --git a/doc/Error-Codes.rst b/doc/Error-Codes.rst index 361ca025b9..60090c9da9 100644 --- a/doc/Error-Codes.rst +++ b/doc/Error-Codes.rst @@ -5,7 +5,7 @@ Error codes Below is the definition for all error codes used in the API, their numerical values, and a human readable description. -.. literalinclude:: ../native_client/deepspeech.h +.. literalinclude:: ../native_client/mozilla_voice_stt.h :language: c :start-after: sphinx-doc: error_code_listing_start :end-before: sphinx-doc: error_code_listing_end diff --git a/doc/Makefile b/doc/Makefile index 0980ab242c..1b8aa39c69 100644 --- a/doc/Makefile +++ b/doc/Makefile @@ -4,7 +4,7 @@ # You can set these variables from the command line. SPHINXOPTS = SPHINXBUILD = sphinx-build -SPHINXPROJ = DeepSpeech +SPHINXPROJ = Mozilla Voice STT SOURCEDIR = . BUILDDIR = .build diff --git a/doc/Scorer.rst b/doc/Scorer.rst index 1f37460448..841c857761 100644 --- a/doc/Scorer.rst +++ b/doc/Scorer.rst @@ -3,7 +3,7 @@ External scorer scripts ======================= -DeepSpeech pre-trained models include an external scorer. This document explains how to reproduce our external scorer, as well as adapt the scripts to create your own. +Mozilla Voice STT pre-trained models include an external scorer. This document explains how to reproduce our external scorer, as well as adapt the scripts to create your own. The scorer is composed of two sub-components, a KenLM language model and a trie data structure containing all words in the vocabulary. In order to create the scorer package, first we must create a KenLM language model (using ``data/lm/generate_lm.py``, and then use ``generate_scorer_package`` to create the final package file including the trie data structure. @@ -59,6 +59,6 @@ Building your own scorer can be useful if you're using models in a narrow usage The LibriSpeech LM training text used by our scorer is around 4GB uncompressed, which should give an idea of the size of a corpus needed for a reasonable language model for general speech recognition. For more constrained use cases with smaller vocabularies, you don't need as much data, but you should still try to gather as much as you can. -With a text corpus in hand, you can then re-use ``generate_lm.py`` and ``generate_scorer_package`` to create your own scorer that is compatible with DeepSpeech clients and language bindings. Before building the language model, you must first familiarize yourself with the `KenLM toolkit `_. Most of the options exposed by the ``generate_lm.py`` script are simply forwarded to KenLM options of the same name, so you must read the KenLM documentation in order to fully understand their behavior. +With a text corpus in hand, you can then re-use ``generate_lm.py`` and ``generate_scorer_package`` to create your own scorer that is compatible with Mozilla Voice STT clients and language bindings. Before building the language model, you must first familiarize yourself with the `KenLM toolkit `_. Most of the options exposed by the ``generate_lm.py`` script are simply forwarded to KenLM options of the same name, so you must read the KenLM documentation in order to fully understand their behavior. After using ``generate_lm.py`` to create a KenLM language model binary file, you can use ``generate_scorer_package`` to create a scorer package as described in the previous section. Note that we have a :github:`lm_optimizer.py script ` which can be used to find good default values for alpha and beta. To use it, you must first generate a package with any value set for default alpha and beta flags. For this step, it doesn't matter what values you use, as they'll be overridden by ``lm_optimizer.py`` later. Then, use ``lm_optimizer.py`` with this scorer file to find good alpha and beta values. Finally, use ``generate_scorer_package`` again, this time with the new values. diff --git a/doc/TRAINING.rst b/doc/TRAINING.rst index 7de40e6a64..1976e578fa 100644 --- a/doc/TRAINING.rst +++ b/doc/TRAINING.rst @@ -12,7 +12,7 @@ Prerequisites for training a model Getting the training code ^^^^^^^^^^^^^^^^^^^^^^^^^ -Clone the DeepSpeech repository: +Clone the Mozilla Voice STT repository: .. code-block:: bash @@ -32,14 +32,14 @@ Once this command completes successfully, the environment will be ready to be ac Activating the environment ^^^^^^^^^^^^^^^^^^^^^^^^^^ -Each time you need to work with DeepSpeech, you have to *activate* this virtual environment. This is done with this simple command: +Each time you need to work with Mozilla Voice STT, you have to *activate* this virtual environment. This is done with this simple command: .. code-block:: $ source $HOME/tmp/deepspeech-train-venv/bin/activate -Installing DeepSpeech Training Code and its dependencies -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +Installing Mozilla Voice STT Training Code and its dependencies +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Install the required dependencies using ``pip3``\ : @@ -88,7 +88,7 @@ This should ensure that you'll re-use the upstream Python 3 TensorFlow GPU-enabl make Dockerfile.train -If you want to specify a different DeepSpeech repository / branch, you can pass ``DEEPSPEECH_REPO`` or ``DEEPSPEECH_SHA`` parameters: +If you want to specify a different Mozilla Voice STT repository / branch, you can pass ``DEEPSPEECH_REPO`` or ``DEEPSPEECH_SHA`` parameters: .. code-block:: bash @@ -105,7 +105,7 @@ After extraction of such a data set, you'll find the following contents: * the ``*.tsv`` files output by CorporaCreator for the downloaded language * the mp3 audio files they reference in a ``clips`` sub-directory. -For bringing this data into a form that DeepSpeech understands, you have to run the CommonVoice v2.0 importer (\ ``bin/import_cv2.py``\ ): +For bringing this data into a form that Mozilla Voice STT understands, you have to run the CommonVoice v2.0 importer (\ ``bin/import_cv2.py``\ ): .. code-block:: bash @@ -147,7 +147,7 @@ For executing pre-configured training scenarios, there is a collection of conven **If you experience GPU OOM errors while training, try reducing the batch size with the ``--train_batch_size``\ , ``--dev_batch_size`` and ``--test_batch_size`` parameters.** -As a simple first example you can open a terminal, change to the directory of the DeepSpeech checkout, activate the virtualenv created above, and run: +As a simple first example you can open a terminal, change to the directory of the Mozilla Voice STT checkout, activate the virtualenv created above, and run: .. code-block:: bash @@ -157,7 +157,7 @@ This script will train on a small sample dataset composed of just a single audio Feel also free to pass additional (or overriding) ``DeepSpeech.py`` parameters to these scripts. Then, just run the script to train the modified network. -Each dataset has a corresponding importer script in ``bin/`` that can be used to download (if it's freely available) and preprocess the dataset. See ``bin/import_librivox.py`` for an example of how to import and preprocess a large dataset for training with DeepSpeech. +Each dataset has a corresponding importer script in ``bin/`` that can be used to download (if it's freely available) and preprocess the dataset. See ``bin/import_librivox.py`` for an example of how to import and preprocess a large dataset for training with Mozilla Voice STT. Some importers might require additional code to properly handled your locale-specific requirements. Such handling is dealt with ``--validate_label_locale`` flag that allows you to source out-of-tree Python script that defines a ``validate_label`` function. Please refer to ``util/importers.py`` for implementation example of that function. If you don't provide this argument, the default ``validate_label`` function will be used. This one is only intended for English language, so you might have consistency issues in your data for other languages. @@ -184,7 +184,7 @@ Mixed precision training makes use of both FP32 and FP16 precisions where approp python3 DeepSpeech.py --train_files ./train.csv --dev_files ./dev.csv --test_files ./test.csv --automatic_mixed_precision ``` -On a Volta generation V100 GPU, automatic mixed precision speeds up DeepSpeech training and evaluation by ~30%-40%. +On a Volta generation V100 GPU, automatic mixed precision speeds up Mozilla Voice STT training and evaluation by ~30%-40%. Checkpointing ^^^^^^^^^^^^^ @@ -226,9 +226,9 @@ Upon sucessfull run, it should report about conversion of a non-zero number of n Continuing training from a release model ---------------------------------------- -There are currently two supported approaches to make use of a pre-trained DeepSpeech model: fine-tuning or transfer-learning. Choosing which one to use is a simple decision, and it depends on your target dataset. Does your data use the same alphabet as the release model? If "Yes": fine-tune. If "No" use transfer-learning. +There are currently two supported approaches to make use of a pre-trained Mozilla Voice STT model: fine-tuning or transfer-learning. Choosing which one to use is a simple decision, and it depends on your target dataset. Does your data use the same alphabet as the release model? If "Yes": fine-tune. If "No" use transfer-learning. -If your own data uses the *extact* same alphabet as the English release model (i.e. `a-z` plus `'`) then the release model's output layer will match your data, and you can just fine-tune the existing parameters. However, if you want to use a new alphabet (e.g. Cyrillic `а`, `б`, `д`), the output layer of a release DeepSpeech model will *not* match your data. In this case, you should use transfer-learning (i.e. remove the trained model's output layer, and reinitialize a new output layer that matches your target character set. +If your own data uses the *extact* same alphabet as the English release model (i.e. `a-z` plus `'`) then the release model's output layer will match your data, and you can just fine-tune the existing parameters. However, if you want to use a new alphabet (e.g. Cyrillic `а`, `б`, `д`), the output layer of a release Mozilla Voice STT model will *not* match your data. In this case, you should use transfer-learning (i.e. remove the trained model's output layer, and reinitialize a new output layer that matches your target character set. N.B. - If you have access to a pre-trained model which uses UTF-8 bytes at the output layer you can always fine-tune, because any alphabet should be encodable as UTF-8. @@ -260,11 +260,11 @@ If you try to load a release model without following these steps, you'll get an Transfer-Learning (new alphabet) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -If you want to continue training an alphabet-based DeepSpeech model (i.e. not a UTF-8 model) on a new language, or if you just want to add new characters to your custom alphabet, you will probably want to use transfer-learning instead of fine-tuning. If you're starting with a pre-trained UTF-8 model -- even if your data comes from a different language or uses a different alphabet -- the model will be able to predict your new transcripts, and you should use fine-tuning instead. +If you want to continue training an alphabet-based Mozilla Voice STT model (i.e. not a UTF-8 model) on a new language, or if you just want to add new characters to your custom alphabet, you will probably want to use transfer-learning instead of fine-tuning. If you're starting with a pre-trained UTF-8 model -- even if your data comes from a different language or uses a different alphabet -- the model will be able to predict your new transcripts, and you should use fine-tuning instead. -In a nutshell, DeepSpeech's transfer-learning allows you to remove certain layers from a pre-trained model, initialize new layers for your target data, stitch together the old and new layers, and update all layers via gradient descent. You will remove the pre-trained output layer (and optionally more layers) and reinitialize parameters to fit your target alphabet. The simplest case of transfer-learning is when you remove just the output layer. +In a nutshell, Mozilla Voice STT's transfer-learning allows you to remove certain layers from a pre-trained model, initialize new layers for your target data, stitch together the old and new layers, and update all layers via gradient descent. You will remove the pre-trained output layer (and optionally more layers) and reinitialize parameters to fit your target alphabet. The simplest case of transfer-learning is when you remove just the output layer. -In DeepSpeech's implementation of transfer-learning, all removed layers will be contiguous, starting from the output layer. The key flag you will want to experiment with is ``--drop_source_layers``. This flag accepts an integer from ``1`` to ``5`` and allows you to specify how many layers you want to remove from the pre-trained model. For example, if you supplied ``--drop_source_layers 3``, you will drop the last three layers of the pre-trained model: the output layer, penultimate layer, and LSTM layer. All dropped layers will be reinintialized, and (crucially) the output layer will be defined to match your supplied target alphabet. +In Mozilla Voice STT's implementation of transfer-learning, all removed layers will be contiguous, starting from the output layer. The key flag you will want to experiment with is ``--drop_source_layers``. This flag accepts an integer from ``1`` to ``5`` and allows you to specify how many layers you want to remove from the pre-trained model. For example, if you supplied ``--drop_source_layers 3``, you will drop the last three layers of the pre-trained model: the output layer, penultimate layer, and LSTM layer. All dropped layers will be reinintialized, and (crucially) the output layer will be defined to match your supplied target alphabet. You need to specify the location of the pre-trained model with ``--load_checkpoint_dir`` and define where your new model checkpoints will be saved with ``--save_checkpoint_dir``. You need to specify how many layers to remove (aka "drop") from the pre-trained model: ``--drop_source_layers``. You also need to supply your new alphabet file using the standard ``--alphabet_config_path`` (remember, using a new alphabet is the whole reason you want to use transfer-learning). @@ -282,8 +282,7 @@ You need to specify the location of the pre-trained model with ``--load_checkpoi UTF-8 mode ^^^^^^^^^^ -DeepSpeech includes a UTF-8 operating mode which can be useful to model languages with very large alphabets, such as Chinese Mandarin. For details on how it works and how to use it, see :ref:`decoder-docs`. - +Mozilla Voice STT includes a UTF-8 operating mode which can be useful to model languages with very large alphabets, such as Chinese Mandarin. For details on how it works and how to use it, see :ref:`decoder-docs`. .. _training-data-augmentation: diff --git a/doc/USING.rst b/doc/USING.rst index 12519980a9..c81c8fd1fd 100644 --- a/doc/USING.rst +++ b/doc/USING.rst @@ -3,7 +3,7 @@ Using a Pre-trained Model ========================= -Inference using a DeepSpeech pre-trained model can be done with a client/language binding package. We have four clients/language bindings in this repository, listed below, and also a few community-maintained clients/language bindings in other repositories, listed `further down in this README <#third-party-bindings>`_. +Inference using a Mozilla Voice STT pre-trained model can be done with a client/language binding package. We have four clients/language bindings in this repository, listed below, and also a few community-maintained clients/language bindings in other repositories, listed `further down in this README <#third-party-bindings>`_. * :ref:`The C API `. * :ref:`The Python package/language binding ` @@ -33,7 +33,7 @@ The GPU capable builds (Python, NodeJS, C++, etc) depend on CUDA 10.1 and CuDNN Getting the pre-trained model ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -If you want to use the pre-trained English model for performing speech-to-text, you can download it (along with other important inference material) from the DeepSpeech `releases page `_. Alternatively, you can run the following command to download the model files in your current directory: +If you want to use the pre-trained English model for performing speech-to-text, you can download it (along with other important inference material) from the Mozilla Voice STT `releases page `_. Alternatively, you can run the following command to download the model files in your current directory: .. code-block:: bash @@ -61,12 +61,12 @@ The release notes include detailed information on how the released models were t The process for training an acoustic model is described in :ref:`training-docs`. In particular, fine tuning a release model using your own data can be a good way to leverage relatively smaller amounts of data that would not be sufficient for training a new model from scratch. See the :ref:`fine tuning and transfer learning sections ` for more information. :ref:`Data augmentation ` can also be a good way to increase the value of smaller training sets. -Creating your own external scorer from text data is another way that you can adapt the model to your specific needs. The process and tools used to generate an external scorer package are described in :ref:`scorer-scripts` and an overview of how the external scorer is used by DeepSpeech to perform inference is available in :ref:`decoder-docs`. Generating a smaller scorer from a single purpose text dataset is a quick process and can bring significant accuracy improvements, specially for more constrained, limited vocabulary applications. +Creating your own external scorer from text data is another way that you can adapt the model to your specific needs. The process and tools used to generate an external scorer package are described in :ref:`scorer-scripts` and an overview of how the external scorer is used by Mozilla Voice STT to perform inference is available in :ref:`decoder-docs`. Generating a smaller scorer from a single purpose text dataset is a quick process and can bring significant accuracy improvements, specially for more constrained, limited vocabulary applications. Model compatibility ^^^^^^^^^^^^^^^^^^^ -DeepSpeech models are versioned to keep you from trying to use an incompatible graph with a newer client after a breaking change was made to the code. If you get an error saying your model file version is too old for the client, you should either upgrade to a newer model release, re-export your model from the checkpoint using a newer version of the code, or downgrade your client if you need to use the old model and can't re-export it. +Mozilla Voice STT models are versioned to keep you from trying to use an incompatible graph with a newer client after a breaking change was made to the code. If you get an error saying your model file version is too old for the client, you should either upgrade to a newer model release, re-export your model from the checkpoint using a newer version of the code, or downgrade your client if you need to use the old model and can't re-export it. .. _py-usage: @@ -79,8 +79,8 @@ For the Python bindings, it is highly recommended that you perform the installat We will continue under the assumption that you already have your system properly setup to create new virtual environments. -Create a DeepSpeech virtual environment -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Create a Mozilla Voice STT virtual environment +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ In creating a virtual environment you will create a directory containing a ``python3`` binary and everything needed to run deepspeech. You can use whatever directory you want. For the purpose of the documentation, we will rely on ``$HOME/tmp/deepspeech-venv``. You can create it using this command: @@ -93,16 +93,16 @@ Once this command completes successfully, the environment will be ready to be ac Activating the environment ~~~~~~~~~~~~~~~~~~~~~~~~~~ -Each time you need to work with DeepSpeech, you have to *activate* this virtual environment. This is done with this simple command: +Each time you need to work with Mozilla Voice STT, you have to *activate* this virtual environment. This is done with this simple command: .. code-block:: $ source $HOME/tmp/deepspeech-venv/bin/activate -Installing DeepSpeech Python bindings -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Installing Mozilla Voice STT Python bindings +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Once your environment has been set-up and loaded, you can use ``pip3`` to manage packages locally. On a fresh setup of the ``virtualenv``\ , you will have to install the DeepSpeech wheel. You can check if ``deepspeech`` is already installed with ``pip3 list``. +Once your environment has been set-up and loaded, you can use ``pip3`` to manage packages locally. On a fresh setup of the ``virtualenv``\ , you will have to install the Mozilla Voice STT wheel. You can check if ``deepspeech`` is already installed with ``pip3 list``. To perform the installation, just use ``pip3`` as such: @@ -192,7 +192,7 @@ also, if you need some binaries different than current master, like ``v0.2.0-alp python3 util/taskcluster.py --branch "v0.2.0-alpha.6" --target "." -The script ``taskcluster.py`` will download ``native_client.tar.xz`` (which includes the ``deepspeech`` binary and associated libraries) and extract it into the current folder. Also, ``taskcluster.py`` will download binaries for Linux/x86_64 by default, but you can override that behavior with the ``--arch`` parameter. See the help info with ``python util/taskcluster.py -h`` for more details. Specific branches of DeepSpeech or TensorFlow can be specified as well. +The script ``taskcluster.py`` will download ``native_client.tar.xz`` (which includes the ``deepspeech`` binary and associated libraries) and extract it into the current folder. Also, ``taskcluster.py`` will download binaries for Linux/x86_64 by default, but you can override that behavior with the ``--arch`` parameter. See the help info with ``python util/taskcluster.py -h`` for more details. Specific branches of Mozilla Voice STT or TensorFlow can be specified as well. Alternatively you may manually download the ``native_client.tar.xz`` from the [releases](https://github.com/mozilla/DeepSpeech/releases). @@ -212,14 +212,14 @@ If pre-built binaries aren't available for your system, you'll need to install t Dockerfile for building from source ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -We provide ``Dockerfile.build`` to automatically build ``libdeepspeech.so``, the C++ native client, Python bindings, and KenLM. +We provide ``Dockerfile.build`` to automatically build ``libmozilla_voice_stt.so``, the C++ native client, Python bindings, and KenLM. You need to generate the Dockerfile from the template using: .. code-block:: bash make Dockerfile.build -If you want to specify a different DeepSpeech repository / branch, you can pass ``DEEPSPEECH_REPO`` or ``DEEPSPEECH_SHA`` parameters: +If you want to specify a different Mozilla Voice STT repository / branch, you can pass ``DEEPSPEECH_REPO`` or ``DEEPSPEECH_SHA`` parameters: .. code-block:: bash diff --git a/doc/conf.py b/doc/conf.py index bb64d77e28..4a6c769ade 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# DeepSpeech documentation build configuration file, created by +# Mozilla Voice STT documentation build configuration file, created by # sphinx-quickstart on Thu Feb 2 21:20:39 2017. # # This file is execfile()d with the current directory set to its @@ -41,7 +41,7 @@ # -- Project information ----------------------------------------------------- -project = u'DeepSpeech' +project = u'Mozilla Voice STT' copyright = '2019-2020, Mozilla Corporation' author = 'Mozilla Corporation' @@ -170,7 +170,7 @@ # (source start file, target name, title, # author, documentclass [howto, manual, or own class]). latex_documents = [ - (master_doc, 'DeepSpeech.tex', u'DeepSpeech Documentation', + (master_doc, 'Mozilla_Voice_STT.tex', u'Mozilla Voice STT Documentation', u'Mozilla Research', 'manual'), ] @@ -180,7 +180,7 @@ # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). man_pages = [ - (master_doc, 'deepspeech', u'DeepSpeech Documentation', + (master_doc, 'deepspeech', u'Mozilla Voice STT Documentation', [author], 1) ] @@ -191,8 +191,8 @@ # (source start file, target name, title, author, # dir menu entry, description, category) texinfo_documents = [ - (master_doc, 'DeepSpeech', u'DeepSpeech Documentation', - author, 'DeepSpeech', 'One line description of project.', + (master_doc, 'Mozilla Voice STT', u'Mozilla Voice STT Documentation', + author, 'Mozilla Voice STT', 'One line description of project.', 'Miscellaneous'), ] diff --git a/doc/doxygen-c.conf b/doc/doxygen-c.conf index f36f57b205..daecb5f4cd 100644 --- a/doc/doxygen-c.conf +++ b/doc/doxygen-c.conf @@ -790,7 +790,7 @@ WARN_LOGFILE = # spaces. See also FILE_PATTERNS and EXTENSION_MAPPING # Note: If this tag is empty the current directory is searched. -INPUT = native_client/deepspeech.h +INPUT = native_client/mozilla_voice_stt.h # This tag can be used to specify the character encoding of the source files # that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses diff --git a/doc/index.rst b/doc/index.rst index e8991d3f58..8becba91af 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -1,14 +1,14 @@ -.. DeepSpeech documentation master file, created by +.. Mozilla Voice STT documentation master file, created by sphinx-quickstart on Thu Feb 2 21:20:39 2017. You can adapt this file completely to your liking, but it should at least contain the root `toctree` directive. -Welcome to DeepSpeech's documentation! +Welcome to Mozilla Voice STT's documentation! ====================================== -DeepSpeech is an open source Speech-To-Text engine, using a model trained by machine learning techniques based on `Baidu's Deep Speech research paper `_. Project DeepSpeech uses Google's `TensorFlow `_ to make the implementation easier. +Mozilla Voice STT is an open source Speech-To-Text engine, using a model trained by machine learning techniques based on `Baidu's Deep Speech research paper `_. Project Mozilla Voice STT uses Google's `TensorFlow `_ to make the implementation easier. -To install and use DeepSpeech all you have to do is: +To install and use Mozilla Voice STT all you have to do is: .. code-block:: bash @@ -16,7 +16,7 @@ To install and use DeepSpeech all you have to do is: virtualenv -p python3 $HOME/tmp/deepspeech-venv/ source $HOME/tmp/deepspeech-venv/bin/activate - # Install DeepSpeech + # Install Mozilla Voice STT pip3 install deepspeech # Download pre-trained English model files @@ -40,7 +40,7 @@ Quicker inference can be performed using a supported NVIDIA GPU on Linux. See th virtualenv -p python3 $HOME/tmp/deepspeech-gpu-venv/ source $HOME/tmp/deepspeech-gpu-venv/bin/activate - # Install DeepSpeech CUDA enabled package + # Install Mozilla Voice STT CUDA enabled package pip3 install deepspeech-gpu # Transcribe an audio file. @@ -76,7 +76,7 @@ See the output of ``deepspeech -h`` for more information on the use of ``deepspe :maxdepth: 2 :caption: Architecture and training - DeepSpeech + AcousticModel Geometry diff --git a/evaluate_tflite.py b/evaluate_tflite.py index 0d46261551..70e9bd97eb 100644 --- a/evaluate_tflite.py +++ b/evaluate_tflite.py @@ -10,7 +10,7 @@ import os import sys -from deepspeech import Model +from mozilla_voice_stt import Model from deepspeech_training.util.evaluate_tools import calculate_and_print_report from deepspeech_training.util.flags import create_flags from functools import partial @@ -19,8 +19,8 @@ r''' This module should be self-contained: - - build libdeepspeech.so with TFLite: - - bazel build [...] --define=runtime=tflite [...] //native_client:libdeepspeech.so + - build libmozilla_voice_stt.so with TFLite: + - bazel build [...] --define=runtime=tflite [...] //native_client:libmozilla_voice_stt.so - make -C native_client/python/ TFDIR=... bindings - setup a virtualenv - pip install native_client/python/dist/deepspeech*.whl diff --git a/native_client/Android.mk b/native_client/Android.mk index d21551fd1c..9c40d58542 100644 --- a/native_client/Android.mk +++ b/native_client/Android.mk @@ -1,14 +1,14 @@ LOCAL_PATH := $(call my-dir) include $(CLEAR_VARS) -LOCAL_MODULE := deepspeech-prebuilt -LOCAL_SRC_FILES := $(TFDIR)/bazel-bin/native_client/libdeepspeech.so +LOCAL_MODULE := mozilla_voice_stt-prebuilt +LOCAL_SRC_FILES := $(TFDIR)/bazel-bin/native_client/libmozilla_voice_stt.so include $(PREBUILT_SHARED_LIBRARY) include $(CLEAR_VARS) LOCAL_CPP_EXTENSION := .cc .cxx .cpp -LOCAL_MODULE := deepspeech +LOCAL_MODULE := mozilla_voice_stt LOCAL_SRC_FILES := client.cc -LOCAL_SHARED_LIBRARIES := deepspeech-prebuilt +LOCAL_SHARED_LIBRARIES := mozilla_voice_stt-prebuilt LOCAL_LDFLAGS := -Wl,--no-as-needed include $(BUILD_EXECUTABLE) diff --git a/native_client/BUILD b/native_client/BUILD index 92eb788cae..0b8ffed341 100644 --- a/native_client/BUILD +++ b/native_client/BUILD @@ -96,10 +96,10 @@ cc_library( ) tf_cc_shared_object( - name = "libdeepspeech.so", + name = "libmozilla_voice_stt.so", srcs = [ "deepspeech.cc", - "deepspeech.h", + "mozilla_voice_stt.h", "deepspeech_errors.cc", "modelstate.cc", "modelstate.h", @@ -149,7 +149,7 @@ tf_cc_shared_object( #"//tensorflow/core:all_kernels", ### => Trying to be more fine-grained ### Use bin/ops_in_graph.py to list all the ops used by a frozen graph. - ### CPU only build, libdeepspeech.so file size reduced by ~50% + ### CPU only build, libmozilla_voice_stt.so file size reduced by ~50% "//tensorflow/core/kernels:spectrogram_op", # AudioSpectrogram "//tensorflow/core/kernels:bias_op", # BiasAdd "//tensorflow/core/kernels:cast_op", # Cast @@ -189,11 +189,11 @@ tf_cc_shared_object( ) genrule( - name = "libdeepspeech_so_dsym", - srcs = [":libdeepspeech.so"], - outs = ["libdeepspeech.so.dSYM"], + name = "libmozilla_voice_stt_so_dsym", + srcs = [":libmozilla_voice_stt.so"], + outs = ["libmozilla_voice_stt.so.dSYM"], output_to_bindir = True, - cmd = "dsymutil $(location :libdeepspeech.so) -o $@" + cmd = "dsymutil $(location :libmozilla_voice_stt.so) -o $@" ) cc_binary( diff --git a/native_client/Makefile b/native_client/Makefile index b645499c28..597adc1265 100644 --- a/native_client/Makefile +++ b/native_client/Makefile @@ -16,32 +16,32 @@ include definitions.mk default: $(DEEPSPEECH_BIN) clean: - rm -f deepspeech + rm -f $(DEEPSPEECH_BIN) $(DEEPSPEECH_BIN): client.cc Makefile $(CXX) $(CFLAGS) $(CFLAGS_DEEPSPEECH) $(SOX_CFLAGS) client.cc $(LDFLAGS) $(SOX_LDFLAGS) ifeq ($(OS),Darwin) - install_name_tool -change bazel-out/local-opt/bin/native_client/libdeepspeech.so @rpath/libdeepspeech.so deepspeech + install_name_tool -change bazel-out/local-opt/bin/native_client/libmozilla_voice_stt.so @rpath/libmozilla_voice_stt.so $(DEEPSPEECH_BIN) endif run: $(DEEPSPEECH_BIN) - ${META_LD_LIBRARY_PATH}=${TFDIR}/bazel-bin/native_client:${${META_LD_LIBRARY_PATH}} ./deepspeech ${ARGS} + ${META_LD_LIBRARY_PATH}=${TFDIR}/bazel-bin/native_client:${${META_LD_LIBRARY_PATH}} ./$(DEEPSPEECH_BIN) ${ARGS} debug: $(DEEPSPEECH_BIN) - ${META_LD_LIBRARY_PATH}=${TFDIR}/bazel-bin/native_client:${${META_LD_LIBRARY_PATH}} gdb --args ./deepspeech ${ARGS} + ${META_LD_LIBRARY_PATH}=${TFDIR}/bazel-bin/native_client:${${META_LD_LIBRARY_PATH}} gdb --args ./$(DEEPSPEECH_BIN) ${ARGS} install: $(DEEPSPEECH_BIN) install -d ${PREFIX}/lib - install -m 0644 ${TFDIR}/bazel-bin/native_client/libdeepspeech.so ${PREFIX}/lib/ + install -m 0644 ${TFDIR}/bazel-bin/native_client/libmozilla_voice_stt.so ${PREFIX}/lib/ install -d ${PREFIX}/include - install -m 0644 deepspeech.h ${PREFIX}/include + install -m 0644 mozilla_voice_stt.h ${PREFIX}/include install -d ${PREFIX}/bin - install -m 0755 deepspeech ${PREFIX}/bin/ + install -m 0755 $(DEEPSPEECH_BIN) ${PREFIX}/bin/ uninstall: - rm -f ${PREFIX}/bin/deepspeech + rm -f ${PREFIX}/bin/$(DEEPSPEECH_BIN) rmdir --ignore-fail-on-non-empty ${PREFIX}/bin - rm -f ${PREFIX}/lib/libdeepspeech.so + rm -f ${PREFIX}/lib/libmozilla_voice_stt.so rmdir --ignore-fail-on-non-empty ${PREFIX}/lib print-toolchain: diff --git a/native_client/args.h b/native_client/args.h index baa9b7ffa3..0f26743c3c 100644 --- a/native_client/args.h +++ b/native_client/args.h @@ -8,7 +8,7 @@ #endif #include -#include "deepspeech.h" +#include "mozilla_voice_stt.h" char* model = NULL; @@ -43,7 +43,7 @@ void PrintHelp(const char* bin) std::cout << "Usage: " << bin << " --model MODEL [--scorer SCORER] --audio AUDIO [-t] [-e]\n" "\n" - "Running DeepSpeech inference.\n" + "Running Mozilla Voice STT inference.\n" "\n" "\t--model MODEL\t\t\tPath to the model (protocol buffer binary file)\n" "\t--scorer SCORER\t\t\tPath to the external scorer file\n" @@ -58,9 +58,9 @@ void PrintHelp(const char* bin) "\t--stream size\t\t\tRun in stream mode, output intermediate results\n" "\t--help\t\t\t\tShow help\n" "\t--version\t\t\tPrint version and exits\n"; - char* version = DS_Version(); - std::cerr << "DeepSpeech " << version << "\n"; - DS_FreeString(version); + char* version = STT_Version(); + std::cerr << "Mozilla Voice STT " << version << "\n"; + STT_FreeString(version); exit(1); } @@ -153,9 +153,9 @@ bool ProcessArgs(int argc, char** argv) } if (has_versions) { - char* version = DS_Version(); - std::cout << "DeepSpeech " << version << "\n"; - DS_FreeString(version); + char* version = STT_Version(); + std::cout << "Mozilla Voice STT " << version << "\n"; + STT_FreeString(version); return false; } diff --git a/native_client/client.cc b/native_client/client.cc index 46a16115c5..4fa167d2d2 100644 --- a/native_client/client.cc +++ b/native_client/client.cc @@ -34,7 +34,7 @@ #endif // NO_DIR #include -#include "deepspeech.h" +#include "mozilla_voice_stt.h" #include "args.h" typedef struct { @@ -168,17 +168,17 @@ LocalDsSTT(ModelState* aCtx, const short* aBuffer, size_t aBufferSize, // sphinx-doc: c_ref_inference_start if (extended_output) { - Metadata *result = DS_SpeechToTextWithMetadata(aCtx, aBuffer, aBufferSize, 1); + Metadata *result = STT_SpeechToTextWithMetadata(aCtx, aBuffer, aBufferSize, 1); res.string = CandidateTranscriptToString(&result->transcripts[0]); - DS_FreeMetadata(result); + STT_FreeMetadata(result); } else if (json_output) { - Metadata *result = DS_SpeechToTextWithMetadata(aCtx, aBuffer, aBufferSize, json_candidate_transcripts); + Metadata *result = STT_SpeechToTextWithMetadata(aCtx, aBuffer, aBufferSize, json_candidate_transcripts); res.string = MetadataToJSON(result); - DS_FreeMetadata(result); + STT_FreeMetadata(result); } else if (stream_size > 0) { StreamingState* ctx; - int status = DS_CreateStream(aCtx, &ctx); - if (status != DS_ERR_OK) { + int status = STT_CreateStream(aCtx, &ctx); + if (status != STT_ERR_OK) { res.string = strdup(""); return res; } @@ -186,22 +186,22 @@ LocalDsSTT(ModelState* aCtx, const short* aBuffer, size_t aBufferSize, const char *last = nullptr; while (off < aBufferSize) { size_t cur = aBufferSize - off > stream_size ? stream_size : aBufferSize - off; - DS_FeedAudioContent(ctx, aBuffer + off, cur); + STT_FeedAudioContent(ctx, aBuffer + off, cur); off += cur; - const char* partial = DS_IntermediateDecode(ctx); + const char* partial = STT_IntermediateDecode(ctx); if (last == nullptr || strcmp(last, partial)) { printf("%s\n", partial); last = partial; } else { - DS_FreeString((char *) partial); + STT_FreeString((char *) partial); } } if (last != nullptr) { - DS_FreeString((char *) last); + STT_FreeString((char *) last); } - res.string = DS_FinishStream(ctx); + res.string = STT_FinishStream(ctx); } else { - res.string = DS_SpeechToText(aCtx, aBuffer, aBufferSize); + res.string = STT_SpeechToText(aCtx, aBuffer, aBufferSize); } // sphinx-doc: c_ref_inference_stop @@ -367,7 +367,7 @@ GetAudioBuffer(const char* path, int desired_sample_rate) void ProcessFile(ModelState* context, const char* path, bool show_times) { - ds_audio_buffer audio = GetAudioBuffer(path, DS_GetModelSampleRate(context)); + ds_audio_buffer audio = GetAudioBuffer(path, STT_GetModelSampleRate(context)); // Pass audio to DeepSpeech // We take half of buffer_size because buffer is a char* while @@ -381,7 +381,7 @@ ProcessFile(ModelState* context, const char* path, bool show_times) if (result.string) { printf("%s\n", result.string); - DS_FreeString((char*)result.string); + STT_FreeString((char*)result.string); } if (show_times) { @@ -400,16 +400,16 @@ main(int argc, char **argv) // Initialise DeepSpeech ModelState* ctx; // sphinx-doc: c_ref_model_start - int status = DS_CreateModel(model, &ctx); + int status = STT_CreateModel(model, &ctx); if (status != 0) { - char* error = DS_ErrorCodeToErrorMessage(status); + char* error = STT_ErrorCodeToErrorMessage(status); fprintf(stderr, "Could not create model: %s\n", error); free(error); return 1; } if (set_beamwidth) { - status = DS_SetModelBeamWidth(ctx, beam_width); + status = STT_SetModelBeamWidth(ctx, beam_width); if (status != 0) { fprintf(stderr, "Could not set model beam width.\n"); return 1; @@ -417,13 +417,13 @@ main(int argc, char **argv) } if (scorer) { - status = DS_EnableExternalScorer(ctx, scorer); + status = STT_EnableExternalScorer(ctx, scorer); if (status != 0) { fprintf(stderr, "Could not enable external scorer.\n"); return 1; } if (set_alphabeta) { - status = DS_SetScorerAlphaBeta(ctx, lm_alpha, lm_beta); + status = STT_SetScorerAlphaBeta(ctx, lm_alpha, lm_beta); if (status != 0) { fprintf(stderr, "Error setting scorer alpha and beta.\n"); return 1; @@ -485,7 +485,7 @@ main(int argc, char **argv) sox_quit(); #endif // NO_SOX - DS_FreeModel(ctx); + STT_FreeModel(ctx); return 0; } diff --git a/native_client/ctcdecode/__init__.py b/native_client/ctcdecode/__init__.py index 2dc2be560d..c01d671238 100644 --- a/native_client/ctcdecode/__init__.py +++ b/native_client/ctcdecode/__init__.py @@ -10,7 +10,7 @@ # Hack: import error codes by matching on their names, as SWIG unfortunately # does not support binding enums to Python in a scoped manner yet. for symbol in dir(swigwrapper): - if symbol.startswith('DS_ERR_'): + if symbol.startswith('STT_ERR_'): globals()[symbol] = getattr(swigwrapper, symbol) class Scorer(swigwrapper.Scorer): diff --git a/native_client/ctcdecode/scorer.cpp b/native_client/ctcdecode/scorer.cpp index 23982ef33a..ad41dd8e2e 100644 --- a/native_client/ctcdecode/scorer.cpp +++ b/native_client/ctcdecode/scorer.cpp @@ -74,13 +74,13 @@ int Scorer::load_lm(const std::string& lm_path) // Check if file is readable to avoid KenLM throwing an exception const char* filename = lm_path.c_str(); if (access(filename, R_OK) != 0) { - return DS_ERR_SCORER_UNREADABLE; + return STT_ERR_SCORER_UNREADABLE; } // Check if the file format is valid to avoid KenLM throwing an exception lm::ngram::ModelType model_type; if (!lm::ngram::RecognizeBinary(filename, model_type)) { - return DS_ERR_SCORER_INVALID_LM; + return STT_ERR_SCORER_INVALID_LM; } // Load the LM @@ -97,7 +97,7 @@ int Scorer::load_lm(const std::string& lm_path) uint64_t trie_offset = language_model_->GetEndOfSearchOffset(); if (package_size <= trie_offset) { // File ends without a trie structure - return DS_ERR_SCORER_NO_TRIE; + return STT_ERR_SCORER_NO_TRIE; } // Read metadata and trie from file @@ -113,7 +113,7 @@ int Scorer::load_trie(std::ifstream& fin, const std::string& file_path) if (magic != MAGIC) { std::cerr << "Error: Can't parse scorer file, invalid header. Try updating " "your scorer file." << std::endl; - return DS_ERR_SCORER_INVALID_TRIE; + return STT_ERR_SCORER_INVALID_TRIE; } int version; @@ -125,10 +125,10 @@ int Scorer::load_trie(std::ifstream& fin, const std::string& file_path) if (version < FILE_VERSION) { std::cerr << "Update your scorer file."; } else { - std::cerr << "Downgrade your scorer file or update your version of DeepSpeech."; + std::cerr << "Downgrade your scorer file or update your version of Mozilla Voice STT."; } std::cerr << std::endl; - return DS_ERR_SCORER_VERSION_MISMATCH; + return STT_ERR_SCORER_VERSION_MISMATCH; } fin.read(reinterpret_cast(&is_utf8_mode_), sizeof(is_utf8_mode_)); @@ -143,7 +143,7 @@ int Scorer::load_trie(std::ifstream& fin, const std::string& file_path) opt.mode = fst::FstReadOptions::MAP; opt.source = file_path; dictionary.reset(FstType::Read(fin, opt)); - return DS_ERR_OK; + return STT_ERR_OK; } bool Scorer::save_dictionary(const std::string& path, bool append_instead_of_overwrite) diff --git a/native_client/ctcdecode/scorer.h b/native_client/ctcdecode/scorer.h index 5aee1046ff..ee361d7a60 100644 --- a/native_client/ctcdecode/scorer.h +++ b/native_client/ctcdecode/scorer.h @@ -13,7 +13,7 @@ #include "path_trie.h" #include "alphabet.h" -#include "deepspeech.h" +#include "mozilla_voice_stt.h" const double OOV_SCORE = -1000.0; const std::string START_TOKEN = ""; diff --git a/native_client/ctcdecode/swigwrapper.i b/native_client/ctcdecode/swigwrapper.i index dbe67c689c..9daf7d89d8 100644 --- a/native_client/ctcdecode/swigwrapper.i +++ b/native_client/ctcdecode/swigwrapper.i @@ -42,14 +42,14 @@ namespace std { %constant const char* __version__ = ds_version(); %constant const char* __git_version__ = ds_git_version(); -// Import only the error code enum definitions from deepspeech.h +// Import only the error code enum definitions from mozilla_voice_stt.h // We can't just do |%ignore "";| here because it affects this file globally (even // files %include'd above). That causes SWIG to lose destructor information and // leads to leaks of the wrapper objects. // Instead we ignore functions and classes (structs), which are the only other -// things in deepspeech.h. If we add some new construct to deepspeech.h we need +// things in mozilla_voice_stt.h. If we add some new construct to mozilla_voice_stt.h we need // to update the ignore rules here to avoid exposing unwanted APIs in the decoder // package. %rename("$ignore", %$isfunction) ""; %rename("$ignore", %$isclass) ""; -%include "../deepspeech.h" +%include "../mozilla_voice_stt.h" diff --git a/native_client/deepspeech.cc b/native_client/deepspeech.cc index 38868d4b5f..01a9292b64 100644 --- a/native_client/deepspeech.cc +++ b/native_client/deepspeech.cc @@ -9,7 +9,7 @@ #include #include -#include "deepspeech.h" +#include "mozilla_voice_stt.h" #include "alphabet.h" #include "modelstate.h" @@ -25,7 +25,7 @@ #ifdef __ANDROID__ #include -#define LOG_TAG "libdeepspeech" +#define LOG_TAG "libmozilla_voice_stt" #define LOGD(...) __android_log_print(ANDROID_LOG_DEBUG, LOG_TAG, __VA_ARGS__) #define LOGE(...) __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, __VA_ARGS__) #else @@ -263,23 +263,23 @@ StreamingState::processBatch(const vector& buf, unsigned int n_steps) } int -DS_CreateModel(const char* aModelPath, +STT_CreateModel(const char* aModelPath, ModelState** retval) { *retval = nullptr; std::cerr << "TensorFlow: " << tf_local_git_version() << std::endl; - std::cerr << "DeepSpeech: " << ds_git_version() << std::endl; + std::cerr << "Mozilla Voice STT: " << ds_git_version() << std::endl; #ifdef __ANDROID__ LOGE("TensorFlow: %s", tf_local_git_version()); LOGD("TensorFlow: %s", tf_local_git_version()); - LOGE("DeepSpeech: %s", ds_git_version()); - LOGD("DeepSpeech: %s", ds_git_version()); + LOGE("Mozilla Voice STT: %s", ds_git_version()); + LOGD("Mozilla Voice STT: %s", ds_git_version()); #endif if (!aModelPath || strlen(aModelPath) < 1) { std::cerr << "No model specified, cannot continue." << std::endl; - return DS_ERR_NO_MODEL; + return STT_ERR_NO_MODEL; } std::unique_ptr model( @@ -292,79 +292,79 @@ DS_CreateModel(const char* aModelPath, if (!model) { std::cerr << "Could not allocate model state." << std::endl; - return DS_ERR_FAIL_CREATE_MODEL; + return STT_ERR_FAIL_CREATE_MODEL; } int err = model->init(aModelPath); - if (err != DS_ERR_OK) { + if (err != STT_ERR_OK) { return err; } *retval = model.release(); - return DS_ERR_OK; + return STT_ERR_OK; } unsigned int -DS_GetModelBeamWidth(const ModelState* aCtx) +STT_GetModelBeamWidth(const ModelState* aCtx) { return aCtx->beam_width_; } int -DS_SetModelBeamWidth(ModelState* aCtx, unsigned int aBeamWidth) +STT_SetModelBeamWidth(ModelState* aCtx, unsigned int aBeamWidth) { aCtx->beam_width_ = aBeamWidth; return 0; } int -DS_GetModelSampleRate(const ModelState* aCtx) +STT_GetModelSampleRate(const ModelState* aCtx) { return aCtx->sample_rate_; } void -DS_FreeModel(ModelState* ctx) +STT_FreeModel(ModelState* ctx) { delete ctx; } int -DS_EnableExternalScorer(ModelState* aCtx, +STT_EnableExternalScorer(ModelState* aCtx, const char* aScorerPath) { std::unique_ptr scorer(new Scorer()); int err = scorer->init(aScorerPath, aCtx->alphabet_); if (err != 0) { - return DS_ERR_INVALID_SCORER; + return STT_ERR_INVALID_SCORER; } aCtx->scorer_ = std::move(scorer); - return DS_ERR_OK; + return STT_ERR_OK; } int -DS_DisableExternalScorer(ModelState* aCtx) +STT_DisableExternalScorer(ModelState* aCtx) { if (aCtx->scorer_) { aCtx->scorer_.reset(); - return DS_ERR_OK; + return STT_ERR_OK; } - return DS_ERR_SCORER_NOT_ENABLED; + return STT_ERR_SCORER_NOT_ENABLED; } -int DS_SetScorerAlphaBeta(ModelState* aCtx, +int STT_SetScorerAlphaBeta(ModelState* aCtx, float aAlpha, float aBeta) { if (aCtx->scorer_) { aCtx->scorer_->reset_params(aAlpha, aBeta); - return DS_ERR_OK; + return STT_ERR_OK; } - return DS_ERR_SCORER_NOT_ENABLED; + return STT_ERR_SCORER_NOT_ENABLED; } int -DS_CreateStream(ModelState* aCtx, +STT_CreateStream(ModelState* aCtx, StreamingState** retval) { *retval = nullptr; @@ -372,7 +372,7 @@ DS_CreateStream(ModelState* aCtx, std::unique_ptr ctx(new StreamingState()); if (!ctx) { std::cerr << "Could not allocate streaming state." << std::endl; - return DS_ERR_FAIL_CREATE_STREAM; + return STT_ERR_FAIL_CREATE_STREAM; } ctx->audio_buffer_.reserve(aCtx->audio_win_len_); @@ -393,11 +393,11 @@ DS_CreateStream(ModelState* aCtx, aCtx->scorer_); *retval = ctx.release(); - return DS_ERR_OK; + return STT_ERR_OK; } void -DS_FeedAudioContent(StreamingState* aSctx, +STT_FeedAudioContent(StreamingState* aSctx, const short* aBuffer, unsigned int aBufferSize) { @@ -405,32 +405,32 @@ DS_FeedAudioContent(StreamingState* aSctx, } char* -DS_IntermediateDecode(const StreamingState* aSctx) +STT_IntermediateDecode(const StreamingState* aSctx) { return aSctx->intermediateDecode(); } Metadata* -DS_IntermediateDecodeWithMetadata(const StreamingState* aSctx, +STT_IntermediateDecodeWithMetadata(const StreamingState* aSctx, unsigned int aNumResults) { return aSctx->intermediateDecodeWithMetadata(aNumResults); } char* -DS_FinishStream(StreamingState* aSctx) +STT_FinishStream(StreamingState* aSctx) { char* str = aSctx->finishStream(); - DS_FreeStream(aSctx); + STT_FreeStream(aSctx); return str; } Metadata* -DS_FinishStreamWithMetadata(StreamingState* aSctx, +STT_FinishStreamWithMetadata(StreamingState* aSctx, unsigned int aNumResults) { Metadata* result = aSctx->finishStreamWithMetadata(aNumResults); - DS_FreeStream(aSctx); + STT_FreeStream(aSctx); return result; } @@ -440,41 +440,41 @@ CreateStreamAndFeedAudioContent(ModelState* aCtx, unsigned int aBufferSize) { StreamingState* ctx; - int status = DS_CreateStream(aCtx, &ctx); - if (status != DS_ERR_OK) { + int status = STT_CreateStream(aCtx, &ctx); + if (status != STT_ERR_OK) { return nullptr; } - DS_FeedAudioContent(ctx, aBuffer, aBufferSize); + STT_FeedAudioContent(ctx, aBuffer, aBufferSize); return ctx; } char* -DS_SpeechToText(ModelState* aCtx, +STT_SpeechToText(ModelState* aCtx, const short* aBuffer, unsigned int aBufferSize) { StreamingState* ctx = CreateStreamAndFeedAudioContent(aCtx, aBuffer, aBufferSize); - return DS_FinishStream(ctx); + return STT_FinishStream(ctx); } Metadata* -DS_SpeechToTextWithMetadata(ModelState* aCtx, +STT_SpeechToTextWithMetadata(ModelState* aCtx, const short* aBuffer, unsigned int aBufferSize, unsigned int aNumResults) { StreamingState* ctx = CreateStreamAndFeedAudioContent(aCtx, aBuffer, aBufferSize); - return DS_FinishStreamWithMetadata(ctx, aNumResults); + return STT_FinishStreamWithMetadata(ctx, aNumResults); } void -DS_FreeStream(StreamingState* aSctx) +STT_FreeStream(StreamingState* aSctx) { delete aSctx; } void -DS_FreeMetadata(Metadata* m) +STT_FreeMetadata(Metadata* m) { if (m) { for (int i = 0; i < m->num_transcripts; ++i) { @@ -491,13 +491,13 @@ DS_FreeMetadata(Metadata* m) } void -DS_FreeString(char* str) +STT_FreeString(char* str) { free(str); } char* -DS_Version() +STT_Version() { return strdup(ds_version()); } diff --git a/native_client/deepspeech_errors.cc b/native_client/deepspeech_errors.cc index 1f1e4d8d15..69b580f62f 100644 --- a/native_client/deepspeech_errors.cc +++ b/native_client/deepspeech_errors.cc @@ -1,8 +1,8 @@ -#include "deepspeech.h" +#include "mozilla_voice_stt.h" #include char* -DS_ErrorCodeToErrorMessage(int aErrorCode) +STT_ErrorCodeToErrorMessage(int aErrorCode) { #define RETURN_MESSAGE(NAME, VALUE, DESC) \ case NAME: \ @@ -10,7 +10,7 @@ DS_ErrorCodeToErrorMessage(int aErrorCode) switch(aErrorCode) { - DS_FOR_EACH_ERROR(RETURN_MESSAGE) + STT_FOR_EACH_ERROR(RETURN_MESSAGE) default: return strdup("Unknown error, please make sure you are using the correct native binary."); } diff --git a/native_client/definitions.mk b/native_client/definitions.mk index 0c8ab656ba..bad584f83a 100644 --- a/native_client/definitions.mk +++ b/native_client/definitions.mk @@ -18,9 +18,9 @@ ifeq ($(findstring _NT,$(OS)),_NT) PLATFORM_EXE_SUFFIX := .exe endif -DEEPSPEECH_BIN := deepspeech$(PLATFORM_EXE_SUFFIX) +DEEPSPEECH_BIN := mozilla_voice_stt$(PLATFORM_EXE_SUFFIX) CFLAGS_DEEPSPEECH := -std=c++11 -o $(DEEPSPEECH_BIN) -LINK_DEEPSPEECH := -ldeepspeech +LINK_DEEPSPEECH := -lmozilla_voice_stt LINK_PATH_DEEPSPEECH := -L${TFDIR}/bazel-bin/native_client ifeq ($(TARGET),host) @@ -53,7 +53,7 @@ TOOL_CC := cl.exe TOOL_CXX := cl.exe TOOL_LD := link.exe TOOL_LIBEXE := lib.exe -LINK_DEEPSPEECH := $(TFDIR)\bazel-bin\native_client\libdeepspeech.so.if.lib +LINK_DEEPSPEECH := $(TFDIR)\bazel-bin\native_client\libmozilla_voice_stt.so.if.lib LINK_PATH_DEEPSPEECH := CFLAGS_DEEPSPEECH := -nologo -Fe$(DEEPSPEECH_BIN) SOX_CFLAGS := @@ -174,7 +174,7 @@ define copy_missing_libs new_missing="$$( (for f in $$(otool -L $$lib 2>/dev/null | tail -n +2 | awk '{ print $$1 }' | grep -v '$$lib'); do ls -hal $$f; done;) 2>&1 | grep 'No such' | cut -d':' -f2 | xargs basename -a)"; \ missing_libs="$$missing_libs $$new_missing"; \ elif [ "$(OS)" = "${TC_MSYS_VERSION}" ]; then \ - missing_libs="libdeepspeech.so"; \ + missing_libs="libmozilla_voice_stt.so"; \ else \ missing_libs="$$missing_libs $$($(LDD) $$lib | grep 'not found' | awk '{ print $$1 }')"; \ fi; \ diff --git a/native_client/dotnet/DeepSpeechClient/DeepSpeech.cs b/native_client/dotnet/DeepSpeechClient/DeepSpeech.cs index 08a3808b39..fda061d760 100644 --- a/native_client/dotnet/DeepSpeechClient/DeepSpeech.cs +++ b/native_client/dotnet/DeepSpeechClient/DeepSpeech.cs @@ -48,7 +48,7 @@ private unsafe void CreateModel(string aModelPath) { throw new FileNotFoundException(exceptionMessage); } - var resultCode = NativeImp.DS_CreateModel(aModelPath, + var resultCode = NativeImp.STT_CreateModel(aModelPath, ref _modelStatePP); EvaluateResultCode(resultCode); } @@ -60,7 +60,7 @@ private unsafe void CreateModel(string aModelPath) /// Beam width value used by the model. public unsafe uint GetModelBeamWidth() { - return NativeImp.DS_GetModelBeamWidth(_modelStatePP); + return NativeImp.STT_GetModelBeamWidth(_modelStatePP); } /// @@ -70,7 +70,7 @@ public unsafe uint GetModelBeamWidth() /// Thrown on failure. public unsafe void SetModelBeamWidth(uint aBeamWidth) { - var resultCode = NativeImp.DS_SetModelBeamWidth(_modelStatePP, aBeamWidth); + var resultCode = NativeImp.STT_SetModelBeamWidth(_modelStatePP, aBeamWidth); EvaluateResultCode(resultCode); } @@ -80,7 +80,7 @@ public unsafe void SetModelBeamWidth(uint aBeamWidth) /// Sample rate. public unsafe int GetModelSampleRate() { - return NativeImp.DS_GetModelSampleRate(_modelStatePP); + return NativeImp.STT_GetModelSampleRate(_modelStatePP); } /// @@ -89,9 +89,9 @@ public unsafe int GetModelSampleRate() /// Native result code. private void EvaluateResultCode(ErrorCodes resultCode) { - if (resultCode != ErrorCodes.DS_ERR_OK) + if (resultCode != ErrorCodes.STT_ERR_OK) { - throw new ArgumentException(NativeImp.DS_ErrorCodeToErrorMessage((int)resultCode).PtrToString()); + throw new ArgumentException(NativeImp.STT_ErrorCodeToErrorMessage((int)resultCode).PtrToString()); } } @@ -100,7 +100,7 @@ private void EvaluateResultCode(ErrorCodes resultCode) /// public unsafe void Dispose() { - NativeImp.DS_FreeModel(_modelStatePP); + NativeImp.STT_FreeModel(_modelStatePP); } /// @@ -120,7 +120,7 @@ public unsafe void EnableExternalScorer(string aScorerPath) throw new FileNotFoundException($"Cannot find the scorer file: {aScorerPath}"); } - var resultCode = NativeImp.DS_EnableExternalScorer(_modelStatePP, aScorerPath); + var resultCode = NativeImp.STT_EnableExternalScorer(_modelStatePP, aScorerPath); EvaluateResultCode(resultCode); } @@ -130,7 +130,7 @@ public unsafe void EnableExternalScorer(string aScorerPath) /// Thrown when an external scorer is not enabled. public unsafe void DisableExternalScorer() { - var resultCode = NativeImp.DS_DisableExternalScorer(_modelStatePP); + var resultCode = NativeImp.STT_DisableExternalScorer(_modelStatePP); EvaluateResultCode(resultCode); } @@ -142,7 +142,7 @@ public unsafe void DisableExternalScorer() /// Thrown when an external scorer is not enabled. public unsafe void SetScorerAlphaBeta(float aAlpha, float aBeta) { - var resultCode = NativeImp.DS_SetScorerAlphaBeta(_modelStatePP, + var resultCode = NativeImp.STT_SetScorerAlphaBeta(_modelStatePP, aAlpha, aBeta); EvaluateResultCode(resultCode); @@ -155,7 +155,7 @@ public unsafe void SetScorerAlphaBeta(float aAlpha, float aBeta) /// An array of 16-bit, mono raw audio samples at the appropriate sample rate (matching what the model was trained on). public unsafe void FeedAudioContent(DeepSpeechStream stream, short[] aBuffer, uint aBufferSize) { - NativeImp.DS_FeedAudioContent(stream.GetNativePointer(), aBuffer, aBufferSize); + NativeImp.STT_FeedAudioContent(stream.GetNativePointer(), aBuffer, aBufferSize); } /// @@ -165,7 +165,7 @@ public unsafe void FeedAudioContent(DeepSpeechStream stream, short[] aBuffer, ui /// The STT result. public unsafe string FinishStream(DeepSpeechStream stream) { - return NativeImp.DS_FinishStream(stream.GetNativePointer()).PtrToString(); + return NativeImp.STT_FinishStream(stream.GetNativePointer()).PtrToString(); } /// @@ -176,7 +176,7 @@ public unsafe string FinishStream(DeepSpeechStream stream) /// The extended metadata result. public unsafe Metadata FinishStreamWithMetadata(DeepSpeechStream stream, uint aNumResults) { - return NativeImp.DS_FinishStreamWithMetadata(stream.GetNativePointer(), aNumResults).PtrToMetadata(); + return NativeImp.STT_FinishStreamWithMetadata(stream.GetNativePointer(), aNumResults).PtrToMetadata(); } /// @@ -186,7 +186,7 @@ public unsafe Metadata FinishStreamWithMetadata(DeepSpeechStream stream, uint aN /// The STT intermediate result. public unsafe string IntermediateDecode(DeepSpeechStream stream) { - return NativeImp.DS_IntermediateDecode(stream.GetNativePointer()).PtrToString(); + return NativeImp.STT_IntermediateDecode(stream.GetNativePointer()).PtrToString(); } /// @@ -197,7 +197,7 @@ public unsafe string IntermediateDecode(DeepSpeechStream stream) /// The STT intermediate result. public unsafe Metadata IntermediateDecodeWithMetadata(DeepSpeechStream stream, uint aNumResults) { - return NativeImp.DS_IntermediateDecodeWithMetadata(stream.GetNativePointer(), aNumResults).PtrToMetadata(); + return NativeImp.STT_IntermediateDecodeWithMetadata(stream.GetNativePointer(), aNumResults).PtrToMetadata(); } /// @@ -206,7 +206,7 @@ public unsafe Metadata IntermediateDecodeWithMetadata(DeepSpeechStream stream, u /// public unsafe string Version() { - return NativeImp.DS_Version().PtrToString(); + return NativeImp.STT_Version().PtrToString(); } /// @@ -215,7 +215,7 @@ public unsafe string Version() public unsafe DeepSpeechStream CreateStream() { IntPtr** streamingStatePointer = null; - var resultCode = NativeImp.DS_CreateStream(_modelStatePP, ref streamingStatePointer); + var resultCode = NativeImp.STT_CreateStream(_modelStatePP, ref streamingStatePointer); EvaluateResultCode(resultCode); return new DeepSpeechStream(streamingStatePointer); } @@ -227,7 +227,7 @@ public unsafe DeepSpeechStream CreateStream() /// public unsafe void FreeStream(DeepSpeechStream stream) { - NativeImp.DS_FreeStream(stream.GetNativePointer()); + NativeImp.STT_FreeStream(stream.GetNativePointer()); stream.Dispose(); } @@ -239,7 +239,7 @@ public unsafe void FreeStream(DeepSpeechStream stream) /// The STT result. Returns NULL on error. public unsafe string SpeechToText(short[] aBuffer, uint aBufferSize) { - return NativeImp.DS_SpeechToText(_modelStatePP, aBuffer, aBufferSize).PtrToString(); + return NativeImp.STT_SpeechToText(_modelStatePP, aBuffer, aBufferSize).PtrToString(); } /// @@ -251,7 +251,7 @@ public unsafe string SpeechToText(short[] aBuffer, uint aBufferSize) /// The extended metadata. Returns NULL on error. public unsafe Metadata SpeechToTextWithMetadata(short[] aBuffer, uint aBufferSize, uint aNumResults) { - return NativeImp.DS_SpeechToTextWithMetadata(_modelStatePP, aBuffer, aBufferSize, aNumResults).PtrToMetadata(); + return NativeImp.STT_SpeechToTextWithMetadata(_modelStatePP, aBuffer, aBufferSize, aNumResults).PtrToMetadata(); } #endregion diff --git a/native_client/dotnet/DeepSpeechClient/Enums/ErrorCodes.cs b/native_client/dotnet/DeepSpeechClient/Enums/ErrorCodes.cs index 30660add2a..600c91d30f 100644 --- a/native_client/dotnet/DeepSpeechClient/Enums/ErrorCodes.cs +++ b/native_client/dotnet/DeepSpeechClient/Enums/ErrorCodes.cs @@ -5,26 +5,25 @@ /// internal enum ErrorCodes { - // OK - DS_ERR_OK = 0x0000, - - // Missing invormations - DS_ERR_NO_MODEL = 0x1000, - - // Invalid parameters - DS_ERR_INVALID_ALPHABET = 0x2000, - DS_ERR_INVALID_SHAPE = 0x2001, - DS_ERR_INVALID_SCORER = 0x2002, - DS_ERR_MODEL_INCOMPATIBLE = 0x2003, - DS_ERR_SCORER_NOT_ENABLED = 0x2004, - - // Runtime failures - DS_ERR_FAIL_INIT_MMAP = 0x3000, - DS_ERR_FAIL_INIT_SESS = 0x3001, - DS_ERR_FAIL_INTERPRETER = 0x3002, - DS_ERR_FAIL_RUN_SESS = 0x3003, - DS_ERR_FAIL_CREATE_STREAM = 0x3004, - DS_ERR_FAIL_READ_PROTOBUF = 0x3005, - DS_ERR_FAIL_CREATE_SESS = 0x3006, + STT_ERR_OK = 0x0000, + STT_ERR_NO_MODEL = 0x1000, + STT_ERR_INVALID_ALPHABET = 0x2000, + STT_ERR_INVALID_SHAPE = 0x2001, + STT_ERR_INVALID_SCORER = 0x2002, + STT_ERR_MODEL_INCOMPATIBLE = 0x2003, + STT_ERR_SCORER_NOT_ENABLED = 0x2004, + STT_ERR_SCORER_UNREADABLE = 0x2005, + STT_ERR_SCORER_INVALID_LM = 0x2006, + STT_ERR_SCORER_NO_TRIE = 0x2007, + STT_ERR_SCORER_INVALID_TRIE = 0x2008, + STT_ERR_SCORER_VERSION_MISMATCH = 0x2009, + STT_ERR_FAIL_INIT_MMAP = 0x3000, + STT_ERR_FAIL_INIT_SESS = 0x3001, + STT_ERR_FAIL_INTERPRETER = 0x3002, + STT_ERR_FAIL_RUN_SESS = 0x3003, + STT_ERR_FAIL_CREATE_STREAM = 0x3004, + STT_ERR_FAIL_READ_PROTOBUF = 0x3005, + STT_ERR_FAIL_CREATE_SESS = 0x3006, + STT_ERR_FAIL_CREATE_MODEL = 0x3007, } } diff --git a/native_client/dotnet/DeepSpeechClient/Extensions/NativeExtensions.cs b/native_client/dotnet/DeepSpeechClient/Extensions/NativeExtensions.cs index 9325f4b82a..3e18f7cb9c 100644 --- a/native_client/dotnet/DeepSpeechClient/Extensions/NativeExtensions.cs +++ b/native_client/dotnet/DeepSpeechClient/Extensions/NativeExtensions.cs @@ -20,7 +20,7 @@ internal static string PtrToString(this IntPtr intPtr, bool releasePtr = true) byte[] buffer = new byte[len]; Marshal.Copy(intPtr, buffer, 0, buffer.Length); if (releasePtr) - NativeImp.DS_FreeString(intPtr); + NativeImp.STT_FreeString(intPtr); string result = Encoding.UTF8.GetString(buffer); return result; } @@ -86,7 +86,7 @@ internal static Models.Metadata PtrToMetadata(this IntPtr intPtr) metadata.transcripts += sizeOfCandidateTranscript; } - NativeImp.DS_FreeMetadata(intPtr); + NativeImp.STT_FreeMetadata(intPtr); return managedMetadata; } } diff --git a/native_client/dotnet/DeepSpeechClient/NativeImp.cs b/native_client/dotnet/DeepSpeechClient/NativeImp.cs index bc77cf1b18..566952960a 100644 --- a/native_client/dotnet/DeepSpeechClient/NativeImp.cs +++ b/native_client/dotnet/DeepSpeechClient/NativeImp.cs @@ -6,96 +6,96 @@ namespace DeepSpeechClient { /// - /// Wrapper for the native implementation of "libdeepspeech.so" + /// Wrapper for the native implementation of "libmozilla_voice_stt.so" /// internal static class NativeImp { #region Native Implementation - [DllImport("libdeepspeech.so", CallingConvention = CallingConvention.Cdecl, + [DllImport("libmozilla_voice_stt.so", CallingConvention = CallingConvention.Cdecl, CharSet = CharSet.Ansi, SetLastError = true)] - internal static extern IntPtr DS_Version(); + internal static extern IntPtr STT_Version(); - [DllImport("libdeepspeech.so", CallingConvention = CallingConvention.Cdecl)] - internal unsafe static extern ErrorCodes DS_CreateModel(string aModelPath, + [DllImport("libmozilla_voice_stt.so", CallingConvention = CallingConvention.Cdecl)] + internal unsafe static extern ErrorCodes STT_CreateModel(string aModelPath, ref IntPtr** pint); - [DllImport("libdeepspeech.so", CallingConvention = CallingConvention.Cdecl)] - internal unsafe static extern IntPtr DS_ErrorCodeToErrorMessage(int aErrorCode); + [DllImport("libmozilla_voice_stt.so", CallingConvention = CallingConvention.Cdecl)] + internal unsafe static extern IntPtr STT_ErrorCodeToErrorMessage(int aErrorCode); - [DllImport("libdeepspeech.so", CallingConvention = CallingConvention.Cdecl)] - internal unsafe static extern uint DS_GetModelBeamWidth(IntPtr** aCtx); + [DllImport("libmozilla_voice_stt.so", CallingConvention = CallingConvention.Cdecl)] + internal unsafe static extern uint STT_GetModelBeamWidth(IntPtr** aCtx); - [DllImport("libdeepspeech.so", CallingConvention = CallingConvention.Cdecl)] - internal unsafe static extern ErrorCodes DS_SetModelBeamWidth(IntPtr** aCtx, + [DllImport("libmozilla_voice_stt.so", CallingConvention = CallingConvention.Cdecl)] + internal unsafe static extern ErrorCodes STT_SetModelBeamWidth(IntPtr** aCtx, uint aBeamWidth); - [DllImport("libdeepspeech.so", CallingConvention = CallingConvention.Cdecl)] - internal unsafe static extern ErrorCodes DS_CreateModel(string aModelPath, + [DllImport("libmozilla_voice_stt.so", CallingConvention = CallingConvention.Cdecl)] + internal unsafe static extern ErrorCodes STT_CreateModel(string aModelPath, uint aBeamWidth, ref IntPtr** pint); - [DllImport("libdeepspeech.so", CallingConvention = CallingConvention.Cdecl)] - internal unsafe static extern int DS_GetModelSampleRate(IntPtr** aCtx); + [DllImport("libmozilla_voice_stt.so", CallingConvention = CallingConvention.Cdecl)] + internal unsafe static extern int STT_GetModelSampleRate(IntPtr** aCtx); - [DllImport("libdeepspeech.so", CallingConvention = CallingConvention.Cdecl)] - internal static unsafe extern ErrorCodes DS_EnableExternalScorer(IntPtr** aCtx, + [DllImport("libmozilla_voice_stt.so", CallingConvention = CallingConvention.Cdecl)] + internal static unsafe extern ErrorCodes STT_EnableExternalScorer(IntPtr** aCtx, string aScorerPath); - [DllImport("libdeepspeech.so", CallingConvention = CallingConvention.Cdecl)] - internal static unsafe extern ErrorCodes DS_DisableExternalScorer(IntPtr** aCtx); + [DllImport("libmozilla_voice_stt.so", CallingConvention = CallingConvention.Cdecl)] + internal static unsafe extern ErrorCodes STT_DisableExternalScorer(IntPtr** aCtx); - [DllImport("libdeepspeech.so", CallingConvention = CallingConvention.Cdecl)] - internal static unsafe extern ErrorCodes DS_SetScorerAlphaBeta(IntPtr** aCtx, + [DllImport("libmozilla_voice_stt.so", CallingConvention = CallingConvention.Cdecl)] + internal static unsafe extern ErrorCodes STT_SetScorerAlphaBeta(IntPtr** aCtx, float aAlpha, float aBeta); - [DllImport("libdeepspeech.so", CallingConvention = CallingConvention.Cdecl, + [DllImport("libmozilla_voice_stt.so", CallingConvention = CallingConvention.Cdecl, CharSet = CharSet.Ansi, SetLastError = true)] - internal static unsafe extern IntPtr DS_SpeechToText(IntPtr** aCtx, + internal static unsafe extern IntPtr STT_SpeechToText(IntPtr** aCtx, short[] aBuffer, uint aBufferSize); - [DllImport("libdeepspeech.so", CallingConvention = CallingConvention.Cdecl, SetLastError = true)] - internal static unsafe extern IntPtr DS_SpeechToTextWithMetadata(IntPtr** aCtx, + [DllImport("libmozilla_voice_stt.so", CallingConvention = CallingConvention.Cdecl, SetLastError = true)] + internal static unsafe extern IntPtr STT_SpeechToTextWithMetadata(IntPtr** aCtx, short[] aBuffer, uint aBufferSize, uint aNumResults); - [DllImport("libdeepspeech.so", CallingConvention = CallingConvention.Cdecl)] - internal static unsafe extern void DS_FreeModel(IntPtr** aCtx); + [DllImport("libmozilla_voice_stt.so", CallingConvention = CallingConvention.Cdecl)] + internal static unsafe extern void STT_FreeModel(IntPtr** aCtx); - [DllImport("libdeepspeech.so", CallingConvention = CallingConvention.Cdecl)] - internal static unsafe extern ErrorCodes DS_CreateStream(IntPtr** aCtx, + [DllImport("libmozilla_voice_stt.so", CallingConvention = CallingConvention.Cdecl)] + internal static unsafe extern ErrorCodes STT_CreateStream(IntPtr** aCtx, ref IntPtr** retval); - [DllImport("libdeepspeech.so", CallingConvention = CallingConvention.Cdecl)] - internal static unsafe extern void DS_FreeStream(IntPtr** aSctx); + [DllImport("libmozilla_voice_stt.so", CallingConvention = CallingConvention.Cdecl)] + internal static unsafe extern void STT_FreeStream(IntPtr** aSctx); - [DllImport("libdeepspeech.so", CallingConvention = CallingConvention.Cdecl)] - internal static unsafe extern void DS_FreeMetadata(IntPtr metadata); + [DllImport("libmozilla_voice_stt.so", CallingConvention = CallingConvention.Cdecl)] + internal static unsafe extern void STT_FreeMetadata(IntPtr metadata); - [DllImport("libdeepspeech.so", CallingConvention = CallingConvention.Cdecl)] - internal static unsafe extern void DS_FreeString(IntPtr str); + [DllImport("libmozilla_voice_stt.so", CallingConvention = CallingConvention.Cdecl)] + internal static unsafe extern void STT_FreeString(IntPtr str); - [DllImport("libdeepspeech.so", CallingConvention = CallingConvention.Cdecl, + [DllImport("libmozilla_voice_stt.so", CallingConvention = CallingConvention.Cdecl, CharSet = CharSet.Ansi, SetLastError = true)] - internal static unsafe extern void DS_FeedAudioContent(IntPtr** aSctx, + internal static unsafe extern void STT_FeedAudioContent(IntPtr** aSctx, short[] aBuffer, uint aBufferSize); - [DllImport("libdeepspeech.so", CallingConvention = CallingConvention.Cdecl)] - internal static unsafe extern IntPtr DS_IntermediateDecode(IntPtr** aSctx); + [DllImport("libmozilla_voice_stt.so", CallingConvention = CallingConvention.Cdecl)] + internal static unsafe extern IntPtr STT_IntermediateDecode(IntPtr** aSctx); - [DllImport("libdeepspeech.so", CallingConvention = CallingConvention.Cdecl)] - internal static unsafe extern IntPtr DS_IntermediateDecodeWithMetadata(IntPtr** aSctx, + [DllImport("libmozilla_voice_stt.so", CallingConvention = CallingConvention.Cdecl)] + internal static unsafe extern IntPtr STT_IntermediateDecodeWithMetadata(IntPtr** aSctx, uint aNumResults); - [DllImport("libdeepspeech.so", CallingConvention = CallingConvention.Cdecl, + [DllImport("libmozilla_voice_stt.so", CallingConvention = CallingConvention.Cdecl, CharSet = CharSet.Ansi, SetLastError = true)] - internal static unsafe extern IntPtr DS_FinishStream(IntPtr** aSctx); + internal static unsafe extern IntPtr STT_FinishStream(IntPtr** aSctx); - [DllImport("libdeepspeech.so", CallingConvention = CallingConvention.Cdecl)] - internal static unsafe extern IntPtr DS_FinishStreamWithMetadata(IntPtr** aSctx, + [DllImport("libmozilla_voice_stt.so", CallingConvention = CallingConvention.Cdecl)] + internal static unsafe extern IntPtr STT_FinishStreamWithMetadata(IntPtr** aSctx, uint aNumResults); #endregion } diff --git a/native_client/dotnet/README.rst b/native_client/dotnet/README.rst index b102557368..f998bfa3c9 100644 --- a/native_client/dotnet/README.rst +++ b/native_client/dotnet/README.rst @@ -126,7 +126,7 @@ We will add AVX/AVX2 support in the command, please make sure that your CPU supp .. code-block:: bash - bazel build --workspace_status_command="bash native_client/bazel_workspace_status_cmd.sh" -c opt --copt=/arch:AVX --copt=/arch:AVX2 //native_client:libdeepspeech.so + bazel build --workspace_status_command="bash native_client/bazel_workspace_status_cmd.sh" -c opt --copt=/arch:AVX --copt=/arch:AVX2 //native_client:libmozilla_voice_stt.so GPU with CUDA ~~~~~~~~~~~~~ @@ -135,11 +135,11 @@ If you enabled CUDA in `configure.py `_ in your DeepSpeech directory and open the Visual Studio solution, then we need to build in debug or release mode, finally we just need to copy ``libdeepspeech.so`` to the generated ``x64/Debug`` or ``x64/Release`` directory. +As for now we can only use the generated ``libmozilla_voice_stt.so`` with the C# clients, go to `native_client/dotnet/ `_ in your DeepSpeech directory and open the Visual Studio solution, then we need to build in debug or release mode, finally we just need to copy ``libmozilla_voice_stt.so`` to the generated ``x64/Debug`` or ``x64/Release`` directory. diff --git a/native_client/dotnet/nupkg/deepspeech.nuspec.in b/native_client/dotnet/nupkg/deepspeech.nuspec.in index a4797177ce..fd1a169f14 100644 --- a/native_client/dotnet/nupkg/deepspeech.nuspec.in +++ b/native_client/dotnet/nupkg/deepspeech.nuspec.in @@ -3,7 +3,7 @@ $NUPKG_ID $NUPKG_VERSION - DeepSpeech + Mozilla_Voice_STT Mozilla Mozilla MPL-2.0 diff --git a/native_client/generate_scorer_package.cpp b/native_client/generate_scorer_package.cpp index 4486b42cb9..c33c4891cd 100644 --- a/native_client/generate_scorer_package.cpp +++ b/native_client/generate_scorer_package.cpp @@ -11,7 +11,7 @@ using namespace std; #include "ctcdecode/decoder_utils.h" #include "ctcdecode/scorer.h" #include "alphabet.h" -#include "deepspeech.h" +#include "mozilla_voice_stt.h" namespace po = boost::program_options; @@ -66,9 +66,9 @@ create_package(absl::optional alphabet_path, scorer.set_utf8_mode(force_utf8.value()); scorer.reset_params(default_alpha, default_beta); int err = scorer.load_lm(lm_path); - if (err != DS_ERR_SCORER_NO_TRIE) { + if (err != STT_ERR_SCORER_NO_TRIE) { cerr << "Error loading language model file: " - << DS_ErrorCodeToErrorMessage(err) << "\n"; + << STT_ErrorCodeToErrorMessage(err) << "\n"; return 1; } scorer.fill_dictionary(words); diff --git a/native_client/java/Makefile b/native_client/java/Makefile index 191b1013e1..1af3b83c64 100644 --- a/native_client/java/Makefile +++ b/native_client/java/Makefile @@ -14,13 +14,13 @@ apk-clean: $(GRADLE) clean libs-clean: - rm -fr libdeepspeech/libs/*/libdeepspeech.so + rm -fr libdeepspeech/libs/*/libmozilla_voice_stt.so -libdeepspeech/libs/%/libdeepspeech.so: +libdeepspeech/libs/%/libmozilla_voice_stt.so: -mkdir libdeepspeech/libs/$*/ - cp ${TFDIR}/bazel-out/$*-*/bin/native_client/libdeepspeech.so libdeepspeech/libs/$*/ + cp ${TFDIR}/bazel-out/$*-*/bin/native_client/libmozilla_voice_stt.so libdeepspeech/libs/$*/ -apk: apk-clean bindings $(patsubst %,libdeepspeech/libs/%/libdeepspeech.so,$(ARCHS)) +apk: apk-clean bindings $(patsubst %,libdeepspeech/libs/%/libmozilla_voice_stt.so,$(ARCHS)) $(GRADLE) build maven-bundle: apk diff --git a/native_client/java/jni/deepspeech.i b/native_client/java/jni/deepspeech.i index cd5a97a516..fea1ac8d0b 100644 --- a/native_client/java/jni/deepspeech.i +++ b/native_client/java/jni/deepspeech.i @@ -2,7 +2,7 @@ %{ #define SWIG_FILE_WITH_INIT -#include "../../deepspeech.h" +#include "../../mozilla_voice_stt.h" %} %include "typemaps.i" @@ -10,7 +10,7 @@ %javaconst(1); %include "arrays_java.i" -// apply to DS_FeedAudioContent and DS_SpeechToText +// apply to STT_FeedAudioContent and STT_SpeechToText %apply short[] { short* }; %include "cpointer.i" @@ -43,7 +43,7 @@ } ~Metadata() { - DS_FreeMetadata(self); + STT_FreeMetadata(self); } } @@ -54,13 +54,13 @@ %nodefaultctor TokenMetadata; %nodefaultdtor TokenMetadata; -%typemap(newfree) char* "DS_FreeString($1);"; -%newobject DS_SpeechToText; -%newobject DS_IntermediateDecode; -%newobject DS_FinishStream; -%newobject DS_ErrorCodeToErrorMessage; +%typemap(newfree) char* "STT_FreeString($1);"; +%newobject STT_SpeechToText; +%newobject STT_IntermediateDecode; +%newobject STT_FinishStream; +%newobject STT_ErrorCodeToErrorMessage; -%rename ("%(strip:[DS_])s") ""; +%rename ("%(strip:[STT_])s") ""; // make struct members camel case to suit Java conventions %rename ("%(camelcase)s", %$ismember) ""; @@ -71,4 +71,4 @@ %ignore "Metadata::transcripts"; %ignore "CandidateTranscript::tokens"; -%include "../deepspeech.h" +%include "../mozilla_voice_stt.h" diff --git a/native_client/java/libdeepspeech/CMakeLists.txt b/native_client/java/libdeepspeech/CMakeLists.txt index c64ea47e61..bd974369ee 100644 --- a/native_client/java/libdeepspeech/CMakeLists.txt +++ b/native_client/java/libdeepspeech/CMakeLists.txt @@ -26,12 +26,12 @@ add_library( deepspeech-lib set_target_properties( deepspeech-lib PROPERTIES IMPORTED_LOCATION - ${CMAKE_SOURCE_DIR}/libs/${ANDROID_ABI}/libdeepspeech.so ) + ${CMAKE_SOURCE_DIR}/libs/${ANDROID_ABI}/libmozilla_voice_stt.so ) add_custom_command( TARGET deepspeech-jni POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy - ${CMAKE_SOURCE_DIR}/libs/${ANDROID_ABI}/libdeepspeech.so - ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/libdeepspeech.so ) + ${CMAKE_SOURCE_DIR}/libs/${ANDROID_ABI}/libmozilla_voice_stt.so + ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/libmozilla_voice_stt.so ) # Searches for a specified prebuilt library and stores the path as a diff --git a/native_client/java/libdeepspeech/src/main/java/org/mozilla/deepspeech/libdeepspeech/DeepSpeechModel.java b/native_client/java/libdeepspeech/src/main/java/org/mozilla/deepspeech/libdeepspeech/DeepSpeechModel.java index eafa11e230..1f39644f99 100644 --- a/native_client/java/libdeepspeech/src/main/java/org/mozilla/deepspeech/libdeepspeech/DeepSpeechModel.java +++ b/native_client/java/libdeepspeech/src/main/java/org/mozilla/deepspeech/libdeepspeech/DeepSpeechModel.java @@ -7,7 +7,7 @@ public class DeepSpeechModel { static { System.loadLibrary("deepspeech-jni"); - System.loadLibrary("deepspeech"); + System.loadLibrary("mozilla_voice_stt"); } // FIXME: We should have something better than those SWIGTYPE_* @@ -15,8 +15,8 @@ public class DeepSpeechModel { private SWIGTYPE_p_ModelState _msp; private void evaluateErrorCode(int errorCode) { - DeepSpeech_Error_Codes code = DeepSpeech_Error_Codes.swigToEnum(errorCode); - if (code != DeepSpeech_Error_Codes.ERR_OK) { + STT_Error_Codes code = STT_Error_Codes.swigToEnum(errorCode); + if (code != STT_Error_Codes.ERR_OK) { throw new RuntimeException("Error: " + impl.ErrorCodeToErrorMessage(errorCode) + " (0x" + Integer.toHexString(errorCode) + ")."); } } diff --git a/native_client/javascript/Makefile b/native_client/javascript/Makefile index 454bdc4221..5bd33efd33 100644 --- a/native_client/javascript/Makefile +++ b/native_client/javascript/Makefile @@ -2,7 +2,7 @@ NODE_BUILD_TOOL ?= node-pre-gyp NODE_ABI_TARGET ?= NODE_BUILD_VERBOSE ?= --verbose NPM_TOOL ?= npm -PROJECT_NAME ?= deepspeech +PROJECT_NAME ?= mozilla_voice_stt PROJECT_VERSION ?= $(shell cat ../../training/deepspeech_training/VERSION | tr -d '\n') NPM_ROOT ?= $(shell npm root) @@ -28,7 +28,7 @@ clean: rm -rf ./build/ clean-npm-pack: - rm -fr ./deepspeech-*.tgz + rm -fr ./$(PROJECT_NAME)-*.tgz really-clean: clean clean-npm-pack rm -fr ./node_modules/ diff --git a/native_client/javascript/client.ts b/native_client/javascript/client.ts index 53c9eac4b2..617ff9e1fb 100644 --- a/native_client/javascript/client.ts +++ b/native_client/javascript/client.ts @@ -14,7 +14,7 @@ const Duplex = require("stream").Duplex; class VersionAction extends argparse.Action { call(parser: argparse.ArgumentParser, namespace: argparse.Namespace, values: string | string[], optionString: string | null) { - console.log('DeepSpeech ' + Ds.Version()); + console.log('Mozilla Voice STT ' + Ds.Version()); let runtime = 'Node'; if (process.versions.electron) { runtime = 'Electron'; @@ -24,7 +24,7 @@ class VersionAction extends argparse.Action { } } -let parser = new argparse.ArgumentParser({addHelp: true, description: 'Running DeepSpeech inference.'}); +let parser = new argparse.ArgumentParser({addHelp: true, description: 'Running Mozilla Voice STT inference.'}); parser.addArgument(['--model'], {required: true, help: 'Path to the model (protocol buffer binary file)'}); parser.addArgument(['--scorer'], {help: 'Path to the external scorer file'}); parser.addArgument(['--audio'], {required: true, help: 'Path to the audio file to run (WAV format)'}); diff --git a/native_client/javascript/deepspeech.i b/native_client/javascript/deepspeech.i index e311a41be5..b3363f8acb 100644 --- a/native_client/javascript/deepspeech.i +++ b/native_client/javascript/deepspeech.i @@ -5,7 +5,7 @@ #define SWIG_FILE_WITH_INIT #include #include -#include "deepspeech.h" +#include "mozilla_voice_stt.h" using namespace v8; using namespace node; @@ -26,18 +26,18 @@ using namespace node; $2 = ($2_ltype)(bufferLength / 2); } -// apply to DS_FeedAudioContent and DS_SpeechToText +// apply to STT_FeedAudioContent and STT_SpeechToText %apply (short* IN_ARRAY1, int DIM1) {(const short* aBuffer, unsigned int aBufferSize)}; // make sure the string returned by SpeechToText is freed -%typemap(newfree) char* "DS_FreeString($1);"; +%typemap(newfree) char* "STT_FreeString($1);"; -%newobject DS_SpeechToText; -%newobject DS_IntermediateDecode; -%newobject DS_FinishStream; -%newobject DS_Version; -%newobject DS_ErrorCodeToErrorMessage; +%newobject STT_SpeechToText; +%newobject STT_IntermediateDecode; +%newobject STT_FinishStream; +%newobject STT_Version; +%newobject STT_ErrorCodeToErrorMessage; // convert double pointer retval in CreateModel to an output %typemap(in, numinputs=0) ModelState **retval (ModelState *ret) { @@ -62,7 +62,7 @@ using namespace node; %typemap(argout) StreamingState **retval { $result = SWIGV8_ARRAY_NEW(); SWIGV8_AppendOutput($result, SWIG_From_int(result)); - // not owned, DS_FinishStream deallocates StreamingState + // not owned, STT_FinishStream deallocates StreamingState %append_output(SWIG_NewPointerObj(%as_voidptr(*$1), $*1_descriptor, 0)); } @@ -93,6 +93,6 @@ using namespace node; %nodefaultctor TokenMetadata; %nodefaultdtor TokenMetadata; -%rename ("%(strip:[DS_])s") ""; +%rename ("%(strip:[STT_])s") ""; -%include "../deepspeech.h" +%include "../mozilla_voice_stt.h" diff --git a/native_client/javascript/package.json.in b/native_client/javascript/package.json.in index 42edc3c161..6cb49b2c92 100644 --- a/native_client/javascript/package.json.in +++ b/native_client/javascript/package.json.in @@ -5,7 +5,7 @@ "main" : "./index.js", "types": "./index.d.ts", "bin": { - "deepspeech": "./client.js" + "mozilla_voice_stt": "./client.js" }, "author" : "Mozilla", "license": "MPL-2.0", diff --git a/native_client/modelstate.cc b/native_client/modelstate.cc index d8637c3656..84fc5ed623 100644 --- a/native_client/modelstate.cc +++ b/native_client/modelstate.cc @@ -26,7 +26,7 @@ ModelState::~ModelState() int ModelState::init(const char* model_path) { - return DS_ERR_OK; + return STT_ERR_OK; } char* diff --git a/native_client/modelstate.h b/native_client/modelstate.h index 0dbe108ae1..035f180f23 100644 --- a/native_client/modelstate.h +++ b/native_client/modelstate.h @@ -3,7 +3,7 @@ #include -#include "deepspeech.h" +#include "mozilla_voice_stt.h" #include "alphabet.h" #include "ctcdecode/scorer.h" @@ -70,7 +70,7 @@ struct ModelState { * * @return A Metadata struct containing CandidateTranscript structs. * Each represents an candidate transcript, with the first ranked most probable. - * The user is responsible for freeing Result by calling DS_FreeMetadata(). + * The user is responsible for freeing Result by calling STT_FreeMetadata(). */ virtual Metadata* decode_metadata(const DecoderState& state, size_t num_results); diff --git a/native_client/deepspeech.h b/native_client/mozilla_voice_stt.h similarity index 56% rename from native_client/deepspeech.h rename to native_client/mozilla_voice_stt.h index 1df3cf2e43..42f87d8b2e 100644 --- a/native_client/deepspeech.h +++ b/native_client/mozilla_voice_stt.h @@ -1,5 +1,5 @@ -#ifndef DEEPSPEECH_H -#define DEEPSPEECH_H +#ifndef MOZILLA_VOICE_STT_H +#define MOZILLA_VOICE_STT_H #ifdef __cplusplus extern "C" { @@ -7,12 +7,12 @@ extern "C" { #ifndef SWIG #if defined _MSC_VER - #define DEEPSPEECH_EXPORT __declspec(dllexport) + #define STT_EXPORT __declspec(dllexport) #else - #define DEEPSPEECH_EXPORT __attribute__ ((visibility("default"))) + #define STT_EXPORT __attribute__ ((visibility("default"))) #endif /*End of _MSC_VER*/ #else - #define DEEPSPEECH_EXPORT + #define STT_EXPORT #endif typedef struct ModelState ModelState; @@ -61,34 +61,34 @@ typedef struct Metadata { // sphinx-doc: error_code_listing_start -#define DS_FOR_EACH_ERROR(APPLY) \ - APPLY(DS_ERR_OK, 0x0000, "No error.") \ - APPLY(DS_ERR_NO_MODEL, 0x1000, "Missing model information.") \ - APPLY(DS_ERR_INVALID_ALPHABET, 0x2000, "Invalid alphabet embedded in model. (Data corruption?)") \ - APPLY(DS_ERR_INVALID_SHAPE, 0x2001, "Invalid model shape.") \ - APPLY(DS_ERR_INVALID_SCORER, 0x2002, "Invalid scorer file.") \ - APPLY(DS_ERR_MODEL_INCOMPATIBLE, 0x2003, "Incompatible model.") \ - APPLY(DS_ERR_SCORER_NOT_ENABLED, 0x2004, "External scorer is not enabled.") \ - APPLY(DS_ERR_SCORER_UNREADABLE, 0x2005, "Could not read scorer file.") \ - APPLY(DS_ERR_SCORER_INVALID_LM, 0x2006, "Could not recognize language model header in scorer.") \ - APPLY(DS_ERR_SCORER_NO_TRIE, 0x2007, "Reached end of scorer file before loading vocabulary trie.") \ - APPLY(DS_ERR_SCORER_INVALID_TRIE, 0x2008, "Invalid magic in trie header.") \ - APPLY(DS_ERR_SCORER_VERSION_MISMATCH, 0x2009, "Scorer file version does not match expected version.") \ - APPLY(DS_ERR_FAIL_INIT_MMAP, 0x3000, "Failed to initialize memory mapped model.") \ - APPLY(DS_ERR_FAIL_INIT_SESS, 0x3001, "Failed to initialize the session.") \ - APPLY(DS_ERR_FAIL_INTERPRETER, 0x3002, "Interpreter failed.") \ - APPLY(DS_ERR_FAIL_RUN_SESS, 0x3003, "Failed to run the session.") \ - APPLY(DS_ERR_FAIL_CREATE_STREAM, 0x3004, "Error creating the stream.") \ - APPLY(DS_ERR_FAIL_READ_PROTOBUF, 0x3005, "Error reading the proto buffer model file.") \ - APPLY(DS_ERR_FAIL_CREATE_SESS, 0x3006, "Failed to create session.") \ - APPLY(DS_ERR_FAIL_CREATE_MODEL, 0x3007, "Could not allocate model state.") +#define STT_FOR_EACH_ERROR(APPLY) \ + APPLY(STT_ERR_OK, 0x0000, "No error.") \ + APPLY(STT_ERR_NO_MODEL, 0x1000, "Missing model information.") \ + APPLY(STT_ERR_INVALID_ALPHABET, 0x2000, "Invalid alphabet embedded in model. (Data corruption?)") \ + APPLY(STT_ERR_INVALID_SHAPE, 0x2001, "Invalid model shape.") \ + APPLY(STT_ERR_INVALID_SCORER, 0x2002, "Invalid scorer file.") \ + APPLY(STT_ERR_MODEL_INCOMPATIBLE, 0x2003, "Incompatible model.") \ + APPLY(STT_ERR_SCORER_NOT_ENABLED, 0x2004, "External scorer is not enabled.") \ + APPLY(STT_ERR_SCORER_UNREADABLE, 0x2005, "Could not read scorer file.") \ + APPLY(STT_ERR_SCORER_INVALID_LM, 0x2006, "Could not recognize language model header in scorer.") \ + APPLY(STT_ERR_SCORER_NO_TRIE, 0x2007, "Reached end of scorer file before loading vocabulary trie.") \ + APPLY(STT_ERR_SCORER_INVALID_TRIE, 0x2008, "Invalid magic in trie header.") \ + APPLY(STT_ERR_SCORER_VERSION_MISMATCH, 0x2009, "Scorer file version does not match expected version.") \ + APPLY(STT_ERR_FAIL_INIT_MMAP, 0x3000, "Failed to initialize memory mapped model.") \ + APPLY(STT_ERR_FAIL_INIT_SESS, 0x3001, "Failed to initialize the session.") \ + APPLY(STT_ERR_FAIL_INTERPRETER, 0x3002, "Interpreter failed.") \ + APPLY(STT_ERR_FAIL_RUN_SESS, 0x3003, "Failed to run the session.") \ + APPLY(STT_ERR_FAIL_CREATE_STREAM, 0x3004, "Error creating the stream.") \ + APPLY(STT_ERR_FAIL_READ_PROTOBUF, 0x3005, "Error reading the proto buffer model file.") \ + APPLY(STT_ERR_FAIL_CREATE_SESS, 0x3006, "Failed to create session.") \ + APPLY(STT_ERR_FAIL_CREATE_MODEL, 0x3007, "Could not allocate model state.") // sphinx-doc: error_code_listing_end -enum DeepSpeech_Error_Codes +enum STT_Error_Codes { #define DEFINE(NAME, VALUE, DESC) NAME = VALUE, -DS_FOR_EACH_ERROR(DEFINE) +STT_FOR_EACH_ERROR(DEFINE) #undef DEFINE }; @@ -100,50 +100,50 @@ DS_FOR_EACH_ERROR(DEFINE) * * @return Zero on success, non-zero on failure. */ -DEEPSPEECH_EXPORT -int DS_CreateModel(const char* aModelPath, - ModelState** retval); +STT_EXPORT +int STT_CreateModel(const char* aModelPath, + ModelState** retval); /** - * @brief Get beam width value used by the model. If {@link DS_SetModelBeamWidth} + * @brief Get beam width value used by the model. If {@link STT_SetModelBeamWidth} * was not called before, will return the default value loaded from the * model file. * - * @param aCtx A ModelState pointer created with {@link DS_CreateModel}. + * @param aCtx A ModelState pointer created with {@link STT_CreateModel}. * * @return Beam width value used by the model. */ -DEEPSPEECH_EXPORT -unsigned int DS_GetModelBeamWidth(const ModelState* aCtx); +STT_EXPORT +unsigned int STT_GetModelBeamWidth(const ModelState* aCtx); /** * @brief Set beam width value used by the model. * - * @param aCtx A ModelState pointer created with {@link DS_CreateModel}. + * @param aCtx A ModelState pointer created with {@link STT_CreateModel}. * @param aBeamWidth The beam width used by the model. A larger beam width value * generates better results at the cost of decoding time. * * @return Zero on success, non-zero on failure. */ -DEEPSPEECH_EXPORT -int DS_SetModelBeamWidth(ModelState* aCtx, - unsigned int aBeamWidth); +STT_EXPORT +int STT_SetModelBeamWidth(ModelState* aCtx, + unsigned int aBeamWidth); /** * @brief Return the sample rate expected by a model. * - * @param aCtx A ModelState pointer created with {@link DS_CreateModel}. + * @param aCtx A ModelState pointer created with {@link STT_CreateModel}. * * @return Sample rate expected by the model for its input. */ -DEEPSPEECH_EXPORT -int DS_GetModelSampleRate(const ModelState* aCtx); +STT_EXPORT +int STT_GetModelSampleRate(const ModelState* aCtx); /** * @brief Frees associated resources and destroys model object. */ -DEEPSPEECH_EXPORT -void DS_FreeModel(ModelState* ctx); +STT_EXPORT +void STT_FreeModel(ModelState* ctx); /** * @brief Enable decoding using an external scorer. @@ -153,9 +153,9 @@ void DS_FreeModel(ModelState* ctx); * * @return Zero on success, non-zero on failure (invalid arguments). */ -DEEPSPEECH_EXPORT -int DS_EnableExternalScorer(ModelState* aCtx, - const char* aScorerPath); +STT_EXPORT +int STT_EnableExternalScorer(ModelState* aCtx, + const char* aScorerPath); /** * @brief Disable decoding using an external scorer. @@ -164,8 +164,8 @@ int DS_EnableExternalScorer(ModelState* aCtx, * * @return Zero on success, non-zero on failure. */ -DEEPSPEECH_EXPORT -int DS_DisableExternalScorer(ModelState* aCtx); +STT_EXPORT +int STT_DisableExternalScorer(ModelState* aCtx); /** * @brief Set hyperparameters alpha and beta of the external scorer. @@ -176,10 +176,10 @@ int DS_DisableExternalScorer(ModelState* aCtx); * * @return Zero on success, non-zero on failure. */ -DEEPSPEECH_EXPORT -int DS_SetScorerAlphaBeta(ModelState* aCtx, - float aAlpha, - float aBeta); +STT_EXPORT +int STT_SetScorerAlphaBeta(ModelState* aCtx, + float aAlpha, + float aBeta); /** * @brief Use the DeepSpeech model to convert speech to text. @@ -190,12 +190,12 @@ int DS_SetScorerAlphaBeta(ModelState* aCtx, * @param aBufferSize The number of samples in the audio signal. * * @return The STT result. The user is responsible for freeing the string using - * {@link DS_FreeString()}. Returns NULL on error. + * {@link STT_FreeString()}. Returns NULL on error. */ -DEEPSPEECH_EXPORT -char* DS_SpeechToText(ModelState* aCtx, - const short* aBuffer, - unsigned int aBufferSize); +STT_EXPORT +char* STT_SpeechToText(ModelState* aCtx, + const short* aBuffer, + unsigned int aBufferSize); /** * @brief Use the DeepSpeech model to convert speech to text and output results @@ -209,19 +209,19 @@ char* DS_SpeechToText(ModelState* aCtx, * * @return Metadata struct containing multiple CandidateTranscript structs. Each * transcript has per-token metadata including timing information. The - * user is responsible for freeing Metadata by calling {@link DS_FreeMetadata()}. + * user is responsible for freeing Metadata by calling {@link STT_FreeMetadata()}. * Returns NULL on error. */ -DEEPSPEECH_EXPORT -Metadata* DS_SpeechToTextWithMetadata(ModelState* aCtx, - const short* aBuffer, - unsigned int aBufferSize, - unsigned int aNumResults); +STT_EXPORT +Metadata* STT_SpeechToTextWithMetadata(ModelState* aCtx, + const short* aBuffer, + unsigned int aBufferSize, + unsigned int aNumResults); /** * @brief Create a new streaming inference state. The streaming state returned - * by this function can then be passed to {@link DS_FeedAudioContent()} - * and {@link DS_FinishStream()}. + * by this function can then be passed to {@link STT_FeedAudioContent()} + * and {@link STT_FinishStream()}. * * @param aCtx The ModelState pointer for the model to use. * @param[out] retval an opaque pointer that represents the streaming state. Can @@ -229,129 +229,129 @@ Metadata* DS_SpeechToTextWithMetadata(ModelState* aCtx, * * @return Zero for success, non-zero on failure. */ -DEEPSPEECH_EXPORT -int DS_CreateStream(ModelState* aCtx, - StreamingState** retval); +STT_EXPORT +int STT_CreateStream(ModelState* aCtx, + StreamingState** retval); /** * @brief Feed audio samples to an ongoing streaming inference. * - * @param aSctx A streaming state pointer returned by {@link DS_CreateStream()}. + * @param aSctx A streaming state pointer returned by {@link STT_CreateStream()}. * @param aBuffer An array of 16-bit, mono raw audio samples at the * appropriate sample rate (matching what the model was trained on). * @param aBufferSize The number of samples in @p aBuffer. */ -DEEPSPEECH_EXPORT -void DS_FeedAudioContent(StreamingState* aSctx, - const short* aBuffer, - unsigned int aBufferSize); +STT_EXPORT +void STT_FeedAudioContent(StreamingState* aSctx, + const short* aBuffer, + unsigned int aBufferSize); /** * @brief Compute the intermediate decoding of an ongoing streaming inference. * - * @param aSctx A streaming state pointer returned by {@link DS_CreateStream()}. + * @param aSctx A streaming state pointer returned by {@link STT_CreateStream()}. * * @return The STT intermediate result. The user is responsible for freeing the - * string using {@link DS_FreeString()}. + * string using {@link STT_FreeString()}. */ -DEEPSPEECH_EXPORT -char* DS_IntermediateDecode(const StreamingState* aSctx); +STT_EXPORT +char* STT_IntermediateDecode(const StreamingState* aSctx); /** * @brief Compute the intermediate decoding of an ongoing streaming inference, * return results including metadata. * - * @param aSctx A streaming state pointer returned by {@link DS_CreateStream()}. + * @param aSctx A streaming state pointer returned by {@link STT_CreateStream()}. * @param aNumResults The number of candidate transcripts to return. * * @return Metadata struct containing multiple candidate transcripts. Each transcript * has per-token metadata including timing information. The user is - * responsible for freeing Metadata by calling {@link DS_FreeMetadata()}. + * responsible for freeing Metadata by calling {@link STT_FreeMetadata()}. * Returns NULL on error. */ -DEEPSPEECH_EXPORT -Metadata* DS_IntermediateDecodeWithMetadata(const StreamingState* aSctx, - unsigned int aNumResults); +STT_EXPORT +Metadata* STT_IntermediateDecodeWithMetadata(const StreamingState* aSctx, + unsigned int aNumResults); /** * @brief Compute the final decoding of an ongoing streaming inference and return * the result. Signals the end of an ongoing streaming inference. * - * @param aSctx A streaming state pointer returned by {@link DS_CreateStream()}. + * @param aSctx A streaming state pointer returned by {@link STT_CreateStream()}. * * @return The STT result. The user is responsible for freeing the string using - * {@link DS_FreeString()}. + * {@link STT_FreeString()}. * * @note This method will free the state pointer (@p aSctx). */ -DEEPSPEECH_EXPORT -char* DS_FinishStream(StreamingState* aSctx); +STT_EXPORT +char* STT_FinishStream(StreamingState* aSctx); /** * @brief Compute the final decoding of an ongoing streaming inference and return * results including metadata. Signals the end of an ongoing streaming * inference. * - * @param aSctx A streaming state pointer returned by {@link DS_CreateStream()}. + * @param aSctx A streaming state pointer returned by {@link STT_CreateStream()}. * @param aNumResults The number of candidate transcripts to return. * * @return Metadata struct containing multiple candidate transcripts. Each transcript * has per-token metadata including timing information. The user is - * responsible for freeing Metadata by calling {@link DS_FreeMetadata()}. + * responsible for freeing Metadata by calling {@link STT_FreeMetadata()}. * Returns NULL on error. * * @note This method will free the state pointer (@p aSctx). */ -DEEPSPEECH_EXPORT -Metadata* DS_FinishStreamWithMetadata(StreamingState* aSctx, - unsigned int aNumResults); +STT_EXPORT +Metadata* STT_FinishStreamWithMetadata(StreamingState* aSctx, + unsigned int aNumResults); /** * @brief Destroy a streaming state without decoding the computed logits. This * can be used if you no longer need the result of an ongoing streaming * inference and don't want to perform a costly decode operation. * - * @param aSctx A streaming state pointer returned by {@link DS_CreateStream()}. + * @param aSctx A streaming state pointer returned by {@link STT_CreateStream()}. * * @note This method will free the state pointer (@p aSctx). */ -DEEPSPEECH_EXPORT -void DS_FreeStream(StreamingState* aSctx); +STT_EXPORT +void STT_FreeStream(StreamingState* aSctx); /** * @brief Free memory allocated for metadata information. */ -DEEPSPEECH_EXPORT -void DS_FreeMetadata(Metadata* m); +STT_EXPORT +void STT_FreeMetadata(Metadata* m); /** * @brief Free a char* string returned by the DeepSpeech API. */ -DEEPSPEECH_EXPORT -void DS_FreeString(char* str); +STT_EXPORT +void STT_FreeString(char* str); /** * @brief Returns the version of this library. The returned version is a semantic - * version (SemVer 2.0.0). The string returned must be freed with {@link DS_FreeString()}. + * version (SemVer 2.0.0). The string returned must be freed with {@link STT_FreeString()}. * * @return The version string. */ -DEEPSPEECH_EXPORT -char* DS_Version(); +STT_EXPORT +char* STT_Version(); /** * @brief Returns a textual description corresponding to an error code. - * The string returned must be freed with @{link DS_FreeString()}. + * The string returned must be freed with @{link STT_FreeString()}. * * @return The error description. */ -DEEPSPEECH_EXPORT -char* DS_ErrorCodeToErrorMessage(int aErrorCode); +STT_EXPORT +char* STT_ErrorCodeToErrorMessage(int aErrorCode); -#undef DEEPSPEECH_EXPORT +#undef STT_EXPORT #ifdef __cplusplus } #endif -#endif /* DEEPSPEECH_H */ +#endif /* MOZILLA_VOICE_STT_H */ diff --git a/native_client/python/Makefile b/native_client/python/Makefile index 7f948649af..312e5248d9 100644 --- a/native_client/python/Makefile +++ b/native_client/python/Makefile @@ -3,7 +3,7 @@ include ../definitions.mk bindings-clean: - rm -rf dist temp_build deepspeech.egg-info MANIFEST.in temp_lib + rm -rf dist temp_build mozilla_voice_stt.egg-info MANIFEST.in temp_lib rm -f impl_wrap.cpp impl.py # Enforce PATH here because swig calls from build_ext looses track of some @@ -14,7 +14,7 @@ bindings-build: ds-swig MANIFEST.in: bindings-build > $@ - $(call copy_missing_libs,temp_build/deepspeech/*.so,temp_build/deepspeech/lib,$@) + $(call copy_missing_libs,temp_build/mozilla_voice_stt/*.so,temp_build/mozilla_voice_stt/lib,$@) # On Unix, _wrap.o gets generated # On Windows, _wrap.obj it is diff --git a/native_client/python/__init__.py b/native_client/python/__init__.py index 8dec3f0c44..c6888499cc 100644 --- a/native_client/python/__init__.py +++ b/native_client/python/__init__.py @@ -17,10 +17,10 @@ # directory for the dynamic linker os.environ['PATH'] = dslib_path + ';' + os.environ['PATH'] -import deepspeech +import mozilla_voice_stt # rename for backwards compatibility -from deepspeech.impl import Version as version +from mozilla_voice_stt.impl import Version as version class Model(object): """ @@ -33,14 +33,14 @@ def __init__(self, model_path): # make sure the attribute is there if CreateModel fails self._impl = None - status, impl = deepspeech.impl.CreateModel(model_path) + status, impl = mozilla_voice_stt.impl.CreateModel(model_path) if status != 0: - raise RuntimeError("CreateModel failed with '{}' (0x{:X})".format(deepspeech.impl.ErrorCodeToErrorMessage(status),status)) + raise RuntimeError("CreateModel failed with '{}' (0x{:X})".format(mozilla_voice_stt.impl.ErrorCodeToErrorMessage(status),status)) self._impl = impl def __del__(self): if self._impl: - deepspeech.impl.FreeModel(self._impl) + mozilla_voice_stt.impl.FreeModel(self._impl) self._impl = None def beamWidth(self): @@ -51,7 +51,7 @@ def beamWidth(self): :return: Beam width value used by the model. :type: int """ - return deepspeech.impl.GetModelBeamWidth(self._impl) + return mozilla_voice_stt.impl.GetModelBeamWidth(self._impl) def setBeamWidth(self, beam_width): """ @@ -63,7 +63,7 @@ def setBeamWidth(self, beam_width): :return: Zero on success, non-zero on failure. :type: int """ - return deepspeech.impl.SetModelBeamWidth(self._impl, beam_width) + return mozilla_voice_stt.impl.SetModelBeamWidth(self._impl, beam_width) def sampleRate(self): """ @@ -72,7 +72,7 @@ def sampleRate(self): :return: Sample rate. :type: int """ - return deepspeech.impl.GetModelSampleRate(self._impl) + return mozilla_voice_stt.impl.GetModelSampleRate(self._impl) def enableExternalScorer(self, scorer_path): """ @@ -83,9 +83,9 @@ def enableExternalScorer(self, scorer_path): :throws: RuntimeError on error """ - status = deepspeech.impl.EnableExternalScorer(self._impl, scorer_path) + status = mozilla_voice_stt.impl.EnableExternalScorer(self._impl, scorer_path) if status != 0: - raise RuntimeError("EnableExternalScorer failed with '{}' (0x{:X})".format(deepspeech.impl.ErrorCodeToErrorMessage(status),status)) + raise RuntimeError("EnableExternalScorer failed with '{}' (0x{:X})".format(mozilla_voice_stt.impl.ErrorCodeToErrorMessage(status),status)) def disableExternalScorer(self): """ @@ -93,7 +93,7 @@ def disableExternalScorer(self): :return: Zero on success, non-zero on failure. """ - return deepspeech.impl.DisableExternalScorer(self._impl) + return mozilla_voice_stt.impl.DisableExternalScorer(self._impl) def setScorerAlphaBeta(self, alpha, beta): """ @@ -108,7 +108,7 @@ def setScorerAlphaBeta(self, alpha, beta): :return: Zero on success, non-zero on failure. :type: int """ - return deepspeech.impl.SetScorerAlphaBeta(self._impl, alpha, beta) + return mozilla_voice_stt.impl.SetScorerAlphaBeta(self._impl, alpha, beta) def stt(self, audio_buffer): """ @@ -120,7 +120,7 @@ def stt(self, audio_buffer): :return: The STT result. :type: str """ - return deepspeech.impl.SpeechToText(self._impl, audio_buffer) + return mozilla_voice_stt.impl.SpeechToText(self._impl, audio_buffer) def sttWithMetadata(self, audio_buffer, num_results=1): """ @@ -135,7 +135,7 @@ def sttWithMetadata(self, audio_buffer, num_results=1): :return: Metadata object containing multiple candidate transcripts. Each transcript has per-token metadata including timing information. :type: :func:`Metadata` """ - return deepspeech.impl.SpeechToTextWithMetadata(self._impl, audio_buffer, num_results) + return mozilla_voice_stt.impl.SpeechToTextWithMetadata(self._impl, audio_buffer, num_results) def createStream(self): """ @@ -147,9 +147,9 @@ def createStream(self): :throws: RuntimeError on error """ - status, ctx = deepspeech.impl.CreateStream(self._impl) + status, ctx = mozilla_voice_stt.impl.CreateStream(self._impl) if status != 0: - raise RuntimeError("CreateStream failed with '{}' (0x{:X})".format(deepspeech.impl.ErrorCodeToErrorMessage(status),status)) + raise RuntimeError("CreateStream failed with '{}' (0x{:X})".format(mozilla_voice_stt.impl.ErrorCodeToErrorMessage(status),status)) return Stream(ctx) @@ -176,7 +176,7 @@ def feedAudioContent(self, audio_buffer): """ if not self._impl: raise RuntimeError("Stream object is not valid. Trying to feed an already finished stream?") - deepspeech.impl.FeedAudioContent(self._impl, audio_buffer) + mozilla_voice_stt.impl.FeedAudioContent(self._impl, audio_buffer) def intermediateDecode(self): """ @@ -189,7 +189,7 @@ def intermediateDecode(self): """ if not self._impl: raise RuntimeError("Stream object is not valid. Trying to decode an already finished stream?") - return deepspeech.impl.IntermediateDecode(self._impl) + return mozilla_voice_stt.impl.IntermediateDecode(self._impl) def intermediateDecodeWithMetadata(self, num_results=1): """ @@ -205,7 +205,7 @@ def intermediateDecodeWithMetadata(self, num_results=1): """ if not self._impl: raise RuntimeError("Stream object is not valid. Trying to decode an already finished stream?") - return deepspeech.impl.IntermediateDecodeWithMetadata(self._impl, num_results) + return mozilla_voice_stt.impl.IntermediateDecodeWithMetadata(self._impl, num_results) def finishStream(self): """ @@ -220,7 +220,7 @@ def finishStream(self): """ if not self._impl: raise RuntimeError("Stream object is not valid. Trying to finish an already finished stream?") - result = deepspeech.impl.FinishStream(self._impl) + result = mozilla_voice_stt.impl.FinishStream(self._impl) self._impl = None return result @@ -241,7 +241,7 @@ def finishStreamWithMetadata(self, num_results=1): """ if not self._impl: raise RuntimeError("Stream object is not valid. Trying to finish an already finished stream?") - result = deepspeech.impl.FinishStreamWithMetadata(self._impl, num_results) + result = mozilla_voice_stt.impl.FinishStreamWithMetadata(self._impl, num_results) self._impl = None return result @@ -254,12 +254,12 @@ def freeStream(self): """ if not self._impl: raise RuntimeError("Stream object is not valid. Trying to free an already finished stream?") - deepspeech.impl.FreeStream(self._impl) + mozilla_voice_stt.impl.FreeStream(self._impl) self._impl = None # This is only for documentation purpose -# Metadata, CandidateTranscript and TokenMetadata should be in sync with native_client/deepspeech.h +# Metadata, CandidateTranscript and TokenMetadata should be in sync with native_client/mozilla_voice_stt.h class TokenMetadata(object): """ Stores each individual character, along with its timing information diff --git a/native_client/python/client.py b/native_client/python/client.py index d4bd562a52..bb86e58add 100644 --- a/native_client/python/client.py +++ b/native_client/python/client.py @@ -10,7 +10,7 @@ import wave import json -from deepspeech import Model, version +from mozilla_voice_stt import Model, version from timeit import default_timer as timer try: @@ -83,12 +83,12 @@ def __init__(self, *args, **kwargs): super(VersionAction, self).__init__(nargs=0, *args, **kwargs) def __call__(self, *args, **kwargs): - print('DeepSpeech ', version()) + print('Mozilla Voice STT ', version()) exit(0) def main(): - parser = argparse.ArgumentParser(description='Running DeepSpeech inference.') + parser = argparse.ArgumentParser(description='Running Mozilla Voice STT inference.') parser.add_argument('--model', required=True, help='Path to the model (protocol buffer binary file)') parser.add_argument('--scorer', required=False, diff --git a/native_client/python/impl.i b/native_client/python/impl.i index 3ee4b51608..97f9ea9389 100644 --- a/native_client/python/impl.i +++ b/native_client/python/impl.i @@ -2,7 +2,7 @@ %{ #define SWIG_FILE_WITH_INIT -#include "deepspeech.h" +#include "mozilla_voice_stt.h" %} %include "numpy.i" @@ -10,7 +10,7 @@ import_array(); %} -// apply NumPy conversion typemap to DS_FeedAudioContent and DS_SpeechToText +// apply NumPy conversion typemap to STT_FeedAudioContent and STT_SpeechToText %apply (short* IN_ARRAY1, int DIM1) {(const short* aBuffer, unsigned int aBufferSize)}; %typemap(in, numinputs=0) ModelState **retval (ModelState *ret) { @@ -19,7 +19,7 @@ import_array(); } %typemap(argout) ModelState **retval { - // not owned, Python wrapper in __init__.py calls DS_FreeModel + // not owned, Python wrapper in __init__.py calls STT_FreeModel %append_output(SWIG_NewPointerObj(%as_voidptr(*$1), $*1_descriptor, 0)); } @@ -29,7 +29,7 @@ import_array(); } %typemap(argout) StreamingState **retval { - // not owned, DS_FinishStream deallocates StreamingState + // not owned, STT_FinishStream deallocates StreamingState %append_output(SWIG_NewPointerObj(%as_voidptr(*$1), $*1_descriptor, 0)); } @@ -104,7 +104,7 @@ static PyObject *parent_reference() { %extend struct Metadata { ~Metadata() { - DS_FreeMetadata($self); + STT_FreeMetadata($self); } } @@ -115,14 +115,14 @@ static PyObject *parent_reference() { %nodefaultctor TokenMetadata; %nodefaultdtor TokenMetadata; -%typemap(newfree) char* "DS_FreeString($1);"; +%typemap(newfree) char* "STT_FreeString($1);"; -%newobject DS_SpeechToText; -%newobject DS_IntermediateDecode; -%newobject DS_FinishStream; -%newobject DS_Version; -%newobject DS_ErrorCodeToErrorMessage; +%newobject STT_SpeechToText; +%newobject STT_IntermediateDecode; +%newobject STT_FinishStream; +%newobject STT_Version; +%newobject STT_ErrorCodeToErrorMessage; -%rename ("%(strip:[DS_])s") ""; +%rename ("%(strip:[STT_])s") ""; -%include "../deepspeech.h" +%include "../mozilla_voice_stt.h" diff --git a/native_client/python/setup.py b/native_client/python/setup.py index 0e1d0e62b5..1cda94e248 100755 --- a/native_client/python/setup.py +++ b/native_client/python/setup.py @@ -24,7 +24,7 @@ def read(fname): numpy_include = os.getenv('NUMPY_INCLUDE', numpy_include) numpy_min_ver = os.getenv('NUMPY_DEP_VERSION', '') - project_name = 'deepspeech' + project_name = 'mozilla_voice_stt' if '--project_name' in sys.argv: project_name_idx = sys.argv.index('--project_name') project_name = sys.argv[project_name_idx + 1] @@ -59,7 +59,7 @@ def libs_split(a): raise AssertionError('os.name == java not expected') - ds_ext = Extension(name='deepspeech._impl', + ds_ext = Extension(name='mozilla_voice_stt._impl', sources=['impl.i'], include_dirs=[numpy_include, '../'], library_dirs=list(map(lambda x: x.strip(), lib_dirs_split(os.getenv('MODEL_LDFLAGS', '')))), @@ -72,7 +72,7 @@ def libs_split(a): long_description_content_type='text/x-rst; charset=UTF-8', author='Mozilla', version=project_version, - package_dir={'deepspeech': '.'}, + package_dir={'mozilla_voice_stt': '.'}, cmdclass={'build': BuildExtFirst}, license='MPL-2.0', url='https://github.com/mozilla/DeepSpeech', @@ -83,8 +83,8 @@ def libs_split(a): 'Discussions': 'https://discourse.mozilla.org/c/deep-speech', }, ext_modules=[ds_ext], - py_modules=['deepspeech', 'deepspeech.client', 'deepspeech.impl'], - entry_points={'console_scripts':['deepspeech=deepspeech.client:main']}, + py_modules=['mozilla_voice_stt', 'mozilla_voice_stt.client', 'mozilla_voice_stt.impl'], + entry_points={'console_scripts':['mozilla_voice_stt=mozilla_voice_stt.client:main']}, install_requires=['numpy%s' % numpy_min_ver], include_package_data=True, classifiers=[ diff --git a/native_client/swift/deepspeech_ios.xcodeproj/project.pbxproj b/native_client/swift/deepspeech_ios.xcodeproj/project.pbxproj index 59927e9ed4..d36e452377 100644 --- a/native_client/swift/deepspeech_ios.xcodeproj/project.pbxproj +++ b/native_client/swift/deepspeech_ios.xcodeproj/project.pbxproj @@ -9,9 +9,9 @@ /* Begin PBXBuildFile section */ 505B136B24960D550007DADA /* deepspeech_ios.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 505B136124960D550007DADA /* deepspeech_ios.framework */; }; 505B137224960D550007DADA /* deepspeech_ios.h in Headers */ = {isa = PBXBuildFile; fileRef = 505B136424960D550007DADA /* deepspeech_ios.h */; settings = {ATTRIBUTES = (Public, ); }; }; - 505B137D24961AF20007DADA /* deepspeech.h in Headers */ = {isa = PBXBuildFile; fileRef = 505B137C24961AF20007DADA /* deepspeech.h */; settings = {ATTRIBUTES = (Private, ); }; }; + 505B137D24961AF20007DADA /* mozilla_voice_stt.h in Headers */ = {isa = PBXBuildFile; fileRef = 505B137C24961AF20007DADA /* mozilla_voice_stt.h */; settings = {ATTRIBUTES = (Private, ); }; }; 505B137F24961BA70007DADA /* DeepSpeech.swift in Sources */ = {isa = PBXBuildFile; fileRef = 505B137E24961BA70007DADA /* DeepSpeech.swift */; }; - 507CD39B24B61FA100409BBB /* libdeepspeech.so in Frameworks */ = {isa = PBXBuildFile; fileRef = 507CD39A24B61FA100409BBB /* libdeepspeech.so */; }; + 507CD39B24B61FA100409BBB /* libmozilla_voice_stt.so in Frameworks */ = {isa = PBXBuildFile; fileRef = 507CD39A24B61FA100409BBB /* libmozilla_voice_stt.so */; }; /* End PBXBuildFile section */ /* Begin PBXContainerItemProxy section */ @@ -43,9 +43,9 @@ 505B136524960D550007DADA /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = ""; }; 505B136A24960D550007DADA /* deepspeech_iosTests.xctest */ = {isa = PBXFileReference; explicitFileType = wrapper.cfbundle; includeInIndex = 0; path = deepspeech_iosTests.xctest; sourceTree = BUILT_PRODUCTS_DIR; }; 505B137B249619C90007DADA /* deepspeech_ios.modulemap */ = {isa = PBXFileReference; lastKnownFileType = "sourcecode.module-map"; path = deepspeech_ios.modulemap; sourceTree = ""; }; - 505B137C24961AF20007DADA /* deepspeech.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = deepspeech.h; path = ../../deepspeech.h; sourceTree = ""; }; + 505B137C24961AF20007DADA /* mozilla_voice_stt.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = mozilla_voice_stt.h; path = ../../mozilla_voice_stt.h; sourceTree = ""; }; 505B137E24961BA70007DADA /* DeepSpeech.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = DeepSpeech.swift; sourceTree = ""; }; - 507CD39A24B61FA100409BBB /* libdeepspeech.so */ = {isa = PBXFileReference; lastKnownFileType = "compiled.mach-o.dylib"; path = libdeepspeech.so; sourceTree = ""; }; + 507CD39A24B61FA100409BBB /* libmozilla_voice_stt.so */ = {isa = PBXFileReference; lastKnownFileType = "compiled.mach-o.dylib"; path = libmozilla_voice_stt.so; sourceTree = ""; }; /* End PBXFileReference section */ /* Begin PBXFrameworksBuildPhase section */ @@ -53,7 +53,7 @@ isa = PBXFrameworksBuildPhase; buildActionMask = 2147483647; files = ( - 507CD39B24B61FA100409BBB /* libdeepspeech.so in Frameworks */, + 507CD39B24B61FA100409BBB /* libmozilla_voice_stt.so in Frameworks */, ); runOnlyForDeploymentPostprocessing = 0; }; @@ -89,7 +89,7 @@ 505B136324960D550007DADA /* deepspeech_ios */ = { isa = PBXGroup; children = ( - 505B137C24961AF20007DADA /* deepspeech.h */, + 505B137C24961AF20007DADA /* mozilla_voice_stt.h */, 505B136424960D550007DADA /* deepspeech_ios.h */, 505B137E24961BA70007DADA /* DeepSpeech.swift */, 505B137B249619C90007DADA /* deepspeech_ios.modulemap */, @@ -101,7 +101,7 @@ 505B1380249620C60007DADA /* Frameworks */ = { isa = PBXGroup; children = ( - 507CD39A24B61FA100409BBB /* libdeepspeech.so */, + 507CD39A24B61FA100409BBB /* libmozilla_voice_stt.so */, ); name = Frameworks; sourceTree = ""; @@ -114,7 +114,7 @@ buildActionMask = 2147483647; files = ( 505B137224960D550007DADA /* deepspeech_ios.h in Headers */, - 505B137D24961AF20007DADA /* deepspeech.h in Headers */, + 505B137D24961AF20007DADA /* mozilla_voice_stt.h in Headers */, ); runOnlyForDeploymentPostprocessing = 0; }; diff --git a/native_client/swift/deepspeech_ios/DeepSpeech.swift b/native_client/swift/deepspeech_ios/DeepSpeech.swift index 50c325534b..0b42ec814c 100644 --- a/native_client/swift/deepspeech_ios/DeepSpeech.swift +++ b/native_client/swift/deepspeech_ios/DeepSpeech.swift @@ -9,7 +9,7 @@ import deepspeech_ios.libdeepspeech_Private public enum DeepSpeechError: Error { - // Should be kept in sync with deepspeech.h + // Should be kept in sync with mozilla_voice_stt.h case noModel(errorCode: Int32) case invalidAlphabet(errorCode: Int32) case invalidShape(errorCode: Int32) @@ -58,8 +58,8 @@ extension DeepSpeechError : LocalizedError { .failCreateSess(let errorCode), .failCreateModel(let errorCode), .invalidErrorCode(let errorCode): - let result = DS_ErrorCodeToErrorMessage(errorCode) - defer { DS_FreeString(result) } + let result = STT_ErrorCodeToErrorMessage(errorCode) + defer { STT_FreeString(result) } return String(cString: result!) } } @@ -67,43 +67,43 @@ extension DeepSpeechError : LocalizedError { private func errorCodeToEnum(errorCode: Int32) -> DeepSpeechError { switch Int(errorCode) { - case Int(DS_ERR_NO_MODEL.rawValue): + case Int(STT_ERR_NO_MODEL.rawValue): return DeepSpeechError.noModel(errorCode: errorCode) - case Int(DS_ERR_INVALID_ALPHABET.rawValue): + case Int(STT_ERR_INVALID_ALPHABET.rawValue): return DeepSpeechError.invalidAlphabet(errorCode: errorCode) - case Int(DS_ERR_INVALID_SHAPE.rawValue): + case Int(STT_ERR_INVALID_SHAPE.rawValue): return DeepSpeechError.invalidShape(errorCode: errorCode) - case Int(DS_ERR_INVALID_SCORER.rawValue): + case Int(STT_ERR_INVALID_SCORER.rawValue): return DeepSpeechError.invalidScorer(errorCode: errorCode) - case Int(DS_ERR_MODEL_INCOMPATIBLE.rawValue): + case Int(STT_ERR_MODEL_INCOMPATIBLE.rawValue): return DeepSpeechError.modelIncompatible(errorCode: errorCode) - case Int(DS_ERR_SCORER_NOT_ENABLED.rawValue): + case Int(STT_ERR_SCORER_NOT_ENABLED.rawValue): return DeepSpeechError.scorerNotEnabled(errorCode: errorCode) - case Int(DS_ERR_SCORER_UNREADABLE.rawValue): + case Int(STT_ERR_SCORER_UNREADABLE.rawValue): return DeepSpeechError.scorerUnreadable(errorCode: errorCode) - case Int(DS_ERR_SCORER_INVALID_LM.rawValue): + case Int(STT_ERR_SCORER_INVALID_LM.rawValue): return DeepSpeechError.scorerInvalidLm(errorCode: errorCode) - case Int(DS_ERR_SCORER_NO_TRIE.rawValue): + case Int(STT_ERR_SCORER_NO_TRIE.rawValue): return DeepSpeechError.scorerNoTrie(errorCode: errorCode) - case Int(DS_ERR_SCORER_INVALID_TRIE.rawValue): + case Int(STT_ERR_SCORER_INVALID_TRIE.rawValue): return DeepSpeechError.scorerInvalidTrie(errorCode: errorCode) - case Int(DS_ERR_SCORER_VERSION_MISMATCH.rawValue): + case Int(STT_ERR_SCORER_VERSION_MISMATCH.rawValue): return DeepSpeechError.scorerVersionMismatch(errorCode: errorCode) - case Int(DS_ERR_FAIL_INIT_MMAP.rawValue): + case Int(STT_ERR_FAIL_INIT_MMAP.rawValue): return DeepSpeechError.failInitMmap(errorCode: errorCode) - case Int(DS_ERR_FAIL_INIT_SESS.rawValue): + case Int(STT_ERR_FAIL_INIT_SESS.rawValue): return DeepSpeechError.failInitSess(errorCode: errorCode) - case Int(DS_ERR_FAIL_INTERPRETER.rawValue): + case Int(STT_ERR_FAIL_INTERPRETER.rawValue): return DeepSpeechError.failInterpreter(errorCode: errorCode) - case Int(DS_ERR_FAIL_RUN_SESS.rawValue): + case Int(STT_ERR_FAIL_RUN_SESS.rawValue): return DeepSpeechError.failRunSess(errorCode: errorCode) - case Int(DS_ERR_FAIL_CREATE_STREAM.rawValue): + case Int(STT_ERR_FAIL_CREATE_STREAM.rawValue): return DeepSpeechError.failCreateStream(errorCode: errorCode) - case Int(DS_ERR_FAIL_READ_PROTOBUF.rawValue): + case Int(STT_ERR_FAIL_READ_PROTOBUF.rawValue): return DeepSpeechError.failReadProtobuf(errorCode: errorCode) - case Int(DS_ERR_FAIL_CREATE_SESS.rawValue): + case Int(STT_ERR_FAIL_CREATE_SESS.rawValue): return DeepSpeechError.failCreateSess(errorCode: errorCode) - case Int(DS_ERR_FAIL_CREATE_MODEL.rawValue): + case Int(STT_ERR_FAIL_CREATE_MODEL.rawValue): return DeepSpeechError.failCreateModel(errorCode: errorCode) default: return DeepSpeechError.invalidErrorCode(errorCode: errorCode) @@ -111,7 +111,7 @@ private func errorCodeToEnum(errorCode: Int32) -> DeepSpeechError { } private func evaluateErrorCode(errorCode: Int32) throws { - if errorCode != Int32(DS_ERR_OK.rawValue) { + if errorCode != Int32(STT_ERR_OK.rawValue) { throw errorCodeToEnum(errorCode: errorCode) } } @@ -182,7 +182,7 @@ public class DeepSpeechStream { deinit { if streamCtx != nil { - DS_FreeStream(streamCtx) + STT_FreeStream(streamCtx) streamCtx = nil } } @@ -212,7 +212,7 @@ public class DeepSpeechStream { public func feedAudioContent(buffer: UnsafeBufferPointer) { precondition(streamCtx != nil, "calling method on invalidated Stream") - DS_FeedAudioContent(streamCtx, buffer.baseAddress, UInt32(buffer.count)) + STT_FeedAudioContent(streamCtx, buffer.baseAddress, UInt32(buffer.count)) } /** Compute the intermediate decoding of an ongoing streaming inference. @@ -224,8 +224,8 @@ public class DeepSpeechStream { public func intermediateDecode() -> String { precondition(streamCtx != nil, "calling method on invalidated Stream") - let result = DS_IntermediateDecode(streamCtx) - defer { DS_FreeString(result) } + let result = STT_IntermediateDecode(streamCtx) + defer { STT_FreeString(result) } return String(cString: result!) } @@ -241,8 +241,8 @@ public class DeepSpeechStream { */ public func intermediateDecodeWithMetadata(numResults: Int) -> DeepSpeechMetadata { precondition(streamCtx != nil, "calling method on invalidated Stream") - let result = DS_IntermediateDecodeWithMetadata(streamCtx, UInt32(numResults))! - defer { DS_FreeMetadata(result) } + let result = STT_IntermediateDecodeWithMetadata(streamCtx, UInt32(numResults))! + defer { STT_FreeMetadata(result) } return DeepSpeechMetadata(fromInternal: result) } @@ -258,9 +258,9 @@ public class DeepSpeechStream { public func finishStream() -> String { precondition(streamCtx != nil, "calling method on invalidated Stream") - let result = DS_FinishStream(streamCtx) + let result = STT_FinishStream(streamCtx) defer { - DS_FreeString(result) + STT_FreeString(result) streamCtx = nil } return String(cString: result!) @@ -282,8 +282,8 @@ public class DeepSpeechStream { public func finishStreamWithMetadata(numResults: Int) -> DeepSpeechMetadata { precondition(streamCtx != nil, "calling method on invalidated Stream") - let result = DS_FinishStreamWithMetadata(streamCtx, UInt32(numResults))! - defer { DS_FreeMetadata(result) } + let result = STT_FinishStreamWithMetadata(streamCtx, UInt32(numResults))! + defer { STT_FreeMetadata(result) } return DeepSpeechMetadata(fromInternal: result) } } @@ -298,23 +298,23 @@ public class DeepSpeechModel { - Throws: `DeepSpeechError` on failure. */ public init(modelPath: String) throws { - let err = DS_CreateModel(modelPath, &modelCtx) + let err = STT_CreateModel(modelPath, &modelCtx) try evaluateErrorCode(errorCode: err) } deinit { - DS_FreeModel(modelCtx) + STT_FreeModel(modelCtx) modelCtx = nil } - /** Get beam width value used by the model. If {@link DS_SetModelBeamWidth} + /** Get beam width value used by the model. If {@link STT_SetModelBeamWidth} was not called before, will return the default value loaded from the model file. - Returns: Beam width value used by the model. */ public func getBeamWidth() -> Int { - return Int(DS_GetModelBeamWidth(modelCtx)) + return Int(STT_GetModelBeamWidth(modelCtx)) } /** Set beam width value used by the model. @@ -326,14 +326,14 @@ public class DeepSpeechModel { - Throws: `DeepSpeechError` on failure. */ public func setBeamWidth(beamWidth: Int) throws { - let err = DS_SetModelBeamWidth(modelCtx, UInt32(beamWidth)) + let err = STT_SetModelBeamWidth(modelCtx, UInt32(beamWidth)) try evaluateErrorCode(errorCode: err) } // The sample rate expected by the model. public var sampleRate: Int { get { - return Int(DS_GetModelSampleRate(modelCtx)) + return Int(STT_GetModelSampleRate(modelCtx)) } } @@ -344,7 +344,7 @@ public class DeepSpeechModel { - Throws: `DeepSpeechError` on failure. */ public func enableExternalScorer(scorerPath: String) throws { - let err = DS_EnableExternalScorer(modelCtx, scorerPath) + let err = STT_EnableExternalScorer(modelCtx, scorerPath) try evaluateErrorCode(errorCode: err) } @@ -353,7 +353,7 @@ public class DeepSpeechModel { - Throws: `DeepSpeechError` on failure. */ public func disableExternalScorer() throws { - let err = DS_DisableExternalScorer(modelCtx) + let err = STT_DisableExternalScorer(modelCtx) try evaluateErrorCode(errorCode: err) } @@ -365,7 +365,7 @@ public class DeepSpeechModel { - Throws: `DeepSpeechError` on failure. */ public func setScorerAlphaBeta(alpha: Float, beta: Float) throws { - let err = DS_SetScorerAlphaBeta(modelCtx, alpha, beta) + let err = STT_SetScorerAlphaBeta(modelCtx, alpha, beta) try evaluateErrorCode(errorCode: err) } @@ -390,8 +390,8 @@ public class DeepSpeechModel { - Returns: The STT result. */ public func speechToText(buffer: UnsafeBufferPointer) -> String { - let result = DS_SpeechToText(modelCtx, buffer.baseAddress, UInt32(buffer.count)) - defer { DS_FreeString(result) } + let result = STT_SpeechToText(modelCtx, buffer.baseAddress, UInt32(buffer.count)) + defer { STT_FreeString(result) } return String(cString: result!) } @@ -424,12 +424,12 @@ public class DeepSpeechModel { Each transcript has per-token metadata including timing information. */ public func speechToTextWithMetadata(buffer: UnsafeBufferPointer, numResults: Int) -> DeepSpeechMetadata { - let result = DS_SpeechToTextWithMetadata( + let result = STT_SpeechToTextWithMetadata( modelCtx, buffer.baseAddress, UInt32(buffer.count), UInt32(numResults))! - defer { DS_FreeMetadata(result) } + defer { STT_FreeMetadata(result) } return DeepSpeechMetadata(fromInternal: result) } @@ -441,14 +441,14 @@ public class DeepSpeechModel { */ public func createStream() throws -> DeepSpeechStream { var streamContext: OpaquePointer! - let err = DS_CreateStream(modelCtx, &streamContext) + let err = STT_CreateStream(modelCtx, &streamContext) try evaluateErrorCode(errorCode: err) return DeepSpeechStream(streamContext: streamContext) } } public func DeepSpeechVersion() -> String { - let result = DS_Version() - defer { DS_FreeString(result) } + let result = STT_Version() + defer { STT_FreeString(result) } return String(cString: result!) } diff --git a/native_client/swift/deepspeech_ios/deepspeech_ios.modulemap b/native_client/swift/deepspeech_ios/deepspeech_ios.modulemap index 078ac915b4..18f40cd12f 100644 --- a/native_client/swift/deepspeech_ios/deepspeech_ios.modulemap +++ b/native_client/swift/deepspeech_ios/deepspeech_ios.modulemap @@ -5,8 +5,8 @@ framework module deepspeech_ios { module * { export * } explicit module libdeepspeech_Private { - header "deepspeech.h" + header "mozilla_voice_stt.h" export * - link "deepspeech" + link "mozilla_voice_stt" } } diff --git a/native_client/swift/deepspeech_ios_test.xcodeproj/project.pbxproj b/native_client/swift/deepspeech_ios_test.xcodeproj/project.pbxproj index a57f983c5b..4acf7bb0d3 100644 --- a/native_client/swift/deepspeech_ios_test.xcodeproj/project.pbxproj +++ b/native_client/swift/deepspeech_ios_test.xcodeproj/project.pbxproj @@ -11,8 +11,8 @@ 504EC34424CF4EFD0073C22E /* AudioContext.swift in Sources */ = {isa = PBXBuildFile; fileRef = 504EC34224CF4EFD0073C22E /* AudioContext.swift */; }; 504EC34524CF4F4F0073C22E /* deepspeech_ios.framework in Embed Frameworks */ = {isa = PBXBuildFile; fileRef = 507CD3A024B61FE400409BBB /* deepspeech_ios.framework */; settings = {ATTRIBUTES = (CodeSignOnCopy, RemoveHeadersOnCopy, ); }; }; 507CD3A124B61FE400409BBB /* deepspeech_ios.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 507CD3A024B61FE400409BBB /* deepspeech_ios.framework */; }; - 507CD3A324B61FEB00409BBB /* libdeepspeech.so in Frameworks */ = {isa = PBXBuildFile; fileRef = 507CD3A224B61FEA00409BBB /* libdeepspeech.so */; }; - 507CD3A424B61FFC00409BBB /* libdeepspeech.so in Embed Frameworks */ = {isa = PBXBuildFile; fileRef = 507CD3A224B61FEA00409BBB /* libdeepspeech.so */; settings = {ATTRIBUTES = (CodeSignOnCopy, ); }; }; + 507CD3A324B61FEB00409BBB /* libmozilla_voice_stt.so in Frameworks */ = {isa = PBXBuildFile; fileRef = 507CD3A224B61FEA00409BBB /* libmozilla_voice_stt.so */; }; + 507CD3A424B61FFC00409BBB /* libmozilla_voice_stt.so in Embed Frameworks */ = {isa = PBXBuildFile; fileRef = 507CD3A224B61FEA00409BBB /* libmozilla_voice_stt.so */; settings = {ATTRIBUTES = (CodeSignOnCopy, ); }; }; 50F787F32497683900D52237 /* AppDelegate.swift in Sources */ = {isa = PBXBuildFile; fileRef = 50F787F22497683900D52237 /* AppDelegate.swift */; }; 50F787F52497683900D52237 /* SceneDelegate.swift in Sources */ = {isa = PBXBuildFile; fileRef = 50F787F42497683900D52237 /* SceneDelegate.swift */; }; 50F787F72497683900D52237 /* ContentView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 50F787F62497683900D52237 /* ContentView.swift */; }; @@ -48,7 +48,7 @@ dstSubfolderSpec = 10; files = ( 504EC34524CF4F4F0073C22E /* deepspeech_ios.framework in Embed Frameworks */, - 507CD3A424B61FFC00409BBB /* libdeepspeech.so in Embed Frameworks */, + 507CD3A424B61FFC00409BBB /* libmozilla_voice_stt.so in Embed Frameworks */, ); name = "Embed Frameworks"; runOnlyForDeploymentPostprocessing = 0; @@ -59,7 +59,7 @@ 504EC34124CF4EFD0073C22E /* SpeechRecognitionImpl.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = SpeechRecognitionImpl.swift; sourceTree = ""; }; 504EC34224CF4EFD0073C22E /* AudioContext.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = AudioContext.swift; sourceTree = ""; }; 507CD3A024B61FE400409BBB /* deepspeech_ios.framework */ = {isa = PBXFileReference; explicitFileType = wrapper.framework; path = deepspeech_ios.framework; sourceTree = BUILT_PRODUCTS_DIR; }; - 507CD3A224B61FEA00409BBB /* libdeepspeech.so */ = {isa = PBXFileReference; lastKnownFileType = "compiled.mach-o.dylib"; path = libdeepspeech.so; sourceTree = ""; }; + 507CD3A224B61FEA00409BBB /* libmozilla_voice_stt.so */ = {isa = PBXFileReference; lastKnownFileType = "compiled.mach-o.dylib"; path = libmozilla_voice_stt.so; sourceTree = ""; }; 50F787EF2497683900D52237 /* deepspeech_ios_test.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = deepspeech_ios_test.app; sourceTree = BUILT_PRODUCTS_DIR; }; 50F787F22497683900D52237 /* AppDelegate.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = AppDelegate.swift; sourceTree = ""; }; 50F787F42497683900D52237 /* SceneDelegate.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SceneDelegate.swift; sourceTree = ""; }; @@ -81,7 +81,7 @@ isa = PBXFrameworksBuildPhase; buildActionMask = 2147483647; files = ( - 507CD3A324B61FEB00409BBB /* libdeepspeech.so in Frameworks */, + 507CD3A324B61FEB00409BBB /* libmozilla_voice_stt.so in Frameworks */, 507CD3A124B61FE400409BBB /* deepspeech_ios.framework in Frameworks */, ); runOnlyForDeploymentPostprocessing = 0; @@ -106,7 +106,7 @@ 50F2B0FC2498D6C7007CD876 /* Frameworks */ = { isa = PBXGroup; children = ( - 507CD3A224B61FEA00409BBB /* libdeepspeech.so */, + 507CD3A224B61FEA00409BBB /* libmozilla_voice_stt.so */, 507CD3A024B61FE400409BBB /* deepspeech_ios.framework */, ); name = Frameworks; diff --git a/native_client/test/concurrent_streams.py b/native_client/test/concurrent_streams.py index e435b43fec..dc330bdaa2 100644 --- a/native_client/test/concurrent_streams.py +++ b/native_client/test/concurrent_streams.py @@ -6,7 +6,7 @@ import numpy as np import wave -from deepspeech import Model +from mozilla_voice_stt import Model def main(): diff --git a/native_client/tflitemodelstate.cc b/native_client/tflitemodelstate.cc index 50a68a4b94..9d8d9ddbac 100644 --- a/native_client/tflitemodelstate.cc +++ b/native_client/tflitemodelstate.cc @@ -4,7 +4,7 @@ #ifdef __ANDROID__ #include -#define LOG_TAG "libdeepspeech" +#define LOG_TAG "libmozilla_voice_stt" #define LOGD(...) __android_log_print(ANDROID_LOG_DEBUG, LOG_TAG, __VA_ARGS__) #define LOGE(...) __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, __VA_ARGS__) #else @@ -159,21 +159,21 @@ int TFLiteModelState::init(const char* model_path) { int err = ModelState::init(model_path); - if (err != DS_ERR_OK) { + if (err != STT_ERR_OK) { return err; } fbmodel_ = tflite::FlatBufferModel::BuildFromFile(model_path); if (!fbmodel_) { std::cerr << "Error at reading model file " << model_path << std::endl; - return DS_ERR_FAIL_INIT_MMAP; + return STT_ERR_FAIL_INIT_MMAP; } tflite::ops::builtin::BuiltinOpResolver resolver; tflite::InterpreterBuilder(*fbmodel_, resolver)(&interpreter_); if (!interpreter_) { std::cerr << "Error at InterpreterBuilder for model file " << model_path << std::endl; - return DS_ERR_FAIL_INTERPRETER; + return STT_ERR_FAIL_INTERPRETER; } LOGD("Trying to detect delegates ..."); @@ -242,13 +242,13 @@ TFLiteModelState::init(const char* model_path) TfLiteStatus status = interpreter_->Invoke(); if (status != kTfLiteOk) { std::cerr << "Error running session: " << status << "\n"; - return DS_ERR_FAIL_INTERPRETER; + return STT_ERR_FAIL_INTERPRETER; } int* const graph_version = interpreter_->typed_tensor(metadata_version_idx); if (graph_version == nullptr) { std::cerr << "Unable to read model file version." << std::endl; - return DS_ERR_MODEL_INCOMPATIBLE; + return STT_ERR_MODEL_INCOMPATIBLE; } if (*graph_version < ds_graph_version()) { @@ -258,13 +258,13 @@ TFLiteModelState::init(const char* model_path) << "https://github.com/mozilla/DeepSpeech/blob/" << ds_git_version() << "/doc/USING.rst#model-compatibility " << "for more information" << std::endl; - return DS_ERR_MODEL_INCOMPATIBLE; + return STT_ERR_MODEL_INCOMPATIBLE; } int* const model_sample_rate = interpreter_->typed_tensor(metadata_sample_rate_idx); if (model_sample_rate == nullptr) { std::cerr << "Unable to read model sample rate." << std::endl; - return DS_ERR_MODEL_INCOMPATIBLE; + return STT_ERR_MODEL_INCOMPATIBLE; } sample_rate_ = *model_sample_rate; @@ -273,7 +273,7 @@ TFLiteModelState::init(const char* model_path) int* const win_step_ms = interpreter_->typed_tensor(metadata_feature_win_step_idx); if (win_len_ms == nullptr || win_step_ms == nullptr) { std::cerr << "Unable to read model feature window informations." << std::endl; - return DS_ERR_MODEL_INCOMPATIBLE; + return STT_ERR_MODEL_INCOMPATIBLE; } audio_win_len_ = sample_rate_ * (*win_len_ms / 1000.0); @@ -285,7 +285,7 @@ TFLiteModelState::init(const char* model_path) tflite::StringRef serialized_alphabet = tflite::GetString(interpreter_->tensor(metadata_alphabet_idx), 0); err = alphabet_.Deserialize(serialized_alphabet.str, serialized_alphabet.len); if (err != 0) { - return DS_ERR_INVALID_ALPHABET; + return STT_ERR_INVALID_ALPHABET; } assert(sample_rate_ > 0); @@ -310,7 +310,7 @@ TFLiteModelState::init(const char* model_path) << " classes in its output. Make sure you're passing an alphabet " << "file with the same size as the one used for training." << std::endl; - return DS_ERR_INVALID_ALPHABET; + return STT_ERR_INVALID_ALPHABET; } TfLiteIntArray* dims_c = interpreter_->tensor(previous_state_c_idx_)->dims; @@ -319,7 +319,7 @@ TFLiteModelState::init(const char* model_path) assert(state_size_ > 0); state_size_ = dims_c->data[1]; - return DS_ERR_OK; + return STT_ERR_OK; } // Copy contents of vec into the tensor with index tensor_idx. diff --git a/native_client/tfmodelstate.cc b/native_client/tfmodelstate.cc index 65328e308a..0d78598640 100644 --- a/native_client/tfmodelstate.cc +++ b/native_client/tfmodelstate.cc @@ -26,7 +26,7 @@ int TFModelState::init(const char* model_path) { int err = ModelState::init(model_path); - if (err != DS_ERR_OK) { + if (err != STT_ERR_OK) { return err; } @@ -42,7 +42,7 @@ TFModelState::init(const char* model_path) status = mmap_env_->InitializeFromFile(model_path); if (!status.ok()) { std::cerr << status << std::endl; - return DS_ERR_FAIL_INIT_MMAP; + return STT_ERR_FAIL_INIT_MMAP; } options.config.mutable_graph_options() @@ -55,7 +55,7 @@ TFModelState::init(const char* model_path) status = NewSession(options, &session); if (!status.ok()) { std::cerr << status << std::endl; - return DS_ERR_FAIL_INIT_SESS; + return STT_ERR_FAIL_INIT_SESS; } session_.reset(session); @@ -68,13 +68,13 @@ TFModelState::init(const char* model_path) } if (!status.ok()) { std::cerr << status << std::endl; - return DS_ERR_FAIL_READ_PROTOBUF; + return STT_ERR_FAIL_READ_PROTOBUF; } status = session_->Create(graph_def_); if (!status.ok()) { std::cerr << status << std::endl; - return DS_ERR_FAIL_CREATE_SESS; + return STT_ERR_FAIL_CREATE_SESS; } std::vector version_output; @@ -83,7 +83,7 @@ TFModelState::init(const char* model_path) }, {}, &version_output); if (!status.ok()) { std::cerr << "Unable to fetch graph version: " << status << std::endl; - return DS_ERR_MODEL_INCOMPATIBLE; + return STT_ERR_MODEL_INCOMPATIBLE; } int graph_version = version_output[0].scalar()(); @@ -94,7 +94,7 @@ TFModelState::init(const char* model_path) << "https://github.com/mozilla/DeepSpeech/blob/" << ds_git_version() << "/doc/USING.rst#model-compatibility " << "for more information" << std::endl; - return DS_ERR_MODEL_INCOMPATIBLE; + return STT_ERR_MODEL_INCOMPATIBLE; } std::vector metadata_outputs; @@ -107,7 +107,7 @@ TFModelState::init(const char* model_path) }, {}, &metadata_outputs); if (!status.ok()) { std::cout << "Unable to fetch metadata: " << status << std::endl; - return DS_ERR_MODEL_INCOMPATIBLE; + return STT_ERR_MODEL_INCOMPATIBLE; } sample_rate_ = metadata_outputs[0].scalar()(); @@ -121,7 +121,7 @@ TFModelState::init(const char* model_path) string serialized_alphabet = metadata_outputs[4].scalar()(); err = alphabet_.Deserialize(serialized_alphabet.data(), serialized_alphabet.size()); if (err != 0) { - return DS_ERR_INVALID_ALPHABET; + return STT_ERR_INVALID_ALPHABET; } assert(sample_rate_ > 0); @@ -155,7 +155,7 @@ TFModelState::init(const char* model_path) << " classes in its output. Make sure you're passing an alphabet " << "file with the same size as the one used for training." << std::endl; - return DS_ERR_INVALID_ALPHABET; + return STT_ERR_INVALID_ALPHABET; } } } @@ -165,10 +165,10 @@ TFModelState::init(const char* model_path) << "Make sure input_node is a 4D tensor with shape " << "[batch_size=1, time, window_size, n_features]." << std::endl; - return DS_ERR_INVALID_SHAPE; + return STT_ERR_INVALID_SHAPE; } - return DS_ERR_OK; + return STT_ERR_OK; } Tensor diff --git a/taskcluster/android-build.sh b/taskcluster/android-build.sh index 2b9e03937f..361a2d467b 100644 --- a/taskcluster/android-build.sh +++ b/taskcluster/android-build.sh @@ -9,7 +9,7 @@ source $(dirname "$0")/tc-tests-utils.sh source $(dirname "$0")/tf_tc-vars.sh BAZEL_TARGETS=" -//native_client:libdeepspeech.so +//native_client:libmozilla_voice_stt.so //native_client:generate_scorer_package " diff --git a/taskcluster/arm64-build.sh b/taskcluster/arm64-build.sh index 1ca4028ec6..0fae977c14 100644 --- a/taskcluster/arm64-build.sh +++ b/taskcluster/arm64-build.sh @@ -7,7 +7,7 @@ source $(dirname "$0")/tc-tests-utils.sh source $(dirname "$0")/tf_tc-vars.sh BAZEL_TARGETS=" -//native_client:libdeepspeech.so +//native_client:libmozilla_voice_stt.so //native_client:generate_scorer_package " diff --git a/taskcluster/cuda-build.sh b/taskcluster/cuda-build.sh index f8213f8104..8676870994 100755 --- a/taskcluster/cuda-build.sh +++ b/taskcluster/cuda-build.sh @@ -7,7 +7,7 @@ source $(dirname "$0")/tc-tests-utils.sh source $(dirname "$0")/tf_tc-vars.sh BAZEL_TARGETS=" -//native_client:libdeepspeech.so +//native_client:libmozilla_voice_stt.so //native_client:generate_scorer_package " diff --git a/taskcluster/examples-base.tyml b/taskcluster/examples-base.tyml index 42fa54c95d..128e453692 100644 --- a/taskcluster/examples-base.tyml +++ b/taskcluster/examples-base.tyml @@ -26,7 +26,7 @@ then: DEEPSPEECH_AUDIO: "https://github.com/mozilla/DeepSpeech/releases/download/v0.4.1/audio-0.4.1.tar.gz" PIP_DEFAULT_TIMEOUT: "60" EXAMPLES_CLONE_URL: "https://github.com/mozilla/DeepSpeech-examples" - EXAMPLES_CHECKOUT_TARGET: "master" + EXAMPLES_CHECKOUT_TARGET: "rename-test" command: - "/bin/bash" diff --git a/taskcluster/host-build.sh b/taskcluster/host-build.sh index 9ff3648c2d..11cfa81c4c 100755 --- a/taskcluster/host-build.sh +++ b/taskcluster/host-build.sh @@ -9,7 +9,7 @@ source $(dirname "$0")/tc-tests-utils.sh source $(dirname "$0")/tf_tc-vars.sh BAZEL_TARGETS=" -//native_client:libdeepspeech.so +//native_client:libmozilla_voice_stt.so //native_client:generate_scorer_package " diff --git a/taskcluster/ios-build.sh b/taskcluster/ios-build.sh index 282f8c32af..42cbbfa47f 100755 --- a/taskcluster/ios-build.sh +++ b/taskcluster/ios-build.sh @@ -9,7 +9,7 @@ source $(dirname "$0")/tc-tests-utils.sh source $(dirname "$0")/tf_tc-vars.sh BAZEL_TARGETS=" -//native_client:libdeepspeech.so +//native_client:libmozilla_voice_stt.so " if [ "${arch}" = "--arm64" ]; then diff --git a/taskcluster/ios-package.sh b/taskcluster/ios-package.sh index 16cc9f961b..d46e3ceb00 100755 --- a/taskcluster/ios-package.sh +++ b/taskcluster/ios-package.sh @@ -12,7 +12,7 @@ cp ${DS_ROOT_TASK}/DeepSpeech/ds/tensorflow/bazel*.log ${TASKCLUSTER_ARTIFACTS}/ package_native_client "native_client.tar.xz" -package_libdeepspeech_as_zip "libdeepspeech.zip" +package_libdeepspeech_as_zip "libmozilla_voice_stt.zip" case $arch in "--x86_64") diff --git a/taskcluster/node-package.sh b/taskcluster/node-package.sh index f81324f663..5980542e54 100644 --- a/taskcluster/node-package.sh +++ b/taskcluster/node-package.sh @@ -7,4 +7,4 @@ source $(dirname "$0")/tc-tests-utils.sh mkdir -p ${TASKCLUSTER_ARTIFACTS} || true # NodeJS package -cp ${DS_ROOT_TASK}/DeepSpeech/ds/native_client/javascript/deepspeech-*.tgz ${TASKCLUSTER_ARTIFACTS}/ +cp ${DS_ROOT_TASK}/DeepSpeech/ds/native_client/javascript/mozilla_voice_stt-*.tgz ${TASKCLUSTER_ARTIFACTS}/ diff --git a/taskcluster/package.sh b/taskcluster/package.sh index 062654fd3b..79b7a9ea79 100755 --- a/taskcluster/package.sh +++ b/taskcluster/package.sh @@ -10,11 +10,11 @@ cp ${DS_ROOT_TASK}/DeepSpeech/ds/tensorflow/bazel*.log ${TASKCLUSTER_ARTIFACTS}/ package_native_client "native_client.tar.xz" -package_libdeepspeech_as_zip "libdeepspeech.zip" +package_libdeepspeech_as_zip "libmozilla_voice_stt.zip" if [ -d ${DS_ROOT_TASK}/DeepSpeech/ds/wheels ]; then cp ${DS_ROOT_TASK}/DeepSpeech/ds/wheels/* ${TASKCLUSTER_ARTIFACTS}/ - cp ${DS_ROOT_TASK}/DeepSpeech/ds/native_client/javascript/deepspeech-*.tgz ${TASKCLUSTER_ARTIFACTS}/ + cp ${DS_ROOT_TASK}/DeepSpeech/ds/native_client/javascript/mozilla_voice_stt-*.tgz ${TASKCLUSTER_ARTIFACTS}/ fi; if [ -f ${DS_ROOT_TASK}/DeepSpeech/ds/native_client/javascript/wrapper.tar.gz ]; then diff --git a/taskcluster/rpi3-build.sh b/taskcluster/rpi3-build.sh index eabff73091..e7580272d3 100755 --- a/taskcluster/rpi3-build.sh +++ b/taskcluster/rpi3-build.sh @@ -7,7 +7,7 @@ source $(dirname "$0")/tc-tests-utils.sh source $(dirname "$0")/tf_tc-vars.sh BAZEL_TARGETS=" -//native_client:libdeepspeech.so +//native_client:libmozilla_voice_stt.so //native_client:generate_scorer_package " diff --git a/taskcluster/tc-all-utils.sh b/taskcluster/tc-all-utils.sh index 769e932905..3f70e52658 100755 --- a/taskcluster/tc-all-utils.sh +++ b/taskcluster/tc-all-utils.sh @@ -101,7 +101,7 @@ verify_bazel_rebuild() cp ${DS_ROOT_TASK}/DeepSpeech/ds/tensorflow/bazel*.log ${TASKCLUSTER_ARTIFACTS}/ - spurious_rebuilds=$(grep 'Executing action' "${bazel_explain_file}" | grep 'Compiling' | grep -v -E 'no entry in the cache|unconditional execution is requested|Executing genrule //native_client:workspace_status|Compiling native_client/workspace_status.cc|Linking native_client/libdeepspeech.so' | wc -l) + spurious_rebuilds=$(grep 'Executing action' "${bazel_explain_file}" | grep 'Compiling' | grep -v -E 'no entry in the cache|unconditional execution is requested|Executing genrule //native_client:workspace_status|Compiling native_client/workspace_status.cc|Linking native_client/libmozilla_voice_stt.so' | wc -l) if [ "${spurious_rebuilds}" -ne 0 ]; then echo "Bazel rebuilds some file it should not, please check." diff --git a/taskcluster/tc-android-utils.sh b/taskcluster/tc-android-utils.sh index 3bf66927f5..cbd37200de 100755 --- a/taskcluster/tc-android-utils.sh +++ b/taskcluster/tc-android-utils.sh @@ -39,7 +39,7 @@ do_deepspeech_java_apk_build() mkdir native_client/java/libdeepspeech/libs/${nc_dir} - curl -L https://community-tc.services.mozilla.com/api/queue/v1/task/${dep}/artifacts/public/native_client.tar.xz | tar -C native_client/java/libdeepspeech/libs/${nc_dir}/ -Jxvf - libdeepspeech.so + curl -L https://community-tc.services.mozilla.com/api/queue/v1/task/${dep}/artifacts/public/native_client.tar.xz | tar -C native_client/java/libdeepspeech/libs/${nc_dir}/ -Jxvf - libmozilla_voice_stt.so fi; done; diff --git a/taskcluster/tc-asserts.sh b/taskcluster/tc-asserts.sh index 7a164b07e2..d9c893aa0f 100755 --- a/taskcluster/tc-asserts.sh +++ b/taskcluster/tc-asserts.sh @@ -253,7 +253,7 @@ assert_tensorflow_version() assert_deepspeech_version() { - assert_not_present "$1" "DeepSpeech: unknown" + assert_not_present "$1" "Mozilla Voice STT: unknown" } # We need to ensure that running on inference really leverages GPU because @@ -261,7 +261,7 @@ assert_deepspeech_version() ensure_cuda_usage() { local _maybe_cuda=$1 - DS_BINARY_FILE=${DS_BINARY_FILE:-"deepspeech"} + DS_BINARY_FILE=${DS_BINARY_FILE:-"mozilla_voice_stt"} if [ "${_maybe_cuda}" = "cuda" ]; then set +e @@ -278,7 +278,7 @@ ensure_cuda_usage() check_versions() { set +e - ds_help=$(${DS_BINARY_PREFIX}deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name} --audio ${TASKCLUSTER_TMP_DIR}/${ldc93s1_sample_filename} 2>&1 1>/dev/null) + ds_help=$(${DS_BINARY_PREFIX}mozilla_voice_stt --model ${TASKCLUSTER_TMP_DIR}/${model_name} --audio ${TASKCLUSTER_TMP_DIR}/${ldc93s1_sample_filename} 2>&1 1>/dev/null) set -e assert_tensorflow_version "${ds_help}" @@ -290,7 +290,7 @@ assert_deepspeech_runtime() local expected_runtime=$1 set +e - local ds_version=$(${DS_BINARY_PREFIX}deepspeech --version 2>&1) + local ds_version=$(${DS_BINARY_PREFIX}mozilla_voice_stt --version 2>&1) set -e assert_shows_something "${ds_version}" "${expected_runtime}" @@ -309,12 +309,12 @@ check_runtime_electronjs() run_tflite_basic_inference_tests() { set +e - phrase_pbmodel_nolm=$(${DS_BINARY_PREFIX}deepspeech --model ${DATA_TMP_DIR}/${model_name} --audio ${DATA_TMP_DIR}/${ldc93s1_sample_filename} 2>${TASKCLUSTER_TMP_DIR}/stderr) + phrase_pbmodel_nolm=$(${DS_BINARY_PREFIX}mozilla_voice_stt --model ${DATA_TMP_DIR}/${model_name} --audio ${DATA_TMP_DIR}/${ldc93s1_sample_filename} 2>${TASKCLUSTER_TMP_DIR}/stderr) set -e assert_correct_ldc93s1 "${phrase_pbmodel_nolm}" "$?" set +e - phrase_pbmodel_nolm=$(${DS_BINARY_PREFIX}deepspeech --model ${DATA_TMP_DIR}/${model_name} --audio ${DATA_TMP_DIR}/${ldc93s1_sample_filename} --extended 2>${TASKCLUSTER_TMP_DIR}/stderr) + phrase_pbmodel_nolm=$(${DS_BINARY_PREFIX}mozilla_voice_stt --model ${DATA_TMP_DIR}/${model_name} --audio ${DATA_TMP_DIR}/${ldc93s1_sample_filename} --extended 2>${TASKCLUSTER_TMP_DIR}/stderr) set -e assert_correct_ldc93s1 "${phrase_pbmodel_nolm}" "$?" } @@ -345,22 +345,22 @@ run_netframework_inference_tests() run_electronjs_inference_tests() { set +e - phrase_pbmodel_nolm=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name} --audio ${TASKCLUSTER_TMP_DIR}/${ldc93s1_sample_filename} 2>${TASKCLUSTER_TMP_DIR}/stderr) + phrase_pbmodel_nolm=$(mozilla_voice_stt --model ${TASKCLUSTER_TMP_DIR}/${model_name} --audio ${TASKCLUSTER_TMP_DIR}/${ldc93s1_sample_filename} 2>${TASKCLUSTER_TMP_DIR}/stderr) set -e assert_working_ldc93s1 "${phrase_pbmodel_nolm}" "$?" set +e - phrase_pbmodel_nolm=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name} --audio ${TASKCLUSTER_TMP_DIR}/${ldc93s1_sample_filename} --extended 2>${TASKCLUSTER_TMP_DIR}/stderr) + phrase_pbmodel_nolm=$(mozilla_voice_stt --model ${TASKCLUSTER_TMP_DIR}/${model_name} --audio ${TASKCLUSTER_TMP_DIR}/${ldc93s1_sample_filename} --extended 2>${TASKCLUSTER_TMP_DIR}/stderr) set -e assert_working_ldc93s1 "${phrase_pbmodel_nolm}" "$?" set +e - phrase_pbmodel_nolm=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --audio ${TASKCLUSTER_TMP_DIR}/${ldc93s1_sample_filename} 2>${TASKCLUSTER_TMP_DIR}/stderr) + phrase_pbmodel_nolm=$(mozilla_voice_stt --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --audio ${TASKCLUSTER_TMP_DIR}/${ldc93s1_sample_filename} 2>${TASKCLUSTER_TMP_DIR}/stderr) set -e assert_working_ldc93s1 "${phrase_pbmodel_nolm}" "$?" set +e - phrase_pbmodel_withlm=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --scorer ${TASKCLUSTER_TMP_DIR}/kenlm.scorer --audio ${TASKCLUSTER_TMP_DIR}/${ldc93s1_sample_filename} 2>${TASKCLUSTER_TMP_DIR}/stderr) + phrase_pbmodel_withlm=$(mozilla_voice_stt --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --scorer ${TASKCLUSTER_TMP_DIR}/kenlm.scorer --audio ${TASKCLUSTER_TMP_DIR}/${ldc93s1_sample_filename} 2>${TASKCLUSTER_TMP_DIR}/stderr) set -e assert_working_ldc93s1_lm "${phrase_pbmodel_withlm}" "$?" } @@ -368,30 +368,30 @@ run_electronjs_inference_tests() run_basic_inference_tests() { set +e - deepspeech --model "" --audio ${TASKCLUSTER_TMP_DIR}/${ldc93s1_sample_filename} 2>${TASKCLUSTER_TMP_DIR}/stderr + mozilla_voice_stt --model "" --audio ${TASKCLUSTER_TMP_DIR}/${ldc93s1_sample_filename} 2>${TASKCLUSTER_TMP_DIR}/stderr set -e grep "Missing model information" ${TASKCLUSTER_TMP_DIR}/stderr set +e - phrase_pbmodel_nolm=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name} --audio ${TASKCLUSTER_TMP_DIR}/${ldc93s1_sample_filename} 2>${TASKCLUSTER_TMP_DIR}/stderr) + phrase_pbmodel_nolm=$(mozilla_voice_stt --model ${TASKCLUSTER_TMP_DIR}/${model_name} --audio ${TASKCLUSTER_TMP_DIR}/${ldc93s1_sample_filename} 2>${TASKCLUSTER_TMP_DIR}/stderr) status=$? set -e assert_correct_ldc93s1 "${phrase_pbmodel_nolm}" "$status" set +e - phrase_pbmodel_nolm=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name} --audio ${TASKCLUSTER_TMP_DIR}/${ldc93s1_sample_filename} --extended 2>${TASKCLUSTER_TMP_DIR}/stderr) + phrase_pbmodel_nolm=$(mozilla_voice_stt --model ${TASKCLUSTER_TMP_DIR}/${model_name} --audio ${TASKCLUSTER_TMP_DIR}/${ldc93s1_sample_filename} --extended 2>${TASKCLUSTER_TMP_DIR}/stderr) status=$? set -e assert_correct_ldc93s1 "${phrase_pbmodel_nolm}" "$status" set +e - phrase_pbmodel_nolm=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --audio ${TASKCLUSTER_TMP_DIR}/${ldc93s1_sample_filename} 2>${TASKCLUSTER_TMP_DIR}/stderr) + phrase_pbmodel_nolm=$(mozilla_voice_stt --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --audio ${TASKCLUSTER_TMP_DIR}/${ldc93s1_sample_filename} 2>${TASKCLUSTER_TMP_DIR}/stderr) status=$? set -e assert_correct_ldc93s1 "${phrase_pbmodel_nolm}" "$status" set +e - phrase_pbmodel_withlm=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --scorer ${TASKCLUSTER_TMP_DIR}/kenlm.scorer --audio ${TASKCLUSTER_TMP_DIR}/${ldc93s1_sample_filename} 2>${TASKCLUSTER_TMP_DIR}/stderr) + phrase_pbmodel_withlm=$(mozilla_voice_stt --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --scorer ${TASKCLUSTER_TMP_DIR}/kenlm.scorer --audio ${TASKCLUSTER_TMP_DIR}/${ldc93s1_sample_filename} 2>${TASKCLUSTER_TMP_DIR}/stderr) status=$? set -e assert_correct_ldc93s1_lm "${phrase_pbmodel_withlm}" "$status" @@ -402,13 +402,13 @@ run_all_inference_tests() run_basic_inference_tests set +e - phrase_pbmodel_nolm_stereo_44k=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1_pcms16le_2_44100.wav 2>${TASKCLUSTER_TMP_DIR}/stderr) + phrase_pbmodel_nolm_stereo_44k=$(mozilla_voice_stt --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1_pcms16le_2_44100.wav 2>${TASKCLUSTER_TMP_DIR}/stderr) status=$? set -e assert_correct_ldc93s1 "${phrase_pbmodel_nolm_stereo_44k}" "$status" set +e - phrase_pbmodel_withlm_stereo_44k=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --scorer ${TASKCLUSTER_TMP_DIR}/kenlm.scorer --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1_pcms16le_2_44100.wav 2>${TASKCLUSTER_TMP_DIR}/stderr) + phrase_pbmodel_withlm_stereo_44k=$(mozilla_voice_stt --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --scorer ${TASKCLUSTER_TMP_DIR}/kenlm.scorer --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1_pcms16le_2_44100.wav 2>${TASKCLUSTER_TMP_DIR}/stderr) status=$? set -e assert_correct_ldc93s1_lm "${phrase_pbmodel_withlm_stereo_44k}" "$status" @@ -416,12 +416,12 @@ run_all_inference_tests() # Run down-sampling warning test only when we actually perform downsampling if [ "${ldc93s1_sample_filename}" != "LDC93S1_pcms16le_1_8000.wav" ]; then set +e - phrase_pbmodel_nolm_mono_8k=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1_pcms16le_1_8000.wav 2>&1 1>/dev/null) + phrase_pbmodel_nolm_mono_8k=$(mozilla_voice_stt --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1_pcms16le_1_8000.wav 2>&1 1>/dev/null) set -e assert_correct_warning_upsampling "${phrase_pbmodel_nolm_mono_8k}" set +e - phrase_pbmodel_withlm_mono_8k=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --scorer ${TASKCLUSTER_TMP_DIR}/kenlm.scorer --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1_pcms16le_1_8000.wav 2>&1 1>/dev/null) + phrase_pbmodel_withlm_mono_8k=$(mozilla_voice_stt --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --scorer ${TASKCLUSTER_TMP_DIR}/kenlm.scorer --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1_pcms16le_1_8000.wav 2>&1 1>/dev/null) set -e assert_correct_warning_upsampling "${phrase_pbmodel_withlm_mono_8k}" fi; @@ -452,19 +452,19 @@ run_prod_inference_tests() local _bitrate=$1 set +e - phrase_pbmodel_withlm=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name} --scorer ${TASKCLUSTER_TMP_DIR}/kenlm.scorer --audio ${TASKCLUSTER_TMP_DIR}/${ldc93s1_sample_filename} 2>${TASKCLUSTER_TMP_DIR}/stderr) + phrase_pbmodel_withlm=$(mozilla_voice_stt --model ${TASKCLUSTER_TMP_DIR}/${model_name} --scorer ${TASKCLUSTER_TMP_DIR}/kenlm.scorer --audio ${TASKCLUSTER_TMP_DIR}/${ldc93s1_sample_filename} 2>${TASKCLUSTER_TMP_DIR}/stderr) status=$? set -e assert_correct_ldc93s1_prodmodel "${phrase_pbmodel_withlm}" "$status" "${_bitrate}" set +e - phrase_pbmodel_withlm=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --scorer ${TASKCLUSTER_TMP_DIR}/kenlm.scorer --audio ${TASKCLUSTER_TMP_DIR}/${ldc93s1_sample_filename} 2>${TASKCLUSTER_TMP_DIR}/stderr) + phrase_pbmodel_withlm=$(mozilla_voice_stt --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --scorer ${TASKCLUSTER_TMP_DIR}/kenlm.scorer --audio ${TASKCLUSTER_TMP_DIR}/${ldc93s1_sample_filename} 2>${TASKCLUSTER_TMP_DIR}/stderr) status=$? set -e assert_correct_ldc93s1_prodmodel "${phrase_pbmodel_withlm}" "$status" "${_bitrate}" set +e - phrase_pbmodel_withlm_stereo_44k=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --scorer ${TASKCLUSTER_TMP_DIR}/kenlm.scorer --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1_pcms16le_2_44100.wav 2>${TASKCLUSTER_TMP_DIR}/stderr) + phrase_pbmodel_withlm_stereo_44k=$(mozilla_voice_stt --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --scorer ${TASKCLUSTER_TMP_DIR}/kenlm.scorer --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1_pcms16le_2_44100.wav 2>${TASKCLUSTER_TMP_DIR}/stderr) status=$? set -e assert_correct_ldc93s1_prodmodel_stereo_44k "${phrase_pbmodel_withlm_stereo_44k}" "$status" @@ -472,7 +472,7 @@ run_prod_inference_tests() # Run down-sampling warning test only when we actually perform downsampling if [ "${ldc93s1_sample_filename}" != "LDC93S1_pcms16le_1_8000.wav" ]; then set +e - phrase_pbmodel_withlm_mono_8k=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --scorer ${TASKCLUSTER_TMP_DIR}/kenlm.scorer --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1_pcms16le_1_8000.wav 2>&1 1>/dev/null) + phrase_pbmodel_withlm_mono_8k=$(mozilla_voice_stt --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --scorer ${TASKCLUSTER_TMP_DIR}/kenlm.scorer --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1_pcms16le_1_8000.wav 2>&1 1>/dev/null) set -e assert_correct_warning_upsampling "${phrase_pbmodel_withlm_mono_8k}" fi; @@ -483,19 +483,19 @@ run_prodtflite_inference_tests() local _bitrate=$1 set +e - phrase_pbmodel_withlm=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name} --scorer ${TASKCLUSTER_TMP_DIR}/kenlm.scorer --audio ${TASKCLUSTER_TMP_DIR}/${ldc93s1_sample_filename} 2>${TASKCLUSTER_TMP_DIR}/stderr) + phrase_pbmodel_withlm=$(mozilla_voice_stt --model ${TASKCLUSTER_TMP_DIR}/${model_name} --scorer ${TASKCLUSTER_TMP_DIR}/kenlm.scorer --audio ${TASKCLUSTER_TMP_DIR}/${ldc93s1_sample_filename} 2>${TASKCLUSTER_TMP_DIR}/stderr) status=$? set -e assert_correct_ldc93s1_prodtflitemodel "${phrase_pbmodel_withlm}" "$status" "${_bitrate}" set +e - phrase_pbmodel_withlm=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --scorer ${TASKCLUSTER_TMP_DIR}/kenlm.scorer --audio ${TASKCLUSTER_TMP_DIR}/${ldc93s1_sample_filename} 2>${TASKCLUSTER_TMP_DIR}/stderr) + phrase_pbmodel_withlm=$(mozilla_voice_stt --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --scorer ${TASKCLUSTER_TMP_DIR}/kenlm.scorer --audio ${TASKCLUSTER_TMP_DIR}/${ldc93s1_sample_filename} 2>${TASKCLUSTER_TMP_DIR}/stderr) status=$? set -e assert_correct_ldc93s1_prodtflitemodel "${phrase_pbmodel_withlm}" "$status" "${_bitrate}" set +e - phrase_pbmodel_withlm_stereo_44k=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --scorer ${TASKCLUSTER_TMP_DIR}/kenlm.scorer --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1_pcms16le_2_44100.wav 2>${TASKCLUSTER_TMP_DIR}/stderr) + phrase_pbmodel_withlm_stereo_44k=$(mozilla_voice_stt --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --scorer ${TASKCLUSTER_TMP_DIR}/kenlm.scorer --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1_pcms16le_2_44100.wav 2>${TASKCLUSTER_TMP_DIR}/stderr) status=$? set -e assert_correct_ldc93s1_prodtflitemodel_stereo_44k "${phrase_pbmodel_withlm_stereo_44k}" "$status" @@ -503,7 +503,7 @@ run_prodtflite_inference_tests() # Run down-sampling warning test only when we actually perform downsampling if [ "${ldc93s1_sample_filename}" != "LDC93S1_pcms16le_1_8000.wav" ]; then set +e - phrase_pbmodel_withlm_mono_8k=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --scorer ${TASKCLUSTER_TMP_DIR}/kenlm.scorer --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1_pcms16le_1_8000.wav 2>&1 1>/dev/null) + phrase_pbmodel_withlm_mono_8k=$(mozilla_voice_stt --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --scorer ${TASKCLUSTER_TMP_DIR}/kenlm.scorer --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1_pcms16le_1_8000.wav 2>&1 1>/dev/null) set -e assert_correct_warning_upsampling "${phrase_pbmodel_withlm_mono_8k}" fi; @@ -512,13 +512,13 @@ run_prodtflite_inference_tests() run_multi_inference_tests() { set +e -o pipefail - multi_phrase_pbmodel_nolm=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name} --audio ${TASKCLUSTER_TMP_DIR}/ 2>${TASKCLUSTER_TMP_DIR}/stderr | tr '\n' '%') + multi_phrase_pbmodel_nolm=$(mozilla_voice_stt --model ${TASKCLUSTER_TMP_DIR}/${model_name} --audio ${TASKCLUSTER_TMP_DIR}/ 2>${TASKCLUSTER_TMP_DIR}/stderr | tr '\n' '%') status=$? set -e +o pipefail assert_correct_multi_ldc93s1 "${multi_phrase_pbmodel_nolm}" "$status" set +e -o pipefail - multi_phrase_pbmodel_withlm=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name} --scorer ${TASKCLUSTER_TMP_DIR}/kenlm.scorer --audio ${TASKCLUSTER_TMP_DIR}/ 2>${TASKCLUSTER_TMP_DIR}/stderr | tr '\n' '%') + multi_phrase_pbmodel_withlm=$(mozilla_voice_stt --model ${TASKCLUSTER_TMP_DIR}/${model_name} --scorer ${TASKCLUSTER_TMP_DIR}/kenlm.scorer --audio ${TASKCLUSTER_TMP_DIR}/ 2>${TASKCLUSTER_TMP_DIR}/stderr | tr '\n' '%') status=$? set -e +o pipefail assert_correct_multi_ldc93s1 "${multi_phrase_pbmodel_withlm}" "$status" @@ -527,7 +527,7 @@ run_multi_inference_tests() run_cpp_only_inference_tests() { set +e - phrase_pbmodel_withlm_intermediate_decode=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --scorer ${TASKCLUSTER_TMP_DIR}/kenlm.scorer --audio ${TASKCLUSTER_TMP_DIR}/${ldc93s1_sample_filename} --stream 1280 2>${TASKCLUSTER_TMP_DIR}/stderr | tail -n 1) + phrase_pbmodel_withlm_intermediate_decode=$(mozilla_voice_stt --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --scorer ${TASKCLUSTER_TMP_DIR}/kenlm.scorer --audio ${TASKCLUSTER_TMP_DIR}/${ldc93s1_sample_filename} --stream 1280 2>${TASKCLUSTER_TMP_DIR}/stderr | tail -n 1) status=$? set -e assert_correct_ldc93s1_lm "${phrase_pbmodel_withlm_intermediate_decode}" "$status" @@ -536,13 +536,13 @@ run_cpp_only_inference_tests() run_js_streaming_inference_tests() { set +e - phrase_pbmodel_withlm=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --scorer ${TASKCLUSTER_TMP_DIR}/kenlm.scorer --audio ${TASKCLUSTER_TMP_DIR}/${ldc93s1_sample_filename} --stream 2>${TASKCLUSTER_TMP_DIR}/stderr | tail -n 1) + phrase_pbmodel_withlm=$(mozilla_voice_stt --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --scorer ${TASKCLUSTER_TMP_DIR}/kenlm.scorer --audio ${TASKCLUSTER_TMP_DIR}/${ldc93s1_sample_filename} --stream 2>${TASKCLUSTER_TMP_DIR}/stderr | tail -n 1) status=$? set -e assert_correct_ldc93s1_lm "${phrase_pbmodel_withlm}" "$status" set +e - phrase_pbmodel_withlm=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --scorer ${TASKCLUSTER_TMP_DIR}/kenlm.scorer --audio ${TASKCLUSTER_TMP_DIR}/${ldc93s1_sample_filename} --stream --extended 2>${TASKCLUSTER_TMP_DIR}/stderr | tail -n 1) + phrase_pbmodel_withlm=$(mozilla_voice_stt --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --scorer ${TASKCLUSTER_TMP_DIR}/kenlm.scorer --audio ${TASKCLUSTER_TMP_DIR}/${ldc93s1_sample_filename} --stream --extended 2>${TASKCLUSTER_TMP_DIR}/stderr | tail -n 1) status=$? set -e assert_correct_ldc93s1_lm "${phrase_pbmodel_withlm}" "$status" @@ -552,14 +552,14 @@ run_js_streaming_prod_inference_tests() { local _bitrate=$1 set +e - phrase_pbmodel_withlm=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --scorer ${TASKCLUSTER_TMP_DIR}/kenlm.scorer --audio ${TASKCLUSTER_TMP_DIR}/${ldc93s1_sample_filename} --stream 2>${TASKCLUSTER_TMP_DIR}/stderr | tail -n 1) + phrase_pbmodel_withlm=$(mozilla_voice_stt --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --scorer ${TASKCLUSTER_TMP_DIR}/kenlm.scorer --audio ${TASKCLUSTER_TMP_DIR}/${ldc93s1_sample_filename} --stream 2>${TASKCLUSTER_TMP_DIR}/stderr | tail -n 1) status=$? set -e assert_correct_ldc93s1_prodmodel "${phrase_pbmodel_withlm}" "$status" "${_bitrate}" local _bitrate=$1 set +e - phrase_pbmodel_withlm=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --scorer ${TASKCLUSTER_TMP_DIR}/kenlm.scorer --audio ${TASKCLUSTER_TMP_DIR}/${ldc93s1_sample_filename} --stream --extended 2>${TASKCLUSTER_TMP_DIR}/stderr | tail -n 1) + phrase_pbmodel_withlm=$(mozilla_voice_stt --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --scorer ${TASKCLUSTER_TMP_DIR}/kenlm.scorer --audio ${TASKCLUSTER_TMP_DIR}/${ldc93s1_sample_filename} --stream --extended 2>${TASKCLUSTER_TMP_DIR}/stderr | tail -n 1) status=$? set -e assert_correct_ldc93s1_prodmodel "${phrase_pbmodel_withlm}" "$status" "${_bitrate}" @@ -569,14 +569,14 @@ run_js_streaming_prodtflite_inference_tests() { local _bitrate=$1 set +e - phrase_pbmodel_withlm=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --scorer ${TASKCLUSTER_TMP_DIR}/kenlm.scorer --audio ${TASKCLUSTER_TMP_DIR}/${ldc93s1_sample_filename} --stream 2>${TASKCLUSTER_TMP_DIR}/stderr | tail -n 1) + phrase_pbmodel_withlm=$(mozilla_voice_stt --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --scorer ${TASKCLUSTER_TMP_DIR}/kenlm.scorer --audio ${TASKCLUSTER_TMP_DIR}/${ldc93s1_sample_filename} --stream 2>${TASKCLUSTER_TMP_DIR}/stderr | tail -n 1) status=$? set -e assert_correct_ldc93s1_prodtflitemodel "${phrase_pbmodel_withlm}" "$status" "${_bitrate}" local _bitrate=$1 set +e - phrase_pbmodel_withlm=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --scorer ${TASKCLUSTER_TMP_DIR}/kenlm.scorer --audio ${TASKCLUSTER_TMP_DIR}/${ldc93s1_sample_filename} --stream --extended 2>${TASKCLUSTER_TMP_DIR}/stderr | tail -n 1) + phrase_pbmodel_withlm=$(mozilla_voice_stt --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --scorer ${TASKCLUSTER_TMP_DIR}/kenlm.scorer --audio ${TASKCLUSTER_TMP_DIR}/${ldc93s1_sample_filename} --stream --extended 2>${TASKCLUSTER_TMP_DIR}/stderr | tail -n 1) status=$? set -e assert_correct_ldc93s1_prodtflitemodel "${phrase_pbmodel_withlm}" "$status" "${_bitrate}" diff --git a/taskcluster/tc-build-utils.sh b/taskcluster/tc-build-utils.sh index 4d47877150..56e7c3a826 100755 --- a/taskcluster/tc-build-utils.sh +++ b/taskcluster/tc-build-utils.sh @@ -17,9 +17,9 @@ do_deepspeech_python_build() SETUP_FLAGS="" if [ "${package_option}" = "--cuda" ]; then - SETUP_FLAGS="--project_name deepspeech-gpu" + SETUP_FLAGS="--project_name mozilla_voice_stt-gpu" elif [ "${package_option}" = "--tflite" ]; then - SETUP_FLAGS="--project_name deepspeech-tflite" + SETUP_FLAGS="--project_name mozilla_voice_stt-tflite" fi for pyver_conf in ${SUPPORTED_PYTHON_VERSIONS}; do @@ -133,9 +133,9 @@ do_deepspeech_nodejs_build() done; if [ "${rename_to_gpu}" = "--cuda" ]; then - make -C native_client/javascript clean npm-pack PROJECT_NAME=deepspeech-gpu + make -C native_client/javascript clean npm-pack PROJECT_NAME=mozilla_voice_stt-gpu else - make -C native_client/javascript clean npm-pack + make -C native_client/javascript clean npm-pack PROJECT_NAME=mozilla_voice_stt fi tar -czf native_client/javascript/wrapper.tar.gz \ @@ -165,9 +165,9 @@ do_deepspeech_npm_package() done; if [ "${package_option}" = "--cuda" ]; then - make -C native_client/javascript clean npm-pack PROJECT_NAME=deepspeech-gpu + make -C native_client/javascript clean npm-pack PROJECT_NAME=mozilla_voice_stt-gpu elif [ "${package_option}" = "--tflite" ]; then - make -C native_client/javascript clean npm-pack PROJECT_NAME=deepspeech-tflite + make -C native_client/javascript clean npm-pack PROJECT_NAME=mozilla_voice_stt-tflite else make -C native_client/javascript clean npm-pack fi @@ -208,7 +208,7 @@ do_deepspeech_binary_build() EXTRA_CFLAGS="${EXTRA_LOCAL_CFLAGS}" \ EXTRA_LDFLAGS="${EXTRA_LOCAL_LDFLAGS}" \ EXTRA_LIBS="${EXTRA_LOCAL_LIBS}" \ - deepspeech${PLATFORM_EXE_SUFFIX} + mozilla_voice_stt${PLATFORM_EXE_SUFFIX} } do_deepspeech_ndk_build() @@ -301,7 +301,7 @@ do_nuget_build() cd ${DS_DSDIR}/native_client/dotnet - cp ${DS_TFDIR}/bazel-bin/native_client/libdeepspeech.so nupkg/build + cp ${DS_TFDIR}/bazel-bin/native_client/libmozilla_voice_stt.so nupkg/build # We copy the generated clients for .NET into the Nuget framework dirs @@ -329,7 +329,7 @@ do_nuget_build() do_deepspeech_ios_framework_build() { arch=$1 - cp ${DS_TFDIR}/bazel-bin/native_client/libdeepspeech.so ${DS_DSDIR}/native_client/swift/libdeepspeech.so + cp ${DS_TFDIR}/bazel-bin/native_client/libmozilla_voice_stt.so ${DS_DSDIR}/native_client/swift/libmozilla_voice_stt.so cd ${DS_DSDIR}/native_client/swift case $arch in "--x86_64") diff --git a/taskcluster/tc-dotnet-utils.sh b/taskcluster/tc-dotnet-utils.sh index 66cbabf15b..f1ccdfad4a 100755 --- a/taskcluster/tc-dotnet-utils.sh +++ b/taskcluster/tc-dotnet-utils.sh @@ -36,7 +36,7 @@ install_nuget() nuget install NAudio cp NAudio*/lib/net35/NAudio.dll ${TASKCLUSTER_TMP_DIR}/ds/ - cp ${PROJECT_NAME}.${DS_VERSION}/build/libdeepspeech.so ${TASKCLUSTER_TMP_DIR}/ds/ + cp ${PROJECT_NAME}.${DS_VERSION}/build/libmozilla_voice_stt.so ${TASKCLUSTER_TMP_DIR}/ds/ cp ${PROJECT_NAME}.${DS_VERSION}/lib/net46/DeepSpeechClient.dll ${TASKCLUSTER_TMP_DIR}/ds/ ls -hal ${TASKCLUSTER_TMP_DIR}/ds/ diff --git a/taskcluster/tc-evaluate_tflite.sh b/taskcluster/tc-evaluate_tflite.sh index 8a4a94500e..8829672b3a 100755 --- a/taskcluster/tc-evaluate_tflite.sh +++ b/taskcluster/tc-evaluate_tflite.sh @@ -22,8 +22,8 @@ pushd ${HOME}/DeepSpeech/ds popd set +o pipefail -which deepspeech -deepspeech --version +which mozilla_voice_stt +mozilla_voice_stt --version pushd ${HOME}/DeepSpeech/ds/ python bin/import_ldc93s1.py data/smoke_test diff --git a/taskcluster/tc-netframework-ds-tests.sh b/taskcluster/tc-netframework-ds-tests.sh index e74d0cb86c..ee9f52502c 100644 --- a/taskcluster/tc-netframework-ds-tests.sh +++ b/taskcluster/tc-netframework-ds-tests.sh @@ -11,14 +11,14 @@ bitrate=$1 set_ldc_sample_filename "${bitrate}" if [ "${package_option}" = "cuda" ]; then - PROJECT_NAME="DeepSpeech-GPU" + PROJECT_NAME="Mozilla-Voice-STT-GPU" elif [ "${package_option}" = "--tflite" ]; then - PROJECT_NAME="DeepSpeech-TFLite" + PROJECT_NAME="Mozilla-Voice-STT-TFLite" model_source=${DEEPSPEECH_TEST_MODEL//.pb/.tflite} model_name=$(basename "${model_source}") model_name_mmap=$(basename "${model_source}") else - PROJECT_NAME="DeepSpeech" + PROJECT_NAME="Mozilla-Voice-STT" fi download_data diff --git a/taskcluster/tc-node-utils.sh b/taskcluster/tc-node-utils.sh index 8c33cb393d..5a10a40dde 100755 --- a/taskcluster/tc-node-utils.sh +++ b/taskcluster/tc-node-utils.sh @@ -7,8 +7,8 @@ get_dep_npm_pkg_url() { local all_deps="$(curl -s https://community-tc.services.mozilla.com/api/queue/v1/task/${TASK_ID} | python -c 'import json; import sys; print(" ".join(json.loads(sys.stdin.read())["dependencies"]));')" - # We try "deepspeech-tflite" and "deepspeech-gpu" first and if we don't find it we try "deepspeech" - for pkg_basename in "deepspeech-tflite" "deepspeech-gpu" "deepspeech"; do + # We try "mozilla_voice_stt-tflite" and "mozilla_voice_stt-gpu" first and if we don't find it we try "mozilla_voice_stt" + for pkg_basename in "mozilla_voice_stt-tflite" "mozilla_voice_stt-gpu" "mozilla_voice_stt"; do local deepspeech_pkg="${pkg_basename}-${DS_VERSION}.tgz" for dep in ${all_deps}; do local has_artifact=$(curl -s https://community-tc.services.mozilla.com/api/queue/v1/task/${dep}/artifacts | python -c 'import json; import sys; has_artifact = True in [ e["name"].find("'${deepspeech_pkg}'") > 0 for e in json.loads(sys.stdin.read())["artifacts"] ]; print(has_artifact)') diff --git a/taskcluster/tc-package.sh b/taskcluster/tc-package.sh index 652805516e..ce17672482 100755 --- a/taskcluster/tc-package.sh +++ b/taskcluster/tc-package.sh @@ -22,12 +22,12 @@ package_native_client() fi; ${TAR} -cf - \ - -C ${tensorflow_dir}/bazel-bin/native_client/ libdeepspeech.so \ - -C ${tensorflow_dir}/bazel-bin/native_client/ libdeepspeech.so.if.lib \ + -C ${tensorflow_dir}/bazel-bin/native_client/ libmozilla_voice_stt.so \ + -C ${tensorflow_dir}/bazel-bin/native_client/ libmozilla_voice_stt.so.if.lib \ -C ${tensorflow_dir}/bazel-bin/native_client/ generate_scorer_package \ -C ${deepspeech_dir}/ LICENSE \ - -C ${deepspeech_dir}/native_client/ deepspeech${PLATFORM_EXE_SUFFIX} \ - -C ${deepspeech_dir}/native_client/ deepspeech.h \ + -C ${deepspeech_dir}/native_client/ mozilla_voice_stt${PLATFORM_EXE_SUFFIX} \ + -C ${deepspeech_dir}/native_client/ mozilla_voice_stt.h \ -C ${deepspeech_dir}/native_client/kenlm/ README.mozilla \ | ${XZ} > "${artifacts_dir}/${artifact_name}" } @@ -56,11 +56,11 @@ package_native_client_ndk() fi; tar -cf - \ - -C ${deepspeech_dir}/native_client/libs/${arch_abi}/ deepspeech \ - -C ${deepspeech_dir}/native_client/libs/${arch_abi}/ libdeepspeech.so \ + -C ${deepspeech_dir}/native_client/libs/${arch_abi}/ mozilla_voice_stt \ + -C ${deepspeech_dir}/native_client/libs/${arch_abi}/ libmozilla_voice_stt.so \ -C ${tensorflow_dir}/bazel-bin/native_client/ generate_scorer_package \ -C ${deepspeech_dir}/native_client/libs/${arch_abi}/ libc++_shared.so \ - -C ${deepspeech_dir}/native_client/ deepspeech.h \ + -C ${deepspeech_dir}/native_client/ mozilla_voice_stt.h \ -C ${deepspeech_dir}/ LICENSE \ -C ${deepspeech_dir}/native_client/kenlm/ README.mozilla \ | pixz -9 > "${artifacts_dir}/${artifact_name}" @@ -83,5 +83,5 @@ package_libdeepspeech_as_zip() echo "Please specify artifact name." fi; - zip -r9 --junk-paths "${artifacts_dir}/${artifact_name}" ${tensorflow_dir}/bazel-bin/native_client/libdeepspeech.so + zip -r9 --junk-paths "${artifacts_dir}/${artifact_name}" ${tensorflow_dir}/bazel-bin/native_client/libmozilla_voice_stt.so } diff --git a/taskcluster/tc-py-utils.sh b/taskcluster/tc-py-utils.sh index 36f8073157..5aa6b98115 100755 --- a/taskcluster/tc-py-utils.sh +++ b/taskcluster/tc-py-utils.sh @@ -268,7 +268,7 @@ get_python_pkg_url() local pkgname=$3 if [ -z "${pkgname}" ]; then - pkgname="deepspeech" + pkgname="mozilla_voice_stt" fi local root=$4 @@ -286,17 +286,17 @@ get_python_pkg_url() get_tflite_python_pkg_name() { # Default to deepspeech package - local _pkgname="deepspeech_tflite" + local _pkgname="mozilla_voice_stt_tflite" ARCH=$(uname -m) case "${OS}:${ARCH}" in Linux:armv7l|Linux:aarch64) - # On linux/arm or linux/aarch64 we don't produce deepspeech_tflite - _pkgname="deepspeech" + # On linux/arm or linux/aarch64 we don't produce mozilla_voice_stt_tflite + _pkgname="mozilla_voice_stt" ;; *) - _pkgname="deepspeech_tflite" + _pkgname="mozilla_voice_stt_tflite" ;; esac diff --git a/taskcluster/tc-python-tests.sh b/taskcluster/tc-python-tests.sh index d55a30978b..ccb8f9488e 100644 --- a/taskcluster/tc-python-tests.sh +++ b/taskcluster/tc-python-tests.sh @@ -14,15 +14,15 @@ download_data virtualenv_activate "${pyalias}" "deepspeech" if [ "$3" = "cuda" ]; then - deepspeech_pkg_url=$(get_python_pkg_url "${pyver_pkg}" "${py_unicode_type}" "deepspeech_gpu") + deepspeech_pkg_url=$(get_python_pkg_url "${pyver_pkg}" "${py_unicode_type}" "mozilla_voice_stt_gpu") else deepspeech_pkg_url=$(get_python_pkg_url "${pyver_pkg}" "${py_unicode_type}") fi; LD_LIBRARY_PATH=${PY37_LDPATH}:$LD_LIBRARY_PATH pip install --verbose --only-binary :all: --upgrade ${deepspeech_pkg_url} | cat -which deepspeech -deepspeech --version +which mozilla_voice_stt +mozilla_voice_stt --version ensure_cuda_usage "$3" diff --git a/taskcluster/tc-python_tflite-tests-prod.sh b/taskcluster/tc-python_tflite-tests-prod.sh index 5f73484c1f..6ea705dbc1 100644 --- a/taskcluster/tc-python_tflite-tests-prod.sh +++ b/taskcluster/tc-python_tflite-tests-prod.sh @@ -28,8 +28,8 @@ pkg_name=$(get_tflite_python_pkg_name) deepspeech_pkg_url=$(get_python_pkg_url "${pyver_pkg}" "${py_unicode_type}" "${pkg_name}") LD_LIBRARY_PATH=${PY37_LDPATH}:$LD_LIBRARY_PATH pip install --verbose --only-binary :all: --upgrade ${deepspeech_pkg_url} | cat -which deepspeech -deepspeech --version +which mozilla_voice_stt +mozilla_voice_stt --version run_prodtflite_inference_tests "${bitrate}" diff --git a/taskcluster/tc-python_tflite-tests.sh b/taskcluster/tc-python_tflite-tests.sh index a95adf4008..b7dc8d65ff 100644 --- a/taskcluster/tc-python_tflite-tests.sh +++ b/taskcluster/tc-python_tflite-tests.sh @@ -28,8 +28,8 @@ pkg_name=$(get_tflite_python_pkg_name) deepspeech_pkg_url=$(get_python_pkg_url "${pyver_pkg}" "${py_unicode_type}" "${pkg_name}") LD_LIBRARY_PATH=${PY37_LDPATH}:$LD_LIBRARY_PATH pip install --verbose --only-binary :all: --upgrade ${deepspeech_pkg_url} | cat -which deepspeech -deepspeech --version +which mozilla_voice_stt +mozilla_voice_stt --version run_all_inference_tests diff --git a/taskcluster/win-build.sh b/taskcluster/win-build.sh index 8eb6b0e971..548d698a59 100755 --- a/taskcluster/win-build.sh +++ b/taskcluster/win-build.sh @@ -9,20 +9,20 @@ source $(dirname "$0")/tc-tests-utils.sh source $(dirname "$0")/tf_tc-vars.sh BAZEL_TARGETS=" -//native_client:libdeepspeech.so +//native_client:libmozilla_voice_stt.so //native_client:generate_scorer_package " if [ "${package_option}" = "--cuda" ]; then BAZEL_ENV_FLAGS="TF_NEED_CUDA=1 ${TF_CUDA_FLAGS}" BAZEL_BUILD_FLAGS="${BAZEL_CUDA_FLAGS} ${BAZEL_EXTRA_FLAGS} ${BAZEL_OPT_FLAGS}" - PROJECT_NAME="DeepSpeech-GPU" + PROJECT_NAME="Mozilla-Voice-STT-GPU" elif [ "${package_option}" = "--tflite" ]; then - PROJECT_NAME="DeepSpeech-TFLite" + PROJECT_NAME="Mozilla-Voice-STT-TFLite" BAZEL_BUILD_FLAGS="--define=runtime=tflite ${BAZEL_OPT_FLAGS} ${BAZEL_EXTRA_FLAGS}" BAZEL_ENV_FLAGS="TF_NEED_CUDA=0" else - PROJECT_NAME="DeepSpeech" + PROJECT_NAME="Mozilla-Voice-STT" BAZEL_BUILD_FLAGS="${BAZEL_OPT_FLAGS} ${BAZEL_EXTRA_FLAGS}" BAZEL_ENV_FLAGS="TF_NEED_CUDA=0" fi @@ -32,7 +32,7 @@ SYSTEM_TARGET=host-win do_bazel_build if [ "${package_option}" = "--cuda" ]; then - cp ${DS_ROOT_TASK}/DeepSpeech/ds/tensorflow/bazel-bin/native_client/liblibdeepspeech.so.ifso ${DS_ROOT_TASK}/DeepSpeech/ds/tensorflow/bazel-bin/native_client/libdeepspeech.so.if.lib + cp ${DS_ROOT_TASK}/DeepSpeech/ds/tensorflow/bazel-bin/native_client/liblibmozilla_voice_stt.so.ifso ${DS_ROOT_TASK}/DeepSpeech/ds/tensorflow/bazel-bin/native_client/libmozilla_voice_stt.so.if.lib fi export PATH=$PATH:$(cygpath ${ChocolateyInstall})/bin:'/c/Program Files/nodejs/' diff --git a/taskcluster/win-opt-base.tyml b/taskcluster/win-opt-base.tyml index db232e2482..872ab12311 100644 --- a/taskcluster/win-opt-base.tyml +++ b/taskcluster/win-opt-base.tyml @@ -32,8 +32,6 @@ payload: env: TC_MSYS_VERSION: 'MSYS_NT-6.3-9600' MSYS: 'winsymlinks:nativestrict' - EXAMPLES_CLONE_URL: "https://github.com/mozilla/DeepSpeech-examples" - EXAMPLES_CHECKOUT_TARGET: "master" command: - >- @@ -50,8 +48,6 @@ payload: env && ln -s $USERPROFILE/msys64 $TASKCLUSTER_TASK_DIR/msys64 && (7z x -txz -so $USERPROFILE/home.tar.xz | 7z x -aoa -ttar -si ) && - git clone --quiet $EXAMPLES_CLONE_URL $TASKCLUSTER_TASK_DIR/DeepSpeech/examples && - cd $TASKCLUSTER_TASK_DIR/DeepSpeech/examples && git checkout --quiet $EXAMPLES_CHECKOUT_TARGET && cd $TASKCLUSTER_TASK_DIR/DeepSpeech/ds && git remote set-url origin ${event.head.repo.url} && git fetch origin && git checkout --quiet ${event.head.sha} && git submodule --quiet sync tensorflow/ && git submodule --quiet update tensorflow/ && (rm $TASKCLUSTER_TASK_DIR/DeepSpeech/ds/tensorflow/native_client || true) && diff --git a/taskcluster/win-package.sh b/taskcluster/win-package.sh index 96c1d7fa0f..9ebcc469bc 100755 --- a/taskcluster/win-package.sh +++ b/taskcluster/win-package.sh @@ -10,7 +10,7 @@ cp ${DS_ROOT_TASK}/DeepSpeech/ds/tensorflow/bazel*.log ${TASKCLUSTER_ARTIFACTS}/ package_native_client "native_client.tar.xz" -package_libdeepspeech_as_zip "libdeepspeech.zip" +package_libdeepspeech_as_zip "libmozilla_voice_stt.zip" cp ${DS_ROOT_TASK}/DeepSpeech/ds/native_client/dotnet/*.nupkg ${TASKCLUSTER_ARTIFACTS}/ @@ -22,5 +22,5 @@ fi; if [ -f ${DS_ROOT_TASK}/DeepSpeech/ds/native_client/javascript/wrapper.tar.gz ]; then cp ${DS_ROOT_TASK}/DeepSpeech/ds/native_client/javascript/wrapper.tar.gz ${TASKCLUSTER_ARTIFACTS}/ - cp ${DS_ROOT_TASK}/DeepSpeech/ds/native_client/javascript/deepspeech-*.tgz ${TASKCLUSTER_ARTIFACTS}/ + cp ${DS_ROOT_TASK}/DeepSpeech/ds/native_client/javascript/mozilla_voice_stt-*.tgz ${TASKCLUSTER_ARTIFACTS}/ fi; diff --git a/training/deepspeech_training/train.py b/training/deepspeech_training/train.py index 47052a0723..d23c88f175 100644 --- a/training/deepspeech_training/train.py +++ b/training/deepspeech_training/train.py @@ -677,7 +677,7 @@ def create_inference_graph(batch_size=1, n_steps=16, tflite=False): mfccs = tf.identity(mfccs, name='mfccs') # Input tensor will be of shape [batch_size, n_steps, 2*n_context+1, n_input] - # This shape is read by the native_client in DS_CreateModel to know the + # This shape is read by the native_client in STT_CreateModel to know the # value of n_steps, n_context and n_input. Make sure you update the code # there if this shape is changed. input_tensor = tfv1.placeholder(tf.float32, [batch_size, n_steps if n_steps > 0 else None, 2 * Config.n_context + 1, Config.n_input], name='input_node') From ee7bf86460f106ee109e458fea6c0aca1450ebf7 Mon Sep 17 00:00:00 2001 From: Reuben Morais Date: Tue, 4 Aug 2020 11:15:27 +0200 Subject: [PATCH 02/12] .NET rename --- .../{DeepSpeech.sln => MozillaVoiceStt.sln} | 4 +- .../Enums/ErrorCodes.cs | 4 +- .../Extensions/NativeExtensions.cs | 4 +- .../Interfaces/IModel.cs} | 26 ++-- .../Interfaces/IMozillaVoiceSttModel.cs | 130 ++++++++++++++++++ .../Models/CandidateTranscript.cs | 2 +- .../Models/DeepSpeechStream.cs | 8 +- .../Models/Metadata.cs | 2 +- .../Models/TokenMetadata.cs | 2 +- .../MozillaVoiceStt.cs} | 42 +++--- .../MozillaVoiceSttClient.csproj} | 0 .../NativeImp.cs | 4 +- .../Structs/CandidateTranscript.cs | 2 +- .../Structs/Metadata.cs | 2 +- .../Structs/TokenMetadata.cs | 2 +- .../App.config | 0 .../MozillaVoiceSttConsole.csproj} | 8 +- .../Program.cs | 8 +- .../Properties/AssemblyInfo.cs | 2 +- .../arctic_a0024.wav | Bin .../packages.config | 0 .../.gitignore | 0 .../App.config | 0 .../App.xaml | 4 +- .../App.xaml.cs | 18 +-- .../MainWindow.xaml | 4 +- .../MainWindow.xaml.cs | 4 +- .../MozillaVoiceStt.WPF.csproj} | 8 +- .../MozillaVoiceStt.WPF.sln} | 4 +- .../Properties/AssemblyInfo.cs | 4 +- .../Properties/Resources.Designer.cs | 4 +- .../Properties/Resources.resx | 0 .../Properties/Settings.Designer.cs | 2 +- .../Properties/Settings.settings | 0 .../ViewModels/BindableBase.cs | 2 +- .../ViewModels/MainWindowViewModel.cs | 17 +-- .../packages.config | 0 native_client/dotnet/README.rst | 10 +- .../dotnet/nupkg/deepspeech.nuspec.in | 4 +- 39 files changed, 234 insertions(+), 103 deletions(-) rename native_client/dotnet/{DeepSpeech.sln => MozillaVoiceStt.sln} (77%) rename native_client/dotnet/{DeepSpeechClient => MozillaVoiceSttClient}/Enums/ErrorCodes.cs (91%) rename native_client/dotnet/{DeepSpeechClient => MozillaVoiceSttClient}/Extensions/NativeExtensions.cs (97%) rename native_client/dotnet/{DeepSpeechClient/Interfaces/IDeepSpeech.cs => MozillaVoiceSttClient/Interfaces/IModel.cs} (86%) create mode 100644 native_client/dotnet/MozillaVoiceSttClient/Interfaces/IMozillaVoiceSttModel.cs rename native_client/dotnet/{DeepSpeechClient => MozillaVoiceSttClient}/Models/CandidateTranscript.cs (92%) rename native_client/dotnet/{DeepSpeechClient => MozillaVoiceSttClient}/Models/DeepSpeechStream.cs (80%) rename native_client/dotnet/{DeepSpeechClient => MozillaVoiceSttClient}/Models/Metadata.cs (88%) rename native_client/dotnet/{DeepSpeechClient => MozillaVoiceSttClient}/Models/TokenMetadata.cs (92%) rename native_client/dotnet/{DeepSpeechClient/DeepSpeech.cs => MozillaVoiceSttClient/MozillaVoiceStt.cs} (87%) rename native_client/dotnet/{DeepSpeechClient/DeepSpeechClient.csproj => MozillaVoiceSttClient/MozillaVoiceSttClient.csproj} (100%) rename native_client/dotnet/{DeepSpeechClient => MozillaVoiceSttClient}/NativeImp.cs (98%) rename native_client/dotnet/{DeepSpeechClient => MozillaVoiceSttClient}/Structs/CandidateTranscript.cs (93%) rename native_client/dotnet/{DeepSpeechClient => MozillaVoiceSttClient}/Structs/Metadata.cs (91%) rename native_client/dotnet/{DeepSpeechClient => MozillaVoiceSttClient}/Structs/TokenMetadata.cs (93%) rename native_client/dotnet/{DeepSpeechConsole => MozillaVoiceSttConsole}/App.config (100%) rename native_client/dotnet/{DeepSpeechConsole/DeepSpeechConsole.csproj => MozillaVoiceSttConsole/MozillaVoiceSttConsole.csproj} (92%) rename native_client/dotnet/{DeepSpeechConsole => MozillaVoiceSttConsole}/Program.cs (94%) rename native_client/dotnet/{DeepSpeechConsole => MozillaVoiceSttConsole}/Properties/AssemblyInfo.cs (96%) rename native_client/dotnet/{DeepSpeechConsole => MozillaVoiceSttConsole}/arctic_a0024.wav (100%) rename native_client/dotnet/{DeepSpeechConsole => MozillaVoiceSttConsole}/packages.config (100%) rename native_client/dotnet/{DeepSpeechWPF => MozillaVoiceSttWPF}/.gitignore (100%) rename native_client/dotnet/{DeepSpeechWPF => MozillaVoiceSttWPF}/App.config (100%) rename native_client/dotnet/{DeepSpeechWPF => MozillaVoiceSttWPF}/App.xaml (71%) rename native_client/dotnet/{DeepSpeechWPF => MozillaVoiceSttWPF}/App.xaml.cs (61%) rename native_client/dotnet/{DeepSpeechWPF => MozillaVoiceSttWPF}/MainWindow.xaml (97%) rename native_client/dotnet/{DeepSpeechWPF => MozillaVoiceSttWPF}/MainWindow.xaml.cs (85%) rename native_client/dotnet/{DeepSpeechWPF/DeepSpeech.WPF.csproj => MozillaVoiceSttWPF/MozillaVoiceStt.WPF.csproj} (94%) rename native_client/dotnet/{DeepSpeechWPF/DeepSpeech.WPF.sln => MozillaVoiceSttWPF/MozillaVoiceStt.WPF.sln} (79%) rename native_client/dotnet/{DeepSpeechWPF => MozillaVoiceSttWPF}/Properties/AssemblyInfo.cs (95%) rename native_client/dotnet/{DeepSpeechWPF => MozillaVoiceSttWPF}/Properties/Resources.Designer.cs (94%) rename native_client/dotnet/{DeepSpeechWPF => MozillaVoiceSttWPF}/Properties/Resources.resx (100%) rename native_client/dotnet/{DeepSpeechWPF => MozillaVoiceSttWPF}/Properties/Settings.Designer.cs (96%) rename native_client/dotnet/{DeepSpeechWPF => MozillaVoiceSttWPF}/Properties/Settings.settings (100%) rename native_client/dotnet/{DeepSpeechWPF => MozillaVoiceSttWPF}/ViewModels/BindableBase.cs (98%) rename native_client/dotnet/{DeepSpeechWPF => MozillaVoiceSttWPF}/ViewModels/MainWindowViewModel.cs (96%) rename native_client/dotnet/{DeepSpeechWPF => MozillaVoiceSttWPF}/packages.config (100%) diff --git a/native_client/dotnet/DeepSpeech.sln b/native_client/dotnet/MozillaVoiceStt.sln similarity index 77% rename from native_client/dotnet/DeepSpeech.sln rename to native_client/dotnet/MozillaVoiceStt.sln index 78afe7db06..0bf2b52e93 100644 --- a/native_client/dotnet/DeepSpeech.sln +++ b/native_client/dotnet/MozillaVoiceStt.sln @@ -2,9 +2,9 @@ Microsoft Visual Studio Solution File, Format Version 12.00 # Visual Studio Version 16 VisualStudioVersion = 16.0.30204.135 MinimumVisualStudioVersion = 10.0.40219.1 -Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "DeepSpeechClient", "DeepSpeechClient\DeepSpeechClient.csproj", "{56DE4091-BBBE-47E4-852D-7268B33B971F}" +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "MozillaVoiceSttClient", "MozillaVoiceSttClient\MozillaVoiceSttClient.csproj", "{56DE4091-BBBE-47E4-852D-7268B33B971F}" EndProject -Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "DeepSpeechConsole", "DeepSpeechConsole\DeepSpeechConsole.csproj", "{312965E5-C4F6-4D95-BA64-79906B8BC7AC}" +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "MozillaVoiceSttConsole", "MozillaVoiceSttConsole\MozillaVoiceSttConsole.csproj", "{312965E5-C4F6-4D95-BA64-79906B8BC7AC}" EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution diff --git a/native_client/dotnet/DeepSpeechClient/Enums/ErrorCodes.cs b/native_client/dotnet/MozillaVoiceSttClient/Enums/ErrorCodes.cs similarity index 91% rename from native_client/dotnet/DeepSpeechClient/Enums/ErrorCodes.cs rename to native_client/dotnet/MozillaVoiceSttClient/Enums/ErrorCodes.cs index 600c91d30f..aa816f8d7e 100644 --- a/native_client/dotnet/DeepSpeechClient/Enums/ErrorCodes.cs +++ b/native_client/dotnet/MozillaVoiceSttClient/Enums/ErrorCodes.cs @@ -1,7 +1,7 @@ -namespace DeepSpeechClient.Enums +namespace MozillaVoiceSttClient.Enums { /// - /// Error codes from the native DeepSpeech binary. + /// Error codes from the native Mozilla Voice STT binary. /// internal enum ErrorCodes { diff --git a/native_client/dotnet/DeepSpeechClient/Extensions/NativeExtensions.cs b/native_client/dotnet/MozillaVoiceSttClient/Extensions/NativeExtensions.cs similarity index 97% rename from native_client/dotnet/DeepSpeechClient/Extensions/NativeExtensions.cs rename to native_client/dotnet/MozillaVoiceSttClient/Extensions/NativeExtensions.cs index 3e18f7cb9c..0d2229f934 100644 --- a/native_client/dotnet/DeepSpeechClient/Extensions/NativeExtensions.cs +++ b/native_client/dotnet/MozillaVoiceSttClient/Extensions/NativeExtensions.cs @@ -1,9 +1,9 @@ -using DeepSpeechClient.Structs; +using MozillaVoiceSttClient.Structs; using System; using System.Runtime.InteropServices; using System.Text; -namespace DeepSpeechClient.Extensions +namespace MozillaVoiceSttClient.Extensions { internal static class NativeExtensions { diff --git a/native_client/dotnet/DeepSpeechClient/Interfaces/IDeepSpeech.cs b/native_client/dotnet/MozillaVoiceSttClient/Interfaces/IModel.cs similarity index 86% rename from native_client/dotnet/DeepSpeechClient/Interfaces/IDeepSpeech.cs rename to native_client/dotnet/MozillaVoiceSttClient/Interfaces/IModel.cs index e1ed9cad7e..bd8a62e127 100644 --- a/native_client/dotnet/DeepSpeechClient/Interfaces/IDeepSpeech.cs +++ b/native_client/dotnet/MozillaVoiceSttClient/Interfaces/IModel.cs @@ -1,13 +1,13 @@ -using DeepSpeechClient.Models; +using MozillaVoiceSttClient.Models; using System; using System.IO; -namespace DeepSpeechClient.Interfaces +namespace MozillaVoiceSttClient.Interfaces { /// - /// Client interface of Mozilla's DeepSpeech implementation. + /// Client interface of Mozilla Voice STT. /// - public interface IDeepSpeech : IDisposable + public interface IModel : IDisposable { /// /// Return version of this library. The returned version is a semantic version @@ -59,7 +59,7 @@ public interface IDeepSpeech : IDisposable unsafe void SetScorerAlphaBeta(float aAlpha, float aBeta); /// - /// Use the DeepSpeech model to perform Speech-To-Text. + /// Use the Mozilla Voice STT model to perform Speech-To-Text. /// /// A 16-bit, mono raw audio signal at the appropriate sample rate (matching what the model was trained on). /// The number of samples in the audio signal. @@ -68,7 +68,7 @@ unsafe string SpeechToText(short[] aBuffer, uint aBufferSize); /// - /// Use the DeepSpeech model to perform Speech-To-Text, return results including metadata. + /// Use the Mozilla Voice STT model to perform Speech-To-Text, return results including metadata. /// /// A 16-bit, mono raw audio signal at the appropriate sample rate (matching what the model was trained on). /// The number of samples in the audio signal. @@ -83,26 +83,26 @@ unsafe Metadata SpeechToTextWithMetadata(short[] aBuffer, /// This can be used if you no longer need the result of an ongoing streaming /// inference and don't want to perform a costly decode operation. /// - unsafe void FreeStream(DeepSpeechStream stream); + unsafe void FreeStream(MozillaVoiceSttStream stream); /// /// Creates a new streaming inference state. /// - unsafe DeepSpeechStream CreateStream(); + unsafe MozillaVoiceSttStream CreateStream(); /// /// Feeds audio samples to an ongoing streaming inference. /// /// Instance of the stream to feed the data. /// An array of 16-bit, mono raw audio samples at the appropriate sample rate (matching what the model was trained on). - unsafe void FeedAudioContent(DeepSpeechStream stream, short[] aBuffer, uint aBufferSize); + unsafe void FeedAudioContent(MozillaVoiceSttStream stream, short[] aBuffer, uint aBufferSize); /// /// Computes the intermediate decoding of an ongoing streaming inference. /// /// Instance of the stream to decode. /// The STT intermediate result. - unsafe string IntermediateDecode(DeepSpeechStream stream); + unsafe string IntermediateDecode(MozillaVoiceSttStream stream); /// /// Computes the intermediate decoding of an ongoing streaming inference, including metadata. @@ -110,14 +110,14 @@ unsafe Metadata SpeechToTextWithMetadata(short[] aBuffer, /// Instance of the stream to decode. /// Maximum number of candidate transcripts to return. Returned list might be smaller than this. /// The extended metadata result. - unsafe Metadata IntermediateDecodeWithMetadata(DeepSpeechStream stream, uint aNumResults); + unsafe Metadata IntermediateDecodeWithMetadata(MozillaVoiceSttStream stream, uint aNumResults); /// /// Closes the ongoing streaming inference, returns the STT result over the whole audio signal. /// /// Instance of the stream to finish. /// The STT result. - unsafe string FinishStream(DeepSpeechStream stream); + unsafe string FinishStream(MozillaVoiceSttStream stream); /// /// Closes the ongoing streaming inference, returns the STT result over the whole audio signal, including metadata. @@ -125,6 +125,6 @@ unsafe Metadata SpeechToTextWithMetadata(short[] aBuffer, /// Instance of the stream to finish. /// Maximum number of candidate transcripts to return. Returned list might be smaller than this. /// The extended metadata result. - unsafe Metadata FinishStreamWithMetadata(DeepSpeechStream stream, uint aNumResults); + unsafe Metadata FinishStreamWithMetadata(MozillaVoiceSttStream stream, uint aNumResults); } } diff --git a/native_client/dotnet/MozillaVoiceSttClient/Interfaces/IMozillaVoiceSttModel.cs b/native_client/dotnet/MozillaVoiceSttClient/Interfaces/IMozillaVoiceSttModel.cs new file mode 100644 index 0000000000..ede8b5f4bb --- /dev/null +++ b/native_client/dotnet/MozillaVoiceSttClient/Interfaces/IMozillaVoiceSttModel.cs @@ -0,0 +1,130 @@ +using MozillaVoiceSttClient.Models; +using System; +using System.IO; + +namespace MozillaVoiceSttClient.Interfaces +{ + /// + /// Client interface of Mozilla Voice STT. + /// + public interface IMozillaVoiceSttModel : IDisposable + { + /// + /// Return version of this library. The returned version is a semantic version + /// (SemVer 2.0.0). + /// + unsafe string Version(); + + /// + /// Return the sample rate expected by the model. + /// + /// Sample rate. + unsafe int GetModelSampleRate(); + + /// + /// Get beam width value used by the model. If SetModelBeamWidth was not + /// called before, will return the default value loaded from the model + /// file. + /// + /// Beam width value used by the model. + unsafe uint GetModelBeamWidth(); + + /// + /// Set beam width value used by the model. + /// + /// The beam width used by the decoder. A larger beam width value generates better results at the cost of decoding time. + /// Thrown on failure. + unsafe void SetModelBeamWidth(uint aBeamWidth); + + /// + /// Enable decoding using an external scorer. + /// + /// The path to the external scorer file. + /// Thrown when the native binary failed to enable decoding with an external scorer. + /// Thrown when cannot find the scorer file. + unsafe void EnableExternalScorer(string aScorerPath); + + /// + /// Disable decoding using an external scorer. + /// + /// Thrown when an external scorer is not enabled. + unsafe void DisableExternalScorer(); + + /// + /// Set hyperparameters alpha and beta of the external scorer. + /// + /// The alpha hyperparameter of the decoder. Language model weight. + /// The beta hyperparameter of the decoder. Word insertion weight. + /// Thrown when an external scorer is not enabled. + unsafe void SetScorerAlphaBeta(float aAlpha, float aBeta); + + /// + /// Use the Mozilla Voice STT model to perform Speech-To-Text. + /// + /// A 16-bit, mono raw audio signal at the appropriate sample rate (matching what the model was trained on). + /// The number of samples in the audio signal. + /// The STT result. Returns NULL on error. + unsafe string SpeechToText(short[] aBuffer, + uint aBufferSize); + + /// + /// Use the Mozilla Voice STT model to perform Speech-To-Text, return results including metadata. + /// + /// A 16-bit, mono raw audio signal at the appropriate sample rate (matching what the model was trained on). + /// The number of samples in the audio signal. + /// Maximum number of candidate transcripts to return. Returned list might be smaller than this. + /// The extended metadata. Returns NULL on error. + unsafe Metadata SpeechToTextWithMetadata(short[] aBuffer, + uint aBufferSize, + uint aNumResults); + + /// + /// Destroy a streaming state without decoding the computed logits. + /// This can be used if you no longer need the result of an ongoing streaming + /// inference and don't want to perform a costly decode operation. + /// + unsafe void FreeStream(MozillaVoiceSttStream stream); + + /// + /// Creates a new streaming inference state. + /// + unsafe MozillaVoiceSttStream CreateStream(); + + /// + /// Feeds audio samples to an ongoing streaming inference. + /// + /// Instance of the stream to feed the data. + /// An array of 16-bit, mono raw audio samples at the appropriate sample rate (matching what the model was trained on). + unsafe void FeedAudioContent(MozillaVoiceSttStream stream, short[] aBuffer, uint aBufferSize); + + /// + /// Computes the intermediate decoding of an ongoing streaming inference. + /// + /// Instance of the stream to decode. + /// The STT intermediate result. + unsafe string IntermediateDecode(MozillaVoiceSttStream stream); + + /// + /// Computes the intermediate decoding of an ongoing streaming inference, including metadata. + /// + /// Instance of the stream to decode. + /// Maximum number of candidate transcripts to return. Returned list might be smaller than this. + /// The extended metadata result. + unsafe Metadata IntermediateDecodeWithMetadata(MozillaVoiceSttStream stream, uint aNumResults); + + /// + /// Closes the ongoing streaming inference, returns the STT result over the whole audio signal. + /// + /// Instance of the stream to finish. + /// The STT result. + unsafe string FinishStream(MozillaVoiceSttStream stream); + + /// + /// Closes the ongoing streaming inference, returns the STT result over the whole audio signal, including metadata. + /// + /// Instance of the stream to finish. + /// Maximum number of candidate transcripts to return. Returned list might be smaller than this. + /// The extended metadata result. + unsafe Metadata FinishStreamWithMetadata(MozillaVoiceSttStream stream, uint aNumResults); + } +} diff --git a/native_client/dotnet/DeepSpeechClient/Models/CandidateTranscript.cs b/native_client/dotnet/MozillaVoiceSttClient/Models/CandidateTranscript.cs similarity index 92% rename from native_client/dotnet/DeepSpeechClient/Models/CandidateTranscript.cs rename to native_client/dotnet/MozillaVoiceSttClient/Models/CandidateTranscript.cs index cc6b5d2855..abe1aa3025 100644 --- a/native_client/dotnet/DeepSpeechClient/Models/CandidateTranscript.cs +++ b/native_client/dotnet/MozillaVoiceSttClient/Models/CandidateTranscript.cs @@ -1,4 +1,4 @@ -namespace DeepSpeechClient.Models +namespace MozillaVoiceSttClient.Models { /// /// Stores the entire CTC output as an array of character metadata objects. diff --git a/native_client/dotnet/DeepSpeechClient/Models/DeepSpeechStream.cs b/native_client/dotnet/MozillaVoiceSttClient/Models/DeepSpeechStream.cs similarity index 80% rename from native_client/dotnet/DeepSpeechClient/Models/DeepSpeechStream.cs rename to native_client/dotnet/MozillaVoiceSttClient/Models/DeepSpeechStream.cs index e4605f5ed8..0223a6bd2d 100644 --- a/native_client/dotnet/DeepSpeechClient/Models/DeepSpeechStream.cs +++ b/native_client/dotnet/MozillaVoiceSttClient/Models/DeepSpeechStream.cs @@ -1,19 +1,19 @@ using System; -namespace DeepSpeechClient.Models +namespace MozillaVoiceSttClient.Models { /// /// Wrapper of the pointer used for the decoding stream. /// - public class DeepSpeechStream : IDisposable + public class MozillaVoiceSttStream : IDisposable { private unsafe IntPtr** _streamingStatePp; /// - /// Initializes a new instance of . + /// Initializes a new instance of . /// /// Native pointer of the native stream. - public unsafe DeepSpeechStream(IntPtr** streamingStatePP) + public unsafe MozillaVoiceSttStream(IntPtr** streamingStatePP) { _streamingStatePp = streamingStatePP; } diff --git a/native_client/dotnet/DeepSpeechClient/Models/Metadata.cs b/native_client/dotnet/MozillaVoiceSttClient/Models/Metadata.cs similarity index 88% rename from native_client/dotnet/DeepSpeechClient/Models/Metadata.cs rename to native_client/dotnet/MozillaVoiceSttClient/Models/Metadata.cs index fb6c613dfd..ea0666bf17 100644 --- a/native_client/dotnet/DeepSpeechClient/Models/Metadata.cs +++ b/native_client/dotnet/MozillaVoiceSttClient/Models/Metadata.cs @@ -1,4 +1,4 @@ -namespace DeepSpeechClient.Models +namespace MozillaVoiceSttClient.Models { /// /// Stores the entire CTC output as an array of character metadata objects. diff --git a/native_client/dotnet/DeepSpeechClient/Models/TokenMetadata.cs b/native_client/dotnet/MozillaVoiceSttClient/Models/TokenMetadata.cs similarity index 92% rename from native_client/dotnet/DeepSpeechClient/Models/TokenMetadata.cs rename to native_client/dotnet/MozillaVoiceSttClient/Models/TokenMetadata.cs index 5f2dea562f..86e8bdda1d 100644 --- a/native_client/dotnet/DeepSpeechClient/Models/TokenMetadata.cs +++ b/native_client/dotnet/MozillaVoiceSttClient/Models/TokenMetadata.cs @@ -1,4 +1,4 @@ -namespace DeepSpeechClient.Models +namespace MozillaVoiceSttClient.Models { /// /// Stores each individual character, along with its timing information. diff --git a/native_client/dotnet/DeepSpeechClient/DeepSpeech.cs b/native_client/dotnet/MozillaVoiceSttClient/MozillaVoiceStt.cs similarity index 87% rename from native_client/dotnet/DeepSpeechClient/DeepSpeech.cs rename to native_client/dotnet/MozillaVoiceSttClient/MozillaVoiceStt.cs index fda061d760..f2b67fb7ff 100644 --- a/native_client/dotnet/DeepSpeechClient/DeepSpeech.cs +++ b/native_client/dotnet/MozillaVoiceSttClient/MozillaVoiceStt.cs @@ -1,34 +1,34 @@ -using DeepSpeechClient.Interfaces; -using DeepSpeechClient.Extensions; +using MozillaVoiceStt.Interfaces; +using MozillaVoiceStt.Extensions; using System; using System.IO; -using DeepSpeechClient.Enums; -using DeepSpeechClient.Models; +using MozillaVoiceStt.Enums; +using MozillaVoiceStt.Models; -namespace DeepSpeechClient +namespace MozillaVoiceStt { /// - /// Concrete implementation of . + /// Concrete implementation of . /// - public class DeepSpeech : IDeepSpeech + public class MozillaVoiceSttModel : IMozillaVoiceSttModel { private unsafe IntPtr** _modelStatePP; /// - /// Initializes a new instance of class and creates a new acoustic model. + /// Initializes a new instance of class and creates a new acoustic model. /// /// The path to the frozen model graph. /// Thrown when the native binary failed to create the model. - public DeepSpeech(string aModelPath) + public MozillaVoiceSttModel(string aModelPath) { CreateModel(aModelPath); } - #region IDeepSpeech + #region IMozillaVoiceSttModel /// - /// Create an object providing an interface to a trained DeepSpeech model. + /// Create an object providing an interface to a trained Mozilla Voice STT model. /// /// The path to the frozen model graph. /// Thrown when the native binary failed to create the model. @@ -153,7 +153,7 @@ public unsafe void SetScorerAlphaBeta(float aAlpha, float aBeta) /// /// Instance of the stream to feed the data. /// An array of 16-bit, mono raw audio samples at the appropriate sample rate (matching what the model was trained on). - public unsafe void FeedAudioContent(DeepSpeechStream stream, short[] aBuffer, uint aBufferSize) + public unsafe void FeedAudioContent(MozillaVoiceSttStream stream, short[] aBuffer, uint aBufferSize) { NativeImp.STT_FeedAudioContent(stream.GetNativePointer(), aBuffer, aBufferSize); } @@ -163,7 +163,7 @@ public unsafe void FeedAudioContent(DeepSpeechStream stream, short[] aBuffer, ui /// /// Instance of the stream to finish. /// The STT result. - public unsafe string FinishStream(DeepSpeechStream stream) + public unsafe string FinishStream(MozillaVoiceSttStream stream) { return NativeImp.STT_FinishStream(stream.GetNativePointer()).PtrToString(); } @@ -174,7 +174,7 @@ public unsafe string FinishStream(DeepSpeechStream stream) /// Instance of the stream to finish. /// Maximum number of candidate transcripts to return. Returned list might be smaller than this. /// The extended metadata result. - public unsafe Metadata FinishStreamWithMetadata(DeepSpeechStream stream, uint aNumResults) + public unsafe Metadata FinishStreamWithMetadata(MozillaVoiceSttStream stream, uint aNumResults) { return NativeImp.STT_FinishStreamWithMetadata(stream.GetNativePointer(), aNumResults).PtrToMetadata(); } @@ -184,7 +184,7 @@ public unsafe Metadata FinishStreamWithMetadata(DeepSpeechStream stream, uint aN /// /// Instance of the stream to decode. /// The STT intermediate result. - public unsafe string IntermediateDecode(DeepSpeechStream stream) + public unsafe string IntermediateDecode(MozillaVoiceSttStream stream) { return NativeImp.STT_IntermediateDecode(stream.GetNativePointer()).PtrToString(); } @@ -195,7 +195,7 @@ public unsafe string IntermediateDecode(DeepSpeechStream stream) /// Instance of the stream to decode. /// Maximum number of candidate transcripts to return. Returned list might be smaller than this. /// The STT intermediate result. - public unsafe Metadata IntermediateDecodeWithMetadata(DeepSpeechStream stream, uint aNumResults) + public unsafe Metadata IntermediateDecodeWithMetadata(MozillaVoiceSttStream stream, uint aNumResults) { return NativeImp.STT_IntermediateDecodeWithMetadata(stream.GetNativePointer(), aNumResults).PtrToMetadata(); } @@ -212,12 +212,12 @@ public unsafe string Version() /// /// Creates a new streaming inference state. /// - public unsafe DeepSpeechStream CreateStream() + public unsafe MozillaVoiceSttStream CreateStream() { IntPtr** streamingStatePointer = null; var resultCode = NativeImp.STT_CreateStream(_modelStatePP, ref streamingStatePointer); EvaluateResultCode(resultCode); - return new DeepSpeechStream(streamingStatePointer); + return new MozillaVoiceSttStream(streamingStatePointer); } /// @@ -225,14 +225,14 @@ public unsafe DeepSpeechStream CreateStream() /// This can be used if you no longer need the result of an ongoing streaming /// inference and don't want to perform a costly decode operation. /// - public unsafe void FreeStream(DeepSpeechStream stream) + public unsafe void FreeStream(MozillaVoiceSttStream stream) { NativeImp.STT_FreeStream(stream.GetNativePointer()); stream.Dispose(); } /// - /// Use the DeepSpeech model to perform Speech-To-Text. + /// Use the Mozilla Voice STT model to perform Speech-To-Text. /// /// A 16-bit, mono raw audio signal at the appropriate sample rate (matching what the model was trained on). /// The number of samples in the audio signal. @@ -243,7 +243,7 @@ public unsafe string SpeechToText(short[] aBuffer, uint aBufferSize) } /// - /// Use the DeepSpeech model to perform Speech-To-Text, return results including metadata. + /// Use the Mozilla Voice STT model to perform Speech-To-Text, return results including metadata. /// /// A 16-bit, mono raw audio signal at the appropriate sample rate (matching what the model was trained on). /// The number of samples in the audio signal. diff --git a/native_client/dotnet/DeepSpeechClient/DeepSpeechClient.csproj b/native_client/dotnet/MozillaVoiceSttClient/MozillaVoiceSttClient.csproj similarity index 100% rename from native_client/dotnet/DeepSpeechClient/DeepSpeechClient.csproj rename to native_client/dotnet/MozillaVoiceSttClient/MozillaVoiceSttClient.csproj diff --git a/native_client/dotnet/DeepSpeechClient/NativeImp.cs b/native_client/dotnet/MozillaVoiceSttClient/NativeImp.cs similarity index 98% rename from native_client/dotnet/DeepSpeechClient/NativeImp.cs rename to native_client/dotnet/MozillaVoiceSttClient/NativeImp.cs index 566952960a..daad79acb6 100644 --- a/native_client/dotnet/DeepSpeechClient/NativeImp.cs +++ b/native_client/dotnet/MozillaVoiceSttClient/NativeImp.cs @@ -1,9 +1,9 @@ -using DeepSpeechClient.Enums; +using MozillaVoiceSttClient.Enums; using System; using System.Runtime.InteropServices; -namespace DeepSpeechClient +namespace MozillaVoiceSttClient { /// /// Wrapper for the native implementation of "libmozilla_voice_stt.so" diff --git a/native_client/dotnet/DeepSpeechClient/Structs/CandidateTranscript.cs b/native_client/dotnet/MozillaVoiceSttClient/Structs/CandidateTranscript.cs similarity index 93% rename from native_client/dotnet/DeepSpeechClient/Structs/CandidateTranscript.cs rename to native_client/dotnet/MozillaVoiceSttClient/Structs/CandidateTranscript.cs index 54581f6f84..9029d0f5cc 100644 --- a/native_client/dotnet/DeepSpeechClient/Structs/CandidateTranscript.cs +++ b/native_client/dotnet/MozillaVoiceSttClient/Structs/CandidateTranscript.cs @@ -1,7 +1,7 @@ using System; using System.Runtime.InteropServices; -namespace DeepSpeechClient.Structs +namespace MozillaVoiceSttClient.Structs { [StructLayout(LayoutKind.Sequential)] internal unsafe struct CandidateTranscript diff --git a/native_client/dotnet/DeepSpeechClient/Structs/Metadata.cs b/native_client/dotnet/MozillaVoiceSttClient/Structs/Metadata.cs similarity index 91% rename from native_client/dotnet/DeepSpeechClient/Structs/Metadata.cs rename to native_client/dotnet/MozillaVoiceSttClient/Structs/Metadata.cs index 0a9beddce5..a354759abc 100644 --- a/native_client/dotnet/DeepSpeechClient/Structs/Metadata.cs +++ b/native_client/dotnet/MozillaVoiceSttClient/Structs/Metadata.cs @@ -1,7 +1,7 @@ using System; using System.Runtime.InteropServices; -namespace DeepSpeechClient.Structs +namespace MozillaVoiceSttClient.Structs { [StructLayout(LayoutKind.Sequential)] internal unsafe struct Metadata diff --git a/native_client/dotnet/DeepSpeechClient/Structs/TokenMetadata.cs b/native_client/dotnet/MozillaVoiceSttClient/Structs/TokenMetadata.cs similarity index 93% rename from native_client/dotnet/DeepSpeechClient/Structs/TokenMetadata.cs rename to native_client/dotnet/MozillaVoiceSttClient/Structs/TokenMetadata.cs index 1c660c71cc..1f54e5d48e 100644 --- a/native_client/dotnet/DeepSpeechClient/Structs/TokenMetadata.cs +++ b/native_client/dotnet/MozillaVoiceSttClient/Structs/TokenMetadata.cs @@ -1,7 +1,7 @@ using System; using System.Runtime.InteropServices; -namespace DeepSpeechClient.Structs +namespace MozillaVoiceSttClient.Structs { [StructLayout(LayoutKind.Sequential)] internal unsafe struct TokenMetadata diff --git a/native_client/dotnet/DeepSpeechConsole/App.config b/native_client/dotnet/MozillaVoiceSttConsole/App.config similarity index 100% rename from native_client/dotnet/DeepSpeechConsole/App.config rename to native_client/dotnet/MozillaVoiceSttConsole/App.config diff --git a/native_client/dotnet/DeepSpeechConsole/DeepSpeechConsole.csproj b/native_client/dotnet/MozillaVoiceSttConsole/MozillaVoiceSttConsole.csproj similarity index 92% rename from native_client/dotnet/DeepSpeechConsole/DeepSpeechConsole.csproj rename to native_client/dotnet/MozillaVoiceSttConsole/MozillaVoiceSttConsole.csproj index a05fca6141..13a8b3551e 100644 --- a/native_client/dotnet/DeepSpeechConsole/DeepSpeechConsole.csproj +++ b/native_client/dotnet/MozillaVoiceSttConsole/MozillaVoiceSttConsole.csproj @@ -6,8 +6,8 @@ AnyCPU {312965E5-C4F6-4D95-BA64-79906B8BC7AC} Exe - DeepSpeechConsole - DeepSpeechConsole + MozillaVoiceSttConsole + MozillaVoiceSttConsole v4.6.2 512 true @@ -56,9 +56,9 @@ - + {56DE4091-BBBE-47E4-852D-7268B33B971F} - DeepSpeechClient + MozillaVoiceSttClient diff --git a/native_client/dotnet/DeepSpeechConsole/Program.cs b/native_client/dotnet/MozillaVoiceSttConsole/Program.cs similarity index 94% rename from native_client/dotnet/DeepSpeechConsole/Program.cs rename to native_client/dotnet/MozillaVoiceSttConsole/Program.cs index 68f3fc54b9..f94f5de16e 100644 --- a/native_client/dotnet/DeepSpeechConsole/Program.cs +++ b/native_client/dotnet/MozillaVoiceSttConsole/Program.cs @@ -1,6 +1,6 @@ -using DeepSpeechClient; -using DeepSpeechClient.Interfaces; -using DeepSpeechClient.Models; +using MozillaVoiceSttClient; +using MozillaVoiceSttClient.Interfaces; +using MozillaVoiceSttClient.Models; using NAudio.Wave; using System; using System.Collections.Generic; @@ -52,7 +52,7 @@ static void Main(string[] args) Console.WriteLine("Loading model..."); stopwatch.Start(); // sphinx-doc: csharp_ref_model_start - using (IDeepSpeech sttClient = new DeepSpeech(model ?? "output_graph.pbmm")) + using (IMozillaVoiceSttModel sttClient = new MozillaVoiceSttModel(model ?? "output_graph.pbmm")) { // sphinx-doc: csharp_ref_model_stop stopwatch.Stop(); diff --git a/native_client/dotnet/DeepSpeechConsole/Properties/AssemblyInfo.cs b/native_client/dotnet/MozillaVoiceSttConsole/Properties/AssemblyInfo.cs similarity index 96% rename from native_client/dotnet/DeepSpeechConsole/Properties/AssemblyInfo.cs rename to native_client/dotnet/MozillaVoiceSttConsole/Properties/AssemblyInfo.cs index 845851a185..f3257c6409 100644 --- a/native_client/dotnet/DeepSpeechConsole/Properties/AssemblyInfo.cs +++ b/native_client/dotnet/MozillaVoiceSttConsole/Properties/AssemblyInfo.cs @@ -5,7 +5,7 @@ // General Information about an assembly is controlled through the following // set of attributes. Change these attribute values to modify the information // associated with an assembly. -[assembly: AssemblyTitle("DeepSpeechConsole")] +[assembly: AssemblyTitle("MozillaVoiceSttConsole")] [assembly: AssemblyDescription("")] [assembly: AssemblyConfiguration("")] [assembly: AssemblyCompany("")] diff --git a/native_client/dotnet/DeepSpeechConsole/arctic_a0024.wav b/native_client/dotnet/MozillaVoiceSttConsole/arctic_a0024.wav similarity index 100% rename from native_client/dotnet/DeepSpeechConsole/arctic_a0024.wav rename to native_client/dotnet/MozillaVoiceSttConsole/arctic_a0024.wav diff --git a/native_client/dotnet/DeepSpeechConsole/packages.config b/native_client/dotnet/MozillaVoiceSttConsole/packages.config similarity index 100% rename from native_client/dotnet/DeepSpeechConsole/packages.config rename to native_client/dotnet/MozillaVoiceSttConsole/packages.config diff --git a/native_client/dotnet/DeepSpeechWPF/.gitignore b/native_client/dotnet/MozillaVoiceSttWPF/.gitignore similarity index 100% rename from native_client/dotnet/DeepSpeechWPF/.gitignore rename to native_client/dotnet/MozillaVoiceSttWPF/.gitignore diff --git a/native_client/dotnet/DeepSpeechWPF/App.config b/native_client/dotnet/MozillaVoiceSttWPF/App.config similarity index 100% rename from native_client/dotnet/DeepSpeechWPF/App.config rename to native_client/dotnet/MozillaVoiceSttWPF/App.config diff --git a/native_client/dotnet/DeepSpeechWPF/App.xaml b/native_client/dotnet/MozillaVoiceSttWPF/App.xaml similarity index 71% rename from native_client/dotnet/DeepSpeechWPF/App.xaml rename to native_client/dotnet/MozillaVoiceSttWPF/App.xaml index 16ebb0d435..ca6a0f1369 100644 --- a/native_client/dotnet/DeepSpeechWPF/App.xaml +++ b/native_client/dotnet/MozillaVoiceSttWPF/App.xaml @@ -1,8 +1,8 @@  diff --git a/native_client/dotnet/DeepSpeechWPF/App.xaml.cs b/native_client/dotnet/MozillaVoiceSttWPF/App.xaml.cs similarity index 61% rename from native_client/dotnet/DeepSpeechWPF/App.xaml.cs rename to native_client/dotnet/MozillaVoiceSttWPF/App.xaml.cs index d4b87d6e60..973046b85e 100644 --- a/native_client/dotnet/DeepSpeechWPF/App.xaml.cs +++ b/native_client/dotnet/MozillaVoiceSttWPF/App.xaml.cs @@ -1,10 +1,10 @@ using CommonServiceLocator; -using DeepSpeech.WPF.ViewModels; -using DeepSpeechClient.Interfaces; +using MozillaVoiceStt.WPF.ViewModels; +using MozillaVoiceStt.Interfaces; using GalaSoft.MvvmLight.Ioc; using System.Windows; -namespace DeepSpeechWPF +namespace MozillaVoiceSttWPF { /// /// Interaction logic for App.xaml @@ -18,11 +18,11 @@ protected override void OnStartup(StartupEventArgs e) try { - //Register instance of DeepSpeech - DeepSpeechClient.DeepSpeech deepSpeechClient = - new DeepSpeechClient.DeepSpeech("deepspeech-0.8.0-models.pbmm"); + //Register instance of Mozilla Voice STT + MozillaVoiceSttClient.Model client = + new MozillaVoiceSttClient.Model("deepspeech-0.8.0-models.pbmm"); - SimpleIoc.Default.Register(() => deepSpeechClient); + SimpleIoc.Default.Register(() => client); SimpleIoc.Default.Register(); } catch (System.Exception ex) @@ -35,8 +35,8 @@ protected override void OnStartup(StartupEventArgs e) protected override void OnExit(ExitEventArgs e) { base.OnExit(e); - //Dispose instance of DeepSpeech - ServiceLocator.Current.GetInstance()?.Dispose(); + //Dispose instance of Mozilla Voice STT + ServiceLocator.Current.GetInstance()?.Dispose(); } } } diff --git a/native_client/dotnet/DeepSpeechWPF/MainWindow.xaml b/native_client/dotnet/MozillaVoiceSttWPF/MainWindow.xaml similarity index 97% rename from native_client/dotnet/DeepSpeechWPF/MainWindow.xaml rename to native_client/dotnet/MozillaVoiceSttWPF/MainWindow.xaml index 4fbe5e72e1..5894fae3bc 100644 --- a/native_client/dotnet/DeepSpeechWPF/MainWindow.xaml +++ b/native_client/dotnet/MozillaVoiceSttWPF/MainWindow.xaml @@ -1,10 +1,10 @@  /// Interaction logic for MainWindow.xaml diff --git a/native_client/dotnet/DeepSpeechWPF/DeepSpeech.WPF.csproj b/native_client/dotnet/MozillaVoiceSttWPF/MozillaVoiceStt.WPF.csproj similarity index 94% rename from native_client/dotnet/DeepSpeechWPF/DeepSpeech.WPF.csproj rename to native_client/dotnet/MozillaVoiceSttWPF/MozillaVoiceStt.WPF.csproj index 7f46a31e1f..d14a02b707 100644 --- a/native_client/dotnet/DeepSpeechWPF/DeepSpeech.WPF.csproj +++ b/native_client/dotnet/MozillaVoiceSttWPF/MozillaVoiceStt.WPF.csproj @@ -6,8 +6,8 @@ AnyCPU {54BFD766-4305-4F4C-BA59-AF45505DF3C1} WinExe - DeepSpeech.WPF - DeepSpeech.WPF + MozillaVoiceStt.WPF + MozillaVoiceStt.WPF v4.6.2 512 {60dc8134-eba5-43b8-bcc9-bb4bc16c2548};{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC} @@ -131,9 +131,9 @@ - + {56de4091-bbbe-47e4-852d-7268b33b971f} - DeepSpeechClient + MozillaVoiceSttClient diff --git a/native_client/dotnet/DeepSpeechWPF/DeepSpeech.WPF.sln b/native_client/dotnet/MozillaVoiceSttWPF/MozillaVoiceStt.WPF.sln similarity index 79% rename from native_client/dotnet/DeepSpeechWPF/DeepSpeech.WPF.sln rename to native_client/dotnet/MozillaVoiceSttWPF/MozillaVoiceStt.WPF.sln index cd29025ea3..003c6d8e6b 100644 --- a/native_client/dotnet/DeepSpeechWPF/DeepSpeech.WPF.sln +++ b/native_client/dotnet/MozillaVoiceSttWPF/MozillaVoiceStt.WPF.sln @@ -3,9 +3,9 @@ Microsoft Visual Studio Solution File, Format Version 12.00 # Visual Studio 15 VisualStudioVersion = 15.0.28307.421 MinimumVisualStudioVersion = 10.0.40219.1 -Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "DeepSpeech.WPF", "DeepSpeech.WPF.csproj", "{54BFD766-4305-4F4C-BA59-AF45505DF3C1}" +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "MozillaVoiceStt.WPF", "MozillaVoiceStt.WPF.csproj", "{54BFD766-4305-4F4C-BA59-AF45505DF3C1}" EndProject -Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "DeepSpeechClient", "..\DeepSpeechClient\DeepSpeechClient.csproj", "{56DE4091-BBBE-47E4-852D-7268B33B971F}" +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "MozillaVoiceSttClient", "..\MozillaVoiceSttClient\MozillaVoiceSttClient.csproj", "{56DE4091-BBBE-47E4-852D-7268B33B971F}" EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution diff --git a/native_client/dotnet/DeepSpeechWPF/Properties/AssemblyInfo.cs b/native_client/dotnet/MozillaVoiceSttWPF/Properties/AssemblyInfo.cs similarity index 95% rename from native_client/dotnet/DeepSpeechWPF/Properties/AssemblyInfo.cs rename to native_client/dotnet/MozillaVoiceSttWPF/Properties/AssemblyInfo.cs index f9ae7d76fe..034ac3d6b9 100644 --- a/native_client/dotnet/DeepSpeechWPF/Properties/AssemblyInfo.cs +++ b/native_client/dotnet/MozillaVoiceSttWPF/Properties/AssemblyInfo.cs @@ -7,11 +7,11 @@ // General Information about an assembly is controlled through the following // set of attributes. Change these attribute values to modify the information // associated with an assembly. -[assembly: AssemblyTitle("DeepSpeech.WPF")] +[assembly: AssemblyTitle("MozillaVoiceStt.WPF")] [assembly: AssemblyDescription("")] [assembly: AssemblyConfiguration("")] [assembly: AssemblyCompany("")] -[assembly: AssemblyProduct("DeepSpeech.WPF.SingleFiles")] +[assembly: AssemblyProduct("MozillaVoiceStt.WPF.SingleFiles")] [assembly: AssemblyCopyright("Copyright © 2018")] [assembly: AssemblyTrademark("")] [assembly: AssemblyCulture("")] diff --git a/native_client/dotnet/DeepSpeechWPF/Properties/Resources.Designer.cs b/native_client/dotnet/MozillaVoiceSttWPF/Properties/Resources.Designer.cs similarity index 94% rename from native_client/dotnet/DeepSpeechWPF/Properties/Resources.Designer.cs rename to native_client/dotnet/MozillaVoiceSttWPF/Properties/Resources.Designer.cs index 2da2b4b275..b470f9ae3f 100644 --- a/native_client/dotnet/DeepSpeechWPF/Properties/Resources.Designer.cs +++ b/native_client/dotnet/MozillaVoiceSttWPF/Properties/Resources.Designer.cs @@ -8,7 +8,7 @@ // //------------------------------------------------------------------------------ -namespace DeepSpeech.WPF.Properties { +namespace MozillaVoiceStt.WPF.Properties { using System; @@ -39,7 +39,7 @@ internal Resources() { internal static global::System.Resources.ResourceManager ResourceManager { get { if (object.ReferenceEquals(resourceMan, null)) { - global::System.Resources.ResourceManager temp = new global::System.Resources.ResourceManager("DeepSpeech.WPF.Properties.Resources", typeof(Resources).Assembly); + global::System.Resources.ResourceManager temp = new global::System.Resources.ResourceManager("MozillaVoiceStt.WPF.Properties.Resources", typeof(Resources).Assembly); resourceMan = temp; } return resourceMan; diff --git a/native_client/dotnet/DeepSpeechWPF/Properties/Resources.resx b/native_client/dotnet/MozillaVoiceSttWPF/Properties/Resources.resx similarity index 100% rename from native_client/dotnet/DeepSpeechWPF/Properties/Resources.resx rename to native_client/dotnet/MozillaVoiceSttWPF/Properties/Resources.resx diff --git a/native_client/dotnet/DeepSpeechWPF/Properties/Settings.Designer.cs b/native_client/dotnet/MozillaVoiceSttWPF/Properties/Settings.Designer.cs similarity index 96% rename from native_client/dotnet/DeepSpeechWPF/Properties/Settings.Designer.cs rename to native_client/dotnet/MozillaVoiceSttWPF/Properties/Settings.Designer.cs index 0f464bc46a..a72186946a 100644 --- a/native_client/dotnet/DeepSpeechWPF/Properties/Settings.Designer.cs +++ b/native_client/dotnet/MozillaVoiceSttWPF/Properties/Settings.Designer.cs @@ -8,7 +8,7 @@ // //------------------------------------------------------------------------------ -namespace DeepSpeech.WPF.Properties { +namespace MozillaVoiceStt.WPF.Properties { [global::System.Runtime.CompilerServices.CompilerGeneratedAttribute()] diff --git a/native_client/dotnet/DeepSpeechWPF/Properties/Settings.settings b/native_client/dotnet/MozillaVoiceSttWPF/Properties/Settings.settings similarity index 100% rename from native_client/dotnet/DeepSpeechWPF/Properties/Settings.settings rename to native_client/dotnet/MozillaVoiceSttWPF/Properties/Settings.settings diff --git a/native_client/dotnet/DeepSpeechWPF/ViewModels/BindableBase.cs b/native_client/dotnet/MozillaVoiceSttWPF/ViewModels/BindableBase.cs similarity index 98% rename from native_client/dotnet/DeepSpeechWPF/ViewModels/BindableBase.cs rename to native_client/dotnet/MozillaVoiceSttWPF/ViewModels/BindableBase.cs index 909327ee02..92fd2f57ac 100644 --- a/native_client/dotnet/DeepSpeechWPF/ViewModels/BindableBase.cs +++ b/native_client/dotnet/MozillaVoiceSttWPF/ViewModels/BindableBase.cs @@ -3,7 +3,7 @@ using System.ComponentModel; using System.Runtime.CompilerServices; -namespace DeepSpeech.WPF.ViewModels +namespace MozillaVoiceStt.WPF.ViewModels { /// /// Implementation of to simplify models. diff --git a/native_client/dotnet/DeepSpeechWPF/ViewModels/MainWindowViewModel.cs b/native_client/dotnet/MozillaVoiceSttWPF/ViewModels/MainWindowViewModel.cs similarity index 96% rename from native_client/dotnet/DeepSpeechWPF/ViewModels/MainWindowViewModel.cs rename to native_client/dotnet/MozillaVoiceSttWPF/ViewModels/MainWindowViewModel.cs index 230fd42a3e..0d81c2f05e 100644 --- a/native_client/dotnet/DeepSpeechWPF/ViewModels/MainWindowViewModel.cs +++ b/native_client/dotnet/MozillaVoiceSttWPF/ViewModels/MainWindowViewModel.cs @@ -3,8 +3,8 @@ using CSCore.CoreAudioAPI; using CSCore.SoundIn; using CSCore.Streams; -using DeepSpeechClient.Interfaces; -using DeepSpeechClient.Models; +using MozillaVoiceSttClient.Interfaces; +using MozillaVoiceSttClient.Models; using GalaSoft.MvvmLight.CommandWpf; using Microsoft.Win32; using System; @@ -15,7 +15,7 @@ using System.Threading; using System.Threading.Tasks; -namespace DeepSpeech.WPF.ViewModels +namespace MozillaVoiceStt.WPF.ViewModels { /// /// View model of the MainWindow View. @@ -27,7 +27,7 @@ public class MainWindowViewModel : BindableBase private const string ScorerPath = "kenlm.scorer"; #endregion - private readonly IDeepSpeech _sttClient; + private readonly IMozillaVoiceSttModel _sttClient; #region Commands /// @@ -62,7 +62,7 @@ public class MainWindowViewModel : BindableBase /// /// Stream used to feed data into the acoustic model. /// - private DeepSpeechStream _sttStream; + private MozillaVoiceSttStream _sttStream; /// /// Records the audio of the selected device. @@ -75,7 +75,7 @@ public class MainWindowViewModel : BindableBase private SoundInSource _soundInSource; /// - /// Target wave source.(16KHz Mono 16bit for DeepSpeech) + /// Target wave source.(16KHz Mono 16bit for Mozilla Voice STT) /// private IWaveSource _convertedSource; @@ -200,7 +200,7 @@ public ObservableCollection AvailableRecordDevices #endregion #region Ctors - public MainWindowViewModel(IDeepSpeech sttClient) + public MainWindowViewModel(IMozillaVoiceSttModel sttClient) { _sttClient = sttClient; @@ -290,7 +290,8 @@ private void Capture_DataAvailable(object sender, DataAvailableEventArgs e) //read data from the converedSource //important: don't use the e.Data here //the e.Data contains the raw data provided by the - //soundInSource which won't have the deepspeech required audio format + //soundInSource which won't have the Mozilla Voice STT required + // audio format byte[] buffer = new byte[_convertedSource.WaveFormat.BytesPerSecond / 2]; int read; diff --git a/native_client/dotnet/DeepSpeechWPF/packages.config b/native_client/dotnet/MozillaVoiceSttWPF/packages.config similarity index 100% rename from native_client/dotnet/DeepSpeechWPF/packages.config rename to native_client/dotnet/MozillaVoiceSttWPF/packages.config diff --git a/native_client/dotnet/README.rst b/native_client/dotnet/README.rst index f998bfa3c9..26db5b96ce 100644 --- a/native_client/dotnet/README.rst +++ b/native_client/dotnet/README.rst @@ -1,8 +1,8 @@ -Building DeepSpeech native client for Windows +Building Mozilla Voice STT native client for Windows ============================================= -Now we can build the native client of DeepSpeech and run inference on Windows using the C# client, to do that we need to compile the ``native_client``. +Now we can build the native client of Mozilla Voice STT and run inference on Windows using the C# client, to do that we need to compile the ``native_client``. **Table of Contents** @@ -59,8 +59,8 @@ There should already be a symbolic link, for this example let's suppose that we . ├── D:\ - │ ├── cloned # Contains DeepSpeech and tensorflow side by side - │ │ └── DeepSpeech # Root of the cloned DeepSpeech + │ ├── cloned # Contains Mozilla Voice STT and tensorflow side by side + │ │ └── DeepSpeech # Root of the cloned Mozilla Voice STT │ │ ├── tensorflow # Root of the cloned Mozilla's tensorflow └── ... @@ -142,4 +142,4 @@ Be patient, if you enabled AVX/AVX2 and CUDA it will take a long time. Finally y Using the generated library --------------------------- -As for now we can only use the generated ``libmozilla_voice_stt.so`` with the C# clients, go to `native_client/dotnet/ `_ in your DeepSpeech directory and open the Visual Studio solution, then we need to build in debug or release mode, finally we just need to copy ``libmozilla_voice_stt.so`` to the generated ``x64/Debug`` or ``x64/Release`` directory. +As for now we can only use the generated ``libmozilla_voice_stt.so`` with the C# clients, go to `native_client/dotnet/ `_ in your Mozilla Voice STT directory and open the Visual Studio solution, then we need to build in debug or release mode, finally we just need to copy ``libmozilla_voice_stt.so`` to the generated ``x64/Debug`` or ``x64/Release`` directory. diff --git a/native_client/dotnet/nupkg/deepspeech.nuspec.in b/native_client/dotnet/nupkg/deepspeech.nuspec.in index fd1a169f14..93a6f6ea16 100644 --- a/native_client/dotnet/nupkg/deepspeech.nuspec.in +++ b/native_client/dotnet/nupkg/deepspeech.nuspec.in @@ -3,13 +3,13 @@ $NUPKG_ID $NUPKG_VERSION - Mozilla_Voice_STT + Mozilla.Voice.STT Mozilla Mozilla MPL-2.0 http://github.com/mozilla/DeepSpeech false - A library for running inference with a DeepSpeech model + A library for running inference with a Mozilla Voice STT model Copyright (c) 2019 Mozilla Corporation native speech speech_recognition From 213590b3265ff486eb65bca65e305929d4b9763b Mon Sep 17 00:00:00 2001 From: Reuben Morais Date: Tue, 4 Aug 2020 11:39:22 +0200 Subject: [PATCH 03/12] Java rename --- native_client/java/Makefile | 14 +++++----- native_client/java/app/build.gradle | 4 +-- .../stt}/ExampleInstrumentedTest.java | 4 +-- .../java/app/src/main/AndroidManifest.xml | 4 +-- .../stt/MozillaVoiceSttActivity.java} | 10 +++---- .../main/res/layout/activity_deep_speech.xml | 2 +- .../java/app/src/main/res/values/strings.xml | 2 +- .../stt}/ExampleUnitTest.java | 2 +- .../DeepSpeechStreamingState.java | 13 --------- .../src/main/res/values/strings.xml | 3 -- .../.gitignore | 0 .../CMakeLists.txt | 0 .../build.gradle | 14 +++++----- .../gradle.properties | 0 .../libs/.gitignore | 0 .../proguard-rules.pro | 0 .../mozilla/voice/stt}/test/BasicTest.java | 20 ++++++------- .../src/main/AndroidManifest.xml | 2 +- .../voice/stt/MozillaVoiceSttModel.java} | 28 +++++++++---------- .../stt/MozillaVoiceSttStreamingState.java | 13 +++++++++ .../voice/stt_doc}/CandidateTranscript.java | 2 +- .../mozilla/voice/stt_doc/Error_Codes.java} | 18 ++++++------ .../org/mozilla/voice/stt_doc}/Metadata.java | 2 +- .../org/mozilla/voice/stt_doc}/README.rst | 2 +- .../mozilla/voice/stt_doc}/TokenMetadata.java | 2 +- .../src/main/res/values/strings.xml | 3 ++ .../mozilla/voice/stt}/ExampleUnitTest.java | 2 +- native_client/java/settings.gradle | 2 +- 28 files changed, 84 insertions(+), 84 deletions(-) rename native_client/java/app/src/androidTest/java/org/mozilla/{deepspeech => voice/stt}/ExampleInstrumentedTest.java (85%) rename native_client/java/app/src/main/java/org/mozilla/{deepspeech/DeepSpeechActivity.java => voice/stt/MozillaVoiceSttActivity.java} (95%) rename native_client/java/app/src/test/java/org/mozilla/{deepspeech => voice/stt}/ExampleUnitTest.java (91%) delete mode 100644 native_client/java/libdeepspeech/src/main/java/org/mozilla/deepspeech/libdeepspeech/DeepSpeechStreamingState.java delete mode 100644 native_client/java/libdeepspeech/src/main/res/values/strings.xml rename native_client/java/{libdeepspeech => libmozillavoicestt}/.gitignore (100%) rename native_client/java/{libdeepspeech => libmozillavoicestt}/CMakeLists.txt (100%) rename native_client/java/{libdeepspeech => libmozillavoicestt}/build.gradle (89%) rename native_client/java/{libdeepspeech => libmozillavoicestt}/gradle.properties (100%) rename native_client/java/{libdeepspeech => libmozillavoicestt}/libs/.gitignore (100%) rename native_client/java/{libdeepspeech => libmozillavoicestt}/proguard-rules.pro (100%) rename native_client/java/{libdeepspeech/src/androidTest/java/org/mozilla/deepspeech/libdeepspeech => libmozillavoicestt/src/androidTest/java/org/mozilla/voice/stt}/test/BasicTest.java (86%) rename native_client/java/{libdeepspeech => libmozillavoicestt}/src/main/AndroidManifest.xml (56%) rename native_client/java/{libdeepspeech/src/main/java/org/mozilla/deepspeech/libdeepspeech/DeepSpeechModel.java => libmozillavoicestt/src/main/java/org/mozilla/voice/stt/MozillaVoiceSttModel.java} (87%) create mode 100644 native_client/java/libmozillavoicestt/src/main/java/org/mozilla/voice/stt/MozillaVoiceSttStreamingState.java rename native_client/java/{libdeepspeech/src/main/java/org/mozilla/deepspeech/libdeepspeech_doc => libmozillavoicestt/src/main/java/org/mozilla/voice/stt_doc}/CandidateTranscript.java (97%) rename native_client/java/{libdeepspeech/src/main/java/org/mozilla/deepspeech/libdeepspeech_doc/DeepSpeech_Error_Codes.java => libmozillavoicestt/src/main/java/org/mozilla/voice/stt_doc/Error_Codes.java} (74%) rename native_client/java/{libdeepspeech/src/main/java/org/mozilla/deepspeech/libdeepspeech_doc => libmozillavoicestt/src/main/java/org/mozilla/voice/stt_doc}/Metadata.java (97%) rename native_client/java/{libdeepspeech/src/main/java/org/mozilla/deepspeech/libdeepspeech_doc => libmozillavoicestt/src/main/java/org/mozilla/voice/stt_doc}/README.rst (51%) rename native_client/java/{libdeepspeech/src/main/java/org/mozilla/deepspeech/libdeepspeech_doc => libmozillavoicestt/src/main/java/org/mozilla/voice/stt_doc}/TokenMetadata.java (97%) create mode 100644 native_client/java/libmozillavoicestt/src/main/res/values/strings.xml rename native_client/java/{libdeepspeech/src/test/java/org/mozilla/deepspeech/libdeepspeech => libmozillavoicestt/src/test/java/org/mozilla/voice/stt}/ExampleUnitTest.java (88%) diff --git a/native_client/java/Makefile b/native_client/java/Makefile index 1af3b83c64..22694841c0 100644 --- a/native_client/java/Makefile +++ b/native_client/java/Makefile @@ -2,7 +2,7 @@ include ../definitions.mk -ARCHS := $(shell grep 'ABI_FILTERS' libdeepspeech/gradle.properties | cut -d'=' -f2 | sed -e 's/;/ /g') +ARCHS := $(shell grep 'ABI_FILTERS' libmozillavoicestt/gradle.properties | cut -d'=' -f2 | sed -e 's/;/ /g') GRADLE ?= ./gradlew all: apk @@ -14,13 +14,13 @@ apk-clean: $(GRADLE) clean libs-clean: - rm -fr libdeepspeech/libs/*/libmozilla_voice_stt.so + rm -fr libmozillavoicestt/libs/*/libmozilla_voice_stt.so -libdeepspeech/libs/%/libmozilla_voice_stt.so: - -mkdir libdeepspeech/libs/$*/ - cp ${TFDIR}/bazel-out/$*-*/bin/native_client/libmozilla_voice_stt.so libdeepspeech/libs/$*/ +libmozillavoicestt/libs/%/libmozilla_voice_stt.so: + -mkdir libmozillavoicestt/libs/$*/ + cp ${TFDIR}/bazel-out/$*-*/bin/native_client/libmozilla_voice_stt.so libmozillavoicestt/libs/$*/ -apk: apk-clean bindings $(patsubst %,libdeepspeech/libs/%/libmozilla_voice_stt.so,$(ARCHS)) +apk: apk-clean bindings $(patsubst %,libmozillavoicestt/libs/%/libmozilla_voice_stt.so,$(ARCHS)) $(GRADLE) build maven-bundle: apk @@ -28,4 +28,4 @@ maven-bundle: apk $(GRADLE) zipMavenArtifacts bindings: clean ds-swig - $(DS_SWIG_ENV) swig -c++ -java -package org.mozilla.deepspeech.libdeepspeech -outdir libdeepspeech/src/main/java/org/mozilla/deepspeech/libdeepspeech/ -o jni/deepspeech_wrap.cpp jni/deepspeech.i + $(DS_SWIG_ENV) swig -c++ -java -package org.mozilla.voice.stt -outdir libmozillavoicestt/src/main/java/org/mozilla/voice/stt/ -o jni/deepspeech_wrap.cpp jni/deepspeech.i diff --git a/native_client/java/app/build.gradle b/native_client/java/app/build.gradle index c1aed496ad..c6ff276e39 100644 --- a/native_client/java/app/build.gradle +++ b/native_client/java/app/build.gradle @@ -4,7 +4,7 @@ android { compileSdkVersion 27 defaultConfig { - applicationId "org.mozilla.deepspeech" + applicationId "org.mozilla.voice.stt" minSdkVersion 21 targetSdkVersion 27 versionName androidGitVersion.name() @@ -28,7 +28,7 @@ android { dependencies { implementation fileTree(dir: 'libs', include: ['*.jar']) - implementation project(':libdeepspeech') + implementation project(':libmozillavoicestt') implementation 'com.android.support:appcompat-v7:27.1.1' implementation 'com.android.support.constraint:constraint-layout:1.1.3' testImplementation 'junit:junit:4.12' diff --git a/native_client/java/app/src/androidTest/java/org/mozilla/deepspeech/ExampleInstrumentedTest.java b/native_client/java/app/src/androidTest/java/org/mozilla/voice/stt/ExampleInstrumentedTest.java similarity index 85% rename from native_client/java/app/src/androidTest/java/org/mozilla/deepspeech/ExampleInstrumentedTest.java rename to native_client/java/app/src/androidTest/java/org/mozilla/voice/stt/ExampleInstrumentedTest.java index 6c3e7f91f8..a122b24a60 100644 --- a/native_client/java/app/src/androidTest/java/org/mozilla/deepspeech/ExampleInstrumentedTest.java +++ b/native_client/java/app/src/androidTest/java/org/mozilla/voice/stt/ExampleInstrumentedTest.java @@ -1,4 +1,4 @@ -package org.mozilla.deepspeech; +package org.mozilla.voice.stt; import android.content.Context; import android.support.test.InstrumentationRegistry; @@ -21,6 +21,6 @@ public void useAppContext() { // Context of the app under test. Context appContext = InstrumentationRegistry.getTargetContext(); - assertEquals("org.mozilla.deepspeech", appContext.getPackageName()); + assertEquals("org.mozilla.voice.stt", appContext.getPackageName()); } } diff --git a/native_client/java/app/src/main/AndroidManifest.xml b/native_client/java/app/src/main/AndroidManifest.xml index 0702cc1074..aa5ba8cd32 100644 --- a/native_client/java/app/src/main/AndroidManifest.xml +++ b/native_client/java/app/src/main/AndroidManifest.xml @@ -1,6 +1,6 @@ + package="org.mozilla.voice.stt"> - + diff --git a/native_client/java/app/src/main/java/org/mozilla/deepspeech/DeepSpeechActivity.java b/native_client/java/app/src/main/java/org/mozilla/voice/stt/MozillaVoiceSttActivity.java similarity index 95% rename from native_client/java/app/src/main/java/org/mozilla/deepspeech/DeepSpeechActivity.java rename to native_client/java/app/src/main/java/org/mozilla/voice/stt/MozillaVoiceSttActivity.java index d82de3a121..cf39ce4f4b 100644 --- a/native_client/java/app/src/main/java/org/mozilla/deepspeech/DeepSpeechActivity.java +++ b/native_client/java/app/src/main/java/org/mozilla/voice/stt/MozillaVoiceSttActivity.java @@ -1,4 +1,4 @@ -package org.mozilla.deepspeech; +package org.mozilla.voice.stt; import android.support.v7.app.AppCompatActivity; import android.os.Bundle; @@ -16,11 +16,11 @@ import java.nio.ByteOrder; import java.nio.ByteBuffer; -import org.mozilla.deepspeech.libdeepspeech.DeepSpeechModel; +import org.mozilla.voice.stt.MozillaVoiceSttModel; -public class DeepSpeechActivity extends AppCompatActivity { +public class MozillaVoiceSttActivity extends AppCompatActivity { - DeepSpeechModel _m = null; + MozillaVoiceSttModel _m = null; EditText _tfliteModel; EditText _audioFile; @@ -50,7 +50,7 @@ private void newModel(String tfliteModel) { this._tfliteStatus.setText("Creating model"); if (this._m == null) { // sphinx-doc: java_ref_model_start - this._m = new DeepSpeechModel(tfliteModel); + this._m = new MozillaVoiceSttModel(tfliteModel); this._m.setBeamWidth(BEAM_WIDTH); // sphinx-doc: java_ref_model_stop } diff --git a/native_client/java/app/src/main/res/layout/activity_deep_speech.xml b/native_client/java/app/src/main/res/layout/activity_deep_speech.xml index 02c383d431..ffbee61977 100644 --- a/native_client/java/app/src/main/res/layout/activity_deep_speech.xml +++ b/native_client/java/app/src/main/res/layout/activity_deep_speech.xml @@ -4,7 +4,7 @@ xmlns:tools="http://schemas.android.com/tools" android:layout_width="match_parent" android:layout_height="match_parent" - tools:context=".DeepSpeechActivity"> + tools:context=".MozillaVoiceSttActivity">