From d7e81142dcd1ca7b7f039ef7134fe46e5fc0f836 Mon Sep 17 00:00:00 2001 From: David Huggins-Daines Date: Tue, 8 Nov 2022 20:24:19 -0500 Subject: [PATCH 1/6] docs: update js docs --- js/README.md | 83 ++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 61 insertions(+), 22 deletions(-) diff --git a/js/README.md b/js/README.md index cf8ccce..094b58c 100644 --- a/js/README.md +++ b/js/README.md @@ -124,23 +124,29 @@ await decoder.initialize(); The optional `loglevel` and `backtrace` options will make it a bit more verbose, so you can be sure it's actually doing something. Now -we will create the world's stupidest grammar, which recognizes one -sentence: +we will create and enable the world's stupidest grammar, which +recognizes one sentence: ```js -let fsg = decoder.create_fsg("goforward", 0, 4, [ +await decoder.set_fsg("goforward", 0, 4, [ {from: 0, to: 1, prob: 1.0, word: "go"}, {from: 1, to: 2, prob: 1.0, word: "forward"}, {from: 2, to: 3, prob: 1.0, word: "ten"}, {from: 3, to: 4, prob: 1.0, word: "meters"} ]); -await decoder.set_fsg(fsg); ``` -You should `delete()` it, unless of course you intend to create a -bunch of them and swap them in and out. It is also possible to parse -a grammar in [JSGF](https://en.wikipedia.org/wiki/JSGF) format, see -below for an example. +If you actually want to just recognize a single sentence, in order to +get time alignments (this is known as "force-alignment"), we have a +better method for you: + +```js +await decoder.set_align_text("go forward ten meters"); +``` + +It is also possible to parse a grammar in +[JSGF](https://en.wikipedia.org/wiki/JSGF) format, see below for an +example. Okay, let's wreck a nice beach! Record yourself saying something, preferably the sentence "go forward ten meters", using SoX, for @@ -171,6 +177,23 @@ console.log(decoder.get_hyp()); console.log(decoder.get_hypseg()); ``` +If you want even more detailed segmentation (phone and HMM state +level) you can use `get_alignment_json`. For more detail on this +format, see [the PocketSphinx +documentation](https://github.com/cmusphinx/pocketsphinx#usage) as it +is borrowed from there. Since this is JSON, you can create an object +from it and iterate over it: + +```js +const result = JSON.parse(await decoder.get_alignment_json()); +for (const word of result.w) { + console.log(`word ${word.t} at ${word.b} has duration ${word.d}`); + for (const phone of word.w) { + console.log(`phone ${phone.t} at ${phone.b} has duration ${phone.d}`); + } +} +``` + Finally, if your program is long-running and you think you might make multiple recognizers, you ought to delete them, because JavaScript is awful: @@ -210,18 +233,6 @@ await require('soundswallower')(ssjs); This is simply concatenated to the model name, so you should make sure to include the trailing slash, e.g. "model/" and not "model"! -Currently, it should also support any Sphinx format acoustic model, many of -which are available for download at [the SourceForge -page](https://sourceforge.net/projects/cmusphinx/files/Acoustic%20and%20Language%20Models/). - -To use a module, pass the directory (or base URL) containing its files -(i.e. `means`, `variances`, etc) in the `hmm` property when -initializing the decoder, for example: - -```js -const decoder = ssjs.Decoder({hmm: "https://example.com/excellent-acoustic-model/"}); -``` - Using grammars -------------- @@ -231,7 +242,7 @@ from a JavaScript string and set it in the decoder like this (a hypothetical pizza-ordering grammar): ```js - let fsg = decoder.parse_jsgf(`#JSGF V1.0; + await decoder.set_jsgf(`#JSGF V1.0; grammar pizza; public = [] [] [] [] [pizza] ; = hi | hello | yo | howdy; @@ -241,7 +252,6 @@ public = [] [] [] [] [pizza] ; = [with] ([and] )*; = olives | mushrooms | tomatoes | (green | hot) peppers | pineapple; `); - await decoder.set_fsg(fsg); ``` Note that all the words in the grammar must first be defined in the @@ -257,3 +267,32 @@ the internal state. await decoder.add_word("supercalifragilisticexpialidocious", "S UW P ER K AE L IH F R AE JH IH L IH S T IH K EH K S P IY AE L IH D OW SH Y UH S"); ``` + +Voice activity detection / Endpointing +-------------------------------------- + +This is a work in progress, but it is also possible to detect the +start and end of speech in an input stream using an `Endpointer` +object. This requires you to pass buffers of a specific size, which +is understandably difficult since WebAudio also only wants to *give* +you buffers of a specific (and entirely different) size. A better +example is forthcoming but it looks a bit like this (copied directly +from [the +documentation](https://soundswallower.readthedocs.io/en/latest/soundswallower.js.html#Endpointer.get_in_speech): + +```js +let prev_in_speech = ep.get_in_speech(); +let frame_size = ep.get_frame_size(); +// Presume `frame` is a Float32Array of frame_size or less +let speech; +if (frame.size < frame_size) + speech = ep.end_stream(frame); +else + speech = ep.process(frame); +if (speech !== null) { + if (!prev_in_speech) + console.log("Speech started at " + ep.get_speech_start()); + if (!ep.get_in_speech()) + console.log("Speech ended at " + ep.get_speech_end()); +} +``` From 189f3a9e6e8cd509b0531a47a60939a21db5f9af Mon Sep 17 00:00:00 2001 From: David Huggins-Daines Date: Tue, 8 Nov 2022 22:02:51 -0500 Subject: [PATCH 2/6] refactor: remove useles const --- tests/test_acmod.c | 2 +- tests/test_acmod_grow.c | 2 +- tests/test_fe.c | 12 ++++++------ tests/test_fe_float32.c | 18 +++++++++--------- tests/test_feat_fe.c | 2 +- tests/test_ps.c | 2 +- tests/test_ptm_mgau.c | 2 +- 7 files changed, 20 insertions(+), 20 deletions(-) diff --git a/tests/test_acmod.c b/tests/test_acmod.c index 4fe925a..22444e0 100644 --- a/tests/test_acmod.c +++ b/tests/test_acmod.c @@ -36,7 +36,7 @@ main(int argc, char *argv[]) config_t *config; FILE *rawfh; int16 *buf; - int16 const *bptr; + int16 *bptr; mfcc_t **cepbuf, **cptr; size_t nread, nsamps; fe_t *fe; diff --git a/tests/test_acmod_grow.c b/tests/test_acmod_grow.c index 6327e21..6bfe1a6 100644 --- a/tests/test_acmod_grow.c +++ b/tests/test_acmod_grow.c @@ -37,7 +37,7 @@ main(int argc, char *argv[]) feat_t *fcb; FILE *rawfh; int16 *buf; - int16 const *bptr; + int16 *bptr; size_t nread, nsamps; int nfr; int frame_counter; diff --git a/tests/test_fe.c b/tests/test_fe.c index 4b769ea..4a5703f 100644 --- a/tests/test_fe.c +++ b/tests/test_fe.c @@ -125,10 +125,10 @@ create_shifted(fe_t *fe, int16 *data, size_t nsamp) } mfcc_t ** -create_full(fe_t *fe, const int16 *data, size_t nsamp) +create_full(fe_t *fe, int16 *data, size_t nsamp) { mfcc_t **cepbuf; - const int16 *inptr; + int16 *inptr; int rv, nfr, ncep; TEST_EQUAL(0, fe_start(fe)); @@ -155,10 +155,10 @@ create_full(fe_t *fe, const int16 *data, size_t nsamp) } mfcc_t ** -create_process_frames(fe_t *fe, const int16 *data, size_t nsamp) +create_process_frames(fe_t *fe, int16 *data, size_t nsamp) { mfcc_t **cepbuf; - const int16 *inptr; + int16 *inptr; int i, rv, nfr, ncep, frame_shift, frame_size; fe_get_input_size(fe, &frame_shift, &frame_size); @@ -195,10 +195,10 @@ create_process_frames(fe_t *fe, const int16 *data, size_t nsamp) mfcc_t ** -create_fragments(fe_t *fe, const int16 *data, size_t nsamp) +create_fragments(fe_t *fe, int16 *data, size_t nsamp) { mfcc_t **cepbuf, **cepptr; - const int16 *inptr; + int16 *inptr; int i, rv, nfr, ncep, frame_shift, frame_size; /* Should total 1024 :) */ size_t fragments[] = { diff --git a/tests/test_fe_float32.c b/tests/test_fe_float32.c index 17b4134..e786912 100644 --- a/tests/test_fe_float32.c +++ b/tests/test_fe_float32.c @@ -127,10 +127,10 @@ create_shifted(fe_t *fe, float32 *data, size_t nsamp) } mfcc_t ** -create_full(fe_t *fe, const float32 *data, size_t nsamp) +create_full(fe_t *fe, float32 *data, size_t nsamp) { mfcc_t **cepbuf; - const float32 *inptr; + float32 *inptr; int rv, nfr, ncep; TEST_EQUAL(0, fe_start(fe)); @@ -157,10 +157,10 @@ create_full(fe_t *fe, const float32 *data, size_t nsamp) } mfcc_t ** -create_process_frames(fe_t *fe, const float32 *data, size_t nsamp) +create_process_frames(fe_t *fe, float32 *data, size_t nsamp) { mfcc_t **cepbuf; - const float32 *inptr; + float32 *inptr; int i, rv, nfr, ncep, frame_shift, frame_size; fe_get_input_size(fe, &frame_shift, &frame_size); @@ -197,10 +197,10 @@ create_process_frames(fe_t *fe, const float32 *data, size_t nsamp) mfcc_t ** -create_fragments(fe_t *fe, const float32 *data, size_t nsamp) +create_fragments(fe_t *fe, float32 *data, size_t nsamp) { mfcc_t **cepbuf, **cepptr; - const float32 *inptr; + float32 *inptr; int i, rv, nfr, ncep, frame_shift, frame_size; /* Should total 1024 :) */ size_t fragments[] = { @@ -238,11 +238,11 @@ create_fragments(fe_t *fe, const float32 *data, size_t nsamp) mfcc_t ** -create_mixed_fragments(fe_t *fe, const float32 *data, const int16 *idata, size_t nsamp, int odd) +create_mixed_fragments(fe_t *fe, float32 *data, int16 *idata, size_t nsamp, int odd) { mfcc_t **cepbuf, **cepptr; - const float32 *inptr; - const int16 *iinptr; + float32 *inptr; + int16 *iinptr; int i, rv, nfr, ncep, frame_shift, frame_size; /* Should total 1024 :) */ size_t fragments[] = { diff --git a/tests/test_feat_fe.c b/tests/test_feat_fe.c index a694cf4..d9c3989 100644 --- a/tests/test_feat_fe.c +++ b/tests/test_feat_fe.c @@ -54,7 +54,7 @@ main(int argc, char *argv[]) cptr = cepbuf; nfr = total_frames; while ((nsamp = fread(buf, sizeof(int16), 2048, raw)) > 0) { - int16 const *bptr = buf; + int16 *bptr = buf; while (nsamp) { int ncep = fe_process_int16(fe, &bptr, &nsamp, cptr, nfr); diff --git a/tests/test_ps.c b/tests/test_ps.c index 66e46ea..6c9393f 100644 --- a/tests/test_ps.c +++ b/tests/test_ps.c @@ -13,7 +13,7 @@ decoder_test(config_t *config, char const *sname, char const *expected) mfcc_t **cepbuf; FILE *rawfh; int16 *buf; - int16 const *bptr; + int16 *bptr; size_t nread; size_t nsamps; int32 nfr, i, score, prob; diff --git a/tests/test_ptm_mgau.c b/tests/test_ptm_mgau.c index fc1e603..50182e6 100644 --- a/tests/test_ptm_mgau.c +++ b/tests/test_ptm_mgau.c @@ -32,7 +32,7 @@ run_acmod_test(acmod_t *acmod) { FILE *rawfh; int16 *buf; - int16 const *bptr; + int16 *bptr; size_t nread, nsamps; int nfr; int frame_counter; From e4b99dc8a995f13313fe5bcc2defb6d15d93a978 Mon Sep 17 00:00:00 2001 From: David Huggins-Daines Date: Tue, 8 Nov 2022 22:03:14 -0500 Subject: [PATCH 3/6] build: remove no longer used files --- README.manylinux.md | 29 ----------------------------- build_wheels.sh | 28 ---------------------------- 2 files changed, 57 deletions(-) delete mode 100644 README.manylinux.md delete mode 100644 build_wheels.sh diff --git a/README.manylinux.md b/README.manylinux.md deleted file mode 100644 index 06f9426..0000000 --- a/README.manylinux.md +++ /dev/null @@ -1,29 +0,0 @@ -Building binary distributions for Linux ---------------------------------------- - -To build distributions that are compatible with all the various Linux -distributions, and can therefore be uploaded to PyPI, we now use -[cibuildwheel](https://pypi.org/project/cibuildwheel/). But also, you -can use the Docker images provided by the [manylinux -project](https://github.com/pypa/manylinux). - -The full sequence of commands to create Linux wheels for Python 3.7 -through 3.10 is, presuming you use the latest source distribution from -PyPI: - - docker pull quay.io/pypa/manylinux1_x86_64 - docker run -v $PWD:$PWD -w $PWD/dist -it quay.io/pypa/manylinux1_x86_64 /opt/python/cp39-cp39/bin/pip wheel soundswallower - docker run -v $PWD:$PWD -w $PWD/dist -it quay.io/pypa/manylinux1_x86_64 /opt/python/cp38-cp38/bin/pip wheel soundswallower - docker run -v $PWD:$PWD -w $PWD/dist -it quay.io/pypa/manylinux1_x86_64 /opt/python/cp37-cp37m/bin/pip wheel soundswallower - docker pull quay.io/pypa/manylinux2014_x86_64 - docker run -v $PWD:$PWD -w $PWD/dist -it quay.io/pypa/manylinux2014_x86_64 /opt/python/cp310-cp310/bin/pip wheel soundswallower - for w in dist/*.whl; do docker run -v $PWD:$PWD -w $PWD -it quay.io/pypa/manylinux2014_x86_64 auditwheel repair $w; done - -The script `build_wheels.sh` is included to automate this somewhat. - -If you wish to use the current directory, replace `soundswallower` -with `.` - likewise, it can also be replaced by the path to an -existing source distribution or source tree. - -Note that running auditwheel is necessary to get the platform tags -right, and it will write new wheels in `wheelhouse`. diff --git a/build_wheels.sh b/build_wheels.sh deleted file mode 100644 index 3ea4cf5..0000000 --- a/build_wheels.sh +++ /dev/null @@ -1,28 +0,0 @@ -#!/bin/sh - -set -e -VERSION=0.4.0 -U=$(id -u) -G=$(id -g) - -many1_run() { - docker run -v $PWD:$PWD -v $HOME/.cache/pip:/.cache/pip -u $U:$G -w $PWD -it quay.io/pypa/manylinux1_x86_64 "$@" -} -many2014_run() { - docker run -v $PWD:$PWD -v $HOME/.cache/pip:/.cache/pip -u $U:$G -w $PWD -it quay.io/pypa/manylinux2014_x86_64 "$@" -} - - -python setup.py clean || true -rm -rf *.whl dist/* _skbuild py/soundswallower.egg-info -python -m build --sdist -docker pull quay.io/pypa/manylinux1_x86_64 -for version in cp39-cp39 cp38-cp38 cp37-cp37m; do - many1_run /opt/python/$version/bin/pip wheel dist/soundswallower-$VERSION.tar.gz -done -docker pull quay.io/pypa/manylinux2014_x86_64 -many2014_run /opt/python/cp310-cp310/bin/pip wheel dist/soundswallower-$VERSION.tar.gz -for w in *.whl; do - many2014_run auditwheel repair -w dist $w - rm $w -done From 93ded9e262a9ba21380e47f8022d5b0fdf1e0690 Mon Sep 17 00:00:00 2001 From: David Huggins-Daines Date: Tue, 8 Nov 2022 22:04:41 -0500 Subject: [PATCH 4/6] build: update MANIFEST.in --- MANIFEST.in | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/MANIFEST.in b/MANIFEST.in index a15b9f5..37f51ad 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,9 +1,7 @@ include CMakeLists.txt include LICENSE -include README.manylinux.md include README.md include TODO.md -include build_wheels.sh include config.h.in include requirements.dev.txt include docs/Makefile @@ -17,10 +15,15 @@ include docs/source/readme.js.rst include docs/source/readme.rst include docs/source/soundswallower.rst include include/soundswallower/CMakeLists.txt -include include/soundswallower/*.h -include js/CMakeLists.txt +recursive-include include *.h include js/README.md +include js/.npmignore +include js/*.txt include js/*.js +include js/*.ts +include js/*.html +include js/*.py +include js/*.c include js/*.json recursive-include model * include py/CMakeLists.txt @@ -35,23 +38,27 @@ include py/test/test_fsg.py include pyproject.toml include setup.py include src/CMakeLists.txt -include src/*.c -include src/*.h -include src/*.y -include src/*.l +recursive-include src *.c +recursive-include src *.h +recursive-include src *.y +recursive-include src *.l include tests/CMakeLists.txt include tests/*.test include tests/*.res include tests/*.c +include tests/*.sh +include tests/testfuncs.sh.in +include tests/test_macros.h.in include tests/compare_table.pl recursive-include tests/data * -include tests/test_macros.h.in exclude MANIFEST.in exclude .readthedocs.yml exclude .travis.yml exclude .gitignore recursive-exclude .github * recursive-exclude _skbuild * +recursive-exclude build * +recursive-exclude jsbuild * recursive-exclude * .gitignore recursive-exclude * *.py[co] recursive-exclude * *~ From 3fa8db4a420edd547fed06efb9c8b1a92b993571 Mon Sep 17 00:00:00 2001 From: David Huggins-Daines Date: Tue, 8 Nov 2022 22:04:56 -0500 Subject: [PATCH 5/6] build: sometimes it is ninja not make --- js/package.json | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/js/package.json b/js/package.json index 9895dfc..5676af5 100644 --- a/js/package.json +++ b/js/package.json @@ -4,9 +4,9 @@ "description": "An even smaller speech recognizer", "main": "soundswallower.js", "scripts": { - "test": "make && mocha test_node", - "tstest": "make && npx tsc && node test_typescript", - "webtest": "make && xdg-open http://localhost:8000/test_web.html && python server.py" + "test": "mocha test_node", + "tstest": "npx tsc && node test_typescript", + "webtest": "xdg-open http://localhost:8000/test_web.html && python server.py" }, "repository": { "type": "git", From e8bd798a7a6ce1d2fce0b70e270829896f1164fe Mon Sep 17 00:00:00 2001 From: David Huggins-Daines Date: Tue, 8 Nov 2022 22:05:04 -0500 Subject: [PATCH 6/6] build: update requirements --- requirements.dev.txt | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/requirements.dev.txt b/requirements.dev.txt index 36741d6..aed2586 100644 --- a/requirements.dev.txt +++ b/requirements.dev.txt @@ -1,5 +1,4 @@ -scikit-build~=0.13 -Cython~=0.29.21 -pytest~=7.1.2 -build~=0.8.0 +scikit-build +Cython +pytest numpy