Skip to content

Commit

Permalink
Merge pull request #30 from ReadAlongs/release_040
Browse files Browse the repository at this point in the history
Final 0.4.0 release updates
  • Loading branch information
dhdaines authored Nov 9, 2022
2 parents 562024d + e8bd798 commit 9bbfdf3
Show file tree
Hide file tree
Showing 13 changed files with 103 additions and 115 deletions.
25 changes: 16 additions & 9 deletions MANIFEST.in
Original file line number Diff line number Diff line change
@@ -1,9 +1,7 @@
include CMakeLists.txt
include LICENSE
include README.manylinux.md
include README.md
include TODO.md
include build_wheels.sh
include config.h.in
include requirements.dev.txt
include docs/Makefile
Expand All @@ -17,10 +15,15 @@ include docs/source/readme.js.rst
include docs/source/readme.rst
include docs/source/soundswallower.rst
include include/soundswallower/CMakeLists.txt
include include/soundswallower/*.h
include js/CMakeLists.txt
recursive-include include *.h
include js/README.md
include js/.npmignore
include js/*.txt
include js/*.js
include js/*.ts
include js/*.html
include js/*.py
include js/*.c
include js/*.json
recursive-include model *
include py/CMakeLists.txt
Expand All @@ -35,23 +38,27 @@ include py/test/test_fsg.py
include pyproject.toml
include setup.py
include src/CMakeLists.txt
include src/*.c
include src/*.h
include src/*.y
include src/*.l
recursive-include src *.c
recursive-include src *.h
recursive-include src *.y
recursive-include src *.l
include tests/CMakeLists.txt
include tests/*.test
include tests/*.res
include tests/*.c
include tests/*.sh
include tests/testfuncs.sh.in
include tests/test_macros.h.in
include tests/compare_table.pl
recursive-include tests/data *
include tests/test_macros.h.in
exclude MANIFEST.in
exclude .readthedocs.yml
exclude .travis.yml
exclude .gitignore
recursive-exclude .github *
recursive-exclude _skbuild *
recursive-exclude build *
recursive-exclude jsbuild *
recursive-exclude * .gitignore
recursive-exclude * *.py[co]
recursive-exclude * *~
Expand Down
29 changes: 0 additions & 29 deletions README.manylinux.md

This file was deleted.

28 changes: 0 additions & 28 deletions build_wheels.sh

This file was deleted.

83 changes: 61 additions & 22 deletions js/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -124,23 +124,29 @@ await decoder.initialize();

The optional `loglevel` and `backtrace` options will make it a bit
more verbose, so you can be sure it's actually doing something. Now
we will create the world's stupidest grammar, which recognizes one
sentence:
we will create and enable the world's stupidest grammar, which
recognizes one sentence:

```js
let fsg = decoder.create_fsg("goforward", 0, 4, [
await decoder.set_fsg("goforward", 0, 4, [
{from: 0, to: 1, prob: 1.0, word: "go"},
{from: 1, to: 2, prob: 1.0, word: "forward"},
{from: 2, to: 3, prob: 1.0, word: "ten"},
{from: 3, to: 4, prob: 1.0, word: "meters"}
]);
await decoder.set_fsg(fsg);
```

You should `delete()` it, unless of course you intend to create a
bunch of them and swap them in and out. It is also possible to parse
a grammar in [JSGF](https://en.wikipedia.org/wiki/JSGF) format, see
below for an example.
If you actually want to just recognize a single sentence, in order to
get time alignments (this is known as "force-alignment"), we have a
better method for you:

```js
await decoder.set_align_text("go forward ten meters");
```

It is also possible to parse a grammar in
[JSGF](https://en.wikipedia.org/wiki/JSGF) format, see below for an
example.

Okay, let's wreck a nice beach! Record yourself saying something,
preferably the sentence "go forward ten meters", using SoX, for
Expand Down Expand Up @@ -171,6 +177,23 @@ console.log(decoder.get_hyp());
console.log(decoder.get_hypseg());
```

If you want even more detailed segmentation (phone and HMM state
level) you can use `get_alignment_json`. For more detail on this
format, see [the PocketSphinx
documentation](https://github.com/cmusphinx/pocketsphinx#usage) as it
is borrowed from there. Since this is JSON, you can create an object
from it and iterate over it:

```js
const result = JSON.parse(await decoder.get_alignment_json());
for (const word of result.w) {
console.log(`word ${word.t} at ${word.b} has duration ${word.d}`);
for (const phone of word.w) {
console.log(`phone ${phone.t} at ${phone.b} has duration ${phone.d}`);
}
}
```

Finally, if your program is long-running and you think you might make
multiple recognizers, you ought to delete them, because JavaScript is
awful:
Expand Down Expand Up @@ -210,18 +233,6 @@ await require('soundswallower')(ssjs);
This is simply concatenated to the model name, so you should make sure
to include the trailing slash, e.g. "model/" and not "model"!

Currently, it should also support any Sphinx format acoustic model, many of
which are available for download at [the SourceForge
page](https://sourceforge.net/projects/cmusphinx/files/Acoustic%20and%20Language%20Models/).

To use a module, pass the directory (or base URL) containing its files
(i.e. `means`, `variances`, etc) in the `hmm` property when
initializing the decoder, for example:

```js
const decoder = ssjs.Decoder({hmm: "https://example.com/excellent-acoustic-model/"});
```


Using grammars
--------------
Expand All @@ -231,7 +242,7 @@ from a JavaScript string and set it in the decoder like this (a
hypothetical pizza-ordering grammar):

```js
let fsg = decoder.parse_jsgf(`#JSGF V1.0;
await decoder.set_jsgf(`#JSGF V1.0;
grammar pizza;
public <order> = [<greeting>] [<want>] [<quantity>] [<size>] [pizza] <toppings>;
<greeting> = hi | hello | yo | howdy;
Expand All @@ -241,7 +252,6 @@ public <order> = [<greeting>] [<want>] [<quantity>] [<size>] [pizza] <toppings>;
<toppings> = [with] <topping> ([and] <topping>)*;
<topping> = olives | mushrooms | tomatoes | (green | hot) peppers | pineapple;
`);
await decoder.set_fsg(fsg);
```

Note that all the words in the grammar must first be defined in the
Expand All @@ -257,3 +267,32 @@ the internal state.
await decoder.add_word("supercalifragilisticexpialidocious",
"S UW P ER K AE L IH F R AE JH IH L IH S T IH K EH K S P IY AE L IH D OW SH Y UH S");
```

Voice activity detection / Endpointing
--------------------------------------

This is a work in progress, but it is also possible to detect the
start and end of speech in an input stream using an `Endpointer`
object. This requires you to pass buffers of a specific size, which
is understandably difficult since WebAudio also only wants to *give*
you buffers of a specific (and entirely different) size. A better
example is forthcoming but it looks a bit like this (copied directly
from [the
documentation](https://soundswallower.readthedocs.io/en/latest/soundswallower.js.html#Endpointer.get_in_speech):

```js
let prev_in_speech = ep.get_in_speech();
let frame_size = ep.get_frame_size();
// Presume `frame` is a Float32Array of frame_size or less
let speech;
if (frame.size < frame_size)
speech = ep.end_stream(frame);
else
speech = ep.process(frame);
if (speech !== null) {
if (!prev_in_speech)
console.log("Speech started at " + ep.get_speech_start());
if (!ep.get_in_speech())
console.log("Speech ended at " + ep.get_speech_end());
}
```
6 changes: 3 additions & 3 deletions js/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@
"description": "An even smaller speech recognizer",
"main": "soundswallower.js",
"scripts": {
"test": "make && mocha test_node",
"tstest": "make && npx tsc && node test_typescript",
"webtest": "make && xdg-open http://localhost:8000/test_web.html && python server.py"
"test": "mocha test_node",
"tstest": "npx tsc && node test_typescript",
"webtest": "xdg-open http://localhost:8000/test_web.html && python server.py"
},
"repository": {
"type": "git",
Expand Down
7 changes: 3 additions & 4 deletions requirements.dev.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
scikit-build~=0.13
Cython~=0.29.21
pytest~=7.1.2
build~=0.8.0
scikit-build
Cython
pytest
numpy
2 changes: 1 addition & 1 deletion tests/test_acmod.c
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ main(int argc, char *argv[])
config_t *config;
FILE *rawfh;
int16 *buf;
int16 const *bptr;
int16 *bptr;
mfcc_t **cepbuf, **cptr;
size_t nread, nsamps;
fe_t *fe;
Expand Down
2 changes: 1 addition & 1 deletion tests/test_acmod_grow.c
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ main(int argc, char *argv[])
feat_t *fcb;
FILE *rawfh;
int16 *buf;
int16 const *bptr;
int16 *bptr;
size_t nread, nsamps;
int nfr;
int frame_counter;
Expand Down
12 changes: 6 additions & 6 deletions tests/test_fe.c
Original file line number Diff line number Diff line change
Expand Up @@ -125,10 +125,10 @@ create_shifted(fe_t *fe, int16 *data, size_t nsamp)
}

mfcc_t **
create_full(fe_t *fe, const int16 *data, size_t nsamp)
create_full(fe_t *fe, int16 *data, size_t nsamp)
{
mfcc_t **cepbuf;
const int16 *inptr;
int16 *inptr;
int rv, nfr, ncep;

TEST_EQUAL(0, fe_start(fe));
Expand All @@ -155,10 +155,10 @@ create_full(fe_t *fe, const int16 *data, size_t nsamp)
}

mfcc_t **
create_process_frames(fe_t *fe, const int16 *data, size_t nsamp)
create_process_frames(fe_t *fe, int16 *data, size_t nsamp)
{
mfcc_t **cepbuf;
const int16 *inptr;
int16 *inptr;
int i, rv, nfr, ncep, frame_shift, frame_size;

fe_get_input_size(fe, &frame_shift, &frame_size);
Expand Down Expand Up @@ -195,10 +195,10 @@ create_process_frames(fe_t *fe, const int16 *data, size_t nsamp)


mfcc_t **
create_fragments(fe_t *fe, const int16 *data, size_t nsamp)
create_fragments(fe_t *fe, int16 *data, size_t nsamp)
{
mfcc_t **cepbuf, **cepptr;
const int16 *inptr;
int16 *inptr;
int i, rv, nfr, ncep, frame_shift, frame_size;
/* Should total 1024 :) */
size_t fragments[] = {
Expand Down
18 changes: 9 additions & 9 deletions tests/test_fe_float32.c
Original file line number Diff line number Diff line change
Expand Up @@ -127,10 +127,10 @@ create_shifted(fe_t *fe, float32 *data, size_t nsamp)
}

mfcc_t **
create_full(fe_t *fe, const float32 *data, size_t nsamp)
create_full(fe_t *fe, float32 *data, size_t nsamp)
{
mfcc_t **cepbuf;
const float32 *inptr;
float32 *inptr;
int rv, nfr, ncep;

TEST_EQUAL(0, fe_start(fe));
Expand All @@ -157,10 +157,10 @@ create_full(fe_t *fe, const float32 *data, size_t nsamp)
}

mfcc_t **
create_process_frames(fe_t *fe, const float32 *data, size_t nsamp)
create_process_frames(fe_t *fe, float32 *data, size_t nsamp)
{
mfcc_t **cepbuf;
const float32 *inptr;
float32 *inptr;
int i, rv, nfr, ncep, frame_shift, frame_size;

fe_get_input_size(fe, &frame_shift, &frame_size);
Expand Down Expand Up @@ -197,10 +197,10 @@ create_process_frames(fe_t *fe, const float32 *data, size_t nsamp)


mfcc_t **
create_fragments(fe_t *fe, const float32 *data, size_t nsamp)
create_fragments(fe_t *fe, float32 *data, size_t nsamp)
{
mfcc_t **cepbuf, **cepptr;
const float32 *inptr;
float32 *inptr;
int i, rv, nfr, ncep, frame_shift, frame_size;
/* Should total 1024 :) */
size_t fragments[] = {
Expand Down Expand Up @@ -238,11 +238,11 @@ create_fragments(fe_t *fe, const float32 *data, size_t nsamp)


mfcc_t **
create_mixed_fragments(fe_t *fe, const float32 *data, const int16 *idata, size_t nsamp, int odd)
create_mixed_fragments(fe_t *fe, float32 *data, int16 *idata, size_t nsamp, int odd)
{
mfcc_t **cepbuf, **cepptr;
const float32 *inptr;
const int16 *iinptr;
float32 *inptr;
int16 *iinptr;
int i, rv, nfr, ncep, frame_shift, frame_size;
/* Should total 1024 :) */
size_t fragments[] = {
Expand Down
2 changes: 1 addition & 1 deletion tests/test_feat_fe.c
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ main(int argc, char *argv[])
cptr = cepbuf;
nfr = total_frames;
while ((nsamp = fread(buf, sizeof(int16), 2048, raw)) > 0) {
int16 const *bptr = buf;
int16 *bptr = buf;
while (nsamp) {
int ncep = fe_process_int16(fe, &bptr, &nsamp,
cptr, nfr);
Expand Down
Loading

0 comments on commit 9bbfdf3

Please sign in to comment.