diff --git a/.github/workflows/build-docs.yml b/.github/workflows/build-docs.yml
index a4de48c8b..903cafc8e 100644
--- a/.github/workflows/build-docs.yml
+++ b/.github/workflows/build-docs.yml
@@ -11,7 +11,7 @@ jobs:
runs-on: ubuntu-22.04
steps:
- - uses: actions/checkout@v2
+ - uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Fetch release tags from GitHub
@@ -22,7 +22,7 @@ jobs:
sudo apt-get update
sudo apt-get install -y build-essential libeigen3-dev libyaml-dev libfftw3-dev libavcodec-dev libavformat-dev libavutil-dev libswresample-dev libsamplerate0-dev libtag1-dev libchromaprint-dev python3-dev python3-numpy-dev python3-numpy python3-yaml python3-six
sudo apt-get install -y doxygen python3-pip pandoc
- pip3 install sphinx pyparsing sphinxcontrib-doxylink docutils jupyter sphinx-toolbox
+ pip3 install sphinx pyparsing sphinxcontrib-doxylink docutils jupyter sphinx-toolbox nbformat gitpython sphinx-copybutton
# Install TensorFlow
sudo sh src/3rdparty/tensorflow/setup_from_libtensorflow.sh
# Install Gaia dependencies
@@ -44,7 +44,7 @@ jobs:
run: |
python3 waf doc
- name: Upload built documentation
- uses: actions/upload-artifact@v2
+ uses: actions/upload-artifact@v4
with:
name: essentia-docs
path: |
diff --git a/.github/workflows/build-wheels.yml b/.github/workflows/build-wheels.yml
index cd9af9c89..6f7ddbe57 100644
--- a/.github/workflows/build-wheels.yml
+++ b/.github/workflows/build-wheels.yml
@@ -32,7 +32,7 @@ jobs:
PRE_CMD: ${{ matrix.PRE_CMD }}
steps:
- - uses: actions/checkout@v2
+ - uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Fetch release tags from GitHub
@@ -51,7 +51,7 @@ jobs:
ls wheelhouse/
sudo python setup.py sdist
- name: Upload wheels and sdist
- uses: actions/upload-artifact@v2
+ uses: actions/upload-artifact@v4
with:
name: essentia-python-wheels
path: |
diff --git a/doc/Doxyfile b/doc/Doxyfile
index 2c3745c77..db786d67a 100644
--- a/doc/Doxyfile
+++ b/doc/Doxyfile
@@ -38,7 +38,7 @@ PROJECT_NAME = Essentia
# could be handy for archiving the generated documentation or if some version
# control system is used.
-PROJECT_NUMBER = 2.1-beta6-dev
+PROJECT_NUMBER = 2.1-beta6-dev
# Using the PROJECT_BRIEF tag one can provide an optional one line description
# for a project that appears at the top of each page and should give viewer a
diff --git a/doc/build_sphinx_doc.sh b/doc/build_sphinx_doc.sh
index 906ab83f4..8172c2f13 100755
--- a/doc/build_sphinx_doc.sh
+++ b/doc/build_sphinx_doc.sh
@@ -1,5 +1,5 @@
#!/bin/sh
-
+set -e
# update Essentia version number in the Doxyfile
cp doc/Doxyfile doc/Doxyfile.tmp
@@ -37,6 +37,7 @@ pandoc ../../FAQ.md -o FAQ.rst
pandoc research_papers.md -o research_papers.rst
jupyter nbconvert ../../src/examples/python/*.ipynb --to rst --output-dir .
+make clean
make html
# remove generated algorithm reference rst and temporary html files
diff --git a/doc/sphinxdoc/_templates/documentation.html b/doc/sphinxdoc/_templates/documentation.html
index b3eed66bb..3f166c605 100644
--- a/doc/sphinxdoc/_templates/documentation.html
+++ b/doc/sphinxdoc/_templates/documentation.html
@@ -7,7 +7,7 @@
Essentia {{version}} Documentation
What is Essentia?
Essentia is an open-source C++ library with Python and JavaScript bindings for audio analysis and audio-based music information retrieval. It is released
- under the Affero GPLv3
+ under the Affero GPLv3
license and is also available under a proprietary license upon request. The library contains an
extensive collection of reusable algorithms that implement audio input/output functionality, standard digital signal processing
blocks, statistical characterization of data, and a large variety of spectral, temporal, tonal, and high-level music
diff --git a/doc/sphinxdoc/conf.py b/doc/sphinxdoc/conf.py
index df3f9b2e6..217458fe3 100644
--- a/doc/sphinxdoc/conf.py
+++ b/doc/sphinxdoc/conf.py
@@ -167,19 +167,22 @@
# We only want a sidebar on the models page.
html_sidebars = {
+ # 'index': [],
'**': [],
+ 'algorithms_reference': ['localtoc.html'],
'models': ['localtoc.html'],
}
# Additional templates that should be rendered to pages, maps page names to
# template names.
html_additional_pages = {'index': 'index.html',
- 'algorithms_reference': 'algorithms_reference.html',
+ # 'algorithms_reference': 'algorithms_reference.html',
'applications': 'applications.html',
'documentation': 'documentation.html'}
-exec(compile(open("essentia_reference.py").read(), "essentia_reference.py", 'exec'))
-html_additional_pages.update(essentia_algorithms)
+# Deprecating this. Generating algorithm docs directly from rst files is better for sidebar.
+# exec(compile(open("essentia_reference.py").read(), "essentia_reference.py", 'exec'))
+# html_additional_pages.update(essentia_algorithms)
# If false, no module index is generated.
#html_domain_indices = True
diff --git a/doc/sphinxdoc/generate_reference.py b/doc/sphinxdoc/generate_reference.py
index 5c94f3777..7217d1c77 100755
--- a/doc/sphinxdoc/generate_reference.py
+++ b/doc/sphinxdoc/generate_reference.py
@@ -24,11 +24,16 @@
import essentia.streaming
import os, re, subprocess
import sys
+from pathlib import Path
+import nbformat
+import git
std_algo_list = [ algo for algo in dir(essentia.standard) if algo[0].isupper() ]
streaming_algo_list = [ algo for algo in dir(essentia.streaming) if algo[0].isupper() and algo not in [ 'CompositeBase'] ]
-
+python_tutorials_list = [tut for tut in Path('../../src/examples/python').glob('*.ipynb')]
+pattern_name = re.compile(r'const char(\s\*|\*\s)\w+::name = "(\w+)";')
+algo_path_dict = {pattern_name.search(algo_path.read_text()).group(2) : algo_path.relative_to('../../') for algo_path in Path('../../src/algorithms').rglob('*/*.cpp')}
def replace_math_symbols(s):
while True:
@@ -76,7 +81,6 @@ def algo_link(algoname, mode):
mode_abbr = 'std' if mode == 'standard' else 'streaming'
return "%s `(%s) <%s_%s.html>`__" % (algoname, mode, mode_abbr, algoname)
-
def related_algos(algo_doc):
lines = []
@@ -100,15 +104,104 @@ def related_algos(algo_doc):
'\n'.join(sorted(lines))]
return []
+def tutorial_link(tutorial_name):
+ """
+ Create link given tutorial file name
+ """
+ return f"{tutorial_name} `(Link) <../{tutorial_name}.html>`__"
+
+ ## NOTE: Possible Improvements
+ ## 1. Check if a more robust way exists to get the file link
+ ## 2. Display the title of tutorial instead of file name
+
+def is_word_in_jupyternb(word, path):
+ """
+ Finds if a word is used in the code cells of given notebook
+ Return True if word found, False if not
+ """
+ with open(path) as f:
+ nbcontent = nbformat.read(f, as_version=nbformat.NO_CONVERT)
+
+ for cell in nbcontent['cells']:
+ if cell['cell_type'] == "code":
+ if(cell['source'].find(word) != -1):
+ return True
+ return False
+
+def related_tutorials(algo_doc):
+ """
+ Get all python tutorials related to the algorithm
+ """
+ lines = []
+
+ lines += [tutorial_link(tut_file.stem) for tut_file in python_tutorials_list
+ if is_word_in_jupyternb(algo_doc['name'], tut_file)]
+
+ if lines:
+ return ['Related tutorials',
+ '-----------------',
+ '',
+ ' | '.join(sorted(lines)),
+ '']
+ return []
+
+ ## NOTE: Possible Optimizations
+ ## 1. This function currently runs once for each algorithm. Each algorithm will go through all the files.
+ ## 2. First Optimization - Create an index from each algorithm to file.
+ ## 3. Use this mapping to go from algorithm to list of tutorial files in O(1) time.
+
+def source_links(algo_doc):
+ """
+ Get the source cpp and header links for the algorithm
+ """
+ # Get build commit
+ repo = git.Repo(search_parent_directories=True)
+ commit_id = repo.git.describe('--long')
+
+ # Source Directory and URL Prefix
+ URL_PREFIX = f'https://github.com/MTG/essentia/blob/{commit_id}/'
+
+ # Get the path of the cpp file
+ cpp_path = algo_path_dict.get(algo_doc['name'], None)
+ if cpp_path:
+ header_path = cpp_path.with_suffix('.h')
+ return [URL_PREFIX + str(cpp_path), URL_PREFIX + str(header_path)]
+ else:
+ return None
+
+ ## NOTE: Future Modifications
+ ## 1. Check if source code path can be added in algo doc itself - DONE
+ ## 2. Make the URL Prefix dynamic to link to the current release version - DONE
+ ## 3. Would be required when we support multiple versions of the documentation
+ ## 4. Can move the URL_PREFIX to a global variable, but it looks cleaner here
def doc2rst(algo_doc, sphinxsearch=False):
+ """
+ Convert the algorithm documentation to RST format
+
+ algo_doc: dict
+ Dictionary containing the algorithm documentation
+ sphinxsearch: bool
+ Flag to indicate if the RST is for sphinx search index or for HTML rendering
+
+ Returns
+ -------
+ str: RST formatted string
+ """
if sphinxsearch:
# dummy rst files used to append algorithms to the sphinx HTML search
- lines = [':orphan:',
- ''
- ]
- header = 'Algorithm reference - ' + algo_doc['name'] + ' (' + algo_doc['mode'] + ')'
- lines += [header, '=' * len(header), '']
+############################################################
+# Deprecated header format.
+############################################################
+ # lines = [':orphan:',
+ # ''
+ # ]
+ # header = 'Algorithm reference - ' + algo_doc['name'] + ' (' + algo_doc['mode'] + ')'
+ # lines += [header, '=' * len(header), '']
+############################################################
+############################################################
+ lines = [ algo_doc['name'], '=' * len(algo_doc['name']), '']
+ lines += [algo_doc['mode'] + ' | ' + algo_doc['category'] + ' category', '']
else:
# actual rst files used to render HTMLs
lines = [ algo_doc['name'], '=' * len(algo_doc['name']), '']
@@ -160,6 +253,16 @@ def doc2rst(algo_doc, sphinxsearch=False):
TR_DESC(algo_doc['description'])
]
+ links = source_links(algo_doc)
+ if links:
+ lines += ['Source code',
+ '-----------',
+ '',
+ ' - `C++ source code <%s>`__' % links[0],
+ ' - `C++ header file <%s>`__' % links[1],
+ '']
+
+ lines += related_tutorials(algo_doc)
lines += related_algos(algo_doc)
return '\n'.join(lines)
@@ -211,7 +314,8 @@ def write_html_doc(filename, algo_doc, layout_type):
def write_algorithms_reference():
- '''Write all files necessary to have a complete algorithms reference in the sphinx doc.
+ '''
+ Write all files necessary to have a complete algorithms reference in the sphinx doc.
That includes:
- write the _templates/algorithms_reference.html template
- write each separate algo doc as an html template in the _templates/reference folder
@@ -252,9 +356,9 @@ def write_algorithms_reference():
print('generating doc for standard algorithm: %s ...' % algoname)
write_doc('reference/std_' + algoname + '.rst', algos[algoname]['standard'])
- write_html_doc('_templates/reference/std_' + algoname + '.html',
- algos[algoname]['standard'],
- layout_type = 'std')
+ # write_html_doc('_templates/reference/std_' + algoname + '.html',
+ # algos[algoname]['standard'],
+ # layout_type = 'std')
for algoname in streaming_algo_list:
algos.setdefault(algoname, {})
@@ -263,10 +367,12 @@ def write_algorithms_reference():
print('generating doc for streaming algorithm: %s ...' % algoname)
write_doc('reference/streaming_' + algoname + '.rst', algos[algoname]['streaming'])
- write_html_doc('_templates/reference/streaming_' + algoname + '.html',
- algos[algoname]['streaming'],
- layout_type = 'streaming')
+ # write_html_doc('_templates/reference/streaming_' + algoname + '.html',
+ # algos[algoname]['streaming'],
+ # layout_type = 'streaming')
+############################################################
+############################################################
# write the template for the std algorithms
html = '''
@@ -317,6 +423,8 @@ def write_algorithms_reference():
'''
open('_templates/algo_description_layout_streaming.html', 'w').write(html)
+############################################################
+############################################################
# write the essentia_reference.py file (to be included in conf.py)
with open('essentia_reference.py', 'w') as algo_ref:
@@ -342,9 +450,11 @@ def write_algorithms_reference():
''')
+############################################################
+############################################################
- # write the algorithms_reference.html file (main ref file)
- algo_categories_html = {}
+ # write the algorithms_reference.rst file (main ref file)
+ algo_categories_rst = {}
for algoname in algos:
std_algo = None
streaming_algo = None
@@ -379,43 +489,70 @@ def write_algorithms_reference():
if len(description):
description = description[0].capitalize() + description[1:]
- links = []
- if std_algo:
- links.append('standard')
- if streaming_algo:
- links.append('streaming')
- algo_html = '' + '
' + '
(' + ', '.join(links) + ')' + '
' + description + '
'
- algo_categories_html.setdefault(category, [])
- algo_categories_html[category].append(algo_html)
+############################################################
+# Deprecated code for generating html file.
+############################################################
+# links = []
+# if std_algo:
+# links.append('standard')
+# if streaming_algo:
+# links.append('streaming')
+# algo_html = '' + '
' + '
(' + ', '.join(links) + ')' + '
' + description + '
'
+# algo_categories_html.setdefault(category, [])
+# algo_categories_html[category].append(algo_html)
- html = '''
-{% extends "layout.html" %}
-{% set title = "Algorithms reference" %}
-{% block body %}
-
-
Algorithms reference
-
Here is the complete list of algorithms which you can access from the Python interface.
-
The C++ interface allows access to the same algorithms, and also some more which are templated
-and hence are not available in python.
+# html = '''
+# {% extends "layout.html" %}
+# {% set title = "Algorithms reference" %}
+# {% block body %}
-
-'''
- for category in algo_categories_html:
- category_id = re.sub('[^0-9a-zA-Z]+', '', category.lower())
- html += '' + category + '
'
- html += '\n'.join(sorted(algo_categories_html[category]))
- html += ''
- html += '''
-
+#
+#
Algorithms reference
+#
Here is the complete list of algorithms which you can access from the Python interface.
+#
The C++ interface allows access to the same algorithms, and also some more which are templated
+# and hence are not available in python.
-
+#
+# '''
+# for category in algo_categories_html:
+# category_id = re.sub('[^0-9a-zA-Z]+', '', category.lower())
+# html += '' + category + '
'
+# html += '\n'.join(sorted(algo_categories_html[category]))
+# html += ''
+# html += '''
+#
-{% endblock %}
-'''
+#
+
+# {% endblock %}
+# '''
+
+# open('_templates/algorithms_reference.html', 'w').write(html)
- open('_templates/algorithms_reference.html', 'w').write(html)
+############################################################
+############################################################
+
+ links = []
+ if std_algo:
+ links.append(':doc:`standard `')
+ if streaming_algo:
+ links.append(':doc:`streaming `')
+ algo_rst = algoname + '\n' + '^' * len(algoname) + '\n\n' + '(' + ', '.join(links) + ')' + '\n\n' + description + '\n\n'
+ algo_categories_rst.setdefault(category, [])
+ algo_categories_rst[category].append(algo_rst)
+
+ rst = "Algorithms reference" + \
+ "\n=====================\n\n" + \
+ "Here is the complete list of algorithms which you can access from the Python interface.\n\n" + \
+ "The C++ interface allows access to the same algorithms, and also some more which are templated and hence are not available in python.\n\n"
+ for category in algo_categories_rst:
+ rst += category + '\n' + '-' * len(category) + '\n\n'
+ rst += '\n'.join(sorted(algo_categories_rst[category]))
+ rst += '\n'
+
+ open('algorithms_reference.rst', 'w').write(rst)
if __name__ == '__main__':
diff --git a/doc/sphinxdoc/installing.rst b/doc/sphinxdoc/installing.rst
index 86708fa47..d3f9b4a46 100644
--- a/doc/sphinxdoc/installing.rst
+++ b/doc/sphinxdoc/installing.rst
@@ -87,7 +87,7 @@ Install prerequisites::
Install Essentia's dependencies::
- brew install eigen libyaml fftw ffmpeg libsamplerate libtag tensorflow
+ brew install eigen libyaml fftw ffmpeg@2.8 libsamplerate libtag chromaprint tensorflow
`Install Python environment using Homebrew `_ (Note that you are advised to do as described here and there are `good reasons to do so `_. You will most probably encounter installation errors when using Python/NumPy preinstalled with macOS.)::
@@ -189,7 +189,7 @@ Install doxigen and pip3. If you are on Linux::
Install additional dependencies (you might need to run this command with sudo)::
- pip3 install sphinx pyparsing sphinxcontrib-doxylink docutils jupyter sphinx-toolbox
+ pip3 install sphinx pyparsing sphinxcontrib-doxylink docutils jupyter sphinx-toolbox nbformat gitpython
sudo apt-get install pandoc
Make sure to build Essentia with Python 3 bindings and run::
diff --git a/doc/sphinxdoc/models.rst b/doc/sphinxdoc/models.rst
index 4d4780477..2ac080db2 100644
--- a/doc/sphinxdoc/models.rst
+++ b/doc/sphinxdoc/models.rst
@@ -1548,7 +1548,7 @@ Tonal/atonal
Music classification by tonality (2 classes)::
- tonal, atonal
+ atonal, tonal
Models:
diff --git a/doc/sphinxdoc/research_papers.md b/doc/sphinxdoc/research_papers.md
index e6f127bff..bc1bd2ef4 100644
--- a/doc/sphinxdoc/research_papers.md
+++ b/doc/sphinxdoc/research_papers.md
@@ -69,7 +69,7 @@ Indexing music by mood: design and integration of an automatic content-based ann
- K. R. Fricke, D.M. Greenberg, P.J. Rentfrow, and P.Y. Herzberg. Computer-based music feature analysis mirrors human perception and can be used to measure individual music preference. Journal of Research in Personality, 75:94-102, 2018.
-## Music version / cover song identification
+## Music version identification
- Yesiler, F., Miron, M., Serrà, J., & Gómez, E. (2022, February). Assessing algorithmic biases for musical version identification. In Proceedings of the Fifteenth ACM International Conference on Web Search and Data Mining (pp. 1284-1290).
- Yesiler, F., Molina, E., Serrà, J., & Gómez, E. (2021, June). Investigating the efficacy of music version retrieval systems for setlist identification. In ICASSP 2021-2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP) (pp. 541-545). IEEE.
- C. J. Tralie. Early MFCC And HPCP Fusion for Robust Cover Song Identification. arXiv preprint arXiv:1707.04680, 2017.
@@ -130,6 +130,7 @@ Indexing music by mood: design and integration of an automatic content-based ann
## Sound indexing, music production, and intelligent audio processing
+- A. Wang, Y. F. Cheng, and D. Lindlbauer. (2024). MARingBA: Music-adaptive ringtones for blended audio notification delivery. In Proceedings of the CHI Conference on Human Factors in Computing Systems (CHI '24), article 729, 1–15.
- Ma, A. B., & Lerch, A. (2022). Representation Learning for the Automatic Indexing of Sound Effects Libraries. arXiv preprint arXiv:2208.09096.
- Rashid, U., Saleem, K., & Ahmed, A. (2021). MIRRE approach: nonlinear and multimodal exploration of MIR aggregated search results. Multimedia Tools and Applications, 80, 20217-20253.
- Shier, J., McNally, K., Tzanetakis, G., & Brooks, K. G. (2021). Manifold learning methods for visualization and browsing of drum machine samples. Journal of the Audio Engineering Society, 69(1/2), 40-53.
@@ -163,6 +164,7 @@ Indexing music by mood: design and integration of an automatic content-based ann
- D. Moffat and J. D. Reiss. Objective evaluations of synthesised environmental sounds. In International Conference on Digital Audio Effects (DAFx-18), 2018.
### Singing voice analysis
+- Bruder, C., Poeppel, D., & Larrouy-Maestri, P. (2024). Perceptual (but not acoustic) features predict singing voice preferences. Scientific reports, 14(1), 8977.
- Faghih, B., Chakraborty, S., Yaseen, A., & Timoney, J. (2022). A new method for detecting onset and offset for singing in real-time and offline environments. Applied Sciences, 12(15), 7391.
### Audio analysis tools for assisting music education
@@ -180,6 +182,7 @@ Indexing music by mood: design and integration of an automatic content-based ann
- Alonso-Jiménez, P., Joglar-Ongay, L., Serra, X., & Bogdanov, D. (2019). Automatic detection of audio problems for quality control in digital music distribution. In Audio Engineering Society Convention 146. 146th Convention of the Audio Engineering Society; 2019 Mar 20-23; Dublin, Ireland. New York: AES; 2019.. Audio Engineering Society.
### Generative music, live coding, audio synthesis, style transfer
+- Plitsis, M., Kouzelis, T., Paraskevopoulos, G., Katsouros, V., & Panagakis, Y. (2024). Investigating personalization methods in text to music generation. In IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP) (pp. 1081-1085). IEEE.
- Singh, N. (2021, April). The Sound Sketchpad: Expressively Combining Large and Diverse Audio Collections. In 26th International Conference on Intelligent User Interfaces (pp. 297-301).
- Cífka, O., Ozerov, A., Şimşekli, U., & Richard, G. (2021, June). Self-supervised vq-vae for one-shot music style transfer. In ICASSP 2021-2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP) (pp. 96-100). IEEE.
- Lee, K. J. (2021). Computer evaluation of musical timbre transfer on drum tracks (Master thesis).
@@ -238,6 +241,7 @@ arXiv preprint arXiv:1911.04952, 2019.
- A. Haron. A step towards automatic identification of influene: Lick detection in a musical passage. In 15th International Society for Music Information Retrieval Conference (ISMIR'14) Late-Breaking/Demo Session.
### Melodic analysis
+- Kuriakose, J., Suresh, V., Dutta, S., Murthy, H. A., & Murthy, M. V. N. (2022). On the concept of Raga parentage in Carnatic music. Journal of New Music Research, 51(4-5), 321-345.
- Rengaswamy, P., Reddy, M. K., Rao, K. S., & Dasgupta, P. (2020). Robust f0 extraction from monophonic signals using adaptive sub-band filtering. Speech Communication, 116, 77-85.
- Viraraghavan, V. S., Pal, A., Aravind, R., & Murthy, H. A. (2020). Data-driven measurement of precision of components of pitch curves in Carnatic music. The Journal of the Acoustical Society of America, 147(5), 3657-3666.
- Y. P. Chen, L. Su, and Y. H. Yang. Electric Guitar Playing Technique Detection in Real-World Recording Based on F0 Sequence Pattern Recognition. In 16th International Society for Music Information Retrieval Conference (ISMIR'15), pages 708-714, 2015.
diff --git a/src/algorithms/spectral/hpcp.h b/src/algorithms/spectral/hpcp.h
index 310537a59..d862a5fc9 100644
--- a/src/algorithms/spectral/hpcp.h
+++ b/src/algorithms/spectral/hpcp.h
@@ -48,7 +48,7 @@ class HPCP : public Algorithm {
}
void declareParameters() {
- declareParameter("size", "the size of the output HPCP (must be a positive nonzero multiple of 12)", "[12,inf)", 12);
+ declareParameter("size", "the size of the output HPCP (defines bin resolution, must be a positive nonzero multiple of 12)", "[12,inf)", 12);
declareParameter("referenceFrequency", "the reference frequency for semitone index calculation, corresponding to A3 [Hz]", "(0,inf)", 440.0);
declareParameter("harmonics", "number of harmonics for frequency contribution, 0 indicates exclusive fundamental frequency contribution", "[0,inf)", 0); // 8 for chord estimation
declareParameter("bandPreset", "enables whether to use a band preset", "{true,false}", true);
diff --git a/src/algorithms/standard/framebuffer.cpp b/src/algorithms/standard/framebuffer.cpp
new file mode 100644
index 000000000..ceb1df760
--- /dev/null
+++ b/src/algorithms/standard/framebuffer.cpp
@@ -0,0 +1,89 @@
+/*
+ * Copyright (C) 2006-2021 Music Technology Group - Universitat Pompeu Fabra
+ *
+ * This file is part of Essentia
+ *
+ * Essentia is free software: you can redistribute it and/or modify it under
+ * the terms of the GNU Affero General Public License as published by the Free
+ * Software Foundation (FSF), either version 3 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the Affero GNU General Public License
+ * version 3 along with this program. If not, see http://www.gnu.org/licenses/
+ */
+
+#include "framebuffer.h"
+//#include
+#include
+
+using namespace std;
+using namespace essentia;
+using namespace standard;
+
+
+const char* FrameBuffer::name = "FrameBuffer";
+const char* FrameBuffer::category = "Standard";
+const char* FrameBuffer::description = DOC(
+"This algorithm buffers input non-overlapping audio frames into longer overlapping frames with a hop sizes equal to input frame size.\n\n"
+"In standard mode, each compute() call updates and outputs the gathered buffer.\n\n"
+"Input frames can be of variate length. Input frames longer than the buffer size will be cropped. Empty input frames will raise an exception."
+);
+
+
+void FrameBuffer::configure() {
+ _bufferSize = parameter("bufferSize").toInt();
+ _zeroPadding = parameter("zeroPadding").toBool();
+ _buffer.resize(_bufferSize);
+ reset();
+}
+
+void FrameBuffer::reset() {
+ if (_zeroPadding) {
+ std::fill(_buffer.begin(), _buffer.end(), (Real) 0.);
+ _bufferUndefined = 0;
+ }
+ else {
+ _bufferUndefined = _bufferSize;
+ }
+}
+
+void FrameBuffer::compute() {
+ const vector& frame = _frame.get();
+ vector& bufferedFrame = _bufferedFrame.get();
+
+ if (frame.empty()) throw EssentiaException("FrameBuffer: the input frame is empty");
+
+ int shift = (int) frame.size();
+
+ if (shift >= _bufferSize) {
+ // Overwrite the entire buffer.
+ std::copy(frame.end() - _bufferSize, frame.end(), _buffer.begin());
+ _bufferUndefined = 0;
+ // TODO E_WARNING for the case of shift > _bufferSize (not all input values fit the buffer)
+ }
+ else {
+ std::copy(_buffer.begin() + shift, _buffer.end(), _buffer.begin());
+ std::copy(frame.begin(), frame.end(), _buffer.begin() + _bufferSize - shift);
+ if (_bufferUndefined) {
+ _bufferUndefined -= shift;
+ if (_bufferUndefined < 0) {
+ _bufferUndefined = 0;
+ }
+ }
+ }
+
+ // output
+ if (!_bufferUndefined) {
+ bufferedFrame.resize(_bufferSize);
+ std::copy(_buffer.begin(), _buffer.end(), bufferedFrame.begin());
+ }
+ else {
+ // Return emtpy frames until a full buffer is available.
+ bufferedFrame.clear();
+ }
+}
diff --git a/src/algorithms/standard/framebuffer.h b/src/algorithms/standard/framebuffer.h
new file mode 100644
index 000000000..f8f93efa8
--- /dev/null
+++ b/src/algorithms/standard/framebuffer.h
@@ -0,0 +1,86 @@
+/*
+ * Copyright (C) 2006-2021 Music Technology Group - Universitat Pompeu Fabra
+ *
+ * This file is part of Essentia
+ *
+ * Essentia is free software: you can redistribute it and/or modify it under
+ * the terms of the GNU Affero General Public License as published by the Free
+ * Software Foundation (FSF), either version 3 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the Affero GNU General Public License
+ * version 3 along with this program. If not, see http://www.gnu.org/licenses/
+ */
+
+#ifndef ESSENTIA_FRAMEBUFFER_H
+#define ESSENTIA_FRAMEBUFFER_H
+
+#include "algorithm.h"
+
+namespace essentia {
+namespace standard {
+
+class FrameBuffer : public Algorithm {
+
+ private:
+ Input > _frame;
+ Output > _bufferedFrame;
+
+ std::vector _buffer;
+ int _bufferSize;
+ bool _zeroPadding;
+ int _bufferUndefined; // Number of undefined values in the buffer (= buffer size for the empty buffer on reset).
+
+ public:
+ FrameBuffer() {
+ declareInput(_frame, "frame", "the input audio frame");
+ declareOutput(_bufferedFrame, "frame", "the buffered audio frame");
+ }
+
+ void declareParameters() {
+ declareParameter("bufferSize", "the buffer size", "(0,inf)", 2048);
+ declareParameter("zeroPadding", "initialize the buffer with zeros (output zero-padded buffer frames if `true`, otherwise output empty frames until a full buffer is accumulated)", "{true,false}", true);
+ }
+ void compute();
+ void configure();
+ void reset();
+
+ static const char* name;
+ static const char* category;
+ static const char* description;
+
+};
+
+} // namespace standard
+} // namespace essentia
+
+
+#include "streamingalgorithmwrapper.h"
+
+namespace essentia {
+namespace streaming {
+
+class FrameBuffer : public StreamingAlgorithmWrapper {
+
+ protected:
+
+ Sink > _frame;
+ Source > _bufferedFrame;
+
+ public:
+ FrameBuffer() {
+ declareAlgorithm("FrameBuffer");
+ declareInput(_frame, TOKEN,"frame");
+ declareOutput(_bufferedFrame, TOKEN, "frame");
+ }
+};
+
+} // namespace streaming
+} // namespace essentia
+
+#endif // ESSENTIA_FRAMEBUFFER_H
diff --git a/src/algorithms/tonal/audio2pitch.cpp b/src/algorithms/tonal/audio2pitch.cpp
new file mode 100644
index 000000000..b67fe9196
--- /dev/null
+++ b/src/algorithms/tonal/audio2pitch.cpp
@@ -0,0 +1,113 @@
+#include "audio2pitch.h"
+#include "essentiamath.h"
+
+using namespace essentia;
+using namespace standard;
+
+const char* Audio2Pitch::name = "Audio2Pitch";
+const char* Audio2Pitch::category = "Pitch";
+const char* Audio2Pitch::description = DOC("This algorithm computes pitch with various pitch algorithms, specifically targeted for real-time pitch detection on audio signals. The algorithm internally uses pitch estimation with PitchYin (pitchyin) and PitchYinFFT (pitchyinfft).");
+
+bool Audio2Pitch::isAboveThresholds(Real pitchConfidence, Real loudness) {
+ return (pitchConfidence >= _pitchConfidenceThreshold) && (loudness >= _loudnessThresholdGain);
+}
+
+void Audio2Pitch::configure() {
+
+ _sampleRate = parameter("sampleRate").toReal();
+ _frameSize = parameter("frameSize").toInt();
+ _minFrequency = parameter("minFrequency").toReal();
+ _maxFrequency = parameter("maxFrequency").toReal();
+ _pitchAlgorithmName = parameter("pitchAlgorithm").toString();
+ _tolerance = parameter("tolerance").toReal();
+ _pitchConfidenceThreshold = parameter("pitchConfidenceThreshold").toReal();
+ _loudnessThreshold = parameter("loudnessThreshold").toReal();
+ _loudnessThresholdGain = db2amp(_loudnessThreshold);
+
+ if (_maxFrequency > _sampleRate * 0.5) {
+ throw EssentiaException("Audio2Pitch: Max frequency cannot be higher than Nyquist frequency");
+ }
+ if (_maxFrequency <= _minFrequency) {
+ throw EssentiaException("Audio2Pitch: Max frequency cannot be lower or equal than the minimum frequency");
+ }
+
+ if (_pitchAlgorithmName != "pitchyinfft" && _pitchAlgorithmName != "pitchyin") {
+ throw EssentiaException("Audio2Pitch: Bad 'pitchAlgorithm' =", _pitchAlgorithmName);
+ }
+
+ if (_pitchAlgorithmName == "pitchyinfft") {
+ _windowing = AlgorithmFactory::create("Windowing");
+ _spectrum = AlgorithmFactory::create("Spectrum");
+ _pitchAlgorithm = AlgorithmFactory::create("PitchYinFFT");
+
+ _windowing->configure("type", "hann",
+ "size", _frameSize);
+ _spectrum->configure("size", _frameSize);
+ }
+ else {
+ _pitchAlgorithm = AlgorithmFactory::create("PitchYin");
+ }
+
+ _loudnessAlgorithm = AlgorithmFactory::create("RMS");
+
+ // switch between pyin and pyin_fft to propagate the weighting parameter
+ if (_pitchAlgorithmName == "pitchyin") {
+ _pitchAlgorithm->configure(INHERIT("frameSize"),
+ INHERIT("maxFrequency"),
+ INHERIT("minFrequency"),
+ INHERIT("sampleRate"),
+ INHERIT("tolerance"));
+ }
+ else {
+ _pitchAlgorithm->configure(INHERIT("frameSize"),
+ INHERIT("maxFrequency"),
+ INHERIT("minFrequency"),
+ INHERIT("sampleRate"),
+ INHERIT("weighting"),
+ INHERIT("tolerance"));
+ }
+}
+
+void Audio2Pitch::compute() {
+ const std::vector& frame = _frame.get();
+ Real& pitch = _pitch.get();
+ Real& pitchConfidence = _pitchConfidence.get();
+ Real& loudness = _loudness.get();
+ int& voiced = _voiced.get();
+
+ if (frame.empty()) {
+ throw EssentiaException("Audio2Pitch: cannot compute the pitch of an empty frame");
+ }
+
+ if (frame.size() == 1) {
+ throw EssentiaException("Audio2Pitch: cannot compute the pitch of a frame of size 1");
+ }
+
+ _loudnessAlgorithm->input("array").set(frame);
+ _loudnessAlgorithm->output("rms").set(loudness);
+ _loudnessAlgorithm->compute();
+
+ std::vector windowedFrame, spectrum;
+ if (_pitchAlgorithmName == "pitchyinfft") {
+ _windowing->input("frame").set(frame);
+ _windowing->output("frame").set(windowedFrame);
+ _windowing->compute();
+ _spectrum->input("frame").set(windowedFrame);
+ _spectrum->output("spectrum").set(spectrum);
+ _spectrum->compute();
+ _pitchAlgorithm->input("spectrum").set(spectrum);
+ }
+ else if (_pitchAlgorithmName == "pitchyin") {
+ _pitchAlgorithm->input("signal").set(frame);
+ }
+
+ _pitchAlgorithm->output("pitch").set(pitch);
+ _pitchAlgorithm->output("pitchConfidence").set(pitchConfidence);
+ _pitchAlgorithm->compute();
+
+ // define voiced by thresholding
+ voiced = 0; // initially assumes an unvoiced frame
+ if (isAboveThresholds(pitchConfidence, loudness)) {
+ voiced = 1;
+ }
+}
diff --git a/src/algorithms/tonal/audio2pitch.h b/src/algorithms/tonal/audio2pitch.h
new file mode 100644
index 000000000..550fb0a4f
--- /dev/null
+++ b/src/algorithms/tonal/audio2pitch.h
@@ -0,0 +1,75 @@
+#ifndef ESSENTIA_AUDIO2PITCH_H
+#define ESSENTIA_AUDIO2PITCH_H
+
+#include "algorithmfactory.h"
+
+namespace essentia {
+namespace standard {
+
+class Audio2Pitch : public Algorithm {
+
+ protected:
+ Input> _frame;
+ Output _pitch;
+ Output _pitchConfidence;
+ Output _loudness;
+ Output _voiced;
+
+ Algorithm* _pitchAlgorithm;
+ Algorithm* _loudnessAlgorithm;
+ // auxiliary algorithms for FFT-based pitch
+ Algorithm* _windowing;
+ Algorithm* _spectrum;
+
+ Real _sampleRate;
+ int _frameSize;
+ Real _minFrequency;
+ Real _maxFrequency;
+ std::string _pitchAlgorithmName;
+ Real _tolerance;
+ Real _pitchConfidenceThreshold;
+ Real _loudnessThreshold;
+ Real _loudnessThresholdGain;
+
+ bool isAboveThresholds(Real pitchConfidence, Real loudness);
+
+ public:
+ Audio2Pitch() {
+ declareInput(_frame, "frame", "the input frame to analyse");
+ declareOutput(_pitch, "pitch", "detected pitch in Hz");
+ declareOutput(_pitchConfidence, "pitchConfidence", "confidence of detected pitch (from 0.0 to 1.0)");
+ declareOutput(_loudness, "loudness", "detected loudness in decibels");
+ declareOutput(_voiced, "voiced", "voiced frame categorization, 1 for voiced and 0 for unvoiced frame");
+ }
+
+ ~Audio2Pitch() {
+ if (_pitchAlgorithm) delete _pitchAlgorithm;
+ if (_loudnessAlgorithm) delete _loudnessAlgorithm;
+ if (_windowing) delete _windowing;
+ if (_spectrum) delete _spectrum;
+ }
+
+ void declareParameters() {
+ declareParameter("sampleRate", "sample rate of incoming audio frames", "[8000,inf)", 44100);
+ declareParameter("frameSize", "size of input frame in samples", "[1,inf)", 1024);
+ declareParameter("minFrequency", "minimum frequency to detect in Hz", "[10,20000]", 60.0);
+ declareParameter("maxFrequency", "maximum frequency to detect in Hz", "[10,20000]", 2300.0);
+ declareParameter("pitchAlgorithm", "pitch algorithm to use", "{pitchyin,pitchyinfft}", "pitchyinfft");
+ declareParameter("weighting", "string to assign a weighting function", "{custom,A,B,C,D,Z}", "custom");
+ declareParameter("tolerance", "sets tolerance for peak detection on pitch algorithm", "[0,1]", 1.0);
+ declareParameter("pitchConfidenceThreshold", "level of pitch confidence above/below which note ON/OFF start to be considered", "[0,1]", 0.25);
+ declareParameter("loudnessThreshold", "loudness level above/below which note ON/OFF start to be considered, in decibels", "[-inf,0]", -51.0);
+ }
+
+ void configure();
+ void compute();
+
+ static const char* name;
+ static const char* category;
+ static const char* description;
+};
+
+} // namespace standard
+} // namespace essentia
+
+#endif
diff --git a/src/algorithms/tonal/multipitchmelodia.cpp b/src/algorithms/tonal/multipitchmelodia.cpp
index 484a33596..d21050664 100644
--- a/src/algorithms/tonal/multipitchmelodia.cpp
+++ b/src/algorithms/tonal/multipitchmelodia.cpp
@@ -27,9 +27,9 @@ namespace standard {
const char* MultiPitchMelodia::name = "MultiPitchMelodia";
const char* MultiPitchMelodia::category = "Pitch";
-const char* MultiPitchMelodia::description = DOC("This algorithm estimates multiple fundamental frequency contours from an audio signal. It is a multi pitch version of the MELODIA algorithm described in [1]. While the algorithm is originally designed to extract melody in polyphonic music, this implementation is adapted for multiple sources. The approach is based on the creation and characterization of pitch contours, time continuous sequences of pitch candidates grouped using auditory streaming cues. To this end, PitchSalienceFunction, PitchSalienceFunctionPeaks, PitchContours, and PitchContoursMonoMelody algorithms are employed. It is strongly advised to use the default parameter values which are optimized according to [1] (where further details are provided) except for minFrequency, maxFrequency, and voicingTolerance, which will depend on your application.\n"
+const char* MultiPitchMelodia::description = DOC("This algorithm estimates multiple fundamental frequency contours from an audio signal. It is a multi pitch version of the MELODIA algorithm described in [1]. While the algorithm is originally designed to extract melody in polyphonic music, this implementation is adapted for multiple sources. The approach is based on the creation and characterization of pitch contours, time continuous sequences of pitch candidates grouped using auditory streaming cues. To this end, PitchSalienceFunction, PitchSalienceFunctionPeaks, PitchContours, and PitchContoursMultiMelody algorithms are employed. It is strongly advised to use the default parameter values which are optimized according to [1] (where further details are provided) except for minFrequency, maxFrequency, and voicingTolerance, which will depend on your application.\n"
"\n"
-"The output is a vector of estimated melody pitch values and a vector of confidence values.\n"
+"The output is a vector of vectors of estimated pitch values for each frame.\n"
"\n"
"References:\n"
" [1] J. Salamon and E. Gómez, \"Melody extraction from polyphonic music\n"
diff --git a/src/algorithms/tonal/pitch2midi.cpp b/src/algorithms/tonal/pitch2midi.cpp
new file mode 100644
index 000000000..d40baeb9d
--- /dev/null
+++ b/src/algorithms/tonal/pitch2midi.cpp
@@ -0,0 +1,266 @@
+#include "pitch2midi.h"
+#include "essentiamath.h"
+
+using namespace std;
+using namespace essentia;
+using namespace standard;
+
+const char* Pitch2Midi::name = "Pitch2Midi";
+const char* Pitch2Midi::category = "Pitch";
+const char *Pitch2Midi::description = DOC("This algorithm estimates the midi note ON/OFF detection from raw pitch and voiced values, using midi buffer and uncertainty checkers.");
+
+void Pitch2Midi::configure()
+{
+ _sampleRate = parameter("sampleRate").toReal();
+ _hopSize = parameter("hopSize").toInt();
+ _minFrequency = parameter("minFrequency").toReal();
+ _minOcurrenceRate = parameter("minOcurrenceRate").toReal();
+ _bufferDuration = parameter("midiBufferDuration").toReal();
+ _minOnsetCheckPeriod = parameter("minOnsetCheckPeriod").toReal();
+ _minOffsetCheckPeriod = parameter("minOffsetCheckPeriod").toReal();
+ _minNoteChangePeriod = parameter("minNoteChangePeriod").toReal();
+ _applyCompensation = parameter("applyTimeCompensation").toBool();
+ // former Pitch2Midi only parameters
+ _tuningFreq = parameter("tuningFrequency").toReal();
+ _transposition = parameter("transpositionAmount").toInt();
+
+ _frameTime = _hopSize / _sampleRate;
+ _minOnsetCheckThreshold = _minOnsetCheckPeriod / _frameTime;
+ _minOffsetCheckThreshold = _minOffsetCheckPeriod / _frameTime;
+ _minNoteChangeThreshold = _minNoteChangePeriod / _frameTime;
+
+ _unvoicedFrameCounter = 0;
+ _offsetCheckCounter = 0;
+ _onsetCheckCounter = 0;
+
+ _minOcurrenceRatePeriod = _minOcurrenceRate * _bufferDuration;
+ _minOcurrenceRateThreshold = _minOcurrenceRatePeriod / _frameTime;
+
+ // estimate buffer capacity
+ int c = static_cast( round( _sampleRate / float(_hopSize) * _bufferDuration ) );
+ _capacity = max(_minCapacity, c);
+ _framebuffer = AlgorithmFactory::create("FrameBuffer");
+ _framebuffer->configure("bufferSize", _capacity);
+
+}
+
+// this should NOT be called until framebuffer.compute has been called
+bool Pitch2Midi::hasCoherence()
+{
+ Real sum = accumulate(_buffer.begin(), _buffer.end(), 0.0);
+ if (sum / _capacity == _buffer[0]) {
+ return true;
+ }
+ return false;
+}
+
+// this should NOT be called until framebuffer.compute has been called and _capacity has been set on the configure
+void Pitch2Midi::getMaxVoted()
+{
+ // estimates the max voted MIDI note in the midi note buffer
+ map counts;
+ for (Real value : _buffer) {
+ counts[value]++;
+ }
+
+ Real maxCount = 0;
+ Real maxValue = 0;
+
+ for (auto& pair : counts) {
+ if (pair.second > maxCount) {
+ maxCount = pair.second;
+ maxValue = pair.first;
+ }
+ }
+
+ _maxVoted[0] = maxValue;
+ _maxVoted[1] = maxCount / _capacity;
+}
+
+void Pitch2Midi::setOutputs(Real midiNoteNumberValue, float onsetTimeCompensation, float offsetTimeCompensation) {
+ vector& messageType = _messageType.get();
+ vector& midiNoteNumber = _midiNoteNumber.get();
+ vector& timeCompensation = _timeCompensation.get();
+
+ // reuse bins
+ messageType.resize(0);
+ midiNoteNumber.resize(0);
+ timeCompensation.resize(0);
+
+ // TODO: this is not clear because it might remove an note_off message which is defined by dnote.
+ //#! it would be better just to provide some code for midiNoteNumbre when this happens
+ if (midiNoteNumberValue <= 0 && midiNoteNumberValue >= 127) {
+ //E_INFO("SCAPE");
+ return;
+ }
+
+ // let's define first the message type
+ if (_noteOff) {
+ messageType.push_back("note_off");
+ }
+
+ if (_noteOn) {
+ messageType.push_back("note_on");
+ }
+
+ if (!_applyCompensation) {
+ onsetTimeCompensation = 0.f;
+ offsetTimeCompensation = 0.f;
+ }
+
+ midiNoteNumber.push_back(dnote);
+ midiNoteNumber.push_back(midiNoteNumberValue);
+ timeCompensation.push_back(offsetTimeCompensation);
+ timeCompensation.push_back(onsetTimeCompensation);
+}
+
+void Pitch2Midi::push(Real midiNoteNumber) {
+ // push new MIDI note number in the MIDI buffer
+ _midiNoteNumberVector[0] = midiNoteNumber;
+ _framebuffer->input("frame").set(_midiNoteNumberVector);
+ _framebuffer->output("frame").set(_buffer);
+ _framebuffer->compute();
+}
+
+void Pitch2Midi::compute()
+{
+ // former MidiPool inputs are now Pitch2Midi internal vars
+ // all we need is to run the conversions:
+ const Real& pitch = _pitch.get();
+ const int& voiced = _voiced.get();
+
+ // do sanity checks
+ if (pitch < 0) {
+ throw EssentiaException("Pitch2Midi: specified duration of the input signal must be non-negative");
+ }
+
+ _detectedPitch = pitch;
+ if (pitch < 0) { _detectedPitch = 1e-05; }
+ _midiNoteNumberTransposed = hz2midi(_detectedPitch, _tuningFreq) + _transposition;
+
+ // refresh note_on and note_off timestamps
+ _noteOn = false;
+ _noteOff = false;
+
+ // unvoiced frame detection
+ if (!voiced) {
+ if ( _NOTED_ON ) {
+ _unvoicedFrameCounter++;
+ if (_unvoicedFrameCounter > _minNoteChangeThreshold) {
+ _NOTED_ON = false;
+ _noteOff = true;
+ updateDnote();
+ setOutputs(dnote, 0.0, _minNoteChangePeriod);
+ //E_INFO("offset(unvoiced frame)");
+ _unvoicedFrameCounter = 0;
+ _offsetCheckCounter = 0;
+ _onsetCheckCounter = 0;
+ }
+ } else {
+ _unvoicedFrameCounter = 0;
+ push(0); // push 0th MIDI note to remove the past
+ _offsetCheckCounter = 0;
+ _onsetCheckCounter = 0;
+ }
+ return;
+ }
+
+ _unvoicedFrameCounter = 0;
+
+ // push new MIDI note number in the MIDI buffer
+ push(_midiNoteNumberTransposed);
+
+ // update max_voting
+ getMaxVoted();
+
+ // analyze pitch buffer
+ if (hasCoherence() && _NOTED_ON) {
+ if (note == _maxVoted[0]) {
+ _offsetCheckCounter = 0;
+ _onsetCheckCounter = 0;
+ }
+ else {
+ // IMPORTANT: this hardly happens so if hasCoherence() current MIDI note is equals to max voted.
+ _offsetCheckCounter++;
+ if (_offsetCheckCounter > _minOffsetCheckThreshold) {
+ _NOTED_ON = true;
+ if (note != _buffer[0]){ // avoid note slicing effect
+ updateDnote();
+ note = _buffer[0];
+ _noteOff = true;
+ _noteOn = true;
+ }
+ _offsetCheckCounter = 0;
+ _onsetCheckCounter = 0;
+ //E_WARNING("off-onset(" << _buffer[0] << ", coherent & NOTED)");
+ }
+ }
+ // in coherence output the _midiNoteNumberTransposed coincides with _buffer[0] value
+ setOutputs(_midiNoteNumberTransposed, _minOffsetCheckPeriod, _minOffsetCheckPeriod);
+ return;
+ }
+
+ if (hasCoherence() && !_NOTED_ON) {
+
+ _onsetCheckCounter++;
+
+ if (_onsetCheckCounter > _minOnsetCheckThreshold){
+ note = _buffer[0];
+ _noteOn = true;
+ _NOTED_ON = true;
+ //E_INFO("onset(" << _buffer[0] << ", coherent & !NOTED): "<< _onsetCheckCounter <<" - " << _minOnsetCheckThreshold);
+ _onsetCheckCounter = 0;
+ _offsetCheckCounter = 0;
+ }
+ // in coherence output the _midiNoteNumberTransposed coincides with _buffer[0] value
+ setOutputs(_midiNoteNumberTransposed, _minOnsetCheckPeriod, _minOffsetCheckPeriod);
+ return;
+ }
+
+ if (!hasCoherence() && _NOTED_ON) {
+ if (_maxVoted[0] != 0.0) {
+ _onsetCheckCounter++;
+ // combines checker with minOcurrenceRate
+ if ((_onsetCheckCounter > _minOcurrenceRateThreshold)){
+ _NOTED_ON = true;
+ if (note != _maxVoted[0]){ // avoid note slicing effect
+ _noteOff = true;
+ _noteOn = true;
+ updateDnote();
+ note = _maxVoted[0];
+ }
+ //E_INFO("off-onset(" << _maxVoted[0] << ", uncoherent & NOTED): " << _onsetCheckCounter << " - " << _minOcurrenceRateThreshold);
+ _offsetCheckCounter = 0;
+ _onsetCheckCounter = 0;
+ }
+ }
+ // output the max-voted midi note to avoid unestable midi note numbers
+ setOutputs(_maxVoted[0], _minOcurrenceRatePeriod, _minOcurrenceRatePeriod);
+ return;
+ }
+
+ if (!hasCoherence() && !_NOTED_ON) {
+ if (_maxVoted[1] > _minOcurrenceRate) {
+ _onsetCheckCounter++;
+
+ if (_onsetCheckCounter > _minOnsetCheckThreshold) {
+ if (_maxVoted[0] != 0.0) {
+ note = _maxVoted[0];
+ _NOTED_ON = true;
+ _noteOn = true;
+ //E_INFO("onset(" << _maxVoted[0] << ", uncoherent & unNOTED)");
+ _onsetCheckCounter = 0;
+ _offsetCheckCounter = 0;
+ }
+ }
+ }
+ // output the max-voted midi note to avoid unestable midi note numbers
+ setOutputs(_maxVoted[0], _minOnsetCheckPeriod, _minOffsetCheckPeriod);
+ return;
+ }
+ // E_INFO("Compute() -END");
+}
+
+void Pitch2Midi::updateDnote() {
+ dnote = note;
+}
diff --git a/src/algorithms/tonal/pitch2midi.h b/src/algorithms/tonal/pitch2midi.h
new file mode 100644
index 000000000..aa3b4a45b
--- /dev/null
+++ b/src/algorithms/tonal/pitch2midi.h
@@ -0,0 +1,118 @@
+#ifndef ESSENTIA_PITCH2MIDI_H
+#define ESSENTIA_PITCH2MIDI_H
+
+#include "algorithmfactory.h"
+
+namespace essentia {
+namespace standard {
+
+ class Pitch2Midi : public Algorithm {
+ protected:
+ // Inputs
+ Input _pitch;
+ Input _voiced;
+
+ // Outputs
+ Output > _messageType;
+ Output > _midiNoteNumber;
+ Output > _timeCompensation;
+
+ bool _noteOn;
+ bool _noteOff;
+
+ Algorithm* _framebuffer;
+
+ // parameters
+ Real _sampleRate;
+ int _hopSize;
+ Real _minFrequency;
+ Real _minOcurrenceRate;
+ Real _minOnsetCheckPeriod;
+ Real _minOffsetCheckPeriod;
+ Real _minNoteChangePeriod;
+ Real _bufferDuration;
+ bool _applyCompensation;
+ // former Pitch2Midi params
+ Real _tuningFreq;
+ int _transposition;
+
+ // other
+ int _capacity;
+ int _minCapacity = 3;
+ bool _NOTED_ON = false;
+ std::vector _maxVoted;
+ bool _COHERENCE;
+ Real note = 0.0;
+ Real dnote = 0.0;
+ Real _detectedPitch;
+
+ // Containers
+ std::vector _midiNoteNumberVector; // always size 1, but frameBuffer algo expects vectors as input
+ std::vector _buffer;
+
+ int capacity();
+ bool hasCoherence();
+ void getMaxVoted();
+
+ void updateDnote();
+ void setOutputs(Real midiNoteNumber, float onsetTimeCompensation, float offsetTimeCompensation);
+
+ Real _minOnsetCheckThreshold;
+ Real _minOffsetCheckThreshold;
+ Real _minNoteChangeThreshold;
+
+ int _unvoicedFrameCounter;
+ int _offsetCheckCounter;
+ int _onsetCheckCounter;
+
+ Real _frameTime;
+ Real _minOcurrenceRateThreshold;
+ Real _minOcurrenceRatePeriod;
+
+ // former Pitch2Midi outputs, now interal vars
+ Real _midiNoteNumberTransposed;
+
+ public:
+ Pitch2Midi() : _maxVoted(2), _midiNoteNumberVector(1) {
+ declareInput(_pitch, "pitch", "pitch given in Hz for conversion");
+ declareInput(_voiced, "voiced", "whether the frame is voiced or not, (0, 1)");
+ declareOutput(_messageType, "messageType", "the output of MIDI message type, as string, {noteoff, noteon, noteoff-noteon}");
+ declareOutput(_midiNoteNumber, "midiNoteNumber", "the output of detected MIDI note number, as integer, in range [0,127]");
+ declareOutput(_timeCompensation, "timeCompensation", "time to be compensated in the messages");
+ }
+
+ ~Pitch2Midi() {
+ delete _framebuffer;
+ };
+
+ void declareParameters() {
+ declareParameter("sampleRate", "Audio sample rate", "[8000,inf)", 44100);
+ declareParameter("hopSize", "Pitch Detection analysis hop size in samples, equivalent to I/O buffer size", "[1,inf)", 128);
+ declareParameter("minFrequency", "minimum detectable frequency", "[20,20000]", 60.0);
+ declareParameter("minOcurrenceRate", "minimum number of times a midi note has to ocur compared to total capacity", "[0,1]", 0.5);
+ declareParameter("midiBufferDuration", "duration in seconds of buffer used for voting in the note toggle detection algorithm", "[0.005,0.5]", 0.015); // 15ms
+ declareParameter("minNoteChangePeriod", "minimum time to wait until a note change is detected (s)", "(0,1]", 0.030);
+ declareParameter("minOnsetCheckPeriod", "minimum time to wait until an onset is detected (s)", "(0,1]", 0.075);
+ declareParameter("minOffsetCheckPeriod", "minimum time to wait until an offset is detected (s)", "(0,1]", 0.2);
+ declareParameter("applyTimeCompensation", "whether to apply time compensation in the timestamp of the note toggle messages.", "{true,false}", true);
+ // former Pitch2Midi params
+ declareParameter("tuningFrequency", "reference tuning frequency in Hz", "{432,440}", 440);
+ declareParameter("transpositionAmount", "Apply transposition (in semitones) to the detected MIDI notes.", "(-69,50)", 0);
+ }
+
+ void configure();
+ void compute();
+ void inline getMidiNoteNumber(Real pitch);
+
+ void push(Real midiNoteNumber);
+
+ static const char* name;
+ static const char* category;
+ static const char* description;
+ };
+
+
+} // namespace standard
+} // namespace essentia
+
+#endif
diff --git a/src/algorithms/tonal/pitchcontoursmultimelody.cpp b/src/algorithms/tonal/pitchcontoursmultimelody.cpp
index c0e7777c5..87bd1ceb2 100644
--- a/src/algorithms/tonal/pitchcontoursmultimelody.cpp
+++ b/src/algorithms/tonal/pitchcontoursmultimelody.cpp
@@ -27,7 +27,7 @@ using namespace standard;
const char* PitchContoursMultiMelody::name = "PitchContoursMultiMelody";
const char* PitchContoursMultiMelody::category = "Pitch";
const char* PitchContoursMultiMelody::description = DOC("This algorithm post-processes a set of pitch contours into a sequence of mutliple f0 values in Hz.\n"
-"This algorithm is intended to receive its \"contoursBins\", \"contoursSaliences\", and \"contoursStartTimes\" inputs from the PitchContours algorithm. The \"duration\" input corresponds to the time duration of the input signal. The output is a vector of estimated pitch values\n"
+"This algorithm is intended to receive its \"contoursBins\", \"contoursSaliences\", and \"contoursStartTimes\" inputs from the PitchContours algorithm. The \"duration\" input corresponds to the time duration of the input signal. The output is a vector of vectors of estimated pitch values for each frame.\n"
"\n"
"When input vectors differ in size, or \"numberFrames\" is negative, an exception is thrown. Input vectors must not contain negative start indices nor negative bin and salience values otherwise an exception is thrown.\n"
"\n"
diff --git a/src/algorithms/tonal/pitchyinfft.cpp b/src/algorithms/tonal/pitchyinfft.cpp
index 9be05b8ed..2076b50e5 100644
--- a/src/algorithms/tonal/pitchyinfft.cpp
+++ b/src/algorithms/tonal/pitchyinfft.cpp
@@ -37,7 +37,7 @@ static Real _weightMask[] = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
static const Real _weights[] = {-75.8, -70.1, -60.8, -52.1, -44.2, -37.5,
-31.3, -25.6, -20.9, -16.5, -12.6, -9.6, -7.0, -4.7, -3.0, -1.8, -0.8,
-0.2, -0.0, 0.5, 1.6, 3.2, 5.4, 7.8, 8.1, 5.3, -2.4, -11.1, -12.8,
- -12.2, -7.4, -17.8, -17.8, -17.8}; // by default the original one is selected
+ -12.2, -7.4, -17.8, -17.8, -17.8}; // by default use custom weights designed specifically for the PitchYinFFT algorithm
static const Real _aWeighting[] = {-148.6, -50.4, -44.8, -39.5, -34.5, -30.3,
-26.2, -22.4, -19.1, -16.2, -13.2, -10.8, -8.7, -6.6, -4.8, -3.2, -1.9,
@@ -91,7 +91,7 @@ void PitchYinFFT::configure() {
// configure algorithms
_fft->configure("size", _frameSize);
- if (_weighting != "default" && _weighting != "A" && _weighting != "B" && _weighting != "C" && _weighting != "D" && _weighting != "Z") {
+ if (_weighting != "custom" && _weighting != "A" && _weighting != "B" && _weighting != "C" && _weighting != "D" && _weighting != "Z") {
E_INFO("PitchYinFFT: 'weighting' = "<<_weighting<<"\n");
throw EssentiaException("PitchYinFFT: Bad 'weighting' parameter");
}
@@ -118,7 +118,7 @@ void PitchYinFFT::spectralWeights(std::string weighting) {
int i = 0, j = 1;
Real freq = 0, a0 = 0, a1 = 0, f0 = 0, f1 = 0;
int _maskSize = 34;
- if (weighting == "default") {
+ if (weighting == "custom") {
for (int n=0; n<_maskSize; n++)
_weightMask[n] = _weights[n];
}
diff --git a/src/algorithms/tonal/pitchyinfft.h b/src/algorithms/tonal/pitchyinfft.h
index 237a65431..015c1b040 100644
--- a/src/algorithms/tonal/pitchyinfft.h
+++ b/src/algorithms/tonal/pitchyinfft.h
@@ -84,7 +84,7 @@ class PitchYinFFT : public Algorithm {
declareParameter("maxFrequency", "the maximum allowed frequency [Hz]", "(0,inf)", 22050.0);
declareParameter("interpolate", "boolean flag to enable interpolation", "{true,false}", true);
declareParameter("tolerance", "tolerance for peak detection", "[0,1]", 1.0);
- declareParameter("weighting", "string to assign a weighting function", "{default,A,B,C,D,Z}", "default");
+ declareParameter("weighting", "string to assign a weighting function", "{custom,A,B,C,D,Z}", "custom");
}
void configure();
diff --git a/src/algorithms/tonal/pitchyinprobabilities.cpp b/src/algorithms/tonal/pitchyinprobabilities.cpp
index 479af15ce..d97ce805c 100644
--- a/src/algorithms/tonal/pitchyinprobabilities.cpp
+++ b/src/algorithms/tonal/pitchyinprobabilities.cpp
@@ -321,7 +321,7 @@ void PitchYinProbabilities::compute() {
bool isLowAmplitude = (RMS < _lowAmp);
for (size_t iCandidate = 0; iCandidate < _freq.size(); ++iCandidate) {
- Real pitchCents = hz2cents(_freq[iCandidate]);
+ Real pitchCents = hz2midi(_freq[iCandidate], 440.0);
_freq[iCandidate] = pitchCents;
if (isLowAmplitude) {
// lower the probabilities of the frequencies by calculating the weighted sum
diff --git a/src/essentia/essentiamath.h b/src/essentia/essentiamath.h
index 2cb47cf03..9e3606859 100644
--- a/src/essentia/essentiamath.h
+++ b/src/essentia/essentiamath.h
@@ -39,6 +39,7 @@
#include "utils/tnt/tnt2essentiautils.h"
#define M_2PI (2 * M_PI)
+#define ALL_NOTES "A", "A#", "B", "C", "C#", "D", "D#", "E", "F", "F#", "G", "G#"
namespace essentia {
@@ -738,8 +739,90 @@ inline Real hz2hz(Real hz){
return hz;
}
-inline Real hz2cents(Real hz) {
- return 12 * std::log(hz/440)/std::log(2.) + 69;
+inline Real cents2hz(Real cents, Real referenceFrequency) {
+ return referenceFrequency * powf(2.0, cents / 1200.0);
+}
+
+inline Real hz2cents(Real hz, Real referenceFrequency) {
+ return 1200 * log2(hz / referenceFrequency);
+}
+
+inline int hz2midi(Real hz, Real tuningFrequency) {
+ return 69 + (int) round(log2(hz / tuningFrequency) * 12);
+}
+
+inline Real midi2hz(int midiNoteNumber, Real tuningFrequency) {
+ return tuningFrequency * powf(2, (midiNoteNumber - 69) / 12.0);
+}
+
+inline std::string note2root(std::string note) {
+ return note.substr(0, note.size()-1);
+}
+
+inline int note2octave(std::string note) {
+ char octaveChar = note.back();
+ return octaveChar - '0';
+}
+
+inline std::string midi2note(int midiNoteNumber) {
+ std::string NOTES[] = {ALL_NOTES};
+ int nNotes = *(&NOTES + 1) - NOTES;
+ int CIdx = 3;
+ int diffCIdx = nNotes - CIdx;
+ int noteIdx = midiNoteNumber - 69;
+ int idx = abs(noteIdx) % nNotes;
+ int octave = (CIdx + 1) + floor(float(noteIdx + diffCIdx) / nNotes);
+ if (noteIdx < 0) {
+ idx = abs(idx - nNotes) % nNotes;
+ }
+ std::string closest_note = NOTES[idx] + std::to_string(octave);
+ return closest_note;
+}
+
+inline int note2midi(std::string note) {
+ //const std::vector ALL_NOTES { "A", "A#", "B", "C", "C#", "D", "D#", "E", "F", "F#", "G", "G#" };
+ std::string NOTES[] = {ALL_NOTES};
+ int octave = note2octave(note);
+ std::string root = note2root(note);
+ int nNotes = *(&NOTES + 1) - NOTES;
+ //int nNotes = NOTES.size();
+ int CIdx = 3;
+
+ int noteIdx = floor((octave - (CIdx + 1)) * nNotes);
+ int idx = 0;
+ for (int i = 0; i < nNotes; i++) {
+ if (NOTES[i] == root) {
+ idx = i;
+ if (idx >= CIdx) {
+ idx = idx - nNotes;
+ }
+ break;
+ }
+ }
+ int midiNote = noteIdx + 69 + idx;
+ return midiNote;
+}
+
+inline std::string hz2note(Real hz, Real tuningFrequency) {
+ int midiNoteNumber = hz2midi(hz, tuningFrequency);
+ return midi2note(midiNoteNumber);
+}
+
+inline int note2hz(std::string note, Real tuningFrequency) {
+ int midiNoteNumber = note2midi(note);
+ return midi2hz(midiNoteNumber, tuningFrequency);
+}
+
+inline int db2velocity (Real decibels, Real hearingThreshold) {
+ int velocity = 0;
+ if (decibels > hearingThreshold) {
+ velocity = (int)((hearingThreshold - decibels) * 127 / hearingThreshold); // decibels should be negative
+ }
+ return velocity;
+}
+
+inline Real velocity2db(int velocity, Real hearingThreshold) {
+ return -(hearingThreshold * velocity / 127 -hearingThreshold);
}
inline int argmin(const std::vector& input) {
diff --git a/src/examples/standard_tempocnn.cpp b/src/examples/standard_tempocnn.cpp
new file mode 100644
index 000000000..0dad10834
--- /dev/null
+++ b/src/examples/standard_tempocnn.cpp
@@ -0,0 +1,100 @@
+/*
+ * Copyright (C) 2006-2021 Music Technology Group - Universitat Pompeu Fabra
+ *
+ * This file is part of Essentia
+ *
+ * Essentia is free software: you can redistribute it and/or modify it under
+ * the terms of the GNU Affero General Public License as published by the Free
+ * Software Foundation (FSF), either version 3 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the Affero GNU General Public License
+ * version 3 along with this program. If not, see http://www.gnu.org/licenses/
+ */
+
+#include
+#include
+#include
+#include
+#include "credit_libav.h"
+using namespace std;
+using namespace essentia;
+using namespace standard;
+
+int main(int argc, char* argv[]) {
+
+ if (argc != 4) {
+ cout << "Error: incorrect number of arguments." << endl;
+ cout << "Usage: " << argv[0] << " audio_input output_file graph_file" << endl;
+ creditLibAV();
+ exit(1);
+ }
+
+ string audioFilename = argv[1];
+ string outputFilename = argv[2];
+
+ // define graphFilePath
+ string graphFilePath = argv[3];
+
+ // register the algorithms in the factory(ies)
+ essentia::init();
+
+ Pool pool;
+
+ /////// PARAMS //////////////
+ Real sampleRate = 11025.0;
+ int resampleQuality = 4;
+
+ AlgorithmFactory& factory = AlgorithmFactory::instance();
+
+ Algorithm* audioLoader = factory.create("MonoLoader",
+ "filename", audioFilename,
+ "sampleRate", sampleRate,
+ "resampleQuality", resampleQuality);
+
+ Algorithm* tempoCNN = factory.create("TempoCNN",
+ "graphFilename", graphFilePath);
+
+ // inputs and outputs
+ vector audio;
+ Real globalTempo;
+ vector localTempo;
+ vector localTempoProbabilities;
+
+ // process
+ audioLoader->output("audio").set(audio);
+ audioLoader->compute();
+
+ tempoCNN->input("audio").set(audio);
+ tempoCNN->output("globalTempo").set(globalTempo);
+ tempoCNN->output("localTempo").set(localTempo);
+ tempoCNN->output("localTempoProbabilities").set(localTempoProbabilities);
+ tempoCNN->compute();
+
+ pool.add("tempoCNN.global_tempo", globalTempo);
+ pool.add("tempoCNN.localTempo", localTempo);
+ pool.add("tempoCNN.localTempoProbabilities", localTempoProbabilities);
+
+ // output results
+ cout << "------------- writing results to file " << outputFilename << " -------------" << endl;
+
+ Algorithm* json = factory.create("YamlOutput",
+ "filename", outputFilename,
+ "format", "json");
+ json->input("pool").set(pool);
+ json->compute();
+
+ // cleanup
+ delete audioLoader;
+ delete tempoCNN;
+ delete json;
+
+ essentia::shutdown();
+
+ return 0;
+}
diff --git a/src/examples/wscript b/src/examples/wscript
index 67c158bd9..ce0b521a5 100644
--- a/src/examples/wscript
+++ b/src/examples/wscript
@@ -43,6 +43,7 @@ example_sources_fileio = [
('standard_loudnessebur128_double_input', ),
('standard_saturationdetector', ),
('standard_snr', ),
+ ('standard_tempocnn',),
('standard_welch', ),
('streaming_humdetector', ),
diff --git a/src/python/essentia/utils.py b/src/python/essentia/utils.py
index ff8678186..d9e01fff8 100644
--- a/src/python/essentia/utils.py
+++ b/src/python/essentia/utils.py
@@ -63,6 +63,50 @@ def mel2hz(arg):
def hz2mel(arg):
return _essentia.hz2mel( _c.convertData(arg, _c.Edt.REAL) )
+def midi2hz(arg1, arg2=440.0):
+ return _essentia.midi2hz( _c.convertData(arg1, _c.Edt.INTEGER),
+ _c.convertData(arg2, _c.Edt.REAL) )
+
+def hz2midi(arg1, arg2=440.0):
+ return _essentia.hz2midi( _c.convertData(arg1, _c.Edt.REAL),
+ _c.convertData(arg2, _c.Edt.REAL) )
+
+def cents2hz(arg1, arg2):
+ return _essentia.cents2hz(_c.convertData(arg1, _c.Edt.REAL),
+ _c.convertData(arg2, _c.Edt.REAL) )
+
+def hz2cents(arg1, arg2):
+ return _essentia.hz2cents(_c.convertData(arg1, _c.Edt.REAL),
+ _c.convertData(arg2, _c.Edt.REAL) )
+
+def midi2note(arg):
+ return _essentia.midi2note( _c.convertData(arg, _c.Edt.INTEGER) )
+
+def note2midi(arg):
+ return _essentia.note2midi( _c.convertData(arg, _c.Edt.STRING) )
+
+def note2root(arg):
+ return _essentia.note2root( _c.convertData(arg, _c.Edt.STRING) )
+
+def note2octave(arg):
+ return _essentia.note2octave( _c.convertData(arg, _c.Edt.STRING) )
+
+def hz2note(arg1, arg2=440.0):
+ return _essentia.hz2note( _c.convertData(arg1, _c.Edt.REAL),
+ _c.convertData(arg2, _c.Edt.REAL) )
+
+def note2hz(arg1, arg2=440.0):
+ return _essentia.note2hz( _c.convertData(arg1, _c.Edt.STRING),
+ _c.convertData(arg2, _c.Edt.REAL) )
+
+def velocity2db(arg1, arg2=-96):
+ return _essentia.velocity2db( _c.convertData(arg1, _c.Edt.INTEGER),
+ _c.convertData(arg2, _c.Edt.REAL) )
+
+def db2velocity(arg1, arg2=-96):
+ return _essentia.db2velocity( _c.convertData(arg1, _c.Edt.REAL),
+ _c.convertData(arg2, _c.Edt.REAL) )
+
def equivalentKey(arg):
return _essentia.equivalentKey( _c.convertData(arg, _c.Edt.STRING) )
@@ -86,6 +130,12 @@ def derivative(array):
'amp2db', 'db2amp',
'bark2hz', 'hz2bark',
'mel2hz', 'hz2mel',
+ 'midi2hz', 'hz2midi',
+ 'cents2hz', 'hz2cents',
+ 'note2root', 'note2octave',
+ 'midi2note', 'note2midi',
+ 'hz2note', 'note2hz',
+ 'velocity2db', 'db2velocity',
'postProcessTicks',
'normalize', 'derivative',
'equivalentKey', 'lin2log']
diff --git a/src/python/globalfuncs.cpp b/src/python/globalfuncs.cpp
index edfa0e9ae..413abe32a 100644
--- a/src/python/globalfuncs.cpp
+++ b/src/python/globalfuncs.cpp
@@ -389,6 +389,167 @@ hzToMel(PyObject* notUsed, PyObject* arg) {
return PyFloat_FromDouble( double(mel) );
}
+static PyObject*
+midiToHz(PyObject* notUsed, PyObject* args) {
+ // parse args to get Source alg, name and source alg and source name
+ vector argsV = unpack(args);
+ if (argsV.size() != 2 ||
+ (!PyLong_Check(argsV[0]) || !PyFloat_Check(argsV[1]))) {
+ PyErr_SetString(PyExc_ValueError, "expecting arguments (int midiNoteNumber, Real tuningFrequency)");
+ return NULL;
+ }
+ Real hz = midi2hz( long( PyLong_AsLong(argsV[0]) ), Real( PyFloat_AS_DOUBLE(argsV[1])) );
+ return PyFloat_FromDouble( double(hz) );
+}
+
+static PyObject*
+hzToMidi(PyObject* notUsed, PyObject* args) {
+ // parse args to get Source alg, name and source alg and source name
+ vector argsV = unpack(args);
+ if (argsV.size() != 2 ||
+ (!PyFloat_Check(argsV[0]) || !PyFloat_Check(argsV[1]))) {
+ PyErr_SetString(PyExc_ValueError, "expecting arguments (Real hertz, Real tuningFrequency)");
+ return NULL;
+ }
+
+ int midi = hz2midi( Real( PyFloat_AS_DOUBLE(argsV[0]) ), Real( PyFloat_AS_DOUBLE(argsV[1])) );
+ return PyLong_FromLong( int(midi) );
+}
+
+static PyObject*
+hzToCents(PyObject* notUsed, PyObject* args) {
+ // parse args to get Source alg, name and source alg and source name
+ vector argsV = unpack(args);
+
+ if (argsV.size() != 2 || !PyFloat_Check(argsV[0]) || !PyFloat_Check(argsV[1])) {
+ PyErr_SetString(PyExc_TypeError, (char*)"expecting arguments (Real hertz, Real referenceFrequency)");
+ return NULL;
+ }
+
+ int cents = hz2cents( Real( PyFloat_AS_DOUBLE(argsV[0]) ), Real( PyFloat_AS_DOUBLE(argsV[1]) ) );
+ return PyFloat_FromDouble( int(cents) );
+}
+
+static PyObject*
+centsToHz(PyObject* notUsed, PyObject* args) {
+ // parse args to get Source alg, name and source alg and source name
+ vector argsV = unpack(args);
+
+ if (argsV.size() != 2 || !PyFloat_Check(argsV[0]) || !PyFloat_Check(argsV[1])) {
+ PyErr_SetString(PyExc_TypeError, (char*)"expecting arguments (Real cents, Real referenceFrequency)");
+ return NULL;
+ }
+
+ Real hz = cents2hz( Real( PyFloat_AS_DOUBLE(argsV[0]) ), Real( PyFloat_AS_DOUBLE(argsV[1]) ) );
+ return PyFloat_FromDouble( hz );
+}
+
+static PyObject*
+midiToNote(PyObject* notUsed, PyObject* arg) {
+
+ if (!PyLong_Check(arg)) {
+ PyErr_SetString(PyExc_TypeError, (char*)"expecting arguments (int midiNoteNumber)");
+ return NULL;
+ }
+
+ std::string note = midi2note( long( PyLong_AsLong(arg) ) );
+ const char *c_note = note.c_str();
+ return PyString_FromString( c_note );
+}
+
+static PyObject*
+noteToMidi(PyObject* notUsed, PyObject* arg) {
+
+ if (!PyString_Check(arg)) {
+ PyErr_SetString(PyExc_TypeError, (char*)"expecting arguments (string note)");
+ return NULL;
+ }
+
+ int octave = note2midi( PyString_AS_STRING(arg) );
+ return PyLong_FromLong( int(octave) );
+}
+
+static PyObject*
+noteToRoot(PyObject* notUsed, PyObject* arg) {
+
+ if (!PyString_Check(arg)) {
+ PyErr_SetString(PyExc_TypeError, (char*)"expecting arguments (string note)");
+ return NULL;
+ }
+
+ std::string root = note2root( PyString_AS_STRING(arg) );
+ const char *c_root = root.c_str();
+ return PyString_FromString( c_root );
+}
+
+static PyObject*
+noteToOctave(PyObject* notUsed, PyObject* arg) {
+
+ if (!PyString_Check(arg)) {
+ PyErr_SetString(PyExc_TypeError, (char*)"expecting arguments (string note)");
+ return NULL;
+ }
+
+ int octave = note2octave( PyString_AS_STRING(arg) );
+ return PyLong_FromLong( int(octave) );
+}
+
+static PyObject*
+hzToNote(PyObject* notUsed, PyObject* args) {
+ // parse args to get Source alg, name and source alg and source name
+ vector argsV = unpack(args);
+ if (argsV.size() != 2 ||
+ (!PyFloat_Check(argsV[0]) || !PyFloat_Check(argsV[1]))) {
+ PyErr_SetString(PyExc_ValueError, "expecting arguments (Real hertz, Real tuningFrequency)");
+ return NULL;
+ }
+
+ std::string note = hz2note( Real( PyFloat_AS_DOUBLE(argsV[0]) ), Real( PyFloat_AS_DOUBLE(argsV[1]) ) );
+ const char *c_note = note.c_str();
+ return PyString_FromString( c_note );
+}
+
+static PyObject*
+noteToHz(PyObject* notUsed, PyObject* args) {
+ // parse args to get Source alg, name and source alg and source name
+ vector argsV = unpack(args);
+ if (argsV.size() != 2 ||
+ (!PyString_Check(argsV[0]) || !PyFloat_Check(argsV[1]))) {
+ PyErr_SetString(PyExc_ValueError, "expecting arguments (string note, Real tuningFrequency)");
+ return NULL;
+ }
+
+ Real hz = note2hz( PyString_AS_STRING(argsV[0]), Real( PyFloat_AS_DOUBLE(argsV[1]) ) );
+ return PyFloat_FromDouble( hz );
+}
+
+static PyObject*
+velocityToDb(PyObject* notUsed, PyObject* args) {
+ // parse args to get Source alg, name and source alg and source name
+ vector argsV = unpack(args);
+ if (argsV.size() != 2 ||
+ (!PyLong_Check(argsV[0]) || !PyFloat_Check(argsV[1]))) {
+ PyErr_SetString(PyExc_ValueError, "expecting arguments (int velocity, Real hearingThreshold)");
+ return NULL;
+ }
+
+ Real db = velocity2db( long( PyLong_AsLong(argsV[0]) ), Real( PyFloat_AS_DOUBLE(argsV[1]) ) );
+ return PyFloat_FromDouble( db );
+}
+
+static PyObject*
+dbToVelocity(PyObject* notUsed, PyObject* args) {
+ // parse args to get Source alg, name and source alg and source name
+ vector argsV = unpack(args);
+ if (argsV.size() != 2 ||
+ (!PyFloat_Check(argsV[0]) || !PyFloat_Check(argsV[1]))) {
+ PyErr_SetString(PyExc_ValueError, "expecting arguments (Real decibels, Real hearingThreshold)");
+ return NULL;
+ }
+
+ long velocity = db2velocity( Real( PyFloat_AS_DOUBLE(argsV[0])), Real( PyFloat_AS_DOUBLE(argsV[1])) );
+ return PyLong_FromLong( int(velocity) );
+}
static PyObject*
getEquivalentKey(PyObject* notUsed, PyObject* arg) {
@@ -1001,6 +1162,18 @@ static PyMethodDef Essentia__Methods[] = {
{ "hz2bark", hzToBark, METH_O, "Converts a frequency in Hz to a bark band" },
{ "mel2hz", melToHz, METH_O, "Converts a mel band to frequency in Hz" },
{ "hz2mel", hzToMel, METH_O, "Converts a frequency in Hz to a mel band" },
+ { "midi2hz", midiToHz, METH_VARARGS, "Converts a midi note number to frequency in Hz" },
+ { "hz2midi", hzToMidi, METH_VARARGS, "Converts a frequency in Hz to a midi note number" },
+ { "hz2cents", hzToCents, METH_VARARGS, "Returns the cents distance between a frequency and a reference frequency in Hz" },
+ { "cents2hz", centsToHz, METH_VARARGS, "Returns the frequency from a cents distance [0-1200] and a reference frequency in Hz" },
+ { "midi2note", midiToNote, METH_O, "Converts a midi note number to note applying the international pitch standard (A4=440Hz)" },
+ { "note2midi", noteToMidi, METH_O, "Converts note (applying the international pitch standard A4=440Hz) to midi note number" },
+ { "note2root", noteToRoot, METH_O, "Returns the root of a note" },
+ { "note2octave", noteToOctave, METH_O, "Returns the octave of a note" },
+ { "hz2note", hzToNote, METH_VARARGS, "Converts a frequency in Hz to a note - applying the international pitch standard A4=440Hz" },
+ { "note2hz", noteToHz, METH_VARARGS, "Converts a note - applying the international pitch standard A4=440Hz - into a frequency in Hz" },
+ { "velocity2db", velocityToDb, METH_VARARGS, "Converts a velocity to a measure in dB" },
+ { "db2velocity", dbToVelocity, METH_VARARGS, "Converts a dB measure of power to velocity [0-127]" },
{ "lin2db", linToDb, METH_O, "Converts a linear measure of power to a measure in dB" },
{ "db2lin", dbToLin, METH_O, "Converts a dB measure of power to a linear measure" },
{ "db2pow", dbToPow, METH_O, "Converts a dB measure of power to a linear measure" },
diff --git a/test/src/unittests/all_tests.py b/test/src/unittests/all_tests.py
index deb354837..4717176b1 100755
--- a/test/src/unittests/all_tests.py
+++ b/test/src/unittests/all_tests.py
@@ -17,7 +17,7 @@
# You should have received a copy of the Affero GNU General Public License
# version 3 along with this program. If not, see http://www.gnu.org/licenses/
-from __future__ import absolute_import # For Python 2 compatibility
+from __future__ import absolute_import # For Python 2 compatibility
from os.path import join, sep
import os
@@ -28,88 +28,125 @@
import essentia.streaming
try:
- from importlib import reload # Python3
+ from importlib import reload # Python3
except:
pass
# we don't want to get too chatty when running all the tests
essentia.log.info = False
-#essentia.log.debug += essentia.EAll
-#essentia.log.debug -= essentia.EConnectors
+# essentia.log.debug += essentia.EAll
+# essentia.log.debug -= essentia.EConnectors
tests_dir = os.path.dirname(__file__)
if tests_dir:
# Add sys path to make python recognize tests/src/unittests as a module
parent_dir = os.path.abspath(os.path.dirname(tests_dir))
sys.path.insert(0, parent_dir)
-
+
# Chdir into the tests dir so that the paths work out right
os.chdir(tests_dir)
# import the test from the subdirectories which filename match the pattern 'test_*.py'
-listAllTests = [ filename.split(sep+'test_') for filename in glob.glob(join('*', 'test_*.py')) ]
+listAllTests = [
+ filename.split(sep + "test_") for filename in glob.glob(join("*", "test_*.py"))
+]
for testfile in listAllTests:
testfile[1] = testfile[1][:-3]
-
-def importTest(fullname, strategy = 'import'):
- '''Imports or reloads test given its fullname.'''
+def importTest(fullname, strategy="import"):
+ """Imports or reloads test given its fullname."""
folder, name = fullname
- if strategy == 'import':
- cmd = 'import unittests.%s.test_%s; setattr(sys.modules[__name__], \'%s\', unittests.%s.test_%s.suite)' % (folder, name, name, folder, name)
- elif strategy == 'reload':
- cmd1 = 'reload(sys.modules[\'unittests.%s.test_%s\']); ' % (folder, name)
- cmd2 = 'setattr(sys.modules[__name__], \'%s\', sys.modules[\'unittests.%s.test_%s\'].suite)' % (name, folder, name)
+ if strategy == "import":
+ cmd = (
+ "import unittests.%s.test_%s; setattr(sys.modules[__name__], '%s', unittests.%s.test_%s.suite)"
+ % (folder, name, name, folder, name)
+ )
+ elif strategy == "reload":
+ cmd1 = "reload(sys.modules['unittests.%s.test_%s']); " % (folder, name)
+ cmd2 = (
+ "setattr(sys.modules[__name__], '%s', sys.modules['unittests.%s.test_%s'].suite)"
+ % (name, folder, name)
+ )
cmd = cmd1 + cmd2
else:
- raise ValueError('When importing a test, the only strategies allowed are \'import\' and \'reload\'')
+ raise ValueError(
+ "When importing a test, the only strategies allowed are 'import' and 'reload'"
+ )
exec(cmd)
-def getTests(names=None, exclude=None, strategy='import'):
- allNames = [ name for _, name in listAllTests ]
+def getTests(names=None, exclude=None, strategy="import"):
+ allNames = [name for _, name in listAllTests]
names = names or allNames
- tests = [ (folder, name) for folder, name in listAllTests
- if name in names and name not in exclude ]
+ tests = [
+ (folder, name)
+ for folder, name in listAllTests
+ if name in names and name not in exclude
+ ]
for name in names:
if name not in allNames:
- print('WARNING: did not find test %s' % name)
- for name in (exclude or []):
+ print("WARNING: did not find test %s" % name)
+ for name in exclude or []:
if name not in allNames:
- print('WARNING: did not find test to exclude %s' % name)
+ print("WARNING: did not find test to exclude %s" % name)
- print('Running tests:')
+ print("Running tests:")
print(sorted(name for _, name in tests))
if not tests:
- raise RuntimeError('No test to execute!')
+ raise RuntimeError("No test to execute!")
for test in tests:
importTest(test, strategy)
- testObjectsList = [ getattr(sys.modules[__name__], testName) for folder, testName in tests ]
+ testObjectsList = [
+ getattr(sys.modules[__name__], testName) for folder, testName in tests
+ ]
return unittest.TestSuite(testObjectsList)
-
def traceCompute(algo, *args, **kwargs):
- print('computing algo %s' % algo.name())
+ print("computing algo %s" % algo.name())
return algo.normalCompute(*args, **kwargs)
def computeResetCompute(algo, *args, **kwargs):
# do skip certain algos, otherwise we'd enter in an infinite loop!!!
- audioLoaders = [ 'MonoLoader', 'EqloudLoader', 'EasyLoader', 'AudioLoader' ]
- filters = [ 'IIR', 'DCRemoval', 'HighPass', 'LowPass', 'BandPass', 'AllPass',
- 'BandReject', 'EqualLoudness', 'MovingAverage' ]
- special = [ 'FrameCutter', 'OverlapAdd', 'TempoScaleBands', 'TempoTap', 'TempoTapTicks',
- 'Panning','OnsetDetection', 'MonoWriter', 'Flux', 'StartStopSilence',
- 'LogSpectrum', 'ClickDetector', 'SNR', 'SaturationDetector', 'Welch' ]
+ audioLoaders = ["MonoLoader", "EqloudLoader", "EasyLoader", "AudioLoader"]
+ filters = [
+ "IIR",
+ "DCRemoval",
+ "HighPass",
+ "LowPass",
+ "BandPass",
+ "AllPass",
+ "BandReject",
+ "EqualLoudness",
+ "MovingAverage",
+ ]
+ special = [
+ "FrameCutter",
+ "OverlapAdd",
+ "TempoScaleBands",
+ "TempoTap",
+ "TempoTapTicks",
+ "Panning",
+ "OnsetDetection",
+ "MonoWriter",
+ "Flux",
+ "StartStopSilence",
+ "LogSpectrum",
+ "ClickDetector",
+ "SNR",
+ "SaturationDetector",
+ "Welch",
+ "FrameBuffer",
+ ]
if algo.name() in audioLoaders + filters + special:
return algo.normalCompute(*args, **kwargs)
@@ -129,6 +166,7 @@ def algodecorator(algo):
return algodecorator
+
# recursive helper function that finds outputs connected to pools and calls func
def mapPools(algo, func):
# make a copy first, because func might modify the connections in the for
@@ -146,8 +184,7 @@ def mapPools(algo, func):
elif isinstance(input, essentia.streaming._StreamConnector):
mapPools(input.input_algo, func)
- #else ignore nowhere connections
-
+ # else ignore nowhere connections
# For this to work for networks that are connected to a pool, we need to conduct
@@ -160,28 +197,34 @@ def runResetRun(gen, *args, **kwargs):
# little trick. In particular, we have a test for multiplexer that runs
# multiple generators...
def isValid(algo):
- if isinstance(algo, essentia.streaming.VectorInput) and not list(algo.connections.values())[0]:
+ if (
+ isinstance(algo, essentia.streaming.VectorInput)
+ and not list(algo.connections.values())[0]
+ ):
# non-connected VectorInput, we don't want to get too fancy here...
return False
- if algo.name() == 'Multiplexer':
+ if algo.name() == "Multiplexer":
return False
for output, inputs in algo.connections.items():
for inp in inputs:
- if isinstance(inp, essentia.streaming._StreamConnector) and not isValid(inp.input_algo):
+ if isinstance(inp, essentia.streaming._StreamConnector) and not isValid(
+ inp.input_algo
+ ):
return False
return True
if not isValid(gen):
- print('Network is not capable of doing the run/reset/run trick, doing it the normal way...')
+ print(
+ "Network is not capable of doing the run/reset/run trick, doing it the normal way..."
+ )
essentia.run(gen)
return
-
# 1. Find all the outputs in the network that are connected to pools--aka
# pool feeders and for each pool feeder, disconnect the given pool,
# store it, and connect a dummy pool in its place
def useDummy(algo, output, input):
- if not hasattr(output, 'originalPools'):
+ if not hasattr(output, "originalPools"):
output.originalPools = []
output.dummyPools = []
@@ -226,7 +269,6 @@ def useOriginal(algo, output, input):
return essentia.run(gen)
-
def runTests(tests):
result = unittest.TextTestRunner(verbosity=2).run(tests)
@@ -234,25 +276,24 @@ def runTests(tests):
return len(result.errors) + len(result.failures)
-if __name__ == '__main__':
- testList = [ t for t in sys.argv[1:] if t[0] != '-' ]
- testExclude = [ t[1:] for t in sys.argv[1:] if t[0] == '-' ]
+if __name__ == "__main__":
+ testList = [t for t in sys.argv[1:] if t[0] != "-"]
+ testExclude = [t[1:] for t in sys.argv[1:] if t[0] == "-"]
- print('Running tests normally')
- print('-'*70)
+ print("Running tests normally")
+ print("-" * 70)
result1 = runTests(getTests(testList, exclude=testExclude))
- print('\n\nRunning tests with compute/reset/compute')
- print('-'*70)
+ print("\n\nRunning tests with compute/reset/compute")
+ print("-" * 70)
- setattr(sys.modules['essentia.common'], 'algoDecorator', computeDecorator(computeResetCompute))
+ # setattr(sys.modules['essentia.common'], 'algoDecorator', computeDecorator(computeResetCompute))
essentia.standard._reloadAlgorithms()
- essentia.standard._reloadAlgorithms('essentia_test')
+ essentia.standard._reloadAlgorithms("essentia_test")
# modify runGenerator behavior
- setattr(sys.modules['essentia_test'], 'run', runResetRun)
-
+ setattr(sys.modules["essentia_test"], "run", runResetRun)
- result2 = runTests(getTests(testList, exclude=testExclude, strategy='reload'))
+ result2 = runTests(getTests(testList, exclude=testExclude, strategy="reload"))
sys.exit(result1 + result2)
diff --git a/test/src/unittests/base/test_utils.py b/test/src/unittests/base/test_utils.py
index 875133007..25d667455 100644
--- a/test/src/unittests/base/test_utils.py
+++ b/test/src/unittests/base/test_utils.py
@@ -18,48 +18,49 @@
# version 3 along with this program. If not, see http://www.gnu.org/licenses/
-
from essentia_test import *
import math
-import cmath # for asinh
+import cmath # for asinh
import sys
+
class TestUtils(TestCase):
def testIsSilent(self):
- self.assertEqual(True, isSilent([0]*100))
+ self.assertEqual(True, isSilent([0] * 100))
def testInstantPower(self):
sample = list(range(1, 11))
p = 0
for s in sample:
p += s**2
- p /= float( len(sample) )
+ p /= float(len(sample))
self.assertAlmostEqual(p, instantPower(sample))
def testIsPowerOfTwo(self):
self.assertTrue(isPowerTwo(0))
- top =131072
- k=1
+ top = 131072
+ k = 1
while k < top:
self.assertTrue(isPowerTwo(k))
- k*=2
+ k *= 2
while k < top:
- k=2*k+1
+ k = 2 * k + 1
self.assertTrue(not isPowerTwo(k))
def testNextPowerOfTwo(self):
self.assertEqual(nextPowerTwo(0), 0)
self.assertEqual(nextPowerTwo(1), 1)
- top =131072
- k=2
+ top = 131072
+ k = 2
lastPowerTwo = 2
while k < top:
- if not isPowerTwo(k): self.assertEqual(nextPowerTwo(k), 2*lastPowerTwo)
+ if not isPowerTwo(k):
+ self.assertEqual(nextPowerTwo(k), 2 * lastPowerTwo)
else:
self.assertEqual(nextPowerTwo(k), k)
- lastPowerTwo=k
- k +=1
+ lastPowerTwo = k
+ k += 1
def testLinToDb(self):
lin = 12.34
@@ -68,8 +69,8 @@ def testLinToDb(self):
def testDbToLin(self):
db = -45.5
- expected_lin = 10**(db/10.)
- self.assertAlmostEqual(expected_lin, db2lin(db) , 5e-7)
+ expected_lin = 10 ** (db / 10.0)
+ self.assertAlmostEqual(expected_lin, db2lin(db), 5e-7)
def testPowToDb(self):
pow = 12.34
@@ -78,8 +79,8 @@ def testPowToDb(self):
def testDbToPow(self):
db = -45.5
- expected_pow = 10**(db/10.)
- self.assertAlmostEqual(expected_pow, db2pow(db) , 5e-7)
+ expected_pow = 10 ** (db / 10.0)
+ self.assertAlmostEqual(expected_pow, db2pow(db), 5e-7)
def testAmpToDb(self):
amp = 12.34
@@ -88,8 +89,8 @@ def testAmpToDb(self):
def testDbToAmp(self):
db = -45.5
- expected_amp = 10**(0.5*db/10.)
- self.assertAlmostEqual(expected_amp, db2amp(db) , 5e-7)
+ expected_amp = 10 ** (0.5 * db / 10.0)
+ self.assertAlmostEqual(expected_amp, db2amp(db), 5e-7)
def testBarkToHz(self):
bark = 5
@@ -98,21 +99,95 @@ def testBarkToHz(self):
def testHzToBark(self):
hz = 440
- expected_bark = 26.81*hz / (1960 + hz) - 0.53
+ expected_bark = 26.81 * hz / (1960 + hz) - 0.53
self.assertAlmostEqual(expected_bark, hz2bark(hz))
def testMelToHz(self):
mel = 5
- expected_hz = 700.0*(math.exp(mel/1127.01048)-1.0)
+ expected_hz = 700.0 * (math.exp(mel / 1127.01048) - 1.0)
self.assertAlmostEqual(expected_hz, mel2hz(mel))
def testHzToMel(self):
hz = 440
- expected_mel = 1127.01048*math.log(hz/700.0+1.0)
+ expected_mel = 1127.01048 * math.log(hz / 700.0 + 1.0)
self.assertAlmostEqual(expected_mel, hz2mel(hz))
+ def testHzToMidi(self):
+ hz = 440
+ expected_midi = 69
+ self.assertAlmostEqual(expected_midi, hz2midi(hz, hz))
+
+ def testMidiToHz(self):
+ expected_hz = tuning_frequency = 440
+ midi = 69
+ self.assertAlmostEqual(expected_hz, midi2hz(midi, tuning_frequency))
+
+ def testHzToCents(self):
+ tuning = 440
+ midi = 70
+ expected_cents = 100
+ self.assertAlmostEqual(expected_cents, hz2cents(midi2hz(midi, tuning), tuning))
+
+ def testCentsToHz(self):
+ tuning = 440
+ cents = 100
+ expected_hz = 466.16378
+ self.assertAlmostEqual(expected_hz, cents2hz(cents, tuning))
+
+ def testMidiToNote(self):
+ midi = 69
+ expected_note = "A4"
+ self.assertEqual(expected_note, midi2note(midi))
+
+ def testNoteToMidi(self):
+ note = "A4"
+ expected_midi = 69
+ self.assertEqual(expected_midi, note2midi(note))
+ note = "C4"
+ expected_midi = 60
+ self.assertEqual(expected_midi, note2midi(note))
+ note = "C5"
+ expected_midi = 72
+ self.assertEqual(expected_midi, note2midi(note))
+
+ def testNoteToRoot(self):
+ note = "A4"
+ expected_root = note[0]
+ self.assertEqual(expected_root, note2root(note))
+
+ def testNoteToOctave(self):
+ note = "A4"
+ expected_octave = int(note[1])
+ self.assertEqual(expected_octave, note2octave(note))
+
+ def testHzToNote(self):
+ hz = 440
+ expected_note = "A4"
+ self.assertEqual(expected_note, hz2note(hz))
+
+ def testNoteToHz(self):
+ note = "A4"
+ expected_hz = 440
+ self.assertEqual(expected_hz, note2hz(note))
+
+ def testDbToVelocity(self):
+ decibels = 0
+ expected_velocity = 127
+ self.assertEqual(expected_velocity, db2velocity(decibels))
+ decibels = -96
+ expected_velocity = 0
+ self.assertEqual(expected_velocity, db2velocity(decibels))
+
+ def testVelocityToDb(self):
+ velocity = 127
+ expected_decibels = 0
+ self.assertEqual(expected_decibels, velocity2db(velocity))
+ velocity = 0
+ expected_decibels = -96
+ self.assertEqual(expected_decibels, velocity2db(velocity))
+
suite = allTests(TestUtils)
-if __name__ == '__main__':
+if __name__ == "__main__":
TextTestRunner(verbosity=2).run(suite)
diff --git a/test/src/unittests/standard/test_framebuffer.py b/test/src/unittests/standard/test_framebuffer.py
new file mode 100644
index 000000000..94110094a
--- /dev/null
+++ b/test/src/unittests/standard/test_framebuffer.py
@@ -0,0 +1,68 @@
+#!/usr/bin/env python
+
+# Copyright (C) 2006-2021 Music Technology Group - Universitat Pompeu Fabra
+#
+# This file is part of Essentia
+#
+# Essentia is free software: you can redistribute it and/or modify it under
+# the terms of the GNU Affero General Public License as published by the Free
+# Software Foundation (FSF), either version 3 of the License, or (at your
+# option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
+# details.
+#
+# You should have received a copy of the Affero GNU General Public License
+# version 3 along with this program. If not, see http://www.gnu.org/licenses/
+
+from essentia_test import *
+
+
+class TestFrameBuffer(TestCase):
+
+ def testEmpty(self):
+ with self.assertRaises(RuntimeError):
+ FrameBuffer()([])
+
+ def testBufferZeroPadding(self):
+ buffer = FrameBuffer(bufferSize=8, zeroPadding=True)
+ self.assertEqualVector(buffer([1, 2]), [0., 0., 0., 0., 0., 0., 1., 2.])
+ self.assertEqualVector(buffer([3, 4]), [0., 0., 0., 0., 1., 2., 3., 4.])
+ self.assertEqualVector(buffer([5, 6]), [0., 0., 1., 2., 3., 4., 5., 6.])
+ self.assertEqualVector(buffer([7, 8]), [1., 2., 3., 4., 5., 6., 7., 8.])
+ self.assertEqualVector(buffer([9, 10]), [3., 4., 5., 6., 7., 8., 9., 10.])
+
+ def testBufferNoZeroPadding(self):
+ buffer = FrameBuffer(bufferSize=8, zeroPadding=False)
+ self.assertEqualVector(buffer([1, 2]), [])
+ self.assertEqualVector(buffer([3, 4]), [])
+ self.assertEqualVector(buffer([5, 6]), [])
+ self.assertEqualVector(buffer([7, 8]), [1., 2., 3., 4., 5., 6., 7., 8.])
+
+ def testFrameSizeEqualsBufferSize(self):
+ buffer = FrameBuffer(bufferSize=8)
+ self.assertEqualVector(buffer([1, 2, 3, 4, 5, 6, 7, 8]), [1., 2., 3., 4., 5., 6., 7., 8.])
+
+ def testFrameSizeLargerBufferSize(self):
+ buffer = FrameBuffer(bufferSize=8)
+ self.assertEqualVector(buffer([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]), [3., 4., 5., 6., 7., 8., 9., 10.])
+
+ def testResetZeroPadding(self):
+ buffer = FrameBuffer(bufferSize=8, zeroPadding=True)
+ buffer([1, 2, 3, 4, 5, 6]) # Results in [0., 0., 1., 2., 3., 4., 5., 6.]
+ buffer.reset() # Sets the buffer to zero vector.
+ self.assertEqualVector(buffer([1, 2]), [0., 0., 0., 0., 0., 0., 1., 2.])
+
+ def testResetNoZeroPadding(self):
+ buffer = FrameBuffer(bufferSize=8, zeroPadding=False)
+ buffer([1, 2, 3, 4, 5, 6, 7, 8])
+ buffer.reset()
+ self.assertEqualVector(buffer([1, 2]), [])
+
+
+suite = allTests(TestFrameBuffer)
+
+if __name__ == '__main__':
+ TextTestRunner(verbosity=2).run(suite)
diff --git a/test/src/unittests/tonal/pitch2midi/359500__mtg__sax-tenor-e-major.npy b/test/src/unittests/tonal/pitch2midi/359500__mtg__sax-tenor-e-major.npy
new file mode 100644
index 000000000..b7f3e48b9
Binary files /dev/null and b/test/src/unittests/tonal/pitch2midi/359500__mtg__sax-tenor-e-major.npy differ
diff --git a/test/src/unittests/tonal/pitch2midi/359628__mtg__sax-tenor-d-minor.npy b/test/src/unittests/tonal/pitch2midi/359628__mtg__sax-tenor-d-minor.npy
new file mode 100644
index 000000000..6b0842423
Binary files /dev/null and b/test/src/unittests/tonal/pitch2midi/359628__mtg__sax-tenor-d-minor.npy differ
diff --git a/test/src/unittests/tonal/pitch2midi/387517__deleted_user_7267864__saxophone-going-up.npy b/test/src/unittests/tonal/pitch2midi/387517__deleted_user_7267864__saxophone-going-up.npy
new file mode 100644
index 000000000..dfd77eff2
Binary files /dev/null and b/test/src/unittests/tonal/pitch2midi/387517__deleted_user_7267864__saxophone-going-up.npy differ
diff --git a/test/src/unittests/tonal/pitch2midi/test_chromatic_sequence.npy b/test/src/unittests/tonal/pitch2midi/test_chromatic_sequence.npy
new file mode 100644
index 000000000..e0578a02c
Binary files /dev/null and b/test/src/unittests/tonal/pitch2midi/test_chromatic_sequence.npy differ
diff --git a/test/src/unittests/tonal/pitch2midi/test_offset.npy b/test/src/unittests/tonal/pitch2midi/test_offset.npy
new file mode 100644
index 000000000..2b2536975
Binary files /dev/null and b/test/src/unittests/tonal/pitch2midi/test_offset.npy differ
diff --git a/test/src/unittests/tonal/pitch2midi/test_onset.npy b/test/src/unittests/tonal/pitch2midi/test_onset.npy
new file mode 100644
index 000000000..940cf2550
Binary files /dev/null and b/test/src/unittests/tonal/pitch2midi/test_onset.npy differ
diff --git a/test/src/unittests/tonal/pitchyinfft/vignesh_confidance.npy b/test/src/unittests/tonal/pitchyinfft/vignesh_confidence.npy
similarity index 100%
rename from test/src/unittests/tonal/pitchyinfft/vignesh_confidance.npy
rename to test/src/unittests/tonal/pitchyinfft/vignesh_confidence.npy
diff --git a/test/src/unittests/tonal/test_audio2pitch.py b/test/src/unittests/tonal/test_audio2pitch.py
new file mode 100644
index 000000000..c1e6ea9ba
--- /dev/null
+++ b/test/src/unittests/tonal/test_audio2pitch.py
@@ -0,0 +1,218 @@
+#!/usr/bin/env python
+
+# Copyright (C) 2006-2021 Music Technology Group - Universitat Pompeu Fabra
+#
+# This file is part of Essentia
+#
+# Essentia is free software: you can redistribute it and/or modify it under
+# the terms of the GNU Affero General Public License as published by the Free
+# Software Foundation (FSF), either version 3 of the License, or (at your
+# option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
+# details.
+#
+# You should have received a copy of the Affero GNU General Public License
+# version 3 along with this program. If not, see http://www.gnu.org/licenses/
+
+
+from essentia_test import *
+from numpy import sin, pi, mean, random, sqrt
+
+
+class TestAudio2Pitch(TestCase):
+
+ def testEmpty(self):
+ self.assertComputeFails(Audio2Pitch(), [])
+
+ def testZero(self):
+ pitch, confidence, loudness, voiced = Audio2Pitch()(zeros(1024))
+ self.assertEqual(pitch, 0)
+ self.assertEqual(confidence, 0)
+ self.assertEqual(loudness, 0.0)
+ self.assertEqual(voiced, 0)
+
+ def testSine(self):
+ sample_rate = 44100
+ size = sample_rate * 1
+ frequency = 440
+ amplitude_in_db = -3
+ signal = [sin(2.0 * pi * frequency * i / sample_rate) for i in range(size)]
+ self.runTest(
+ signal, sample_rate, amplitude_in_db, frequency, loudness_precision=0.5
+ )
+
+ def testBandLimitedSquare(self):
+ sample_rate = 44100
+ size = sample_rate * 1
+ frequency = 660
+ w = 2.0 * pi * frequency
+ nharms = 10
+ amplitude = 0.5
+ amplitude_in_db = -9
+ signal = zeros(size)
+ for i in range(size):
+ for harm in range(nharms):
+ signal[i] += (
+ amplitude
+ / (2.0 * harm + 1)
+ * sin((2 * harm + 1) * i * w / sample_rate)
+ )
+
+ self.runTest(signal, sample_rate, amplitude_in_db, frequency)
+
+ def testBandLimitedSaw(self):
+ sample_rate = 44100
+ size = sample_rate * 1
+ frequency = 660
+ w = 2.0 * pi * frequency
+ nharms = 10
+ amplitude = 1.0
+ amplitude_in_db = -1.43
+ signal = zeros(size)
+ for i in range(1, size):
+ for harm in range(1, nharms + 1):
+ signal[i] += amplitude / harm * sin(harm * i * w / sample_rate)
+ self.runTest(
+ signal,
+ sample_rate,
+ amplitude_in_db,
+ frequency,
+ pitch_precision=1.1,
+ loudness_precision=0.2,
+ )
+
+ def testBandLimitedSawMasked(self):
+ sample_rate = 44100
+ size = sample_rate * 1
+ freq = 440
+ w = 2.0 * pi * freq
+ subw = 2.0 * pi * (freq - 100)
+ nharms = 10
+ signal = zeros(size)
+ for i in range(1, size):
+ # masking noise:
+ whitenoise = 2 * (random.rand(1) - 0.5)
+ signal[i] += 2 * whitenoise
+ for harm in range(1, nharms):
+ signal[i] += 1.0 / harm * sin(i * harm * w / sample_rate)
+ signal = 5 * LowPass()(signal)
+ for i in range(1, size):
+ for harm in range(1, nharms + 1):
+ signal[i] += 0.1 / harm * sin(i * harm * w / sample_rate)
+ signal[i] += 0.5 * sin(i * subw / sample_rate)
+ max_signal = max(signal) + 1
+ signal = signal / max_signal
+ amplitude_in_db = -9
+ self.runTest(
+ signal,
+ sample_rate,
+ amplitude_in_db,
+ freq,
+ pitch_precision=1.5,
+ conf_precision=0.3,
+ )
+
+ def runTest(
+ self,
+ signal: numpy.ndarray,
+ sample_rate: int,
+ amplitude_in_db: float,
+ frequency: float,
+ pitch_precision: float = 1,
+ conf_precision: float = 0.1,
+ loudness_precision: float = 0.1,
+ ):
+ frameSize = 1024
+ hopsize = frameSize
+
+ frames = FrameGenerator(signal, frameSize=frameSize, hopSize=hopsize)
+ pitchDetect = Audio2Pitch(frameSize=frameSize, sampleRate=sample_rate)
+ n_outputs = len(pitchDetect.outputNames())
+ pitch, confidence, loudness, voiced = ([] for _ in range(n_outputs))
+ for frame in frames:
+ f, conf, l, v = pitchDetect(frame)
+ pitch += [f]
+ confidence += [conf]
+ loudness += [amp2db(l)]
+ voiced += [v]
+ self.assertAlmostEqual(mean(f), frequency, pitch_precision)
+ self.assertAlmostEqual(mean(confidence), 1, conf_precision)
+ self.assertAlmostEqual(mean(loudness), amplitude_in_db, loudness_precision)
+ self.assertAlmostEqual(mean(voiced), 1, conf_precision)
+
+ def testInvalidParam(self):
+ self.assertConfigureFails(Audio2Pitch(), {"frameSize": 1})
+ self.assertConfigureFails(Audio2Pitch(), {"sampleRate": 0})
+ self.assertConfigureFails(
+ Audio2Pitch(), {"sampleRate": 44100, "maxFrequency": 44100}
+ )
+ self.assertConfigureFails(
+ Audio2Pitch(),
+ {"sampleRate": 44100, "maxFrequency": 200, "minFrequency": 250},
+ )
+ self.assertConfigureFails(
+ Audio2Pitch(),
+ {"sampleRate": 44100, "pitchAlgorithm": "yin_fft"},
+ )
+ self.assertConfigureFails(
+ Audio2Pitch(),
+ {"sampleRate": 44100, "loudnessThreshold": 1.0},
+ )
+ self.assertConfigureFails(
+ Audio2Pitch(),
+ {"sampleRate": 44100, "pitchConfidenceThreshold": -0.5},
+ )
+ self.assertConfigureFails(
+ Audio2Pitch(),
+ {"sampleRate": 44100, "pitchConfidenceThreshold": 1.5},
+ )
+
+ def testARealCase(self):
+ # The expected values were recomputed from commit
+ # 2d37c0713fb6cc5f637b3d8f5d65aa90b36d4277
+ #
+ # The expeted values were compared with the vamp pYIN
+ # implementation of the YIN algorithm producing very
+ # similar values.
+ #
+ # https://code.soundsoftware.ac.uk/projects/pyin
+
+ frameSize = 1024
+ sample_rate = 44100
+ hopSize = 512
+ loudness_threshold = -80
+ filename = join(testdata.audio_dir, "recorded", "vignesh.wav")
+ audio = MonoLoader(filename=filename, sampleRate=44100)()
+ frames = FrameGenerator(audio, frameSize=frameSize, hopSize=hopSize)
+ pitchDetect = Audio2Pitch(
+ frameSize=frameSize,
+ sampleRate=sample_rate,
+ pitchConfidenceThreshold=0.15,
+ loudnessThreshold=loudness_threshold,
+ )
+
+ n_outputs = len(pitchDetect.outputNames())
+ pitch, confidence, loudness, voiced = ([] for _ in range(n_outputs))
+ for frame in frames:
+ f, conf, l, v = pitchDetect(frame)
+ pitch += [f]
+ confidence += [conf]
+ loudness += [l]
+ voiced += [v]
+ expected_pitch = numpy.load(join(filedir(), "pitchyinfft/vignesh_pitch.npy"))
+ expected_conf = numpy.load(
+ join(filedir(), "pitchyinfft/vignesh_confidence.npy")
+ )
+ expected_voiced = [1] * len(expected_pitch)
+ self.assertAlmostEqualVector(pitch, expected_pitch, 1e-6)
+ self.assertAlmostEqualVector(confidence, expected_conf, 5e-5)
+ self.assertAlmostEqualVector(voiced, expected_voiced)
+
+
+suite = allTests(TestAudio2Pitch)
+
+if __name__ == "__main__":
+ TextTestRunner(verbosity=2).run(suite)
diff --git a/test/src/unittests/tonal/test_pitch2midi.py b/test/src/unittests/tonal/test_pitch2midi.py
new file mode 100644
index 000000000..c84598060
--- /dev/null
+++ b/test/src/unittests/tonal/test_pitch2midi.py
@@ -0,0 +1,438 @@
+#!/usr/bin/env python
+
+# Copyright (C) 2006-2024 Music Technology Group - Universitat Pompeu Fabra
+#
+# This file is part of Essentia
+#
+# Essentia is free software: you can redistribute it and/or modify it under
+# the terms of the GNU Affero General Public License as published by the Free
+# Software Foundation (FSF), either version 3 of the License, or (at your
+# option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
+# details.
+#
+# You should have received a copy of the Affero GNU General Public License
+# version 3 along with this program. If not, see http://www.gnu.org/licenses/
+
+
+from essentia_test import *
+from numpy import mean, array, float32, square
+from pathlib import Path
+
+
+class TestPitch2Midi(TestCase):
+ def testEmpty(self):
+ self.assertComputeFails(Pitch2Midi(), -1, 0)
+
+ def testZero(self):
+ message_type, midi_note, time_compensation = Pitch2Midi()(0, 0)
+ self.assertEqual(message_type, [])
+ self.assertEqual(midi_note.tolist(), array([], dtype=float32).tolist())
+ self.assertEqual(time_compensation.tolist(), array([], dtype=float32).tolist())
+
+ def testOnset(self):
+ sample_rate = 44100
+ hop_size = 128
+ onset_compensation = 0.075
+ pitch = 440.0
+ nblocks_for_onset = round(onset_compensation / (hop_size / sample_rate))
+ pitches = [pitch] * nblocks_for_onset
+ voicings = [1] * nblocks_for_onset
+ reference_path = "pitch2midi/test_onset.npy"
+
+ self.runTest(
+ sample_rate,
+ hop_size,
+ pitches,
+ voicings,
+ reference_path=reference_path,
+ )
+
+ def testUnvoicedFrame(self):
+ sample_rate = 44100
+ hop_size = 128
+ onset_compensation = 0.075
+ minNoteChangePeriod = 0.03
+ nblocks_for_onset = round(onset_compensation / (hop_size / sample_rate))
+ nblocks_for_offset = round(minNoteChangePeriod / (hop_size / sample_rate)) + 1
+ pitches = ([440.0] * nblocks_for_onset) + ([0] * nblocks_for_offset)
+ voicings = ([1] * nblocks_for_onset) + ([0] * nblocks_for_offset)
+ reference_path = "pitch2midi/test_onset.npy"
+
+ self.runTest(
+ sample_rate, hop_size, pitches, voicings, reference_path=reference_path
+ )
+
+ def testOffset(self):
+ sample_rate = 44100
+ hop_size = 128
+ onset_compensation = 0.075
+ min_occurrence_rate = 0.015 / 2
+ nblocks_for_onset = round(onset_compensation / (hop_size / sample_rate))
+ nblocks_for_offset = round(min_occurrence_rate / (hop_size / sample_rate))
+ midi_notes = [69, 70]
+ pitches = [midi2hz(note) for note in midi_notes]
+ pitches = ([pitches[0]] * nblocks_for_onset) + (
+ [pitches[1]] * nblocks_for_offset
+ )
+ voicings = [1] * (nblocks_for_onset + nblocks_for_offset)
+ reference_path = "pitch2midi/test_offset.npy"
+
+ self.runTest(
+ sample_rate, hop_size, pitches, voicings, reference_path=reference_path
+ )
+
+ def testContinuousChromaticSequence(self):
+ sample_rate = 44100
+ hop_size = 128
+ onset_compensation = 0.075
+ min_note_change_period = 0.03
+ midi_buffer_duration = 0.015
+ min_occurrence_rate = 0.5
+ min_occurrence_period = midi_buffer_duration * min_occurrence_rate
+ nblocks_for_onset = round(onset_compensation / (hop_size / sample_rate))
+ nblocks_for_offset = round(min_note_change_period / (hop_size / sample_rate))
+ nblocks_for_transition = round(min_occurrence_period / (hop_size / sample_rate))
+ n_notes = 12
+ midi_notes = list(range(69, 69 + n_notes))
+ pitches = [midi2hz(note) for note in midi_notes]
+ pitch_list = list()
+ for pitch in pitches:
+ pitch_list += [pitch] * (nblocks_for_transition + nblocks_for_onset)
+ pitch_list += [pitch] * (nblocks_for_offset + 1)
+ voicings = [1] * n_notes * (nblocks_for_onset + nblocks_for_transition)
+ voicings += [0] * (nblocks_for_offset + 2)
+ reference_path = "pitch2midi/test_chromatic_sequence.npy"
+ self.runTest(
+ sample_rate, hop_size, pitch_list, voicings, reference_path=reference_path
+ )
+
+ def assessNoteList(
+ self,
+ reference_path: str,
+ estimated: list,
+ n_notes_tolerance: int = 0,
+ onset_tolerance: float = 0.01,
+ offset_tolerance: float = 0.01,
+ midi_note_tolerance: int = 0,
+ ):
+ # read the expected notes file manually annotated
+ expected_notes = numpy.load(join(filedir(), reference_path))
+ print("Expected notes:")
+ print(expected_notes)
+
+ print("\ndiffs")
+ print(array(estimated) - expected_notes[:, 1:])
+
+ # estimate the number of notes for expected and detected
+ n_detected_notes = len(estimated)
+ n_expected_notes = len(expected_notes)
+
+ # estimate the onset error for each note and estimate the mean
+ onset_mse = mean(
+ [square(note[1] - estimated[int(note[0])][0]) for note in expected_notes]
+ )
+
+ # estimate the onset error for each note and estimate the mean
+ offset_mse = mean(
+ [square(note[2] - estimated[int(note[0])][1]) for note in expected_notes]
+ )
+
+ # estimate the midi note error for each note and estimate the mean
+ midi_note_mse = mean(
+ [square(note[-1] - estimated[int(note[0])][-1]) for note in expected_notes]
+ )
+
+ # assert outputs
+ self.assertAlmostEqual(n_detected_notes, n_expected_notes, n_notes_tolerance)
+ self.assertAlmostEqual(onset_mse, 0, onset_tolerance)
+ self.assertAlmostEqual(offset_mse, 0, offset_tolerance)
+ self.assertAlmostEqual(midi_note_mse, midi_note_mse, midi_note_tolerance)
+
+ def runTest(
+ self,
+ sample_rate: int,
+ hop_size: int,
+ pitches: list,
+ voicings: list,
+ n_notes_tolerance: int = 0,
+ onset_tolerance: float = 0.01,
+ offset_tolerance: float = 0.05,
+ midi_note_tolerance: int = 0,
+ reference_path: str = "",
+ ):
+ p2m = Pitch2Midi(sampleRate=sample_rate, hopSize=hop_size)
+
+ step_time = hop_size / sample_rate
+
+ # define estimate bin and some counters
+ nte_list = []
+ n = 0
+ time_stamp = 0
+ n_notes = 0
+
+ for n, (pitch, voiced) in enumerate(zip(pitches, voicings)):
+ message, midi_note, time_compensation = p2m(pitch, voiced)
+ time_stamp += step_time
+ if voiced:
+ if message:
+ nte_list.append(
+ [
+ n_notes,
+ time_stamp - time_compensation[1],
+ time_stamp - time_compensation[0],
+ int(midi_note[1]),
+ message,
+ ]
+ )
+ # print(estimated)
+ # print(
+ # f"[{n_notes}][{n}]:{(time_stamp-time_compensation[1]):.3f}, {midi2note(int(midi_note[1]))}({int(midi_note[1])})~{pitch:.2f}Hz" # , {time_compensation}, {midi_note}, {message}
+ # )
+ if "note_on" in message:
+ n_notes += 1
+ n += 1
+
+ # from the nte_list extracts the note list using note_off messages
+ note_list = self.ntes_to_notes(nte_list)
+
+ self.assessNoteList(
+ reference_path,
+ note_list,
+ n_notes_tolerance=n_notes_tolerance,
+ onset_tolerance=onset_tolerance,
+ offset_tolerance=offset_tolerance,
+ midi_note_tolerance=midi_note_tolerance,
+ )
+
+ def testARealCaseWithEMajorScale(self):
+ frame_size = 8192
+ sample_rate = 48000
+ hop_size = 64
+ loudness_threshold = -40
+ pitch_confidence_threshold = 0.25
+ min_frequency = 103.83
+ max_frequency = 659.26
+ midi_buffer_duration = 0.05
+ min_note_change_period = 0.03
+ n_notes_tolerance = 0
+ onset_tolerance = 0.01
+ midi_note_tolerance = 0
+
+ stem = "359500__mtg__sax-tenor-e-major"
+ audio_path = Path("recorded") / f"{stem}.wav"
+ reference_path = Path("pitch2midi") / f"{stem}.npy"
+
+ self.runARealCase(
+ audio_path=audio_path,
+ reference_path=reference_path,
+ sample_rate=sample_rate,
+ frame_size=frame_size,
+ hop_size=hop_size,
+ pitch_confidence_threshold=pitch_confidence_threshold,
+ loudness_threshold=loudness_threshold,
+ midi_buffer_duration=midi_buffer_duration,
+ min_note_change_period=min_note_change_period,
+ max_frequency=max_frequency,
+ min_frequency=min_frequency,
+ n_notes_tolerance=n_notes_tolerance,
+ onset_tolerance=onset_tolerance,
+ midi_note_tolerance=midi_note_tolerance,
+ )
+
+ def testARealCaseWithDMinorScale(self):
+ frame_size = 8192
+ sample_rate = 48000
+ hop_size = 64
+ loudness_threshold = -40
+ pitch_confidence_threshold = 0.25
+ min_frequency = 103.83
+ max_frequency = 659.26
+ midi_buffer_duration = 0.05
+ min_note_change_period = 0.03
+ n_notes_tolerance = 0
+ onset_tolerance = 0.01
+ midi_note_tolerance = 0
+
+ stem = "359628__mtg__sax-tenor-d-minor"
+ audio_path = Path("recorded") / f"{stem}.wav"
+ reference_path = Path("pitch2midi") / f"{stem}.npy"
+
+ self.runARealCase(
+ audio_path=audio_path,
+ reference_path=reference_path,
+ sample_rate=sample_rate,
+ frame_size=frame_size,
+ hop_size=hop_size,
+ pitch_confidence_threshold=pitch_confidence_threshold,
+ loudness_threshold=loudness_threshold,
+ midi_buffer_duration=midi_buffer_duration,
+ min_note_change_period=min_note_change_period,
+ max_frequency=max_frequency,
+ min_frequency=min_frequency,
+ n_notes_tolerance=n_notes_tolerance,
+ onset_tolerance=onset_tolerance,
+ midi_note_tolerance=midi_note_tolerance,
+ )
+
+ def testSeparatedNotes(self):
+ frame_size = 8192
+ sample_rate = 44100
+ hop_size = 32
+ loudness_threshold = -42
+ pitch_confidence_threshold = 0.6
+ min_frequency = 103.83
+ max_frequency = 659.26
+ midi_buffer_duration = 0.05
+ min_note_change_period = 0.03
+ min_offset_period = 0.1
+ n_notes_tolerance = 0
+ onset_tolerance = 0.01
+ midi_note_tolerance = 0
+
+ stem = "387517__deleted_user_7267864__saxophone-going-up"
+ audio_path = Path("recorded") / f"{stem}.wav"
+ reference_path = Path("pitch2midi") / f"{stem}.npy"
+
+ self.runARealCase(
+ audio_path=audio_path,
+ reference_path=reference_path,
+ sample_rate=sample_rate,
+ frame_size=frame_size,
+ hop_size=hop_size,
+ pitch_confidence_threshold=pitch_confidence_threshold,
+ loudness_threshold=loudness_threshold,
+ max_frequency=max_frequency,
+ min_frequency=min_frequency,
+ midi_buffer_duration=midi_buffer_duration,
+ min_note_change_period=min_note_change_period,
+ min_offset_period=min_offset_period,
+ n_notes_tolerance=n_notes_tolerance,
+ onset_tolerance=onset_tolerance,
+ midi_note_tolerance=midi_note_tolerance,
+ )
+
+ def runARealCase(
+ self,
+ audio_path: str,
+ reference_path: str,
+ sample_rate: int,
+ frame_size: int,
+ hop_size: int,
+ pitch_confidence_threshold: float,
+ loudness_threshold: float,
+ max_frequency: float,
+ min_frequency: float,
+ midi_buffer_duration: float,
+ min_note_change_period: float,
+ min_offset_period: float = 0.2,
+ n_notes_tolerance: int = 0,
+ onset_tolerance: float = 0.01,
+ offset_tolerance: float = 0.05,
+ midi_note_tolerance: int = 0,
+ ):
+ filename = join(testdata.audio_dir, audio_path)
+ if sys.platform == "darwin":
+ import soundfile as sf
+
+ audio, _ = sf.read(filename, dtype="float32")
+ if audio.ndim > 1:
+ audio = audio[:, 0]
+ else:
+ audio = MonoLoader(filename=filename, sampleRate=sample_rate)()
+ frames = FrameGenerator(audio, frameSize=frame_size, hopSize=hop_size)
+ step_time = hop_size / sample_rate
+
+ # initialize audio2pitch & pitch2midi instances
+ pitchDetect = Audio2Pitch(
+ frameSize=frame_size,
+ sampleRate=sample_rate,
+ pitchConfidenceThreshold=pitch_confidence_threshold,
+ loudnessThreshold=loudness_threshold,
+ maxFrequency=max_frequency,
+ minFrequency=min_frequency,
+ )
+
+ p2m = Pitch2Midi(
+ sampleRate=sample_rate,
+ hopSize=hop_size,
+ midiBufferDuration=midi_buffer_duration,
+ minNoteChangePeriod=min_note_change_period,
+ minOffsetCheckPeriod=min_offset_period,
+ )
+ print(p2m.parameterNames())
+
+ # define estimate bin and some counters
+ nte_list = [] # note toggle event list
+ n = 0
+ time_stamp = 0
+ n_notes = 0
+
+ # simulates real-time process
+ for frame in frames:
+ _pitch, _, _, _voiced = pitchDetect(frame)
+ message, midi_note, time_compensation = p2m(_pitch, _voiced)
+ time_stamp += step_time
+ # print(n, time_stamp, message, midi_note, time_compensation)
+ if message:
+ nte_list.append(
+ [
+ n_notes,
+ time_stamp - time_compensation[1],
+ time_stamp - time_compensation[0],
+ int(midi_note[1]),
+ message,
+ ]
+ )
+ print(
+ f"[{n_notes}][{n}]:{(time_stamp-time_compensation[1]):.3f}, {midi2note(int(midi_note[1]))}({int(midi_note[1])})~{_pitch:.2f}Hz, {message}" # , {time_compensation}, {midi_note}, {message}
+ )
+ if "note_on" in message:
+ n_notes += 1
+ n += 1
+
+ print(f"nte_list: {nte_list}")
+ # from the nte_list extracts the note list using note_off messages
+ note_list = self.ntes_to_notes(nte_list)
+ print(f"note_list: {note_list}")
+
+ self.assessNoteList(
+ reference_path,
+ note_list,
+ n_notes_tolerance=n_notes_tolerance,
+ onset_tolerance=onset_tolerance,
+ offset_tolerance=offset_tolerance,
+ midi_note_tolerance=midi_note_tolerance,
+ )
+
+ def ntes_to_notes(self, nte_list: list):
+ note_list = list()
+ for n, nte_message in enumerate(nte_list):
+ if "note_on" in nte_message[4]:
+ # extract time stamp
+ start_time = nte_message[1]
+
+ # in some cases the compensation might generate negative values
+ if start_time < 0:
+ start_time = 0
+
+ # to get the note offset it is need to get time stamps in the next message (note-off)
+ if n + 1 < len(nte_list): # when a note off message is provided
+ # define timestamp for offset
+ end_time = nte_list[n + 1][1]
+ else: # there is a non-closed note at the end
+ # define timestamp for offset
+ end_time = nte_list[-1][1]
+ note = int(nte_message[3])
+ # define annotation in a list
+ note_list.append([float(start_time), float(end_time), note])
+ return note_list
+
+
+suite = allTests(TestPitch2Midi)
+
+if __name__ == "__main__":
+ TextTestRunner(verbosity=2).run(suite)
diff --git a/test/src/unittests/tonal/test_pitchyin.py b/test/src/unittests/tonal/test_pitchyin.py
index 59b6ea826..d1d31cd04 100644
--- a/test/src/unittests/tonal/test_pitchyin.py
+++ b/test/src/unittests/tonal/test_pitchyin.py
@@ -18,10 +18,10 @@
# version 3 along with this program. If not, see http://www.gnu.org/licenses/
-
from essentia_test import *
from numpy import sin, pi, mean, random
+
class TestPitchYin(TestCase):
def testEmpty(self):
@@ -34,65 +34,65 @@ def testZero(self):
def testSine(self):
sr = 44100
- size = sr*1;
+ size = sr * 1
freq = 440
- signal = [sin(2.0*pi*freq*i/sr) for i in range(size)]
+ signal = [sin(2.0 * pi * freq * i / sr) for i in range(size)]
self.runTest(signal, sr, freq)
def testBandLimitedSquare(self):
sr = 44100
- size = sr*1;
+ size = sr * 1
freq = 660
- w = 2.0*pi*freq
+ w = 2.0 * pi * freq
nharms = 10
signal = zeros(size)
for i in range(size):
for harm in range(nharms):
- signal[i] += .5/(2.*harm+1)*sin((2*harm+1)*i*w/sr)
+ signal[i] += 0.5 / (2.0 * harm + 1) * sin((2 * harm + 1) * i * w / sr)
self.runTest(signal, sr, freq)
def testBandLimitedSaw(self):
sr = 44100
- size = sr*1;
+ size = sr * 1
freq = 660
- w = 2.0*pi*freq
+ w = 2.0 * pi * freq
nharms = 10
signal = zeros(size)
- for i in range(1,size):
- for harm in range(1,nharms+1):
- signal[i] += 1./harm*sin(harm*i*w/sr)
+ for i in range(1, size):
+ for harm in range(1, nharms + 1):
+ signal[i] += 1.0 / harm * sin(harm * i * w / sr)
self.runTest(signal, sr, freq, 1.1, 0.1)
def testBandLimitedSawMasked(self):
sr = 44100
- size = sr*1;
+ size = sr * 1
freq = 440
- w = 2.0*pi*freq
- subw = 2.0*pi*(freq-100)
+ w = 2.0 * pi * freq
+ subw = 2.0 * pi * (freq - 100)
nharms = 10
signal = zeros(size)
- for i in range(1,size):
+ for i in range(1, size):
# masking noise:
- whitenoise = 2*(random.rand(1)-0.5)
- signal[i] += 2*whitenoise
- for harm in range(1,nharms):
- signal[i] += 1./harm*sin(i*harm*w/sr)
- signal = 5*LowPass()(signal)
- for i in range(1,size):
- for harm in range(1,nharms+1):
- signal[i] += .1/harm*sin(i*harm*w/sr)
- signal[i] += 0.5*sin(i*subw/sr)
+ whitenoise = 2 * (random.rand(1) - 0.5)
+ signal[i] += 2 * whitenoise
+ for harm in range(1, nharms):
+ signal[i] += 1.0 / harm * sin(i * harm * w / sr)
+ signal = 5 * LowPass()(signal)
+ for i in range(1, size):
+ for harm in range(1, nharms + 1):
+ signal[i] += 0.1 / harm * sin(i * harm * w / sr)
+ signal[i] += 0.5 * sin(i * subw / sr)
max_signal = max(signal) + 1
- signal = signal/max_signal
+ signal = signal / max_signal
self.runTest(signal, sr, freq, 1.5, 0.3)
- def runTest(self, signal, sr, freq, pitch_precision = 1, conf_precision = 0.1):
+ def runTest(self, signal, sr, freq, pitch_precision=1, conf_precision=0.1):
frameSize = 1024
hopsize = frameSize
frames = FrameGenerator(signal, frameSize=frameSize, hopSize=hopsize)
- pitchDetect = PitchYin(frameSize=frameSize, sampleRate = sr)
+ pitchDetect = PitchYin(frameSize=frameSize, sampleRate=sr)
pitch = []
confidence = []
for frame in frames:
@@ -103,8 +103,8 @@ def runTest(self, signal, sr, freq, pitch_precision = 1, conf_precision = 0.1):
self.assertAlmostEqual(mean(confidence), 1, conf_precision)
def testInvalidParam(self):
- self.assertConfigureFails(PitchYin(), {'frameSize' : 1})
- self.assertConfigureFails(PitchYin(), {'sampleRate' : 0})
+ self.assertConfigureFails(PitchYin(), {"frameSize": 1})
+ self.assertConfigureFails(PitchYin(), {"sampleRate": 0})
def testARealCase(self):
# The expected values were recomputed from commit
@@ -119,10 +119,10 @@ def testARealCase(self):
frameSize = 1024
sr = 44100
hopSize = 512
- filename = join(testdata.audio_dir, 'recorded', 'vignesh.wav')
+ filename = join(testdata.audio_dir, "recorded", "vignesh.wav")
audio = MonoLoader(filename=filename, sampleRate=44100)()
frames = FrameGenerator(audio, frameSize=frameSize, hopSize=hopSize)
- pitchDetect = PitchYin(frameSize=frameSize, sampleRate = sr)
+ pitchDetect = PitchYin(frameSize=frameSize, sampleRate=sr)
pitch = []
confidence = []
for frame in frames:
@@ -130,8 +130,8 @@ def testARealCase(self):
pitch += [f]
confidence += [conf]
- expected_pitch = numpy.load(join(filedir(), 'pitchyin/vignesh_pitch.npy'))
- expected_conf = numpy.load(join(filedir(), 'pitchyin/vignesh_confidance.npy'))
+ expected_pitch = numpy.load(join(filedir(), "pitchyin/vignesh_pitch.npy"))
+ expected_conf = numpy.load(join(filedir(), "pitchyin/vignesh_confidence.npy"))
self.assertAlmostEqualVector(pitch, expected_pitch)
self.assertAlmostEqualVector(confidence, expected_conf, 5e-6)
@@ -144,15 +144,16 @@ def testARealCaseVampComparison(self):
frameSize = 2048
sr = 44100
hopSize = 256
- filename = join(testdata.audio_dir, 'recorded', 'vignesh.wav')
+ filename = join(testdata.audio_dir, "recorded", "vignesh.wav")
audio = MonoLoader(filename=filename, sampleRate=44100)()
frames = FrameGenerator(audio, frameSize=frameSize, hopSize=hopSize)
- pitchDetect = PitchYin(frameSize=frameSize, sampleRate=sr,
- minFrequency=40, maxFrequency=1600)
+ pitchDetect = PitchYin(
+ frameSize=frameSize, sampleRate=sr, minFrequency=40, maxFrequency=1600
+ )
pitch = array([pitchDetect(frame)[0] for frame in frames])
- expected_pitch = numpy.load(join(filedir(), 'pitchyin/vignesh_pitch_vamp.npy'))
+ expected_pitch = numpy.load(join(filedir(), "pitchyin/vignesh_pitch_vamp.npy"))
# The VAMP implementation provides voiced/unvoiced information
# while our system does not. Thus set to 0 unvoiced frames in
@@ -171,5 +172,5 @@ def testARealCaseVampComparison(self):
suite = allTests(TestPitchYin)
-if __name__ == '__main__':
+if __name__ == "__main__":
TextTestRunner(verbosity=2).run(suite)
diff --git a/test/src/unittests/tonal/test_pitchyinfft.py b/test/src/unittests/tonal/test_pitchyinfft.py
index e22ca1eb6..49065bba2 100644
--- a/test/src/unittests/tonal/test_pitchyinfft.py
+++ b/test/src/unittests/tonal/test_pitchyinfft.py
@@ -18,10 +18,10 @@
# version 3 along with this program. If not, see http://www.gnu.org/licenses/
-
from essentia_test import *
from numpy import sin, pi, mean, random
+
class TestPitchYinFFT(TestCase):
def testEmpty(self):
@@ -32,70 +32,68 @@ def testZero(self):
self.assertEqual(pitch, 0)
self.assertEqual(confidence, 0)
-
def testSine(self):
sr = 44100
- size = sr*1;
+ size = sr * 1
freq = 440
- signal = [sin(2.0*pi*freq*i/sr) for i in range(size)]
+ signal = [sin(2.0 * pi * freq * i / sr) for i in range(size)]
self.runTest(signal, sr, freq)
def testBandLimitedSquare(self):
sr = 44100
- size = sr*1;
+ size = sr * 1
freq = 660
- w = 2.0*pi*freq
+ w = 2.0 * pi * freq
nharms = 10
signal = zeros(size)
for i in range(size):
for harm in range(nharms):
- signal[i] += .5/(2.*harm+1)*sin((2*harm+1)*i*w/sr)
+ signal[i] += 0.5 / (2.0 * harm + 1) * sin((2 * harm + 1) * i * w / sr)
self.runTest(signal, sr, freq)
def testBandLimitedSaw(self):
sr = 44100
- size = sr*1;
+ size = sr * 1
freq = 660
- w = 2.0*pi*freq
+ w = 2.0 * pi * freq
nharms = 10
signal = zeros(size)
- for i in range(1,size):
- for harm in range(1,nharms+1):
- signal[i] += 1./harm*sin(harm*i*w/sr)
+ for i in range(1, size):
+ for harm in range(1, nharms + 1):
+ signal[i] += 1.0 / harm * sin(harm * i * w / sr)
self.runTest(signal, sr, freq, 1.1, 0.1)
def testBandLimitedSawMasked(self):
sr = 44100
- size = sr*1;
+ size = sr * 1
freq = 440
- w = 2.0*pi*freq
- subw = 2.0*pi*(freq-100)
+ w = 2.0 * pi * freq
+ subw = 2.0 * pi * (freq - 100)
nharms = 10
signal = zeros(size)
- for i in range(1,size):
+ for i in range(1, size):
# masking noise:
- whitenoise = 2*(random.rand(1)-0.5)
- signal[i] += 2*whitenoise
- for harm in range(1,nharms):
- signal[i] += 1./harm*sin(i*harm*w/sr)
- signal = 5*LowPass()(signal)
- for i in range(1,size):
- for harm in range(1,nharms+1):
- signal[i] += .1/harm*sin(i*harm*w/sr)
- signal[i] += 0.5*sin(i*subw/sr)
+ whitenoise = 2 * (random.rand(1) - 0.5)
+ signal[i] += 2 * whitenoise
+ for harm in range(1, nharms):
+ signal[i] += 1.0 / harm * sin(i * harm * w / sr)
+ signal = 5 * LowPass()(signal)
+ for i in range(1, size):
+ for harm in range(1, nharms + 1):
+ signal[i] += 0.1 / harm * sin(i * harm * w / sr)
+ signal[i] += 0.5 * sin(i * subw / sr)
max_signal = max(signal) + 1
- signal = signal/max_signal
+ signal = signal / max_signal
self.runTest(signal, sr, freq, 1.5, 0.3)
-
- def runTest(self, signal, sr, freq, pitch_precision = 1, conf_precision = 0.1):
+ def runTest(self, signal, sr, freq, pitch_precision=1, conf_precision=0.1):
frameSize = 1024
hopsize = frameSize
frames = FrameGenerator(signal, frameSize=frameSize, hopSize=hopsize)
- win = Windowing(type='hann')
- pitchDetect = PitchYinFFT(frameSize=frameSize, sampleRate = sr)
+ win = Windowing(type="hann")
+ pitchDetect = PitchYinFFT(frameSize=frameSize, sampleRate=sr)
pitch = []
confidence = []
for frame in frames:
@@ -107,8 +105,8 @@ def runTest(self, signal, sr, freq, pitch_precision = 1, conf_precision = 0.1):
self.assertAlmostEqual(mean(confidence), 1, conf_precision)
def testInvalidParam(self):
- self.assertConfigureFails(PitchYinFFT(), {'frameSize' : 1})
- self.assertConfigureFails(PitchYinFFT(), {'sampleRate' : 0})
+ self.assertConfigureFails(PitchYinFFT(), {"frameSize": 1})
+ self.assertConfigureFails(PitchYinFFT(), {"sampleRate": 0})
def testARealCase(self):
# The expected values were recomputed from commit
@@ -123,11 +121,11 @@ def testARealCase(self):
frameSize = 1024
sr = 44100
hopSize = 512
- filename = join(testdata.audio_dir, 'recorded', 'vignesh.wav')
+ filename = join(testdata.audio_dir, "recorded", "vignesh.wav")
audio = MonoLoader(filename=filename, sampleRate=44100)()
frames = FrameGenerator(audio, frameSize=frameSize, hopSize=hopSize)
- win = Windowing(type='hann')
- pitchDetect = PitchYinFFT(frameSize=frameSize, sampleRate = sr)
+ win = Windowing(type="hann")
+ pitchDetect = PitchYinFFT(frameSize=frameSize, sampleRate=sr)
pitch = []
confidence = []
for frame in frames:
@@ -135,8 +133,10 @@ def testARealCase(self):
f, conf = pitchDetect(spec)
pitch += [f]
confidence += [conf]
- expected_pitch = numpy.load(join(filedir(), 'pitchyinfft/vignesh_pitch.npy'))
- expected_conf = numpy.load(join(filedir(), 'pitchyinfft/vignesh_confidance.npy'))
+ expected_pitch = numpy.load(join(filedir(), "pitchyinfft/vignesh_pitch.npy"))
+ expected_conf = numpy.load(
+ join(filedir(), "pitchyinfft/vignesh_confidence.npy")
+ )
self.assertAlmostEqualVector(pitch, expected_pitch)
self.assertAlmostEqualVector(confidence, expected_conf, 5e-5)
@@ -148,16 +148,20 @@ def testARealCaseAubioComparison(self):
frameSize = 4096
sr = 44100
hopSize = 512
- filename = join(testdata.audio_dir, 'recorded', 'vignesh.wav')
+ filename = join(testdata.audio_dir, "recorded", "vignesh.wav")
audio = MonoLoader(filename=filename, sampleRate=44100)()
- frames = FrameGenerator(audio, frameSize=frameSize, hopSize=hopSize, startFromZero=True)
+ frames = FrameGenerator(
+ audio, frameSize=frameSize, hopSize=hopSize, startFromZero=True
+ )
win = Windowing(normalized=False, zeroPhase=False)
spec = Spectrum()
pitchDetect = PitchYinFFT(frameSize=frameSize, sampleRate=sr)
pitch = array([pitchDetect(spec(win(frame)))[0] for frame in frames])
- expected_pitch = numpy.load(join(filedir(), 'pitchyinfft/vignesh_pitch_aubio.npy'))
+ expected_pitch = numpy.load(
+ join(filedir(), "pitchyinfft/vignesh_pitch_aubio.npy")
+ )
# Trim the first and last frames as the
# system behavior is unestable.
@@ -171,16 +175,20 @@ def testARealCaseAubioWithToleranceComparison(self):
frameSize = 4096
sr = 44100
hopSize = 512
- filename = join(testdata.audio_dir, 'recorded', 'vignesh.wav')
+ filename = join(testdata.audio_dir, "recorded", "vignesh.wav")
audio = MonoLoader(filename=filename, sampleRate=44100)()
- frames = FrameGenerator(audio, frameSize=frameSize, hopSize=hopSize, startFromZero=True)
+ frames = FrameGenerator(
+ audio, frameSize=frameSize, hopSize=hopSize, startFromZero=True
+ )
win = Windowing(normalized=False, zeroPhase=False)
spec = Spectrum()
pitchDetect = PitchYinFFT(frameSize=frameSize, sampleRate=sr, tolerance=0.4)
pitch = array([pitchDetect(spec(win(frame)))[0] for frame in frames])
- expected_pitch = numpy.load(join(filedir(), 'pitchyinfft/vignesh_pitch_aubio_with_tolerance.npy'))
+ expected_pitch = numpy.load(
+ join(filedir(), "pitchyinfft/vignesh_pitch_aubio_with_tolerance.npy")
+ )
# Trim the first and last frames as the
# system behavior is unestable.
@@ -192,5 +200,5 @@ def testARealCaseAubioWithToleranceComparison(self):
suite = allTests(TestPitchYinFFT)
-if __name__ == '__main__':
+if __name__ == "__main__":
TextTestRunner(verbosity=2).run(suite)