From d73c056b84dbecbdd1120f2ba6e76a73b0e2f4c2 Mon Sep 17 00:00:00 2001 From: Aitor Blanco-Miguez Date: Mon, 25 Jul 2022 09:18:54 +0000 Subject: [PATCH] Updates to version 3.1.0 --- README.md | 16 ++++++---------- bioconda_recipe/meta.yaml | 2 +- changeset.txt | 6 ++++++ metaphlan/metaphlan.py | 4 ++-- metaphlan/strainphlan.py | 6 +++--- metaphlan/utils/add_metadata_tree.py | 6 +++--- metaphlan/utils/external_exec.py | 4 ++-- metaphlan/utils/extract_markers.py | 6 +++--- metaphlan/utils/parallelisation.py | 4 ++-- metaphlan/utils/plot_tree_graphlan.py | 4 ++-- metaphlan/utils/sample2markers.py | 4 ++-- metaphlan/utils/strain_transmission.py | 4 ++-- metaphlan/utils/util_fun.py | 6 +++--- setup.py | 2 +- 14 files changed, 38 insertions(+), 36 deletions(-) diff --git a/README.md b/README.md index 8fa9a94f..31eee633 100755 --- a/README.md +++ b/README.md @@ -1,20 +1,16 @@ # MetaPhlAn: Metagenomic Phylogenetic Analysis [![install with bioconda](https://img.shields.io/badge/install%20with-bioconda-brightgreen.svg?style=flat)](http://bioconda.github.io/recipes/metaphlan/README.html) [![PyPI - Downloads](https://img.shields.io/pypi/dm/metaphlan?label=MetaPhlAn%20on%20PyPi)](https://pypi.org/project/MetaPhlAn/) [![MetaPhlAn on DockerHub](https://img.shields.io/docker/pulls/biobakery/metaphlan?label=MetaPhlAn%20on%20DockerHub)](https://hub.docker.com/r/biobakery/metaphlan) [![Build MetaPhlAn package](https://github.com/biobakery/MetaPhlAn/workflows/Build%20MetaPhlAn%20package/badge.svg?branch=3.0)](https://github.com/biobakery/MetaPhlAn/actions?query=workflow%3A%22Build+MetaPhlAn+package%22) -## What's new in version 3 -* New MetaPhlAn marker genes extracted with a newer version of ChocoPhlAn based on UniRef -* Estimation of metagenome composed by unknown microbes with parameter `--unknown_estimation` -* Automatic retrieval and installation of the latest MetaPhlAn database with parameter `--index latest` -* Virus profiling with `--add_viruses` -* Calculation of metagenome size for improved estimation of reads mapped to a given clade -* Inclusion of NCBI taxonomy ID in the ouput file -* CAMI (Taxonomic) Profiling Output Format included -* Removal of reads with low MAPQ values +## What's new in version 3.1 +* 433 low-quality species were removed from the MetaPhlAn 3.1 marker database and 2,680 species were added (for a new total of 15,766; a 17% increase). +* Marker genes for a subset of existing bioBakery 3 species were also revised. +* Most existing bioBakery 3 species pangenomes were updated with revised or expanded gene content. +* MetaPhlAn 3.1 software has been updated to work with revised marker database. ------------- ## Description MetaPhlAn is a computational tool for profiling the composition of microbial communities (Bacteria, Archaea and Eukaryotes) from metagenomic shotgun sequencing data (i.e. not 16S) with species-level. With the newly added StrainPhlAn module, it is now possible to perform accurate strain-level microbial profiling. -MetaPhlAn relies on ~1.1M unique clade-specific marker genes (the latest marker information file `mpa_v30_CHOCOPhlAn_201901_marker_info.txt.bz2` can be found [here](https://www.dropbox.com/sh/7qze7m7g9fe2xjg/AAAlyQITZuUCtBUJxpxhIroIa/mpa_v30_CHOCOPhlAn_201901_marker_info.txt.bz2?dl=1)) identified from ~100,000 reference genomes (~99,500 bacterial and archaeal and ~500 eukaryotic), allowing: +MetaPhlAn relies on ~1.1M unique clade-specific marker genes (the latest marker information file `mpa_v31_CHOCOPhlAn_201901_marker_info.txt.bz2` can be found [here](http://cmprod1.cibio.unitn.it/biobakery3/metaphlan_databases/mpa_v31_CHOCOPhlAn_201901_marker_info.txt.bz2)) identified from ~100,000 reference genomes (~99,500 bacterial and archaeal and ~500 eukaryotic), allowing: * unambiguous taxonomic assignments; * accurate estimation of organismal relative abundance; diff --git a/bioconda_recipe/meta.yaml b/bioconda_recipe/meta.yaml index 2448f1f5..2a68b118 100644 --- a/bioconda_recipe/meta.yaml +++ b/bioconda_recipe/meta.yaml @@ -1,5 +1,5 @@ {% set name = "metaphlan" %} -{% set version = "4.0" %} +{% set version = "3.1" %} package: name: {{ name }} diff --git a/changeset.txt b/changeset.txt index 8cd927a3..d5552e2b 100755 --- a/changeset.txt +++ b/changeset.txt @@ -1,3 +1,9 @@ +=== Version 3.1 +* 433 low-quality species were removed from the MetaPhlAn 3.1 marker database and 2,680 species were added (for a new total of 15,766; a 17% increase). +* Marker genes for a subset of existing bioBakery 3 species were also revised. +* Most existing bioBakery 3 species pangenomes were updated with revised or expanded gene content. +* MetaPhlAn 3.1 software has been updated to work with revised marker database. + === Version 3.0 * New MetaPhlAn marker genes extracted with a newer version of ChocoPhlAn based on UniRef * Estimation of metagenome composed by unknown microbes with parameter `--unknown_estimation` diff --git a/metaphlan/metaphlan.py b/metaphlan/metaphlan.py index 6f5dc968..d8693e03 100755 --- a/metaphlan/metaphlan.py +++ b/metaphlan/metaphlan.py @@ -4,8 +4,8 @@ 'Duy Tin Truong, ' 'Francesco Asnicar (f.asnicar@unitn.it), ' 'Aitor Blanco Miguez (aitor.blancomiguez@unitn.it)') -__version__ = '3.0.14' -__date__ = '19 Jan 2022' +__version__ = '3.1.0' +__date__ = '25 Jul 2022' import sys try: diff --git a/metaphlan/strainphlan.py b/metaphlan/strainphlan.py index a4a114cf..49a244d6 100755 --- a/metaphlan/strainphlan.py +++ b/metaphlan/strainphlan.py @@ -4,8 +4,8 @@ 'Francesco Asnicar (f.asnicar@unitn.it), ' 'Moreno Zolfo (moreno.zolfo@unitn.it), ' 'Francesco Beghini (francesco.beghini@unitn.it)') -__version__ = '3.0.14' -__date__ = '19 Jan 2022' +__version__ = '3.1.0' +__date__ = '25 Jul 2022' import sys @@ -29,7 +29,7 @@ metaphlan_script_install_folder = os.path.dirname(os.path.abspath(__file__)) DEFAULT_DB_FOLDER = os.path.join(metaphlan_script_install_folder, "metaphlan_databases") DEFAULT_DB_FOLDER = os.environ.get('METAPHLAN_DB_DIR', DEFAULT_DB_FOLDER) -DEFAULT_DB_NAME = "mpa_v30_CHOCOPhlAn_201901.pkl" +DEFAULT_DB_NAME = "mpa_v31_CHOCOPhlAn_201901.pkl" DEFAULT_DATABASE = os.path.join(DEFAULT_DB_FOLDER, DEFAULT_DB_NAME) PHYLOPHLAN_MODES = ['accurate', 'fast'] diff --git a/metaphlan/utils/add_metadata_tree.py b/metaphlan/utils/add_metadata_tree.py index b44a9b2b..c9af825b 100755 --- a/metaphlan/utils/add_metadata_tree.py +++ b/metaphlan/utils/add_metadata_tree.py @@ -1,8 +1,8 @@ #!/usr/bin/env python __author__ = ('Duy Tin Truong (duytin.truong@unitn.it), ' 'Aitor Blanco Miguez (aitor.blancomiguez@unitn.it)') -__version__ = '3.0' -__date__ = '21 Feb 2020' +__version__ = '3.1.0' +__date__ = '25 Jul 2021' import argparse as ap import pandas @@ -102,4 +102,4 @@ def main(): ofile.write(line) if __name__ == '__main__': - main() \ No newline at end of file + main() diff --git a/metaphlan/utils/external_exec.py b/metaphlan/utils/external_exec.py index 9b55e9d0..77f04681 100755 --- a/metaphlan/utils/external_exec.py +++ b/metaphlan/utils/external_exec.py @@ -3,8 +3,8 @@ 'Francesco Asnicar (f.asnicar@unitn.it), ' 'Moreno Zolfo (moreno.zolfo@unitn.it), ' 'Francesco Beghini (francesco.beghini@unitn.it)') -__version__ = '3.0.8' -__date__ = '7 May 2021' +__version__ = '3.1.0' +__date__ = '25 Jul 2022' import os, sys, re, shutil, tempfile import subprocess as sb diff --git a/metaphlan/utils/extract_markers.py b/metaphlan/utils/extract_markers.py index 398e0afc..45d2ecef 100755 --- a/metaphlan/utils/extract_markers.py +++ b/metaphlan/utils/extract_markers.py @@ -4,8 +4,8 @@ 'Francesco Asnicar (f.asnicar@unitn.it), ' 'Moreno Zolfo (moreno.zolfo@unitn.it), ' 'Francesco Beghini (francesco.beghini@unitn.it)') -__version__ = '3.0.14' -__date__ = '19 Jan 2022' +__version__ = '3.1.0' +__date__ = '25 Jul 2022' import sys try: @@ -31,7 +31,7 @@ metaphlan_script_install_folder = os.path.dirname(os.path.abspath(__file__)) DEFAULT_DB_FOLDER = os.path.join(metaphlan_script_install_folder, "../metaphlan_databases") DEFAULT_DB_FOLDER = os.environ.get('METAPHLAN_DB_DIR', DEFAULT_DB_FOLDER) -DEFAULT_DB_NAME = "mpa_v30_CHOCOPhlAn_201901.pkl" +DEFAULT_DB_NAME = "mpa_v31_CHOCOPhlAn_201901.pkl" DEFAULT_DATABASE = os.path.join(DEFAULT_DB_FOLDER, DEFAULT_DB_NAME) """ diff --git a/metaphlan/utils/parallelisation.py b/metaphlan/utils/parallelisation.py index 0bc25f9d..718b803e 100755 --- a/metaphlan/utils/parallelisation.py +++ b/metaphlan/utils/parallelisation.py @@ -3,8 +3,8 @@ 'Francesco Asnicar (f.asnicar@unitn.it), ' 'Moreno Zolfo (moreno.zolfo@unitn.it), ' 'Francesco Beghini (francesco.beghini@unitn.it)') -__version__ = '3.0' -__date__ = '21 Feb 2020' +__version__ = '3.1.0' +__date__ = '25 Jul 2022' try: from .util_fun import error diff --git a/metaphlan/utils/plot_tree_graphlan.py b/metaphlan/utils/plot_tree_graphlan.py index 791b721c..117bcf24 100755 --- a/metaphlan/utils/plot_tree_graphlan.py +++ b/metaphlan/utils/plot_tree_graphlan.py @@ -1,8 +1,8 @@ #!/usr/bin/env python __author__ = ('Duy Tin Truong (duytin.truong@unitn.it), ' 'Aitor Blanco Miguez (aitor.blancomiguez@unitn.it)') -__version__ = '3.0' -__date__ = '21 Feb 2020' +__version__ = '3.1.0' +__date__ = '25 Jul 2022' import argparse as ap import dendropy diff --git a/metaphlan/utils/sample2markers.py b/metaphlan/utils/sample2markers.py index b91548c7..bef458c1 100755 --- a/metaphlan/utils/sample2markers.py +++ b/metaphlan/utils/sample2markers.py @@ -4,8 +4,8 @@ 'Francesco Asnicar (f.asnicar@unitn.it), ' 'Moreno Zolfo (moreno.zolfo@unitn.it), ' 'Francesco Beghini (francesco.beghini@unitn.it)') -__version__ = '3.0.14' -__date__ = '19 Jan 2022' +__version__ = '3.1.0' +__date__ = '25 Jul 2022' import sys try: diff --git a/metaphlan/utils/strain_transmission.py b/metaphlan/utils/strain_transmission.py index 676a34e9..6c40e1b0 100755 --- a/metaphlan/utils/strain_transmission.py +++ b/metaphlan/utils/strain_transmission.py @@ -1,7 +1,7 @@ __author__ = ('Aitor Blanco (aitor.blancomiguez@unitn.it), ' 'Mireia Valles-Colomer (mireia.vallescolomer@unitn.it)') -__version__ = '3.0.14' -__date__ = '19 Jan 2022' +__version__ = '3.1.0' +__date__ = '25 Jul 2022' import os, time, sys import argparse as ap diff --git a/metaphlan/utils/util_fun.py b/metaphlan/utils/util_fun.py index 5c4e02db..b436831b 100755 --- a/metaphlan/utils/util_fun.py +++ b/metaphlan/utils/util_fun.py @@ -3,8 +3,8 @@ 'Francesco Asnicar (f.asnicar@unitn.it), ' 'Moreno Zolfo (moreno.zolfo@unitn.it), ' 'Francesco Beghini (francesco.beghini@unitn.it)') -__version__ = '3.0' -__date__ = '21 Feb 2020' +__version__ = '3.1.0' +__date__ = '25 Jul 2022' import os, sys, re, pickletools, pickle, time, bz2, gzip @@ -134,4 +134,4 @@ def is_number(s): int(s) return True except ValueError: - return False \ No newline at end of file + return False diff --git a/setup.py b/setup.py index 31ded74e..25db594c 100755 --- a/setup.py +++ b/setup.py @@ -13,7 +13,7 @@ setuptools.setup( name='MetaPhlAn', - version='3.0.14', + version='3.1.0', author='Francesco Beghini', author_email='francesco.beghini@unitn.it', url='http://github.com/biobakery/MetaPhlAn/',