From 5f40214ef97a018a93ad17c5c82d7f618996dab7 Mon Sep 17 00:00:00 2001 From: Bo Li Date: Fri, 6 Nov 2015 00:33:01 -0800 Subject: [PATCH] updated documents --- WHAT_IS_NEW | 11 +++++++++++ rsem-calculate-expression | 22 ++++++++++++++++------ rsem_perl_utils.pm | 2 +- 3 files changed, 28 insertions(+), 7 deletions(-) diff --git a/WHAT_IS_NEW b/WHAT_IS_NEW index 8f0ef31..d17fa5a 100644 --- a/WHAT_IS_NEW +++ b/WHAT_IS_NEW @@ -1,3 +1,14 @@ +RSEM v1.2.24 + +- RSEM will extract gene_name/transcript_name from GTF file when possible; if extracted, gene_name/transcript_name will append at the end of gene_id/transcript_id with an underscore in between +- Modified 'rsem-plot-model' to indicate the modes of fragment length and read length distributions +- Modified 'rsem-plot-model' to present alignment statistics better using both barplot and pie chart +- Updated 'EBSeq' to version 1.2.0 +- Added coefficient of quartile variation in addition to credibility intervals when '--calc-ci' is turned on +- Added '--single-cell-prior' option to notify RSEM to use a sparse prior (Dir(0.1)) for single cell data; this option only makes sense if '--calc-pme' or '--calc-ci' is set + +-------------------------------------------------------------------------------------------- + RSEM v1.2.23 - Moved version information from WHAT_IS_NEW to rsem_perl_utils.pm in order to make sure the '--version' option always output the version information diff --git a/rsem-calculate-expression b/rsem-calculate-expression index 471fd73..499a1eb 100755 --- a/rsem-calculate-expression +++ b/rsem-calculate-expression @@ -666,7 +666,7 @@ Set the seed for the random number generators used in calculating posterior mean =item B<--single-cell-prior> -By default, RSEM uses Dirichlet(1) as the prior to calculate posterior mean estimates and credibility intervals. However, much less genes are expressed in single cell RNA-Seq data. Thus, if you want to compute posterior mean estimates and/or credibility intervals and you have single-cell RNA-Seq data, you should turn on this option. Then RSEM will use Dirichlet(0.1) as the prior which encourage the sparsity of the expression levels. (Default: off) +By default, RSEM uses Dirichlet(1) as the prior to calculate posterior mean estimates and credibility intervals. However, much less genes are expressed in single cell RNA-Seq data. Thus, if you want to compute posterior mean estimates and/or credibility intervals and you have single-cell RNA-Seq data, you are recommended to turn on this option. Then RSEM will use Dirichlet(0.1) as the prior which encourage the sparsity of the expression levels. (Default: off) =item B<--calc-pme> @@ -820,7 +820,7 @@ The credibility level for credibility intervals. (Default: 0.95) =item B<--ci-memory> -Maximum size (in memory, MB) of the auxiliary buffer used for computing credibility intervals (CI). Set it larger for a faster CI calculation. However, leaving 2 GB memory free for other usage is recommended. (Default: 1024) +Maximum size (in memory, MB) of the auxiliary buffer used for computing credibility intervals (CI). (Default: 1024) =item B<--ci-number-of-samples-per-count-vector> @@ -878,7 +878,7 @@ File containing isoform level expression estimates. The first line contains column names separated by the tab character. The format of each line in the rest of this file is: -transcript_id gene_id length effective_length expected_count TPM FPKM IsoPct [posterior_mean_count posterior_standard_deviation_of_count pme_TPM pme_FPKM IsoPct_from_pme_TPM TPM_ci_lower_bound TPM_ci_upper_bound FPKM_ci_lower_bound FPKM_ci_upper_bound] +transcript_id gene_id length effective_length expected_count TPM FPKM IsoPct [posterior_mean_count posterior_standard_deviation_of_count pme_TPM pme_FPKM IsoPct_from_pme_TPM TPM_ci_lower_bound TPM_ci_upper_bound TPM_coefficient_of_quartile_variation FPKM_ci_lower_bound FPKM_ci_upper_bound FPKM_coefficient_of_quartile_variation] Fields are separated by the tab character. Fields within "[]" are optional. They will not be presented if neither '--calc-pme' nor @@ -933,7 +933,17 @@ percentage calculated from 'pme_TPM' values. 'TPM_ci_lower_bound', 'TPM_ci_upper_bound', 'FPKM_ci_lower_bound' and 'FPKM_ci_upper_bound' are lower(l) and upper(u) bounds of 95% credibility intervals for TPM and FPKM values. The bounds are -inclusive (i.e. [l, u]). +inclusive (i.e. [l, u]). + +'TPM_coefficient_of_quartile_variation' and +'FPKM_coefficient_of_quartile_variation' are coefficients of quartile +variation (CQV) for TPM and FPKM values. CQV is a robust way of +measuring the ratio between the standard deviation and the mean. It is +defined as + +CQV := (Q3 - Q1) / (Q3 + Q1), + +where Q1 and Q3 are the first and third quartiles. =item B @@ -941,7 +951,7 @@ File containing gene level expression estimates. The first line contains column names separated by the tab character. The format of each line in the rest of this file is: -gene_id transcript_id(s) length effective_length expected_count TPM FPKM [posterior_mean_count posterior_standard_deviation_of_count pme_TPM pme_FPKM TPM_ci_lower_bound TPM_ci_upper_bound FPKM_ci_lower_bound FPKM_ci_upper_bound] +gene_id transcript_id(s) length effective_length expected_count TPM FPKM [posterior_mean_count posterior_standard_deviation_of_count pme_TPM pme_FPKM TPM_ci_lower_bound TPM_ci_upper_bound TPM_coefficient_of_quartile_variation FPKM_ci_lower_bound FPKM_ci_upper_bound FPKM_coefficient_of_quartile_variation] Fields are separated by the tab character. Fields within "[]" are optional. They will not be presented if neither '--calc-pme' nor @@ -966,7 +976,7 @@ allele-specific expression calculation. The first line contains column names separated by the tab character. The format of each line in the rest of this file is: -allele_id transcript_id gene_id length effective_length expected_count TPM FPKM AlleleIsoPct AlleleGenePct [posterior_mean_count posterior_standard_deviation_of_count pme_TPM pme_FPKM AlleleIsoPct_from_pme_TPM AlleleGenePct_from_pme_TPM TPM_ci_lower_bound TPM_ci_upper_bound FPKM_ci_lower_bound FPKM_ci_upper_bound] +allele_id transcript_id gene_id length effective_length expected_count TPM FPKM AlleleIsoPct AlleleGenePct [posterior_mean_count posterior_standard_deviation_of_count pme_TPM pme_FPKM AlleleIsoPct_from_pme_TPM AlleleGenePct_from_pme_TPM TPM_ci_lower_bound TPM_ci_upper_bound TPM_coefficient_of_quartile_variation FPKM_ci_lower_bound FPKM_ci_upper_bound FPKM_coefficient_of_quartile_variation] Fields are separated by the tab character. Fields within "[]" are optional. They will not be presented if neither '--calc-pme' nor diff --git a/rsem_perl_utils.pm b/rsem_perl_utils.pm index 7389d5d..ba91f05 100644 --- a/rsem_perl_utils.pm +++ b/rsem_perl_utils.pm @@ -9,7 +9,7 @@ our @ISA = qw(Exporter); our @EXPORT = qw(runCommand); our @EXPORT_OK = qw(runCommand collectResults showVersionInfo); -my $version = "RSEM v1.2.23"; +my $version = "RSEM v1.2.24"; # command, {err_msg} sub runCommand {