From 1531aed449a2200a4c13ea091e85e439904225e9 Mon Sep 17 00:00:00 2001 From: "almut.heinken" Date: Tue, 18 May 2021 14:33:41 +0100 Subject: [PATCH 01/82] debugged some futile cycles --- src/reconstruction/demeter/src/debugging/removeFutileCycles.m | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/reconstruction/demeter/src/debugging/removeFutileCycles.m b/src/reconstruction/demeter/src/debugging/removeFutileCycles.m index 347dd83c85..403342b353 100644 --- a/src/reconstruction/demeter/src/debugging/removeFutileCycles.m +++ b/src/reconstruction/demeter/src/debugging/removeFutileCycles.m @@ -239,7 +239,9 @@ 'NTRIR4 AND FDNADOX_H AND FDOXR',[],'FDOXR','FDOXRi' 'ASPNH4L AND ASPt2r',[],'ASPNH4L','ASPNH4Li' 'DDGLKr AND DDGLCNt2r',[],'DDGLKr','DDGLK' - 'ORNt2r AND OCBT AND ARGSSr',[],'ARGSSr','ARGSS' + 'ARGSSr',[],'ARGSSr','ARGSS' + 'ARGDr',[],'ARGDr','ARGDA' + 'SERD_Lr',[],'SERD_Lr','SERD_L' 'G1PP AND GLGC AND GLCP',[],'G1PP','G1PPi' 'CBMKr AND OCBT AND r1667','ARGDA','CBMKr','CBMK' 'TRPS3r AND TRPS1 AND TRPS2r',[],'TRPS3r','TRPS3' From b396d99532a0817fcf107479a4bb7d111e8db965 Mon Sep 17 00:00:00 2001 From: "almut.heinken" Date: Wed, 19 May 2021 15:02:35 +0100 Subject: [PATCH 02/82] debugged some futile cycles --- src/reconstruction/demeter/src/debugging/removeFutileCycles.m | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/reconstruction/demeter/src/debugging/removeFutileCycles.m b/src/reconstruction/demeter/src/debugging/removeFutileCycles.m index 403342b353..97b0a053e0 100644 --- a/src/reconstruction/demeter/src/debugging/removeFutileCycles.m +++ b/src/reconstruction/demeter/src/debugging/removeFutileCycles.m @@ -470,7 +470,9 @@ 'OAASr AND ICDHx AND ACONTa AND ACONTb AND ALCD2x AND FDH AND PTAr AND ACKr',[],'ICDHx','ICDHxi' 'METt2r AND METt3r',[],'METt2r','METt2' 'NTP9 AND NDPK4',[],'NTP9','NTP9i' + 'MAN1PT2r',[],'MAN1PT2r','MAN1PT2' 'HEX4 AND HMR_7271 AND MAN1PT2 AND MAN6PI AND PGM AND PMANM',[],'PMANM','PMANMi' + 'MANISO AND HMR_7271 AND MAN1PT2 AND MAN6PI AND PGM AND PMANM',[],'PMANM','PMANMi' 'PGMT AND GALU AND GLBRAN AND GLDBRAN AND GLGNS1 AND GLPASE1 AND NDPK2 AND PPA AND r1393',[],'NDPK2','NDPK2i' 'D_GLUMANt AND MANt2r AND GLU_Dt2r',[],'GLU_Dt2r','GLU_Dt2' 'NACUP AND NACSMCTte AND NAt3_1',[],'NAt3_1','NAt3' From 167ee36d3c85bfa60a343e7c235c1806d85110a4 Mon Sep 17 00:00:00 2001 From: nmendozam Date: Sat, 22 May 2021 17:31:57 -0500 Subject: [PATCH 03/82] buildFluxDistLayout with makeColorGradient --- .../maps/ReconMap/buildFluxDistLayout.m | 4 +- .../maps/ReconMap/makeColorGradient.m | 39 +++++++++++++++++++ 2 files changed, 42 insertions(+), 1 deletion(-) create mode 100644 src/visualization/maps/ReconMap/makeColorGradient.m diff --git a/src/visualization/maps/ReconMap/buildFluxDistLayout.m b/src/visualization/maps/ReconMap/buildFluxDistLayout.m index 6598e88a3d..c95eb629be 100644 --- a/src/visualization/maps/ReconMap/buildFluxDistLayout.m +++ b/src/visualization/maps/ReconMap/buildFluxDistLayout.m @@ -48,6 +48,7 @@ if ~exist('content','var') normalizedFluxes = normalizeFluxes(abs(solution.v), thickness); content = 'name%09reactionIdentifier%09lineWidth%09color%0D'; + cmap = makeColorGradient('#ff0000', defaultColor, 11); for i=1:length(solution.v) mapReactionId = model.rxns{i}; @@ -57,7 +58,8 @@ end if solution.v(i) ~= 0 - line = strcat('%09', mapReactionId, '%09', num2str(normalizedFluxes(i)), '%09', defaultColor, '%0D'); + color = cmap{round(normalizedFluxes(i)) + 1}; + line = strcat('%09', mapReactionId, '%09', 1, '%09', color, '%0D'); content = strcat(content, line); end diff --git a/src/visualization/maps/ReconMap/makeColorGradient.m b/src/visualization/maps/ReconMap/makeColorGradient.m new file mode 100644 index 0000000000..32a590b2eb --- /dev/null +++ b/src/visualization/maps/ReconMap/makeColorGradient.m @@ -0,0 +1,39 @@ +function [cmap] = makeColorGradient(col1, col2, ncol) +% Generates a color gradient in hex format, based on color 1 and 2 +% +% USAGE: +% +% [cmap] = makeColorGradient(col1, col2, ncol) +% +% INPUTS: +% col1: First color +% col2: Last color +% ncol: Number of colors in between +% +% OUTPUT: +% cmap: Color gradient map with the corresponding +% colors in hex format +% +% .. Author: - Nicolas Mendoza-Mejia May/2021 + +hexformat = "#%02x%02x%02x"; + +rgb1 = sscanf(col1, hexformat); +rgb2 = sscanf(col2, hexformat); + +T = (rgb2 -rgb1)/(ncol - 1); +rgb = zeros(3, ncol); + +for i=1:3 + if T(i) ~= 0 + rgb(i, :) = rgb1(i):T(i):rgb2(i); + end +end + +rgb = round(rgb); +cmap = cell(1, ncol); +for i=1:ncol + cmap{i} = sprintf(hexformat, rgb(1, i), rgb(2, i), rgb(3, i)); +end + +end \ No newline at end of file From 24e16d030aae3b9c2c7fe0a88d7f54558290d7ca Mon Sep 17 00:00:00 2001 From: "almut.heinken" Date: Thu, 27 May 2021 17:41:59 +0100 Subject: [PATCH 04/82] Fixed futile cycles in pan-models --- .../mgPipe/adaptVMHDietToAGORA.m | 2 +- .../mgPipe/createPanModels.m | 8 + .../src/debugging/removeFutileCycles.m | 1471 ++++++++--------- .../integration/createRBioNetDBFromVMHDB.m | 2 +- .../demeter/src/properties/producetSNEPlots.m | 530 +++--- 5 files changed, 1002 insertions(+), 1011 deletions(-) mode change 100644 => 100755 src/reconstruction/demeter/src/debugging/removeFutileCycles.m mode change 100644 => 100755 src/reconstruction/demeter/src/properties/producetSNEPlots.m diff --git a/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/adaptVMHDietToAGORA.m b/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/adaptVMHDietToAGORA.m index 473c40d1ee..a8bcf706da 100755 --- a/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/adaptVMHDietToAGORA.m +++ b/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/adaptVMHDietToAGORA.m @@ -51,7 +51,7 @@ % Define the list of metabolites required by at least one AGORA model for % growth -essentialMetabolites = {'EX_12dgr180(e)'; 'EX_26dap_M(e)'; 'EX_2dmmq8(e)'; 'EX_2obut(e)'; 'EX_3mop(e)'; 'EX_4abz(e)'; 'EX_4hbz(e)'; 'EX_ac(e)'; 'EX_acgam(e)'; 'EX_acmana(e)'; 'EX_acnam(e)'; 'EX_ade(e)'; 'EX_adn(e)'; 'EX_adocbl(e)'; 'EX_ala_D(e)'; 'EX_ala_L(e)'; 'EX_amet(e)'; 'EX_amp(e)'; 'EX_arab_D(e)'; 'EX_arab_L(e)'; 'EX_arg_L(e)'; 'EX_asn_L(e)'; 'EX_btn(e)'; 'EX_ca2(e)'; 'EX_cbl1(e)'; 'EX_cgly(e)'; 'EX_chor(e)'; 'EX_chsterol(e)'; 'EX_cit(e)'; 'EX_cl(e)'; 'EX_cobalt2(e)'; 'EX_csn(e)'; 'EX_cu2(e)'; 'EX_cys_L(e)'; 'EX_cytd(e)'; 'EX_dad_2(e)'; 'EX_dcyt(e)'; 'EX_ddca(e)'; 'EX_dgsn(e)'; 'EX_fald(e)'; 'EX_fe2(e)'; 'EX_fe3(e)'; 'EX_fol(e)'; 'EX_for(e)'; 'EX_gal(e)'; 'EX_glc_D(e)'; 'EX_gln_L(e)'; 'EX_glu_L(e)'; 'EX_gly(e)'; 'EX_glyc(e)'; 'EX_glyc3p(e)'; 'EX_gsn(e)'; 'EX_gthox(e)'; 'EX_gthrd(e)'; 'EX_gua(e)'; 'EX_h(e)'; 'EX_h2o(e)'; 'EX_h2s(e)'; 'EX_his_L(e)'; 'EX_hxan(e)'; 'EX_ile_L(e)'; 'EX_k(e)'; 'EX_lanost(e)'; 'EX_leu_L(e)'; 'EX_lys_L(e)'; 'EX_malt(e)'; 'EX_met_L(e)'; 'EX_mg2(e)'; 'EX_mn2(e)'; 'EX_mqn7(e)'; 'EX_mqn8(e)'; 'EX_nac(e)'; 'EX_ncam(e)'; 'EX_nmn(e)'; 'EX_no2(e)'; 'EX_ocdca(e)'; 'EX_ocdcea(e)'; 'EX_orn(e)'; 'EX_phe_L(e)'; 'EX_pheme(e)'; 'EX_pi(e)'; 'EX_pnto_R(e)'; 'EX_pro_L(e)'; 'EX_ptrc(e)'; 'EX_pydx(e)'; 'EX_pydxn(e)'; 'EX_q8(e)'; 'EX_rib_D(e)'; 'EX_ribflv(e)'; 'EX_ser_L(e)'; 'EX_sheme(e)'; 'EX_so4(e)'; 'EX_spmd(e)'; 'EX_thm(e)'; 'EX_thr_L(e)'; 'EX_thymd(e)'; 'EX_trp_L(e)'; 'EX_ttdca(e)'; 'EX_tyr_L(e)'; 'EX_ura(e)'; 'EX_val_L(e)'; 'EX_xan(e)'; 'EX_xyl_D(e)'; 'EX_zn2(e)'; 'EX_glu_D(e)'; 'EX_melib(e)'; 'EX_chtbs(e)'; 'EX_metsox_S_L(e)'; 'EX_hdca(e)'; 'EX_gam(e)'; 'EX_indole(e)'; 'EX_glcn(e)'; 'EX_coa(e)'; 'EX_man(e)'; 'EX_fum(e)'; 'EX_succ(e)'; 'EX_no3(e)'; 'EX_ins(e)'; 'EX_uri(e)'; 'EX_drib(e)'; 'EX_pime(e)'; 'EX_lac_L(e)'; 'EX_glypro(e)'; 'EX_urea(e)'; 'EX_duri(e)'; 'EX_h2(e)'; 'EX_mal_L(e)'; 'EX_tre(e)'; 'EX_orot(e)'; 'EX_alaasp(e)'; 'EX_glyleu(e)'}; +essentialMetabolites = {'EX_12dgr180(e)'; 'EX_26dap_M(e)'; 'EX_2dmmq8(e)'; 'EX_2obut(e)'; 'EX_3mop(e)'; 'EX_4abz(e)'; 'EX_4hbz(e)'; 'EX_ac(e)'; 'EX_acgam(e)'; 'EX_acmana(e)'; 'EX_acnam(e)'; 'EX_ade(e)'; 'EX_adn(e)'; 'EX_adocbl(e)'; 'EX_ala_D(e)'; 'EX_ala_L(e)'; 'EX_amet(e)'; 'EX_amp(e)'; 'EX_arab_D(e)'; 'EX_arab_L(e)'; 'EX_arg_L(e)'; 'EX_asn_L(e)'; 'EX_btn(e)'; 'EX_ca2(e)'; 'EX_cbl1(e)'; 'EX_cgly(e)'; 'EX_chor(e)'; 'EX_chsterol(e)'; 'EX_cit(e)'; 'EX_cl(e)'; 'EX_cobalt2(e)'; 'EX_csn(e)'; 'EX_cu2(e)'; 'EX_cys_L(e)'; 'EX_cytd(e)'; 'EX_dad_2(e)'; 'EX_dcyt(e)'; 'EX_ddca(e)'; 'EX_dgsn(e)'; 'EX_fald(e)'; 'EX_fe2(e)'; 'EX_fe3(e)'; 'EX_fol(e)'; 'EX_for(e)'; 'EX_gal(e)'; 'EX_glc_D(e)'; 'EX_gln_L(e)'; 'EX_glu_L(e)'; 'EX_gly(e)'; 'EX_glyc(e)'; 'EX_glyc3p(e)'; 'EX_gsn(e)'; 'EX_gthox(e)'; 'EX_gthrd(e)'; 'EX_gua(e)'; 'EX_h(e)'; 'EX_h2o(e)'; 'EX_h2s(e)'; 'EX_his_L(e)'; 'EX_hxan(e)'; 'EX_ile_L(e)'; 'EX_k(e)'; 'EX_lanost(e)'; 'EX_leu_L(e)'; 'EX_lys_L(e)'; 'EX_malt(e)'; 'EX_met_L(e)'; 'EX_mg2(e)'; 'EX_mn2(e)'; 'EX_mqn7(e)'; 'EX_mqn8(e)'; 'EX_nac(e)'; 'EX_ncam(e)'; 'EX_nmn(e)'; 'EX_no2(e)'; 'EX_ocdca(e)'; 'EX_ocdcea(e)'; 'EX_orn(e)'; 'EX_phe_L(e)'; 'EX_pheme(e)'; 'EX_pi(e)'; 'EX_pnto_R(e)'; 'EX_pro_L(e)'; 'EX_ptrc(e)'; 'EX_pydx(e)'; 'EX_pydxn(e)'; 'EX_q8(e)'; 'EX_rib_D(e)'; 'EX_ribflv(e)'; 'EX_ser_L(e)'; 'EX_sheme(e)'; 'EX_so4(e)'; 'EX_spmd(e)'; 'EX_thm(e)'; 'EX_thr_L(e)'; 'EX_thymd(e)'; 'EX_trp_L(e)'; 'EX_ttdca(e)'; 'EX_tyr_L(e)'; 'EX_ura(e)'; 'EX_val_L(e)'; 'EX_xan(e)'; 'EX_xyl_D(e)'; 'EX_zn2(e)'; 'EX_glu_D(e)'; 'EX_melib(e)'; 'EX_chtbs(e)'; 'EX_metsox_S_L(e)'; 'EX_hdca(e)'; 'EX_gam(e)'; 'EX_indole(e)'; 'EX_glcn(e)'; 'EX_coa(e)'; 'EX_man(e)'; 'EX_fum(e)'; 'EX_succ(e)'; 'EX_no3(e)'; 'EX_ins(e)'; 'EX_uri(e)'; 'EX_drib(e)'; 'EX_pime(e)'; 'EX_lac_L(e)'; 'EX_glypro(e)'; 'EX_urea(e)'; 'EX_duri(e)'; 'EX_h2(e)'; 'EX_mal_L(e)'; 'EX_tre(e)'; 'EX_orot(e)'}; % fix any exchange nomenclature issues adaptedDietConstraints(:, 1) = strrep(adaptedDietConstraints(:, 1), '[e]', '(e)'); diff --git a/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/createPanModels.m b/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/createPanModels.m index 71a099ab01..690648e834 100755 --- a/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/createPanModels.m +++ b/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/createPanModels.m @@ -359,6 +359,14 @@ function createPanModels(agoraPath, panPath, taxonLevel, numWorkers, taxTable) 'DGORi AND SBTD_D2 AND GALM1r AND GNOXuq','GNOXuq','GNOXuqi' 'LPCDH AND LPCOX AND NADH6pp AND ATPS4pp','LPCDH','LPCDHi' 'CITt2pp AND CITCAtpp AND CAt4ipp','CITCAt','CITCAti' + 'KAS17rev AND FACOAE181 AND FAO181E','FAO181E','FAO181Ei' + 'G1PGTi AND PGMT2 AND G1PPT AND G16BPS','G16BPS','G16BPSi' + 'HISSNAT5tc AND HISt2r','HISt2r','HISt2' + 'TDCOATA AND ACPACT AND FAS140ACPrev','FAS140ACPrev','FAS140ACP' + 'SHCHCS AND 2S6HCC AND ACONTa AND ACONTb AND CITL AND ICDHx','ICDHx','ICDHxi' + 'PPCr AND MALCOAPYRCT AND MMSAD5 AND MMSAD4','PPCr','PPC' + 'SERD_Lr','SERD_Lr','SERD_L' + 'LDH_L AND LDH_L2','LDH_L',[] }; % List Western diet constraints to test if the pan-model produces diff --git a/src/reconstruction/demeter/src/debugging/removeFutileCycles.m b/src/reconstruction/demeter/src/debugging/removeFutileCycles.m old mode 100644 new mode 100755 index 97b0a053e0..283f6ea162 --- a/src/reconstruction/demeter/src/debugging/removeFutileCycles.m +++ b/src/reconstruction/demeter/src/debugging/removeFutileCycles.m @@ -1,745 +1,726 @@ -function [model, deletedRxns, addedRxns, gfRxns] = removeFutileCycles(model, biomassReaction, database,unionRxns,constrainedModel) -% Part of the DEMETER pipeline. Resolves reactions that are running in -% infeasible directions and causing futile cycles that result in -% unrealistically high ATP production. All solutions were identified -% through manual inspection. Any new solutions identified for reaction -% combinations not yet encountered by DEMETER may be added. -% -% USAGE: -% -% [model, deletedRxns, addedRxns, gfRxns] = removeFutileCycles(model, biomassReaction, database,unionRxns,constrainedModel) -% -% INPUTS -% model: COBRA model structure -% biomassReaction: Reaction ID of the biomass objective function -% database: rBioNet reaction database containing min. 3 columns: -% Column 1: reaction abbreviation, Column 2: reaction -% name, Column 3: reaction formula. -% unionRxns: Union of reactions from multiple reconstructions -% (only for debugging multi-species models) -% constrainedModels: COBRA model constrained with defined medium (for -% certain steps of DEMETER) -% -% OUTPUT -% model: COBRA model structure -% deletedRxns: Deleted reactions that were causing futile cycles -% addedRxns: Added irreversible versions of the deleted reactions -% gfRxns: Additional gap-filled reactions needed to enable -% growth. Low confidence score. -% -% .. Author: -% - Almut Heinken, 2016-2019 - -deletedRxns = {}; -addedRxns = {}; -gfRxns = {}; - -tol = 1e-8; - -% model needs to be rebuilt, pipeline may crash otherwise -% model = rebuildModel(model); -model_old = model; -model = changeObjective(model, biomassReaction); - -% load complex medium -constraints = readtable('ComplexMedium.txt', 'Delimiter', 'tab'); -constraints=table2cell(constraints); -constraints=cellstr(string(constraints)); - -% apply complex medium -model = useDiet(model,constraints); - -if nargin > 4 && ~isempty(constrainedModel) - model=constrainedModel; -end - -delCnt = 1; -addCnt = 1; - -% Create table with information on reactions to replace to remove futile -% cycles. This information was determined manually. -reactionsToReplace = {'if present','if not present','removed','added' - 'LYSt2r AND LYSt3r',[],'LYSt3r','LYSt3' - 'FDHr',[],'FDHr','FDH' - 'GLYO1',[],'GLYO1','GLYO1i' - 'EAR40xr',[],'EAR40xr','EAR40x' - 'PROt2r AND PROt4r',[],'PROt4r','PROt4' - 'FOROXAtex AND FORt',[],'FORt',[] - 'NO2t2r AND NTRIR5',[],'NO2t2r','NO2t2' - 'NOr1mq AND NHFRBOr',[],'NHFRBOr','NHFRBO' - 'N2OO AND NHFRBOr',[],'NHFRBOr','NHFRBO' - 'NIR AND L_LACDr',[],'L_LACDr','L_LACD' - 'NARK AND NTRIR5 AND L_LACDr',[],'L_LACDr','L_LACD' - 'PIt6b AND PIt7',[],'PIt7','PIt7ir' - 'ABUTt2r AND GLUABUTt7',[],'ABUTt2r','ABUTt2' - 'ABUTt2r AND ABTAr',[],'ABTAr','ABTA' - 'Kt1r AND Kt3r AND EX_chsterol(e) AND ARGDA',[],'Kt3r','Kt3 AND ASPTA AND PC AND H2CO3D AND ASPNH4L AND r1667 AND EX_orn(e)' - 'Kt1r AND Kt3r','EX_for(e)','Kt3r','Kt3 AND EX_for(e) AND FORt2r' - 'Kt1r AND Kt3r',[],'Kt3r','Kt3' - 'Kt1r AND Kt3r AND ACtr',[],'Kt3r AND ACtr','Kt3 AND ACt2r' - 'CYTDt4 AND CYTDt2r',[],'CYTDt2r','CYTDt2' - 'ASPt2_2 AND ASPt2r',[],'ASPt2_2','ASPt2_2i' - 'ASPt2_3 AND ASPt2r',[],'ASPt2r','ASPt2' - 'FUMt2_2 AND FUMt2r',[],'FUMt2r','FUMt' - 'SUCCt2_2 AND SUCCt2r','SUCCt','SUCCt2r','SUCCt' - 'SUCCt2_3r AND SUCCt2r',[],'SUCCt2r',[] - 'MALFADO AND MDH',[],'MALFADO','MALFADOi' - 'MALFADO AND GLXS',[],'MALFADO','MALFADOi' - 'r0392 AND GLXCL',[],'r0392','ALDD8x' - 'HACD1 AND PHPB2',[],'PHPB2','PHPB2i' - 'PPCKr AND PPCr',[],'PPCKr','PPCK' - 'PPCKr AND GLFRDO AND FXXRDO',[],'PPCKr','PPCK' - 'BTCOADH AND FDNADOX_H AND ACOAD1',[],'ACOAD1','ACOAD1i' - 'ACEDIPIT AND APAT AND DAPDA AND 26DAPLLAT',[],'26DAPLLAT','26DAPLLATi' - 'ACKr AND ACEDIPIT AND APAT AND DAPDA',[],'DAPDA','DAPDAi' - 'ACKr AND ACEDIPIT AND APAT AND DAPDA',[],'DAPDA','DAPDAi AND EX_asp_L(e) AND ASPt2r' - 'MALNAt AND NAt3_1 AND MALt2r',[],'NAt3_1','NAt3' - 'MALNAt AND NAt3_1 AND MALt2r',[],'MALt2r','MALt2' - 'MALNAt AND MAL_Lte AND MALt2r',[],'MALt2r','MALt2' - 'MAL_Lte AND MDH3 AND MALt2r',[],'MALt2r','MALt2' - 'MALNAt AND NAt3_1 AND MALt2r AND URIt2r AND URIt4',[],'URIt2r','URIt2' - 'DADNt2r AND HYXNt',[],'HYXNt','HYXNti' - 'URIt2r AND URAt2r',[],'URAt2r','URAt2' - 'XANt2r AND URAt2r',[],'URAt2r','URAt2' - 'XANt2r AND CSNt6',[],'CSNt6','CSNt2' - 'XANt2r AND DADNt2r',[],'XANt2r','XANt2' - 'XANt2r AND XPPTr',[],'XPPTr','XPPT' - 'XANt2r AND PUNP7',[],'XANt2r','XANt2' - 'r1667 AND ARGt2r',[],'ARGt2r','ARGt2' - 'PIt7 AND NAt3_1 AND GLUt4r',[],'GLUt4r','r1144' - 'GLUt2r AND NAt3_1 AND GLUt4r',[],'GLUt4r','r1144' - 'GLYt2r AND NAt3_1 AND GLYt4r',[],'GLYt2r','GLYt2' - 'GLUt2r AND NAt3 AND GLUt4r',[],'GLUt4r','r1144' - 'L_LACNa1t AND L_LACt2r',[],'L_LACt2r','L_LACt' - 'G3PD8 AND SUCD1 AND G3PD1 AND EX_succ(e)',[],'G3PD8','G3PD8i' - 'G3PD8 AND SUCD4 AND G3PD1 AND EX_succ(e)',[],'G3PD8','G3PD8i' - 'G3PD8 AND SUCD1 AND G3PD1','SUCCt2r','G3PD8','G3PD8i AND EX_succ(e) AND SUCCt' - 'G3PD8 AND SUCD4 AND G3PD1','SUCCt2r','G3PD8','G3PD8i AND EX_succ(e) AND SUCCt' - 'ACOAD1 AND ACOAD1f AND SUCD4',[],'ACOAD1f','ACOAD1fi' - 'PGK AND D_GLY3PR',[],'D_GLY3PR','D_GLY3PRi' - 'H2O2D',[],'H2O2D','NPR' - 'ACCOACL AND BTNCL',[],'BTNCL','BTNCLi' - 'r0220 AND r0318',[],'r0318','r0318i' - 'MTHFRfdx AND FDNADOX_H',[],'FDNADOX_H',[] - 'FDNADOX_H AND FDX_NAD_NADP_OX',[],'FDX_NAD_NADP_OX','FDX_NAD_NADP_OXi' - 'PROPAT4te AND PROt2r',[],'PROt2r','PROt2' - 'G3PD8 AND GLYC3Pt',[],'GLYC3Pt','GLYC3Pti' - 'OAACL AND PPCr AND NDPK9',[],'OAACL','OAACLi' - 'OAACL AND PPCr AND NDPK3',[],'OAACL','OAACLi' - 'OAACL AND ASPTA AND NDPK9',[],'OAACL','OAACLi' - 'OAACL AND ASPTA AND PPDK',[],'OAACL','OAACLi' - 'CBMKr AND OCBT AND CITRH','ARGDA','CBMKr','CBMK' - 'SPTc AND r0392 AND GHMT2r',[],'GHMT2r','GHMT2' - 'OAACL AND OAASr AND NDPK9',[],'OAASr','OAAS' - 'G16BPS AND G1PP AND G1PPT',[],'G16BPS','G16BPSi' - 'ASPK AND ASAD AND HSDx',[],'ASPK','ASPKi' - 'BTCOADH AND ACOAD1f AND FDNADOX_H',[],'ACOAD1f','ACOAD1fi' - 'TARCGLYL AND TARTD AND PYK',[],'TARCGLYL','TARCGLYLi' - 'HPROxr AND PROD3',[],'PROD3','PROD3i' - 'RBPC AND PRKIN',[],'PRKIN','PRKINi' - 'MGt5 AND CITt10 AND CITCAt',[],'CITt10','CITt10i' - 'MGt5 AND CITt10 AND CITCAt',[],'CITCAt','CITCAti' - 'CAt4i AND CITCAt AND r1088',[],'r1088','CITt2' - 'CAt4i AND CITCAt AND r1088',[],'CITCAt','CITCAti' - 'SUCCt AND CITt7 AND r1088',[],'r1088','CITt2' - 'MMSAD5 AND MALCOAPYRCT AND MMSAD4',[],'MMSAD4','MMSAD4i' - 'NTRIR5 AND FDOXR AND FDNADOX_H',[],'NTRIR5','NTRIR5i' - 'GLFRDO AND FRDOr AND FDNADOX_H',[],'FRDOr','FRDO' - 'GCALDL AND r0392 AND GCALDDr',[],'GCALDDr','GCALDD' - 'ACACT1r AND SUCOAS AND OCOAT1r',[],'OCOAT1r','OCOAT1' - 'FDNADOX_H AND BTCOADH AND MAOX2 AND GLFRDO',[],'GLFRDO','GLFRDOi' - 'PYRCT AND SUCOAS AND PPCr',[],'PPCr','PPC' - '3CARLPDH AND r0163c AND r0556c',[],'r0556c','r0556ci' - 'NACUP AND NACt2r',[],'NACUP',[] - 'NACt AND NACt2r',[],'NACt',[] - 'NCAMUP AND NCAMt2r',[],'NCAMUP',[] - 'ORNt AND ORNt2r',[],'ORNt',[] - 'FORt AND FORt2r',[],'FORt',[] - 'ARABt AND ARABDt2',[],'ARABt',[] - 'ASPte AND ASPt2_2',[],'ASPte',[] - 'ASPte AND ASPt2_3',[],'ASPte',[] - 'ASPt2 AND ASPt2_2',[],'ASPt2',[] - 'ASPt2 AND ASPt2_3',[],'ASPt2',[] - 'THYMDt AND THMDt2r',[],'THYMDt',[] - 'CBMK AND CBMKr',[],'CBMKr',[] - 'SPTc AND TRPS2r AND TRPAS2',[],'TRPS2r','TRPS2' - 'PROD3 AND PROD3i',[],'PROD3',[] - 'PROPAT4te AND PROt2r AND PROt2',[],'PROt2r',[] - 'CITt10i AND CITCAt AND CITCAti',[],'CITCAt',[] - 'GUAt2r AND GUAt',[],'GUAt2r','GUAt2' - 'PROPAT4te AND PROt4r AND PROt4',[],'PROt4r',[] - 'INSt2 AND INSt',[],'INSt2','INSt2i' - 'GNOXuq AND GNOXuqi',[],'GNOXuq',[] - 'GNOXmq AND GNOXmqi',[],'GNOXmq',[] - 'MMSAD5 AND MSAS AND MALCOAPYRCT AND PPCr AND ACALD',[],'ACALD','ACALDi' - 'PGK AND G1PP AND G16BPS AND G1PPT',[],'G16BPS','G16BPSi' - 'FRD7 AND SUCD1 AND G3PD8',[],'G3PD8','G3PD8i' - 'LACLi AND PPCr AND RPE AND PKL AND FTHFL AND MTHFC',[],'MTHFC','MTHFCi' - 'RMNt2 AND RMNt2_1',[],'RMNt2_1',[] - 'MNLpts AND MANAD_D AND MNLt6',[],'MNLt6','MNLt6i' - 'FDNADOX_H AND SULRi AND FXXRDO',[],'FXXRDO','FXXRDOi' - 'FDNADOX_H AND SO3R AND FXXRDO',[],'FXXRDO','FXXRDOi' - 'FDNADOX_H AND AKGS AND BTCOADH AND OOR2r',[],'OOR2r','OOR2' - 'FDNADOX_H AND AKGS AND BTCOADH AND OOR2 AND POR4',[],'POR4','POR4i' - 'FDNADOX_H AND AKGS AND OAASr AND ICDHx AND POR4i',[],'ICDHx','ICDHxi' - 'GLXS AND GCALDL AND GCALDDr',[],'GCALDDr','GCALDD' - 'GLYCLTDxr AND GLYCLTDx',[],'GLYCLTDxr',[] - 'GCALDD AND GCALDDr',[],'GCALDDr',[] - 'BGLA AND BGLAr',[],'BGLAr',[] - 'AKGMAL AND MALNAt AND AKGt2r',[],'AKGt2r','AKGt2' - 'AKGte AND MAL_Lte AND AKGt2r',[],'AKGt2r','AKGt2' - 'TRPS1 AND TRPS2r AND TRPS3r',[],'TRPS2r','TRPS2' - 'OAACL AND OAACLi',[],'OAACL',[] - 'DHDPRy AND DHDPRyr',[],'DHDPRyr',[] - 'EDA_R AND EDA',[],'EDA_R',[] - 'GLYC3Pt AND GLYC3Pti',[],'GLYC3Pt',[] - 'TDCOATA AND FA140ACPH AND ACS AND FACOAL140',[],'FACOAL140','FACOAL140i' - 'FA180ACPHrev AND STCOATA AND FACOAL180',[],'FACOAL180','FACOAL180i' - 'FA180ACPHrev AND STCOATA AND FACOAL180',[],'FACOAL180','FACOAL180i AND ADK1' - 'CITt2 AND CAt4i AND CITCAt',[],'CITCAt','CITCAti' - 'AHCYSNS_r AND AHCYSNS',[],'AHCYSNS_r',[] - 'FDOXR AND GLFRDO AND OOR2r AND FRDOr',[],'FRDOr','FRDO' - 'GNOX AND GNOXy AND GNOXuq AND GNOXmq',[],'GNOXmq','GNOXmqi' - 'GNOX AND GNOXy AND GNOXuq AND GNOXmqi',[],'GNOXuq','GNOXuqi' - 'SHSL1r AND SHSL2 AND SHSL4r',[],'SHSL4r','SHSL4' - 'AHSERL3 AND CYSS3r AND METSOXR1r AND SHSL4r',[],'TRDRr','TRDR' - 'ACACT1r AND ACACt2 AND ACACCTr AND OCOAT1r',[],'OCOAT1r','OCOAT1' - 'ACONT AND ACONTa AND ACONTb',[],'ACONT',[] - 'ALAt2r AND ALAt4r',[],'ALAt2r','ALAt2' - 'CYTK2 AND DCMPDA AND URIDK3',[],'DCMPDA','DCMPDAi' - 'MALNAt AND NAt3_1 AND PIt7ir',[],'NAt3_1','NAt3' - 'PIt6b AND PIt7ir',[],'PIt6b','PIt6bi' - 'LEUTA AND LLEUDr',[],'LLEUDr','LLEUD' - 'ILETA AND L_ILE3MR',[],'L_ILE3MR','L_ILE3MRi' - 'TRSARry AND TRSARr',[],'TRSARr','TRSAR' - 'THRD AND THRAr AND PYRDC',[],'THRAr','THRAi' - 'THRD AND GLYAT AND PYRDC',[],'GLYAT','GLYATi' - 'SUCD1 AND SUCD4 AND SUCDimq AND NADH6',[],'SUCD1','SUCD1i' - 'POR4 AND SUCDimq AND NADH6 AND PDHa AND FRD7 AND FDOXR AND NTRIR4',[],'POR4','POR4i' - 'SUCDimq AND NADH6 AND HYD1 AND HYD4 AND FRD7 AND FDOXR AND NTRIR4',[],'FDOXR','FDOXRi' - 'PPCr AND SUCOAS AND OAASr AND ICDHx AND POR4i AND ACONTa AND ACONTb AND ACACT1r AND 3BTCOAI AND OOR2r',[],'ICDHx','ICDHxi' - 'ICDHx AND AKGS AND SUCOAS AND PYK AND POR4 AND FDNADOX_H AND PPCr AND ICL AND GLXS AND MDH',[],'ICDHx','ICDHxi' - 'PYNP1r AND CSNt6',[],'PYNP1r','PYNP1' - 'ASPK AND ASAD AND HSDy',[],'ASPK','ASPKi' - 'GLUt2r AND GLUABUTt7 AND ABTAr',[],'GLUt2r','GLUt2' - 'DURAD AND DHPM1 AND UPPN',[],'DURAD','DURADi' - 'XU5PG3PL AND PKL',[],'PKL',[] - 'G16BPS AND G1PPT AND PGK',[],'G16BPS','G16BPSi' - 'G1PPT AND PGK AND GAPD_NADP AND GAPD',[],'G1PPT','G1PPTi' - 'PPIt2e AND GUAPRT AND AACPS6 AND GALT',[],'PPIt2e','PPIte' - 'PPIt2e AND GLGC AND NADS2 AND SADT',[],'PPIt2e','PPIte' - 'MCOATA AND MALCOAPYRCT AND C180SNrev',[],'MCOATA','MACPMT' - 'PPCr AND MALCOAPYRCT AND MMSAD5 AND MSAS',[],'PPCr','PPC' - 'ACt2r AND ACtr',[],'ACtr',[] - 'LEUt2r AND LEUtec',[],'LEUtec',[] - 'PTRCt2r AND PTRCtex2',[],'PTRCtex2',[] - 'TYRt2r AND TYRt',[],'TYRt',[] - 'TSULt2 AND SO3t AND H2St AND TRDRr',[],'TRDRr','TRDR' - 'AMPSO3OX AND SADT AND EX_h2s(e) AND CHOLSH',[],'AMPSO3OX','AMPSO3OXi' - 'NTRIR4 AND FDNADOX_H AND FDOXR',[],'FDOXR','FDOXRi' - 'ASPNH4L AND ASPt2r',[],'ASPNH4L','ASPNH4Li' - 'DDGLKr AND DDGLCNt2r',[],'DDGLKr','DDGLK' - 'ARGSSr',[],'ARGSSr','ARGSS' - 'ARGDr',[],'ARGDr','ARGDA' - 'SERD_Lr',[],'SERD_Lr','SERD_L' - 'G1PP AND GLGC AND GLCP',[],'G1PP','G1PPi' - 'CBMKr AND OCBT AND r1667','ARGDA','CBMKr','CBMK' - 'TRPS3r AND TRPS1 AND TRPS2r',[],'TRPS3r','TRPS3' - 'D_LACD AND L_LACD2 AND L_LACDr',[],'L_LACDr','L_LACD' - 'PYK AND MMSAD5 AND PPCr AND MSAS AND MALCOAPYRCT',[],'PPCr','PPC' - 'MALFADO AND PPCKr AND ME2',[],'MALFADO','MALFADOi' - 'PIabc AND PIt7',[],'PIt7','PIt7ir' - 'G3PFDXOR AND FDNADOX_H',[],'FDNADOX_H','FDNADOX_Hi' - 'POR4 AND FRDOr',[],'FRDOr','FRDO' - 'TRPAS2',[],'TRPAS2','TRPAS2i' - 'TRPS2r',[],'TRPS2r','TRPS2' - % 'DPCOAt',[],'DPCOAt','DPCOAti' - 'AMPt2r',[],'AMPt2r','AMPt2' - 'dTMPt2r',[],'dTMPt2r','dTMPt2' - 'NADPt',[],'NADPt','NADPti' - 'BTCOADH AND FDOXR AND BUTCTr AND BUTKr AND ACOAD1i AND FDNADOX_H',[],'BTCOADH','BTCOADHi' - 'TRDRr AND THSr1mq AND H2St AND SO3t AND TSULt2',[],'TRDRr','TRDR' - 'TRDRr AND AMPSO3OX2 AND AMPSO3OX AND TSULt2',[],'TRDRr','TRDR' - 'OIVD1r AND KLEURFd',[],'OIVD1r','OIVD1' - 'GALt1r AND GALt2_2',[],'GALt2_2','GALt2_2i' - 'GALt4 AND GALt2_2',[],'GALt2_2','GALt2_2i' - 'GALt1r AND GALt4',[],'GALt4','GALt4i' - 'ACGAt AND ACGAMtr2',[],'ACGAMtr2','ACGAMt2' - 'NTRIR2y AND FDNADOX_H AND FDOXR',[],'FDOXR','FDOXRi' - 'G3PFDXOR AND PGK',[],'G3PFDXOR','G3PFDXORi' - 'GNOXuq AND GNOXmq AND DGOR AND GLUOR AND NADH6',[],'GNOXuq','GNOXuqi' - 'GNOXuq AND GNOXmq AND DGOR AND GLUOR AND NADH6',[],'GNOXmq','GNOXmqi' - 'GNOXmq AND DGOR AND GLUOR AND NADH6',[],'GNOXmq','GNOXmqi' - 'DGOR AND GLUOR AND NADH6',[],'GLUOR','GLUORi' - 'GNOXuq AND GNOXmq AND DGOR AND SBTD_D2 AND NADH6',[],'GNOXuq','GNOXuqi' - 'GNOXuq AND GNOXmq AND DGOR AND SBTD_D2 AND NADH6',[],'GNOXmq','GNOXmqi' - 'GNOXuqi AND GNOXmq AND DGOR AND SBTD_D2 AND NADH6',[],'GNOXmq','GNOXmqi' - 'FACOAL160',[],'FACOAL160','FACOAL160i' - 'FACOAL180',[],'FACOAL180','FACOAL180i' - 'SUCD1 AND SUCCt AND SUCCt2r',[],'SUCCt',[] - 'SUCD4 AND SUCCt AND SUCCt2r',[],'SUCCt',[] - 'CBMKr AND CBMK',[],'CBMK',[] - 'ETOHt2r AND ETOHt',[],'ETOHt',[] - 'ETOHt2r AND ETOHt3',[],'ETOHt2r',[] - 'ETOHt2r AND ETOHt3',[],'ETOHt3',[] - 'DTTPti',[],'DTTPti',[] - 'UCO2L AND BUAMDH AND BURTADH',[],'UCO2L','UCO2Li' - 'NADH6 AND SNG3POR AND EX_succ(e)',[],'SNG3POR','G3PD5' - 'NADH8 AND SNG3POR AND EX_succ(e)',[],'SNG3POR','G3PD5' - 'NADH6 AND SNG3POR',[],'SNG3POR','G3PD5 AND EX_succ(e) AND SUCCt' - 'NADH8 AND SNG3POR',[],'SNG3POR','G3PD5 AND EX_succ(e) AND SUCCt' - 'NADH6 AND SNG3POR',[],'SNG3POR','G3PD5 AND EX_q8(e) AND Q8abc AND EX_2dmmq8(e) AND 2DMMQ8abc' - 'NADH8 AND SNG3POR',[],'SNG3POR','G3PD5 AND EX_q8(e) AND Q8abc AND EX_2dmmq8(e) AND 2DMMQ8abc' - 'FDH2 AND SNG3POR AND FDNADOX_H',[],'SNG3POR','G3PD5' - 'HYD1 AND HYD4 AND SNG3POR AND FDNADOX_H',[],'SNG3POR','G3PD5' - 'SUCD4 AND SUCD1 AND SNG3POR AND EX_succ(e)',[],'SNG3POR','G3PD5' - 'NADH6 AND FTMAOR AND NTMAOR',[],'FTMAOR','FTMAORi' - 'NADH6 AND FTMAOR AND NTMAOR',[],'NTMAOR','NTMAORi' - 'NADH6 AND TMAOR1 AND NTMAOR',[],'NTMAOR','NTMAORi' - 'NADH6 AND TMAOR2e AND NTMAOR',[],'TMAOR2e','TMAORdmq' - 'ACOAD2f AND SUCD1 AND PPCOAOc',[],'ACOAD2f','ACOAD2fi' - 'ACOAD2f AND SUCD1 AND PPCOAOc',[],'PPCOAOc','PPCOAOci' - 'ACOAD2fi AND SUCD1i AND PPCOAOc AND ACOAR',[],'PPCOAOc','PPCOAOci' - 'ACOAD2f AND ACOAD2 AND NADH6',[],'ACOAD2f','ACOAD2fi' - 'ACOAD7f AND C180SNrev AND NADH6',[],'ACOAD7f','ACOAD7fi' - 'THRAr AND THRD_L AND OBTFL',[],'THRAr','THRAi' - 'ALAPAT4te AND ALAt2r',[],'ALAt2r','ALAt2' - 'ALAPAT4te AND ALAt4r',[],'ALAt4r','ALAt4' - 'r2526 AND SERt2r',[],'SERt2r','r2471' - 'HISCAT1 AND HISt2r',[],'HISt2r','HISt2' - 'HISCAT1 AND HISt2r','ACCOAC','HISt2r','HISt2 AND DM_q8h2[c] AND EX_lac_L(e) AND L_LACt2r' - 'r1106 AND RIBFLVt2r',[],'RIBFLVt2r','RIBFLVt2' - 'ILEtec AND ILEt2r',[],'ILEt2r','ILEt2' - 'VALtec AND VALt2r',[],'VALt2r','VALt2' - 'NACUP AND NACt2r',[],'NACUP','NACt' - 'NACUP AND NACt2r',[],'NACt2r','NACHORCTL3le' - 'ORNt AND ORNt2r',[],'ORNt2r','ORNt2' - 'SUCCt AND SUCCt2r',[],'SUCCt',[] - 'NZP_NR AND NZP_NRe',[],'NZP_NRe','NZP_NRei' - 'FUCt2_1 AND FUCtp',[],'FUCt2_1','FUCt2_1i' - 'METATr',[],'METATr','METAT' - 'METATr',[],'METATr','METAT AND EX_met_L(e) AND METt2r' - 'PYDXKr',[],'PYDXKr','PYDXK' - 'PYDXKr',[],'PYDXKr','PYDXK AND EX_pydx(e) AND PYDXabc' - 'TMKr',[],'TMKr','TMK' - 'TMKr',[],'TMKr','TMK AND EX_thm(e) AND THMabc' - 'TMPKr',[],'TMPKr','TMPK' - 'TMPKr',[],'TMPKr','TMPK AND EX_thm(e) AND THMabc' - 'NMNATr',[],'NMNATr','NMNAT' - 'NMNATr',[],'NMNATr','NMNAT AND EX_nmn(e) AND NMNP' - 'DDGLKr',[],'DDGLKr','DDGLK' - 'XYLKr',[],'XYLKr','XYLK' - 'RBK_Dr','ARABI','RBK_Dr','RBK_D' - % 'METSr',[],'METSr','METS' - % 'METSr',[],'METSr','METS AND EX_met_L(e) AND METt2r' - 'SUCCt2i',[],'SUCCt2i','SUCCt2' - 'THMt3 AND THMte',[],'THMte',[] - 'PPAt2r AND PPAtr',[],'PPAtr',[] - 'PPAt2r AND PPAt2',[],'PPAt2',[] - 'CBMKr AND OCBT AND CITRH',[],'CITRH','CITRHi' - 'DHLPHEOR AND DHPHEOGAT',[],'DHLPHEOR','DHLPHEORi' - 'MCCCr AND ACOAD8 AND ACACT1r AND HMGCOAS AND MGCOAH',[],'MCCCr','MCCC' - 'r0392 AND ALCD19 AND GLYD',[],'r0392','ALDD8x' - 'FDNADOX_H AND BTCOADH AND GLFRDO',[],'GLFRDO','GLFRDOi' - 'NADH6 AND SUCD4 AND G3PD8',[],'G3PD8','G3PD8i AND EX_thr_L(e) AND THRt2r' - 'RIBFLVt4 AND r1106',[],'RIBFLVt4','RIBFLVt4i' - 'ASP4DC AND PPCr AND ALATA_L',[],'ASP4DC','ASP4DCi' - 'ASP4DC AND PPCr AND PYK',[],'ASP4DC','ASP4DCi' - 'METFR AND FDNADOX_H AND 5MTHFOX',[],'METFR','METFRi' - 'ACOAR AND SUCD1 AND PPCOAOc',[],'PPCOAOc','PPCOAOci' - 'HYD1 AND FRDOr AND HYD4',[],'FRDOr','FRDO' - 'TSULt2 AND THSr1mq AND H2O2D AND SO3t AND THIORDXi',[],'TSULt2','TSULt2i' - 'FDNADOX_H AND HACD1 AND GLFRDO',[],'GLFRDO','GLFRDOi' - 'FDNADOX_H AND HACD1 AND OOR2r',[],'OOR2r','OOR2' - 'FUMt2r AND FUMt',[],'FUMt',[] - '5ASAt2r AND 5ASAp',[],'5ASAp',[] - 'HMR_0197 AND FACOAL140',[],'FACOAL140','FACOAL140i' - 'SULR AND SO3rDmq AND THSr1mq AND AMPSO3OX',[],'SULR','SULRi' - 'H2St AND TSULt2 AND TSULST AND GTHRD AND THSr1mq',[],'TSULt2','TSULt2i' - '15DAPt AND CADVt AND LYSt3r',[],'LYSt3r','LYSt3' - 'MAL_Lte AND r1144 AND MALNAt AND GLUt2r',[],'GLUt2r','GLUt2' - 'ACGApts AND ACGAMPM AND ACGAMPT AND ACGAMtr2',[],'ACGAMtr2','ACGAMt2' - 'CHLt2r AND sink_chols AND CHOLSH AND EX_so4(e)',[],'CHLt2r','CHLt2' - 'H2O2D AND CYTBD AND EX_h2o2(e) AND L_LACD2',[],'H2O2D','NPR' - 'AKGt2r AND AKGte',[],'AKGte',[] - 'PHEt2r AND PHEtec',[],'PHEt2r','PHEt2' - 'CHOLOX',[],'CHOLOX','CHOLOXi' - '34DCCBR',[],'34DCCBR','34DCCBRi' - 'r0389',[],'r0389','r0389i' - 'URAOX',[],'URAOX','URAOXi' - 'L_TRPCOO',[],'L_TRPCOO','L_TRPCOOi' - 'SQLE',[],'SQLE','SQLEi' - '1H2NPTH',[],'1H2NPTH','1H2NPTHi' - 'HSNOOX',[],'HSNOOX','HSNOOXi' - 'SALCACD',[],'SALCACD','SALCACDi' - '34HPPORdc',[],'34HPPORdc','34HPPORdci' - 'SULR AND SULRi',[],'SULR',[] - 'FUCt2_1 AND FUCt',[],'FUCt2_1',[] - 'G6PDH2r AND G6PBDH AND G6PDA AND G6PI',[],'G6PDH2r','G6PDH2' - 'ADMDCr',[],'ADMDCr','ADMDC' - % 'CD2t6r AND CD2abc1',[],'CD2t6r','CD2t6' - 'OOR2r AND POR4 AND FRD2 AND FUM AND ACONTb AND ACONTa AND SUCCt AND SUCCt2r',[],'SUCCt','FDNADOX_H' - 'ACKr AND NNAM AND NAPRT AND NACt AND NACt2r',[],'NACt','EX_asp_L(e) AND ASPt2r' - 'HYD4 AND POR4 AND FRD2 AND ACONTb AND ACONTa AND FORt AND FORt2r',[],'FORt2r',[] - 'OOR2r AND ACKr AND FRD2 AND ACONTb AND ACONTa AND FORt AND FORt2r AND ALCD2x AND ACALD AND SUCCt','ETOHt','FORt2r AND SUCCt','EX_etoh(e) AND ETOHt2r AND SUCCt2r' - 'POR4 AND FRD2 AND ACONTb AND ACONTa AND FORt AND FORt2r AND ALCD2x AND ACALD AND SUCCt','ETOHt','FORt2r AND SUCCt','FDNADOX_H AND EX_etoh(e) AND ETOHt2r AND SUCCt2r' - 'OOR2r AND FRD2 AND ACONTb AND ACONTa AND FORt AND FORt2r AND SUCCt AND NTRIR2x','PTAr','FORt2r AND SUCCt','PTAr AND SUCCt2r AND EX_no2(e) AND NO2t2' - 'OOR2r AND FRD2 AND ACONTb AND ACONTa AND FORt AND FORt2r AND SUCCt AND NTRIR2x','ACtr','FORt2r AND SUCCt','EX_ac(e) AND ACtr AND SUCCt2r AND EX_no2(e) AND NO2t2' - 'PIt7 AND EX_na1(e) AND ACKr AND OAASr AND FORt AND FORt2r','PIabc','FORt2r','DM_NA1' - % 'PPHISNPPT',[],'PPHISNPPT','PPHISNPPTi' - % 'PPHISPT',[],'PPHISPT','PPHISPTi' - % 'PPHNPPT',[],'PPHNPPT','PPHNPPTi' - 'PPHPT',[],'PPHPT','PPHPTi' - 'ICDHyr AND SUCOAS AND PYK AND FDNADOX_H AND POR4',[],'ICDHyr','ICDHy' - - % added additionally to prevent futile cycles in pairwise models - 'ASPt2_2 AND ASPt2r',[],'ASPt2r','ASPte' - 'SUCCt AND SUCCt2r',[],'SUCCt',[] - 'SUCCt AND SUCCt2_2 AND SUCCt2_3',[],'SUCCt',[] - 'ACKr AND ACEDIPIT AND APAT AND DAPDA AND 26DAPLLAT',[],'26DAPLLAT','26DAPLLATi' - 'MALNAt AND L_LACNa1t AND L_LACt2r',[],'L_LACt2r','L_LACt2' - 'G3PD8 AND SUCD4 AND G3PD1',[],'G3PD8','G3PD8i' - 'r0010 AND H2O2D',[],'H2O2D','NPR' - 'r1088',[],'r1088','CITt2' - 'FDNADOX_H AND AKGS AND OAASr AND ICDHx AND POR4',[],'ICDHx','ICDHxi' - 'CITt2ipp AND CAt4i AND CITCAt',[],'CITCAt','CITCAti' - 'G16BPS AND G1PPT AND PGK AND GAPD_NADP AND GAPD',[],'G16BPS','G16BPSi' - 'PPCr AND PYK AND ACTLDCCL AND HEDCHL AND OAAKEISO',[],'PPCr','PPC' - 'PPCr AND OAACL',[],'OAACL','OAACLi' - 'PPCr AND PYK AND ACPACT AND TDCOATA AND MCOATA AND HACD6',[],'PPCr','PPC' - 'OCBT AND CITRH AND CBMKr',[],'CBMKr','CBMK' - 'GALt2_2 AND GALt1r',[],'GALt2_2','GALt2_2i' - 'LDH_L AND L_LACDr',[],'L_LACDr','L_LACD' - 'UCO2L AND BUAMDH AND BURTADH AND H2CO3D',[],'UCO2L','UCO2Li' - 'FDOXR AND NADH7 AND NTRIR4',[],'FDOXR','FDOXRi' - 'NADH6 AND SNG3POR AND G3PD2',[],'SNG3POR','G3PD5' - 'PPCOAOc AND NADH6 AND ACOAR',[],'PPCOAOc','PPCOAOci' - 'PGK AND G1PP AND G16BPS AND G1PPTi',[],'G16BPS','G16BPSi' - 'FACOAL140 AND FA140ACPH',[],'FACOAL140','FACOAL140i' - 'R5PAT AND PRPPS AND NADN AND NAPRT',[],'R5PAT','R5PATi' - 'R5PAT AND ADPRDPTS AND PPM',[],'R5PAT','R5PATi' - 'MCCCr AND HMGCOAS AND MGCOAH AND ACOAD8 AND ACACT1r',[],'MCCCr','MCCC' - 'FRUpts AND FRUt2r',[],'FRUt2r','FRUt1r' - 'PGMT AND G16BPS AND G1PPTi',[],'G16BPS','G16BPSi' - 'ILEt2r AND ILEtec',[],'ILEt2r','ILEt2' - 'VALt2r AND VALtec',[],'VALt2r','VALt2' - 'SNG3POR AND OOR2r AND FUM AND POR4 AND HPYRI',[],'SNG3POR','G3PD5' - 'NTMAOR AND SUCDimq AND FRD7 AND NADH6',[],'NTMAOR','NTMAORi' - 'PIt6bi AND PIt7',[],'PIt7','PIt7ir' - 'THMt3 AND THMte',[],'THMt3','THMt3i' - 'PROPAT4te AND PROt4r',[],'PROt4r','PROt4' - 'GLUOR AND GALM1r AND NADH6',[],'GLUOR','GLUORi' - 'PGK AND G1PP AND G16BPS AND G1PPT',[],'G1PP','G1PPi' - 'FDOXR AND FDNADOX_H',[],'FDOXR','FDOXRi' - 'FRDOr AND HYD1 AND HYD4',[],'FRDOr','FRDO' - 'ASP4DC AND PYK AND PPCr',[],'ASP4DC','ASP4DCi' - 'NZP_NRe AND NZP_NR',[],'NZP_NRe','NZP_NRei' - 'NFORGLUAH AND 5MTHFGLUNFT AND FOMETR',[],'NFORGLUAH','NFORGLUAHi' - 'FDOXR AND POR4 AND FDH2',[],'FDOXR','FDOXRi' - 'ACGApts AND ACGAMtr2',[],'ACGAMtr2','ACGAMt2' - 'FDNADOX_H AND KLEURFd AND OIVD1r',[],'OIVD1r','OIVD1' - 'CITCAt AND CAt4i AND CITt13',[],'CITCAt','CITCAti' - '4ABZt2r AND 4ABZt',[],'4ABZt2r','4ABZt2' - 'FUMt2r AND FUMt',[],'FUMt2r','FUMt2' - 'TARCGLYL AND TARTD AND PYRCT',[],'TARCGLYL','TARCGLYLi' - 'SULR AND SO3rDmq AND SUCDimq',[],'SULR','SULRi' - 'CITt15 AND ZN2t4 AND Kt1r AND CITt2',[],'CITt15','CITt15i' - 'FRDO AND FDNADOX_H AND GLFRDO',[],'GLFRDO','GLFRDOi' - 'THMDt2r AND THYMDtr2',[],'THMDt2r','THMDt2' - 'HXANt2r AND HYXNt',[],'HXANt2r','HXANt2' - 'GSNt2r AND GSNt',[],'GSNt2r','GSNt2' - 'GALt4 AND GALt1r',[],'GALt4','GALt4i' - 'THSr1mq AND TSULt2 AND H2St AND SO3t AND TSULST AND GTHRD AND SUCDimq',[],'TSULt2','TSULt2i' - 'PPCKr AND MALFADO AND ACKr AND PPDK AND PPIACPT',[],'MALFADO','MALFADOi' - 'AKGte AND AKGt2r',[],'AKGt2r','AKGt2' - '5ASAp AND 5ASAt2r',[],'5ASAt2r','5ASAt2' - 'MAL_Lte AND MALt2r',[],'MALt2r','MALt2' - 'MAL_Lte AND GLUt2r AND MALNAt AND GLUt4r',[],'MALNAt','MALt4' - 'AKGMAL AND MALNAt AND AKGte',[],'MALNAt','MALt4' - 'r0792 AND 5MTHFOX AND FDNADOX_H AND MTHFD2 AND MTHFD',[],'r0792','MTHFR2rev' - 'H2O2D AND CYTBD AND r0010',[],'H2O2D','NPR' - 'PROD3 AND NADH6 AND HPROxr',[],'PROD3','PROD3i' - 'GLFRDO AND GLFRDOi',[],'GLFRDO',[] - 'DGOR AND SBTD_D2 AND GALM1r AND GNOXmq',[],'DGOR','DGORi' - 'DGORi AND SBTD_D2 AND GALM1r AND GNOXmq',[],'GNOXmq','GNOXmqi' - 'DGORi AND SBTD_D2 AND GALM1r AND GNOXuq',[],'GNOXuq','GNOXuqi' - 'LPCDH AND LPCOX AND NADH6pp AND ATPS4pp',[],'LPCDH','LPCDHi' - 'CITt2pp AND CITCAtpp AND CAt4ipp',[],'CITCAtpp','CITCAtipp' - 'GLFRDO AND FDNADOX_H',[],'FDOXR','GLFRDOi' - 'OOR2r AND FDNADOX_H AND AKGS',[],'OOR2r','OOR2' - 'OAASr AND ICDHx AND ACONTa AND ACONTb AND ALCD2x AND FDH AND PTAr AND ACKr',[],'ICDHx','ICDHxi' - 'METt2r AND METt3r',[],'METt2r','METt2' - 'NTP9 AND NDPK4',[],'NTP9','NTP9i' - 'MAN1PT2r',[],'MAN1PT2r','MAN1PT2' - 'HEX4 AND HMR_7271 AND MAN1PT2 AND MAN6PI AND PGM AND PMANM',[],'PMANM','PMANMi' - 'MANISO AND HMR_7271 AND MAN1PT2 AND MAN6PI AND PGM AND PMANM',[],'PMANM','PMANMi' - 'PGMT AND GALU AND GLBRAN AND GLDBRAN AND GLGNS1 AND GLPASE1 AND NDPK2 AND PPA AND r1393',[],'NDPK2','NDPK2i' - 'D_GLUMANt AND MANt2r AND GLU_Dt2r',[],'GLU_Dt2r','GLU_Dt2' - 'NACUP AND NACSMCTte AND NAt3_1',[],'NAt3_1','NAt3' - 'GALU AND DCLMPDOH AND GDPGALP AND GDPMANNE AND GALT',[],'GALT','GALTi' - 'HYD2 AND HYD4 AND NTRIR4 AND FDOXR',[],'FDOXR','FDOXRi' - 'FACOAL181',[],'FACOAL181','FACOAL181i' - 'MAN6PI AND DCLMPDOH AND GDPGALP AND GDPMANNE AND HMR_7271',[],'GDPGALP','GDPGALPi' - 'FE2DH AND FE3Ri AND NADH6 AND SUCD1 AND FRD7',[],'FE2DH','FE2DHi' - 'GLBRAN AND GLDBRAN AND GLGNS1 AND GLPASE1 AND GPDDA1',[],'GLDBRAN',[] - }; - - -% growth-restoring gapfills: needed if the futile cycle was the model's -% only way to produce ATP and growth rate without it is zero. Enables ATP -% production through a more realistic pathway. -growthGapfills={ - 'EX_succ(e) AND SUCCt' - 'EX_fum(e) AND FUMt2' - 'EX_succ(e) AND SUCCt2r' - 'EX_fum(e) AND FUMt2 AND EX_succ(e) AND SUCCt2r' - 'EX_for(e) AND FORt2r' - 'EX_ac(e) AND ACt2r' - 'EX_etoh(e) AND ETOHt2r' - 'EX_hco3(e) AND HCO3abc AND H2CO3D' - % consider adding glycolysis - 'HEX1 AND PFK AND FBA AND TPI AND GAPD AND PGK AND PGM AND ENO AND PYK' - 'HEX1 AND PFK AND FBA AND TPI AND GAPD AND PGK AND PGM AND ENO AND PYK AND EX_etoh(e) AND ETOHt2r' - 'EX_q8(e) AND Q8abc' - 'EX_2dmmq8(e) AND 2DMMQ8abc' - 'DM_q8h2[c]' - 'DM_NA1' - 'G3PFDXORi' % tentative-some models would not produce feasible amounts of ATP without it - 'ASP4DCi' % tentative-some models would not produce feasible amounts of ATP without it - 'EX_lac_L(e) AND L_LACt2r' - 'EX_acald(e) AND ACALDt' - 'EX_asp_L(e) AND ASPt2r' - }; - -for i = 2:size(reactionsToReplace, 1) - % take other models in a multi-species model into account if applies - if nargin>3 && ~isempty(unionRxns) - go = 1; - present=strsplit(reactionsToReplace{i,1},' AND '); - if ~(length(intersect(unionRxns,present))==length(present)) - go= 0; - end - notpresent=reactionsToReplace{i,2}; - if ~isempty(intersect(unionRxns,notpresent)) - go= 0; - end - else - go = 1; - present=strsplit(reactionsToReplace{i,1},' AND '); - if ~(length(intersect(model.rxns,present))==length(present)) - go= 0; - end - notpresent=reactionsToReplace{i,2}; - if ~isempty(intersect(model.rxns,notpresent)) - go= 0; - end - end - if go == 1 - % Only make the change if biomass can still be produced - toRemove=strsplit(reactionsToReplace{i,3},' AND '); - for k=1:length(toRemove) - RxForm = database.reactions{find(ismember(database.reactions(:, 1), toRemove{k})), 3}; - if contains(RxForm,'[e]') - newName=[toRemove{k} 'pp']; - % make sure we get the correct reaction - newForm=strrep(RxForm,'[e]','[p]'); - rxnInd=find(ismember(database.reactions(:, 1), {newName})); - if ~isempty(rxnInd) - dbForm=database.reactions{rxnInd, 3}; - if checkFormulae(newForm, dbForm) && any(contains(model.mets,'[p]')) - toRemove{k}=newName; - end - end - end - end - modelTest = removeRxns(model, toRemove); - if ~isempty(reactionsToReplace{i, 4}) - rxns=strsplit(reactionsToReplace{i, 4},' AND '); - for j=1:length(rxns) - % create a new formula - RxForm = database.reactions{find(ismember(database.reactions(:, 1), rxns{j})), 3}; - - if contains(RxForm,'[e]') && any(contains(model.mets,'[p]')) - newName=[rxns{j} 'ipp']; - % make sure we get the correct reaction - newForm=strrep(RxForm,'[e]','[p]'); - rxnInd=find(ismember(database.reactions(:, 1), {newName})); - if ~isempty(rxnInd) - dbForm=database.reactions{rxnInd, 3}; - if checkFormulae(newForm, dbForm) && any(contains(model.mets,'[p]')) - RxForm=dbForm; - end - end - modelTest = addReaction(modelTest, newName, RxForm); - else - modelTest = addReaction(modelTest, rxns{j}, RxForm); - end - - end - end - FBA = optimizeCbModel(modelTest, 'max'); - if FBA.f > tol - model = modelTest; - if ~isempty(reactionsToReplace{i, 3}) - for j=1:length(toRemove) - deletedRxns{delCnt, 1} = toRemove{j}; - delCnt = delCnt + 1; - end - end - if ~isempty(reactionsToReplace{i, 4}) - if ~isempty(reactionsToReplace{i, 3}) && length(toRemove)==1 - addedRxns{addCnt, 1} = toRemove{1}; - end - for j=1:length(rxns) - addedRxns{addCnt, j+1} = rxns{j}; - end - addCnt = addCnt + 1; - end - else - % try growth-restoring gapfills - gf=1; - modelPrevious=modelTest; - for k=1:size(growthGapfills,1) - ggrxns=strsplit(growthGapfills{k, 1},' AND '); - % to not add reactions that were just flagged for removal - ggrxns=setdiff(ggrxns,toRemove); - for j=1:length(ggrxns) - % create a new formula - RxForm = database.reactions{find(ismember(database.reactions(:, 1), ggrxns{j})), 3}; - if contains(RxForm,'[e]') && any(contains(model.mets,'[p]')) - newName=[ggrxns{j} 'ipp']; - % make sure we get the correct reaction - newForm=strrep(RxForm,'[e]','[p]'); - rxnInd=find(ismember(database.reactions(:, 1), {newName})); - if ~isempty(rxnInd) - dbForm=database.reactions{rxnInd, 3}; - if checkFormulae(newForm, dbForm) && any(contains(model.mets,'[p]')) - RxForm=dbForm; - end - end - if isempty(find(contains(model.rxns,newName))) - modelTest = addReaction(modelTest, newName, RxForm); - end - else - if isempty(find(contains(model.rxns,ggrxns{j}))) - modelTest = addReaction(modelTest, ggrxns{j}, RxForm); - end - end - end - FBA = optimizeCbModel(modelTest, 'max'); - if FBA.f > tol - model = modelTest; - % add replaced reactions - if ~isempty(reactionsToReplace{i, 3}) - for j=1:length(toRemove) - deletedRxns{delCnt, 1} = toRemove{j}; - delCnt = delCnt + 1; - end - end - if ~isempty(reactionsToReplace{i, 4}) - if ~isempty(reactionsToReplace{i, 3}) && length(toRemove)==1 - addedRxns{addCnt, 1} = toRemove{1}; - end - for j=1:length(rxns) - addedRxns{addCnt, j+1} = rxns{j}; - end - addCnt = addCnt + 1; - end - % add growth-restoring gapfilled reactions - for j=1:length(ggrxns) - gfRxns{length(gfRxns)+1, 1} = ggrxns{j}; - end - gf=0; - break - end - modelTest=modelPrevious; - end - % if none of that worked - if gf==1 - [modelTest,untGF] = untargetedGapFilling(modelTest,'max',database,1,1); - if ~isempty(untGF) - if ~isempty(reactionsToReplace{i, 3}) - for j=1:length(toRemove) - deletedRxns{delCnt, 1} = toRemove{j}; - delCnt = delCnt + 1; - end - end - if ~isempty(reactionsToReplace{i, 4}) - if ~isempty(reactionsToReplace{i, 3}) && length(toRemove)==1 - addedRxns{addCnt, 1} = toRemove{1}; - end - for j=1:length(rxns) - addedRxns{addCnt, j+1} = rxns{j}; - end - addCnt = addCnt + 1; - end - for j=1:length(untGF) - gfRxns{length(gfRxns)+1, 1} = untGF{j}; - end - end - end - end - end -end - -%% Make the proposed changes -model = model_old; -if ~isempty(deletedRxns) - for j = 1:length(deletedRxns) - model = removeRxns(model, deletedRxns{j, 1}); - end -end - -% make sure gene rule and notes are kept while replacing -if ~isempty(addedRxns) - for j = 1:size(addedRxns,1) - model = addReaction(model, addedRxns{j, 2}, database.reactions{find(ismember(database.reactions(:, 1), addedRxns{j, 2})), 3}); - % if a reaction from the old version is replaced, keep the GPR - if ~isempty(addedRxns{j, 1}) && ~isempty(find(ismember(model_old.rxns,addedRxns{j, 1}))) - rxnIDNew=find(ismember(model.rxns,addedRxns{j, 2})); - rxnIDOld=find(ismember(model_old.rxns,addedRxns{j, 1})); - model.grRules{rxnIDNew,1}=model_old.grRules{rxnIDOld,1}; - model.rxnConfidenceScores(rxnIDNew,1)=model_old.rxnConfidenceScores(rxnIDOld,1); - end - model.comments{end,1}='Added to eliminate futile cycles during DEMETER pipeline.'; - model.rxnConfidenceScores(end,1)=1; - % if more than one reaction is added - if size(addedRxns,2)>2 - if ~isempty(addedRxns{j,3}) - for k=3:size(addedRxns(j,:),2) - if ~isempty(addedRxns{j,k}) - model = addReaction(model, addedRxns{j, k}, database.reactions{find(ismember(database.reactions(:, 1), addedRxns{j, k})), 3}); - model.comments{end,1}='Added to eliminate futile cycles during DEMETER pipeline.'; - model.rxnConfidenceScores(end,1)=1; - end - end - end - end - end -end - -% add any gapf-ileld reactions -if ~isempty(gfRxns) - for i=1:length(gfRxns) - model = addReaction(model, gfRxns{i,1}, database.reactions{find(ismember(database.reactions(:, 1), gfRxns{i,1})), 3}); - model.comments{end,1}='Added to enable growth after eliminating futile cycles during DEMETER pipeline.'; - model.rxnConfidenceScores(end,1)=1; - end -end - -if size(addedRxns,2) >1 - addedRxns=addedRxns(:,2); -end - -% relax constraints-cause infeasibility problems -relaxConstraints=model.rxns(find(model.lb>0)); -model=changeRxnBounds(model,relaxConstraints,0,'l'); - -% change back to unlimited medium -% list exchange reactions -exchanges = model.rxns(strncmp('EX_', model.rxns, 3)); -% open all exchanges -model = changeRxnBounds(model, exchanges, -1000, 'l'); -model = changeRxnBounds(model, exchanges, 1000, 'u'); - -end +function [model, deletedRxns, addedRxns, gfRxns] = removeFutileCycles(model, biomassReaction, database,unionRxns,constrainedModel) +% Part of the DEMETER pipeline. Resolves reactions that are running in +% infeasible directions and causing futile cycles that result in +% unrealistically high ATP production. All solutions were identified +% through manual inspection. Any new solutions identified for reaction +% combinations not yet encountered by DEMETER may be added. +% +% USAGE: +% +% [model, deletedRxns, addedRxns, gfRxns] = removeFutileCycles(model, biomassReaction, database,unionRxns,constrainedModel) +% +% INPUTS +% model: COBRA model structure +% biomassReaction: Reaction ID of the biomass objective function +% database: rBioNet reaction database containing min. 3 columns: +% Column 1: reaction abbreviation, Column 2: reaction +% name, Column 3: reaction formula. +% unionRxns: Union of reactions from multiple reconstructions +% (only for debugging multi-species models) +% constrainedModels: COBRA model constrained with defined medium (for +% certain steps of DEMETER) +% +% OUTPUT +% model: COBRA model structure +% deletedRxns: Deleted reactions that were causing futile cycles +% addedRxns: Added irreversible versions of the deleted reactions +% gfRxns: Additional gap-filled reactions needed to enable +% growth. Low confidence score. +% +% .. Author: +% - Almut Heinken, 2016-2019 + +deletedRxns = {}; +addedRxns = {}; +gfRxns = {}; + +tol = 1e-8; + +% model needs to be rebuilt, pipeline may crash otherwise +% model = rebuildModel(model); +model_old = model; +model = changeObjective(model, biomassReaction); + +% load complex medium +constraints = readtable('ComplexMedium.txt', 'Delimiter', 'tab'); +constraints=table2cell(constraints); +constraints=cellstr(string(constraints)); + +% apply complex medium +model = useDiet(model,constraints); + +if nargin > 4 && ~isempty(constrainedModel) + model=constrainedModel; +end + +delCnt = 1; +addCnt = 1; + +% Create table with information on reactions to replace to remove futile +% cycles. This information was determined manually. +reactionsToReplace = {'if present','if not present','removed','added' + 'LYSt2r AND LYSt3r',[],'LYSt3r','LYSt3' + 'FDHr',[],'FDHr','FDH' + 'GLYO1',[],'GLYO1','GLYO1i' + 'EAR40xr',[],'EAR40xr','EAR40x' + 'PROt2r AND PROt4r',[],'PROt4r','PROt4' + 'FOROXAtex AND FORt',[],'FORt',[] + 'NO2t2r AND NTRIR5',[],'NO2t2r','NO2t2' + 'NOr1mq AND NHFRBOr',[],'NHFRBOr','NHFRBO' + 'N2OO AND NHFRBOr',[],'NHFRBOr','NHFRBO' + 'NIR AND L_LACDr',[],'L_LACDr','L_LACD' + 'NARK AND NTRIR5 AND L_LACDr',[],'L_LACDr','L_LACD' + 'PIt6b AND PIt7',[],'PIt7','PIt7ir' + 'ABUTt2r AND GLUABUTt7',[],'ABUTt2r','ABUTt2' + 'ABUTt2r AND ABTAr',[],'ABTAr','ABTA' + 'Kt1r AND Kt3r AND EX_chsterol(e) AND ARGDA',[],'Kt3r','Kt3 AND ASPTA AND PC AND H2CO3D AND ASPNH4L AND r1667 AND EX_orn(e)' + 'Kt1r AND Kt3r','EX_for(e)','Kt3r','Kt3 AND EX_for(e) AND FORt2r' + 'Kt1r AND Kt3r',[],'Kt3r','Kt3' + 'Kt1r AND Kt3r AND ACtr',[],'Kt3r AND ACtr','Kt3 AND ACt2r' + 'CYTDt4 AND CYTDt2r',[],'CYTDt2r','CYTDt2' + 'ASPt2_2 AND ASPt2r',[],'ASPt2_2','ASPt2_2i' + 'ASPt2_3 AND ASPt2r',[],'ASPt2r','ASPt2' + 'FUMt2_2 AND FUMt2r',[],'FUMt2r','FUMt' + 'SUCCt2_2 AND SUCCt2r','SUCCt','SUCCt2r','SUCCt' + 'SUCCt2_3r AND SUCCt2r',[],'SUCCt2r',[] + 'MALFADO AND MDH',[],'MALFADO','MALFADOi' + 'MALFADO AND GLXS',[],'MALFADO','MALFADOi' + 'r0392 AND GLXCL',[],'r0392','ALDD8x' + 'HACD1 AND PHPB2',[],'PHPB2','PHPB2i' + 'PPCKr AND PPCr',[],'PPCKr','PPCK' + 'PPCKr AND GLFRDO AND FXXRDO',[],'PPCKr','PPCK' + 'BTCOADH AND FDNADOX_H AND ACOAD1',[],'ACOAD1','ACOAD1i' + 'ACEDIPIT AND APAT AND DAPDA AND 26DAPLLAT',[],'26DAPLLAT','26DAPLLATi' + 'ACKr AND ACEDIPIT AND APAT AND DAPDA',[],'DAPDA','DAPDAi' + 'ACKr AND ACEDIPIT AND APAT AND DAPDA',[],'DAPDA','DAPDAi AND EX_asp_L(e) AND ASPt2r' + 'MALNAt AND NAt3_1 AND MALt2r',[],'NAt3_1','NAt3' + 'MALNAt AND NAt3_1 AND MALt2r',[],'MALt2r','MALt2' + 'MALNAt AND MAL_Lte AND MALt2r',[],'MALt2r','MALt2' + 'MAL_Lte AND MDH3 AND MALt2r',[],'MALt2r','MALt2' + 'MALNAt AND NAt3_1 AND MALt2r AND URIt2r AND URIt4',[],'URIt2r','URIt2' + 'DADNt2r AND HYXNt',[],'HYXNt','HYXNti' + 'URIt2r AND URAt2r',[],'URAt2r','URAt2' + 'XANt2r AND URAt2r',[],'URAt2r','URAt2' + 'XANt2r AND CSNt6',[],'CSNt6','CSNt2' + 'XANt2r AND DADNt2r',[],'XANt2r','XANt2' + 'XANt2r AND XPPTr',[],'XPPTr','XPPT' + 'XANt2r AND PUNP7',[],'XANt2r','XANt2' + 'r1667 AND ARGt2r',[],'ARGt2r','ARGt2' + 'PIt7 AND NAt3_1 AND GLUt4r',[],'GLUt4r','r1144' + 'GLUt2r AND NAt3_1 AND GLUt4r',[],'GLUt4r','r1144' + 'GLYt2r AND NAt3_1 AND GLYt4r',[],'GLYt2r','GLYt2' + 'GLUt2r AND NAt3 AND GLUt4r',[],'GLUt4r','r1144' + 'L_LACNa1t AND L_LACt2r',[],'L_LACt2r','L_LACt' + 'G3PD8 AND SUCD1 AND G3PD1 AND EX_succ(e)',[],'G3PD8','G3PD8i' + 'G3PD8 AND SUCD4 AND G3PD1 AND EX_succ(e)',[],'G3PD8','G3PD8i' + 'G3PD8 AND SUCD1 AND G3PD1','SUCCt2r','G3PD8','G3PD8i AND EX_succ(e) AND SUCCt' + 'G3PD8 AND SUCD4 AND G3PD1','SUCCt2r','G3PD8','G3PD8i AND EX_succ(e) AND SUCCt' + 'ACOAD1 AND ACOAD1f AND SUCD4',[],'ACOAD1f','ACOAD1fi' + 'PGK AND D_GLY3PR',[],'D_GLY3PR','D_GLY3PRi' + 'H2O2D',[],'H2O2D','NPR' + 'ACCOACL AND BTNCL',[],'BTNCL','BTNCLi' + 'r0220 AND r0318',[],'r0318','r0318i' + 'MTHFRfdx AND FDNADOX_H',[],'FDNADOX_H',[] + 'FDNADOX_H AND FDX_NAD_NADP_OX',[],'FDX_NAD_NADP_OX','FDX_NAD_NADP_OXi' + 'PROPAT4te AND PROt2r',[],'PROt2r','PROt2' + 'G3PD8 AND GLYC3Pt',[],'GLYC3Pt','GLYC3Pti' + 'OAACL AND PPCr AND NDPK9',[],'OAACL','OAACLi' + 'OAACL AND PPCr AND NDPK3',[],'OAACL','OAACLi' + 'OAACL AND ASPTA AND NDPK9',[],'OAACL','OAACLi' + 'OAACL AND ASPTA AND PPDK',[],'OAACL','OAACLi' + 'CBMKr AND OCBT AND CITRH','ARGDA','CBMKr','CBMK' + 'SPTc AND r0392 AND GHMT2r',[],'GHMT2r','GHMT2' + 'OAACL AND OAASr AND NDPK9',[],'OAASr','OAAS' + 'G16BPS AND G1PP AND G1PPT',[],'G16BPS','G16BPSi' + 'ASPK AND ASAD AND HSDx',[],'ASPK','ASPKi' + 'BTCOADH AND ACOAD1f AND FDNADOX_H',[],'ACOAD1f','ACOAD1fi' + 'TARCGLYL AND TARTD AND PYK',[],'TARCGLYL','TARCGLYLi' + 'HPROxr AND PROD3',[],'PROD3','PROD3i' + 'RBPC AND PRKIN',[],'PRKIN','PRKINi' + 'MGt5 AND CITt10 AND CITCAt',[],'CITt10','CITt10i' + 'MGt5 AND CITt10 AND CITCAt',[],'CITCAt','CITCAti' + 'CAt4i AND CITCAt AND r1088',[],'r1088','CITt2' + 'CAt4i AND CITCAt AND r1088',[],'CITCAt','CITCAti' + 'SUCCt AND CITt7 AND r1088',[],'r1088','CITt2' + 'MMSAD5 AND MALCOAPYRCT AND MMSAD4',[],'MMSAD4','MMSAD4i' + 'NTRIR5 AND FDOXR AND FDNADOX_H',[],'NTRIR5','NTRIR5i' + 'GLFRDO AND FRDOr AND FDNADOX_H',[],'FRDOr','FRDO' + 'GCALDL AND r0392 AND GCALDDr',[],'GCALDDr','GCALDD' + 'ACACT1r AND SUCOAS AND OCOAT1r',[],'OCOAT1r','OCOAT1' + 'FDNADOX_H AND BTCOADH AND MAOX2 AND GLFRDO',[],'GLFRDO','GLFRDOi' + 'PYRCT AND SUCOAS AND PPCr',[],'PPCr','PPC' + '3CARLPDH AND r0163c AND r0556c',[],'r0556c','r0556ci' + 'NACUP AND NACt2r',[],'NACUP',[] + 'NACt AND NACt2r',[],'NACt',[] + 'NCAMUP AND NCAMt2r',[],'NCAMUP',[] + 'ORNt AND ORNt2r',[],'ORNt',[] + 'FORt AND FORt2r',[],'FORt',[] + 'ARABt AND ARABDt2',[],'ARABt',[] + 'ASPte AND ASPt2_2',[],'ASPte',[] + 'ASPte AND ASPt2_3',[],'ASPte',[] + 'ASPt2 AND ASPt2_2',[],'ASPt2',[] + 'ASPt2 AND ASPt2_3',[],'ASPt2',[] + 'THYMDt AND THMDt2r',[],'THYMDt',[] + 'CBMK AND CBMKr',[],'CBMKr',[] + 'SPTc AND TRPS2r AND TRPAS2',[],'TRPS2r','TRPS2' + 'PROD3 AND PROD3i',[],'PROD3',[] + 'PROPAT4te AND PROt2r AND PROt2',[],'PROt2r',[] + 'CITt10i AND CITCAt AND CITCAti',[],'CITCAt',[] + 'GUAt2r AND GUAt',[],'GUAt2r','GUAt2' + 'PROPAT4te AND PROt4r AND PROt4',[],'PROt4r',[] + 'INSt2 AND INSt',[],'INSt2','INSt2i' + 'GNOXuq AND GNOXuqi',[],'GNOXuq',[] + 'GNOXmq AND GNOXmqi',[],'GNOXmq',[] + 'MMSAD5 AND MSAS AND MALCOAPYRCT AND PPCr AND ACALD',[],'ACALD','ACALDi' + 'PGK AND G1PP AND G16BPS AND G1PPT',[],'G16BPS','G16BPSi' + 'FRD7 AND SUCD1 AND G3PD8',[],'G3PD8','G3PD8i' + 'LACLi AND PPCr AND RPE AND PKL AND FTHFL AND MTHFC',[],'MTHFC','MTHFCi' + 'RMNt2 AND RMNt2_1',[],'RMNt2_1',[] + 'MNLpts AND MANAD_D AND MNLt6',[],'MNLt6','MNLt6i' + 'FDNADOX_H AND SULRi AND FXXRDO',[],'FXXRDO','FXXRDOi' + 'FDNADOX_H AND SO3R AND FXXRDO',[],'FXXRDO','FXXRDOi' + 'FDNADOX_H AND AKGS AND BTCOADH AND OOR2r',[],'OOR2r','OOR2' + 'FDNADOX_H AND AKGS AND BTCOADH AND OOR2 AND POR4',[],'POR4','POR4i' + 'FDNADOX_H AND AKGS AND OAASr AND ICDHx AND POR4i',[],'ICDHx','ICDHxi' + 'GLXS AND GCALDL AND GCALDDr',[],'GCALDDr','GCALDD' + 'GLYCLTDxr AND GLYCLTDx',[],'GLYCLTDxr',[] + 'GCALDD AND GCALDDr',[],'GCALDDr',[] + 'BGLA AND BGLAr',[],'BGLAr',[] + 'AKGMAL AND MALNAt AND AKGt2r',[],'AKGt2r','AKGt2' + 'AKGte AND MAL_Lte AND AKGt2r',[],'AKGt2r','AKGt2' + 'TRPS1 AND TRPS2r AND TRPS3r',[],'TRPS2r','TRPS2' + 'OAACL AND OAACLi',[],'OAACL',[] + 'DHDPRy AND DHDPRyr',[],'DHDPRyr',[] + 'EDA_R AND EDA',[],'EDA_R',[] + 'GLYC3Pt AND GLYC3Pti',[],'GLYC3Pt',[] + 'TDCOATA AND FA140ACPH AND ACS AND FACOAL140',[],'FACOAL140','FACOAL140i' + 'FA180ACPHrev AND STCOATA AND FACOAL180',[],'FACOAL180','FACOAL180i' + 'FA180ACPHrev AND STCOATA AND FACOAL180',[],'FACOAL180','FACOAL180i AND ADK1' + 'CITt2 AND CAt4i AND CITCAt',[],'CITCAt','CITCAti' + 'AHCYSNS_r AND AHCYSNS',[],'AHCYSNS_r',[] + 'FDOXR AND GLFRDO AND OOR2r AND FRDOr',[],'FRDOr','FRDO' + 'GNOX AND GNOXy AND GNOXuq AND GNOXmq',[],'GNOXmq','GNOXmqi' + 'GNOX AND GNOXy AND GNOXuq AND GNOXmqi',[],'GNOXuq','GNOXuqi' + 'SHSL1r AND SHSL2 AND SHSL4r',[],'SHSL4r','SHSL4' + 'AHSERL3 AND CYSS3r AND METSOXR1r AND SHSL4r',[],'TRDRr','TRDR' + 'ACACT1r AND ACACt2 AND ACACCTr AND OCOAT1r',[],'OCOAT1r','OCOAT1' + 'ACONT AND ACONTa AND ACONTb',[],'ACONT',[] + 'ALAt2r AND ALAt4r',[],'ALAt2r','ALAt2' + 'CYTK2 AND DCMPDA AND URIDK3',[],'DCMPDA','DCMPDAi' + 'MALNAt AND NAt3_1 AND PIt7ir',[],'NAt3_1','NAt3' + 'PIt6b AND PIt7ir',[],'PIt6b','PIt6bi' + 'LEUTA AND LLEUDr',[],'LLEUDr','LLEUD' + 'ILETA AND L_ILE3MR',[],'L_ILE3MR','L_ILE3MRi' + 'TRSARry AND TRSARr',[],'TRSARr','TRSAR' + 'THRD AND THRAr AND PYRDC',[],'THRAr','THRAi' + 'THRD AND GLYAT AND PYRDC',[],'GLYAT','GLYATi' + 'SUCD1 AND SUCD4 AND SUCDimq AND NADH6',[],'SUCD1','SUCD1i' + 'POR4 AND SUCDimq AND NADH6 AND PDHa AND FRD7 AND FDOXR AND NTRIR4',[],'POR4','POR4i' + 'SUCDimq AND NADH6 AND HYD1 AND HYD4 AND FRD7 AND FDOXR AND NTRIR4',[],'FDOXR','FDOXRi' + 'PPCr AND SUCOAS AND OAASr AND ICDHx AND POR4i AND ACONTa AND ACONTb AND ACACT1r AND 3BTCOAI AND OOR2r',[],'ICDHx','ICDHxi' + 'ICDHx AND AKGS AND SUCOAS AND PYK AND POR4 AND FDNADOX_H AND PPCr AND ICL AND GLXS AND MDH',[],'ICDHx','ICDHxi' + 'PYNP1r AND CSNt6',[],'PYNP1r','PYNP1' + 'ASPK AND ASAD AND HSDy',[],'ASPK','ASPKi' + 'GLUt2r AND GLUABUTt7 AND ABTAr',[],'GLUt2r','GLUt2' + 'DURAD AND DHPM1 AND UPPN',[],'DURAD','DURADi' + 'XU5PG3PL AND PKL',[],'PKL',[] + 'G16BPS AND G1PPT AND PGK',[],'G16BPS','G16BPSi' + 'G1PPT AND PGK AND GAPD_NADP AND GAPD',[],'G1PPT','G1PPTi' + 'PPIt2e AND GUAPRT AND AACPS6 AND GALT',[],'PPIt2e','PPIte' + 'PPIt2e AND GLGC AND NADS2 AND SADT',[],'PPIt2e','PPIte' + 'MCOATA AND MALCOAPYRCT AND C180SNrev',[],'MCOATA','MACPMT' + 'PPCr AND MALCOAPYRCT AND MMSAD5 AND MSAS',[],'PPCr','PPC' + 'ACt2r AND ACtr',[],'ACtr',[] + 'LEUt2r AND LEUtec',[],'LEUtec',[] + 'PTRCt2r AND PTRCtex2',[],'PTRCtex2',[] + 'TYRt2r AND TYRt',[],'TYRt',[] + 'TSULt2 AND SO3t AND H2St AND TRDRr',[],'TRDRr','TRDR' + 'AMPSO3OX AND SADT AND EX_h2s(e) AND CHOLSH',[],'AMPSO3OX','AMPSO3OXi' + 'NTRIR4 AND FDNADOX_H AND FDOXR',[],'FDOXR','FDOXRi' + 'ASPNH4L AND ASPt2r',[],'ASPNH4L','ASPNH4Li' + 'DDGLKr AND DDGLCNt2r',[],'DDGLKr','DDGLK' + 'ARGSSr',[],'ARGSSr','ARGSS' + 'ARGDr',[],'ARGDr','ARGDA' + 'SERD_Lr',[],'SERD_Lr','SERD_L' + 'G1PP AND GLGC AND GLCP',[],'G1PP','G1PPi' + 'CBMKr AND OCBT AND r1667','ARGDA','CBMKr','CBMK' + 'TRPS3r AND TRPS1 AND TRPS2r',[],'TRPS3r','TRPS3' + 'D_LACD AND L_LACD2 AND L_LACDr',[],'L_LACDr','L_LACD' + 'PYK AND MMSAD5 AND PPCr AND MSAS AND MALCOAPYRCT',[],'PPCr','PPC' + 'MALFADO AND PPCKr AND ME2',[],'MALFADO','MALFADOi' + 'PIabc AND PIt7',[],'PIt7','PIt7ir' + 'G3PFDXOR AND FDNADOX_H',[],'FDNADOX_H','FDNADOX_Hi' + 'POR4 AND FRDOr',[],'FRDOr','FRDO' + 'TRPAS2',[],'TRPAS2','TRPAS2i' + 'TRPS2r',[],'TRPS2r','TRPS2' + % 'DPCOAt',[],'DPCOAt','DPCOAti' + 'AMPt2r',[],'AMPt2r','AMPt2' + 'dTMPt2r',[],'dTMPt2r','dTMPt2' + 'NADPt',[],'NADPt','NADPti' + 'BTCOADH AND FDOXR AND BUTCTr AND BUTKr AND ACOAD1i AND FDNADOX_H',[],'BTCOADH','BTCOADHi' + 'TRDRr AND THSr1mq AND H2St AND SO3t AND TSULt2',[],'TRDRr','TRDR' + 'TRDRr AND AMPSO3OX2 AND AMPSO3OX AND TSULt2',[],'TRDRr','TRDR' + 'OIVD1r AND KLEURFd',[],'OIVD1r','OIVD1' + 'GALt1r AND GALt2_2',[],'GALt2_2','GALt2_2i' + 'GALt4 AND GALt2_2',[],'GALt2_2','GALt2_2i' + 'GALt1r AND GALt4',[],'GALt4','GALt4i' + 'ACGAt AND ACGAMtr2',[],'ACGAMtr2','ACGAMt2' + 'NTRIR2y AND FDNADOX_H AND FDOXR',[],'FDOXR','FDOXRi' + 'G3PFDXOR AND PGK',[],'G3PFDXOR','G3PFDXORi' + 'GNOXuq AND GNOXmq AND DGOR AND GLUOR AND NADH6',[],'GNOXuq','GNOXuqi' + 'GNOXuq AND GNOXmq AND DGOR AND GLUOR AND NADH6',[],'GNOXmq','GNOXmqi' + 'GNOXmq AND DGOR AND GLUOR AND NADH6',[],'GNOXmq','GNOXmqi' + 'DGOR AND GLUOR AND NADH6',[],'GLUOR','GLUORi' + 'GNOXuq AND GNOXmq AND DGOR AND SBTD_D2 AND NADH6',[],'GNOXuq','GNOXuqi' + 'GNOXuq AND GNOXmq AND DGOR AND SBTD_D2 AND NADH6',[],'GNOXmq','GNOXmqi' + 'GNOXuqi AND GNOXmq AND DGOR AND SBTD_D2 AND NADH6',[],'GNOXmq','GNOXmqi' + 'FACOAL160',[],'FACOAL160','FACOAL160i' + 'FACOAL180',[],'FACOAL180','FACOAL180i' + 'SUCD1 AND SUCCt AND SUCCt2r',[],'SUCCt',[] + 'SUCD4 AND SUCCt AND SUCCt2r',[],'SUCCt',[] + 'CBMKr AND CBMK',[],'CBMK',[] + 'ETOHt2r AND ETOHt',[],'ETOHt',[] + 'ETOHt2r AND ETOHt3',[],'ETOHt2r',[] + 'ETOHt2r AND ETOHt3',[],'ETOHt3',[] + 'DTTPti',[],'DTTPti',[] + 'UCO2L AND BUAMDH AND BURTADH',[],'UCO2L','UCO2Li' + 'NADH6 AND SNG3POR AND EX_succ(e)',[],'SNG3POR','G3PD5' + 'NADH8 AND SNG3POR AND EX_succ(e)',[],'SNG3POR','G3PD5' + 'NADH6 AND SNG3POR','SUCCt2r','SNG3POR','G3PD5 AND EX_succ(e) AND SUCCt' + 'NADH8 AND SNG3POR','SUCCt2r','SNG3POR','G3PD5 AND EX_succ(e) AND SUCCt' + 'NADH6 AND SNG3POR',[],'SNG3POR','G3PD5 AND EX_q8(e) AND Q8abc AND EX_2dmmq8(e) AND 2DMMQ8abc' + 'NADH8 AND SNG3POR',[],'SNG3POR','G3PD5 AND EX_q8(e) AND Q8abc AND EX_2dmmq8(e) AND 2DMMQ8abc' + 'FDH2 AND SNG3POR AND FDNADOX_H',[],'SNG3POR','G3PD5' + 'HYD1 AND HYD4 AND SNG3POR AND FDNADOX_H',[],'SNG3POR','G3PD5' + 'SUCD4 AND SUCD1 AND SNG3POR AND EX_succ(e)',[],'SNG3POR','G3PD5' + 'NADH6 AND FTMAOR AND NTMAOR',[],'FTMAOR','FTMAORi' + 'NADH6 AND FTMAOR AND NTMAOR',[],'NTMAOR','NTMAORi' + 'NADH6 AND TMAOR1 AND NTMAOR',[],'NTMAOR','NTMAORi' + 'NADH6 AND TMAOR2e AND NTMAOR',[],'TMAOR2e','TMAORdmq' + 'ACOAD2f AND SUCD1 AND PPCOAOc',[],'ACOAD2f','ACOAD2fi' + 'ACOAD2f AND SUCD1 AND PPCOAOc',[],'PPCOAOc','PPCOAOci' + 'ACOAD2fi AND SUCD1i AND PPCOAOc AND ACOAR',[],'PPCOAOc','PPCOAOci' + 'ACOAD2f AND ACOAD2 AND NADH6',[],'ACOAD2f','ACOAD2fi' + 'ACOAD7f AND C180SNrev AND NADH6',[],'ACOAD7f','ACOAD7fi' + 'THRAr AND THRD_L AND OBTFL',[],'THRAr','THRAi' + 'ALAPAT4te AND ALAt2r',[],'ALAt2r','ALAt2' + 'ALAPAT4te AND ALAt4r',[],'ALAt4r','ALAt4' + 'r2526 AND SERt2r',[],'SERt2r','r2471' + 'HISCAT1 AND HISt2r',[],'HISt2r','HISt2' + 'HISCAT1 AND HISt2r','ACCOAC','HISt2r','HISt2 AND DM_q8h2[c] AND EX_lac_L(e) AND L_LACt2r' + 'r1106 AND RIBFLVt2r',[],'RIBFLVt2r','RIBFLVt2' + 'ILEtec AND ILEt2r',[],'ILEt2r','ILEt2' + 'VALtec AND VALt2r',[],'VALt2r','VALt2' + 'NACUP AND NACt2r',[],'NACUP','NACt' + 'NACUP AND NACt2r',[],'NACt2r','NACHORCTL3le' + 'ORNt AND ORNt2r',[],'ORNt2r','ORNt2' + 'SUCCt AND SUCCt2r',[],'SUCCt',[] + 'NZP_NR AND NZP_NRe',[],'NZP_NRe','NZP_NRei' + 'FUCt2_1 AND FUCtp',[],'FUCt2_1','FUCt2_1i' + 'METATr',[],'METATr','METAT' + 'METATr',[],'METATr','METAT AND EX_met_L(e) AND METt2r' + 'PYDXKr',[],'PYDXKr','PYDXK' + 'PYDXKr',[],'PYDXKr','PYDXK AND EX_pydx(e) AND PYDXabc' + 'TMKr',[],'TMKr','TMK' + 'TMKr',[],'TMKr','TMK AND EX_thm(e) AND THMabc' + 'TMPKr',[],'TMPKr','TMPK' + 'TMPKr',[],'TMPKr','TMPK AND EX_thm(e) AND THMabc' + 'NMNATr',[],'NMNATr','NMNAT' + 'NMNATr',[],'NMNATr','NMNAT AND EX_nmn(e) AND NMNP' + 'DDGLKr',[],'DDGLKr','DDGLK' + 'XYLKr',[],'XYLKr','XYLK' + 'RBK_Dr','ARABI','RBK_Dr','RBK_D' + % 'METSr',[],'METSr','METS' + % 'METSr',[],'METSr','METS AND EX_met_L(e) AND METt2r' + 'SUCCt2i',[],'SUCCt2i','SUCCt2' + 'THMt3 AND THMte',[],'THMte',[] + 'PPAt2r AND PPAtr',[],'PPAtr',[] + 'PPAt2r AND PPAt2',[],'PPAt2',[] + 'CBMKr AND OCBT AND CITRH',[],'CITRH','CITRHi' + 'DHLPHEOR AND DHPHEOGAT',[],'DHLPHEOR','DHLPHEORi' + 'MCCCr AND ACOAD8 AND ACACT1r AND HMGCOAS AND MGCOAH',[],'MCCCr','MCCC' + 'r0392 AND ALCD19 AND GLYD',[],'r0392','ALDD8x' + 'FDNADOX_H AND BTCOADH AND GLFRDO',[],'GLFRDO','GLFRDOi' + 'NADH6 AND SUCD4 AND G3PD8',[],'G3PD8','G3PD8i AND EX_thr_L(e) AND THRt2r' + 'RIBFLVt4 AND r1106',[],'RIBFLVt4','RIBFLVt4i' + 'ASP4DC AND PPCr AND ALATA_L',[],'ASP4DC','ASP4DCi' + 'ASP4DC AND PPCr AND PYK',[],'ASP4DC','ASP4DCi' + 'METFR AND FDNADOX_H AND 5MTHFOX',[],'METFR','METFRi' + 'ACOAR AND SUCD1 AND PPCOAOc',[],'PPCOAOc','PPCOAOci' + 'HYD1 AND FRDOr AND HYD4',[],'FRDOr','FRDO' + 'TSULt2 AND THSr1mq AND H2O2D AND SO3t AND THIORDXi',[],'TSULt2','TSULt2i' + 'FDNADOX_H AND HACD1 AND GLFRDO',[],'GLFRDO','GLFRDOi' + 'FDNADOX_H AND HACD1 AND OOR2r',[],'OOR2r','OOR2' + 'FUMt2r AND FUMt',[],'FUMt',[] + '5ASAt2r AND 5ASAp',[],'5ASAp',[] + 'HMR_0197 AND FACOAL140',[],'FACOAL140','FACOAL140i' + 'SULR AND SO3rDmq AND THSr1mq AND AMPSO3OX',[],'SULR','SULRi' + 'H2St AND TSULt2 AND TSULST AND GTHRD AND THSr1mq',[],'TSULt2','TSULt2i' + '15DAPt AND CADVt AND LYSt3r',[],'LYSt3r','LYSt3' + 'MAL_Lte AND r1144 AND MALNAt AND GLUt2r',[],'GLUt2r','GLUt2' + 'ACGApts AND ACGAMPM AND ACGAMPT AND ACGAMtr2',[],'ACGAMtr2','ACGAMt2' + 'CHLt2r AND sink_chols AND CHOLSH AND EX_so4(e)',[],'CHLt2r','CHLt2' + 'H2O2D AND CYTBD AND EX_h2o2(e) AND L_LACD2',[],'H2O2D','NPR' + 'AKGt2r AND AKGte',[],'AKGte',[] + 'PHEt2r AND PHEtec',[],'PHEt2r','PHEt2' + 'CHOLOX',[],'CHOLOX','CHOLOXi' + '34DCCBR',[],'34DCCBR','34DCCBRi' + 'r0389',[],'r0389','r0389i' + 'URAOX',[],'URAOX','URAOXi' + 'L_TRPCOO',[],'L_TRPCOO','L_TRPCOOi' + 'SQLE',[],'SQLE','SQLEi' + '1H2NPTH',[],'1H2NPTH','1H2NPTHi' + 'HSNOOX',[],'HSNOOX','HSNOOXi' + 'SALCACD',[],'SALCACD','SALCACDi' + '34HPPORdc',[],'34HPPORdc','34HPPORdci' + 'SULR AND SULRi',[],'SULR',[] + 'FUCt2_1 AND FUCt',[],'FUCt2_1',[] + 'G6PDH2r AND G6PBDH AND G6PDA AND G6PI',[],'G6PDH2r','G6PDH2' + 'ADMDCr',[],'ADMDCr','ADMDC' + % 'CD2t6r AND CD2abc1',[],'CD2t6r','CD2t6' + 'OOR2r AND POR4 AND FRD2 AND FUM AND ACONTb AND ACONTa AND SUCCt AND SUCCt2r',[],'SUCCt','FDNADOX_H' + 'ACKr AND NNAM AND NAPRT AND NACt AND NACt2r',[],'NACt','EX_asp_L(e) AND ASPt2r' + 'HYD4 AND POR4 AND FRD2 AND ACONTb AND ACONTa AND FORt AND FORt2r',[],'FORt2r',[] + 'OOR2r AND ACKr AND FRD2 AND ACONTb AND ACONTa AND FORt AND FORt2r AND ALCD2x AND ACALD AND SUCCt','ETOHt','FORt2r AND SUCCt','EX_etoh(e) AND ETOHt2r AND SUCCt2r' + 'POR4 AND FRD2 AND ACONTb AND ACONTa AND FORt AND FORt2r AND ALCD2x AND ACALD AND SUCCt','ETOHt','FORt2r AND SUCCt','FDNADOX_H AND EX_etoh(e) AND ETOHt2r AND SUCCt2r' + 'OOR2r AND FRD2 AND ACONTb AND ACONTa AND FORt AND FORt2r AND SUCCt AND NTRIR2x','PTAr','FORt2r AND SUCCt','PTAr AND SUCCt2r AND EX_no2(e) AND NO2t2' + 'OOR2r AND FRD2 AND ACONTb AND ACONTa AND FORt AND FORt2r AND SUCCt AND NTRIR2x','ACtr','FORt2r AND SUCCt','EX_ac(e) AND ACtr AND SUCCt2r AND EX_no2(e) AND NO2t2' + 'PIt7 AND EX_na1(e) AND ACKr AND OAASr AND FORt AND FORt2r','PIabc','FORt2r','DM_NA1' + % 'PPHISNPPT',[],'PPHISNPPT','PPHISNPPTi' + % 'PPHISPT',[],'PPHISPT','PPHISPTi' + % 'PPHNPPT',[],'PPHNPPT','PPHNPPTi' + 'PPHPT',[],'PPHPT','PPHPTi' + 'ICDHyr AND SUCOAS AND PYK AND FDNADOX_H AND POR4',[],'ICDHyr','ICDHy' + + % added additionally to prevent futile cycles in pairwise models + 'ASPt2_2 AND ASPt2r',[],'ASPt2r','ASPte' + 'SUCCt AND SUCCt2r',[],'SUCCt',[] + 'SUCCt AND SUCCt2_2 AND SUCCt2_3',[],'SUCCt',[] + 'ACKr AND ACEDIPIT AND APAT AND DAPDA AND 26DAPLLAT',[],'26DAPLLAT','26DAPLLATi' + 'MALNAt AND L_LACNa1t AND L_LACt2r',[],'L_LACt2r','L_LACt2' + 'G3PD8 AND SUCD4 AND G3PD1',[],'G3PD8','G3PD8i' + 'r0010 AND H2O2D',[],'H2O2D','NPR' + 'r1088',[],'r1088','CITt2' + 'FDNADOX_H AND AKGS AND OAASr AND ICDHx AND POR4',[],'ICDHx','ICDHxi' + 'CITt2ipp AND CAt4i AND CITCAt',[],'CITCAt','CITCAti' + 'G16BPS AND G1PPT AND PGK AND GAPD_NADP AND GAPD',[],'G16BPS','G16BPSi' + 'PPCr AND PYK AND ACTLDCCL AND HEDCHL AND OAAKEISO',[],'PPCr','PPC' + 'PPCr AND OAACL',[],'OAACL','OAACLi' + 'PPCr AND PYK AND ACPACT AND TDCOATA AND MCOATA AND HACD6',[],'PPCr','PPC' + 'OCBT AND CITRH AND CBMKr',[],'CBMKr','CBMK' + 'GALt2_2 AND GALt1r',[],'GALt2_2','GALt2_2i' + 'LDH_L AND L_LACDr',[],'L_LACDr','L_LACD' + 'UCO2L AND BUAMDH AND BURTADH AND H2CO3D',[],'UCO2L','UCO2Li' + 'FDOXR AND NADH7 AND NTRIR4',[],'FDOXR','FDOXRi' + 'NADH6 AND SNG3POR AND G3PD2',[],'SNG3POR','G3PD5' + 'PPCOAOc AND NADH6 AND ACOAR',[],'PPCOAOc','PPCOAOci' + 'PGK AND G1PP AND G16BPS AND G1PPTi',[],'G16BPS','G16BPSi' + 'FACOAL140 AND FA140ACPH',[],'FACOAL140','FACOAL140i' + 'R5PAT AND PRPPS AND NADN AND NAPRT',[],'R5PAT','R5PATi' + 'R5PAT AND ADPRDPTS AND PPM',[],'R5PAT','R5PATi' + 'MCCCr AND HMGCOAS AND MGCOAH AND ACOAD8 AND ACACT1r',[],'MCCCr','MCCC' + 'FRUpts AND FRUt2r',[],'FRUt2r','FRUt1r' + 'PGMT AND G16BPS AND G1PPTi',[],'G16BPS','G16BPSi' + 'ILEt2r AND ILEtec',[],'ILEt2r','ILEt2' + 'VALt2r AND VALtec',[],'VALt2r','VALt2' + 'SNG3POR AND OOR2r AND FUM AND POR4 AND HPYRI',[],'SNG3POR','G3PD5' + 'NTMAOR AND SUCDimq AND FRD7 AND NADH6',[],'NTMAOR','NTMAORi' + 'PIt6bi AND PIt7',[],'PIt7','PIt7ir' + 'THMt3 AND THMte',[],'THMt3','THMt3i' + 'PROPAT4te AND PROt4r',[],'PROt4r','PROt4' + 'GLUOR AND GALM1r AND NADH6',[],'GLUOR','GLUORi' + 'PGK AND G1PP AND G16BPS AND G1PPT',[],'G1PP','G1PPi' + 'FDOXR AND FDNADOX_H',[],'FDOXR','FDOXRi' + 'FRDOr AND HYD1 AND HYD4',[],'FRDOr','FRDO' + 'ASP4DC AND PYK AND PPCr',[],'ASP4DC','ASP4DCi' + 'NZP_NRe AND NZP_NR',[],'NZP_NRe','NZP_NRei' + 'NFORGLUAH AND 5MTHFGLUNFT AND FOMETR',[],'NFORGLUAH','NFORGLUAHi' + 'FDOXR AND POR4 AND FDH2',[],'FDOXR','FDOXRi' + 'ACGApts AND ACGAMtr2',[],'ACGAMtr2','ACGAMt2' + 'FDNADOX_H AND KLEURFd AND OIVD1r',[],'OIVD1r','OIVD1' + 'CITCAt AND CAt4i AND CITt13',[],'CITCAt','CITCAti' + '4ABZt2r AND 4ABZt',[],'4ABZt2r','4ABZt2' + 'FUMt2r AND FUMt',[],'FUMt2r','FUMt2' + 'TARCGLYL AND TARTD AND PYRCT',[],'TARCGLYL','TARCGLYLi' + 'SULR AND SO3rDmq AND SUCDimq',[],'SULR','SULRi' + 'CITt15 AND ZN2t4 AND Kt1r AND CITt2',[],'CITt15','CITt15i' + 'FRDO AND FDNADOX_H AND GLFRDO',[],'GLFRDO','GLFRDOi' + 'THMDt2r AND THYMDtr2',[],'THMDt2r','THMDt2' + 'HXANt2r AND HYXNt',[],'HXANt2r','HXANt2' + 'GSNt2r AND GSNt',[],'GSNt2r','GSNt2' + 'GALt4 AND GALt1r',[],'GALt4','GALt4i' + 'THSr1mq AND TSULt2 AND H2St AND SO3t AND TSULST AND GTHRD AND SUCDimq',[],'TSULt2','TSULt2i' + 'PPCKr AND MALFADO AND ACKr AND PPDK AND PPIACPT',[],'MALFADO','MALFADOi' + 'AKGte AND AKGt2r',[],'AKGt2r','AKGt2' + '5ASAp AND 5ASAt2r',[],'5ASAt2r','5ASAt2' + 'MAL_Lte AND MALt2r',[],'MALt2r','MALt2' + 'MAL_Lte AND GLUt2r AND MALNAt AND GLUt4r',[],'MALNAt','MALt4' + 'AKGMAL AND MALNAt AND AKGte',[],'MALNAt','MALt4' + 'r0792 AND 5MTHFOX AND FDNADOX_H AND MTHFD2 AND MTHFD',[],'r0792','MTHFR2rev' + 'H2O2D AND CYTBD AND r0010',[],'H2O2D','NPR' + 'PROD3 AND NADH6 AND HPROxr',[],'PROD3','PROD3i' + 'GLFRDO AND GLFRDOi',[],'GLFRDO',[] + 'DGOR AND SBTD_D2 AND GALM1r AND GNOXmq',[],'DGOR','DGORi' + 'DGORi AND SBTD_D2 AND GALM1r AND GNOXmq',[],'GNOXmq','GNOXmqi' + 'DGORi AND SBTD_D2 AND GALM1r AND GNOXuq',[],'GNOXuq','GNOXuqi' + 'LPCDH AND LPCOX AND NADH6pp AND ATPS4pp',[],'LPCDH','LPCDHi' + 'CITt2pp AND CITCAtpp AND CAt4ipp',[],'CITCAtpp','CITCAtipp' + 'GLFRDO AND FDNADOX_H',[],'FDOXR','GLFRDOi' + 'OOR2r AND FDNADOX_H AND AKGS',[],'OOR2r','OOR2' + 'OAASr AND ICDHx AND ACONTa AND ACONTb AND ALCD2x AND FDH AND PTAr AND ACKr',[],'ICDHx','ICDHxi' + 'METt2r AND METt3r',[],'METt2r','METt2' + 'NTP9 AND NDPK4',[],'NTP9','NTP9i' + 'MAN1PT2r',[],'MAN1PT2r','MAN1PT2' + 'HEX4 AND HMR_7271 AND MAN1PT2 AND MAN6PI AND PGM AND PMANM',[],'PMANM','PMANMi' + 'MANISO AND HMR_7271 AND MAN1PT2 AND MAN6PI AND PGM AND PMANM',[],'PMANM','PMANMi' + 'PGMT AND GALU AND GLBRAN AND GLDBRAN AND GLGNS1 AND GLPASE1 AND NDPK2 AND PPA AND r1393',[],'NDPK2','NDPK2i' + 'D_GLUMANt AND MANt2r AND GLU_Dt2r',[],'GLU_Dt2r','GLU_Dt2' + 'NACUP AND NACSMCTte AND NAt3_1',[],'NAt3_1','NAt3' + 'GALU AND DCLMPDOH AND GDPGALP AND GDPMANNE AND GALT',[],'GALT','GALTi' + 'HYD2 AND HYD4 AND NTRIR4 AND FDOXR',[],'FDOXR','FDOXRi' + 'FACOAL181',[],'FACOAL181','FACOAL181i' + 'MAN6PI AND DCLMPDOH AND GDPGALP AND GDPMANNE AND HMR_7271',[],'GDPGALP','GDPGALPi' + 'FE2DH AND FE3Ri AND NADH6 AND SUCD1 AND FRD7',[],'FE2DH','FE2DHi' + 'GLBRAN AND GLDBRAN AND GLGNS1 AND GLPASE1 AND GPDDA1',[],'GLDBRAN',[] + }; + + +% growth-restoring gapfills: needed if the futile cycle was the model's +% only way to produce ATP and growth rate without it is zero. Enables ATP +% production through a more realistic pathway. +growthGapfills={ + 'EX_succ(e) AND SUCCt' + 'EX_fum(e) AND FUMt2' + 'EX_succ(e) AND SUCCt2r' + 'EX_fum(e) AND FUMt2 AND EX_succ(e) AND SUCCt2r' + 'EX_for(e) AND FORt2r' + 'EX_ac(e) AND ACt2r' + 'EX_etoh(e) AND ETOHt2r' + 'EX_hco3(e) AND HCO3abc AND H2CO3D' + % consider adding glycolysis + 'HEX1 AND PFK AND FBA AND TPI AND GAPD AND PGK AND PGM AND ENO AND PYK' + 'HEX1 AND PFK AND FBA AND TPI AND GAPD AND PGK AND PGM AND ENO AND PYK AND EX_etoh(e) AND ETOHt2r' + 'EX_q8(e) AND Q8abc' + 'EX_2dmmq8(e) AND 2DMMQ8abc' + 'DM_q8h2[c]' + 'DM_NA1' + 'G3PFDXORi' % tentative-some models would not produce feasible amounts of ATP without it + 'ASP4DCi' % tentative-some models would not produce feasible amounts of ATP without it + 'EX_lac_L(e) AND L_LACt2r' + 'EX_acald(e) AND ACALDt' + 'EX_asp_L(e) AND ASPt2r' + 'EX_arg_L(e) AND ARGt2r' + 'EX_ser_L(e) AND SERt2r' + 'PPA' + }; + +for i = 2:size(reactionsToReplace, 1) + % take other models in a multi-species model into account if applies + if nargin>3 && ~isempty(unionRxns) + go = 1; + present=strsplit(reactionsToReplace{i,1},' AND '); + if ~(length(intersect(unionRxns,present))==length(present)) + go= 0; + end + notpresent=reactionsToReplace{i,2}; + if ~isempty(intersect(unionRxns,notpresent)) + go= 0; + end + else + go = 1; + present=strsplit(reactionsToReplace{i,1},' AND '); + if ~(length(intersect(model.rxns,present))==length(present)) + go= 0; + end + if ~isempty(reactionsToReplace{i,2}) + notpresent=strsplit(reactionsToReplace{i,2},' AND '); + if length(intersect(model.rxns,notpresent))==length(notpresent) + go= 0; + end + end + end + if go == 1 + % Only make the change if biomass can still be produced + toRemove=strsplit(reactionsToReplace{i,3},' AND '); + for k=1:length(toRemove) + RxForm = database.reactions{find(ismember(database.reactions(:, 1), toRemove{k})), 3}; + if contains(RxForm,'[e]') + newName=[toRemove{k} 'pp']; + % make sure we get the correct reaction + newForm=strrep(RxForm,'[e]','[p]'); + rxnInd=find(ismember(database.reactions(:, 1), {newName})); + if ~isempty(rxnInd) + dbForm=database.reactions{rxnInd, 3}; + if checkFormulae(newForm, dbForm) && any(contains(model.mets,'[p]')) + toRemove{k}=newName; + end + end + end + end + modelTest = removeRxns(model, toRemove); + if ~isempty(reactionsToReplace{i, 4}) + rxns=strsplit(reactionsToReplace{i, 4},' AND '); + for j=1:length(rxns) + % create a new formula + RxForm = database.reactions{find(ismember(database.reactions(:, 1), rxns{j})), 3}; + + if contains(RxForm,'[e]') && any(contains(model.mets,'[p]')) + newName=[rxns{j} 'ipp']; + % make sure we get the correct reaction + newForm=strrep(RxForm,'[e]','[p]'); + rxnInd=find(ismember(database.reactions(:, 1), {newName})); + if ~isempty(rxnInd) + dbForm=database.reactions{rxnInd, 3}; + if checkFormulae(newForm, dbForm) && any(contains(model.mets,'[p]')) + RxForm=dbForm; + end + end + modelTest = addReaction(modelTest, newName, RxForm); + else + modelTest = addReaction(modelTest, rxns{j}, RxForm); + end + + end + end + FBA = optimizeCbModel(modelTest, 'max'); + if FBA.f > tol + model = modelTest; + if ~isempty(reactionsToReplace{i, 3}) + for j=1:length(toRemove) + deletedRxns{delCnt, 1} = toRemove{j}; + delCnt = delCnt + 1; + end + end + if ~isempty(reactionsToReplace{i, 4}) + if ~isempty(reactionsToReplace{i, 3}) && length(toRemove)==1 + addedRxns{addCnt, 1} = toRemove{1}; + end + for j=1:length(rxns) + addedRxns{addCnt, j+1} = rxns{j}; + end + addCnt = addCnt + 1; + end + else + % try growth-restoring gapfills + gf=1; + modelPrevious=modelTest; + for k=1:size(growthGapfills,1) + ggrxns=strsplit(growthGapfills{k, 1},' AND '); + % to not add reactions that were just flagged for removal + ggrxns=setdiff(ggrxns,toRemove); + for j=1:length(ggrxns) + % create a new formula + RxForm = database.reactions{find(ismember(database.reactions(:, 1), ggrxns{j})), 3}; + if contains(RxForm,'[e]') && any(contains(model.mets,'[p]')) + newName=[ggrxns{j} 'ipp']; + % make sure we get the correct reaction + newForm=strrep(RxForm,'[e]','[p]'); + rxnInd=find(ismember(database.reactions(:, 1), {newName})); + if ~isempty(rxnInd) + dbForm=database.reactions{rxnInd, 3}; + if checkFormulae(newForm, dbForm) && any(contains(model.mets,'[p]')) + RxForm=dbForm; + end + end + if isempty(find(contains(model.rxns,newName))) + modelTest = addReaction(modelTest, newName, RxForm); + end + else + if isempty(find(strcmp(model.rxns,ggrxns{j}))) + modelTest = addReaction(modelTest, ggrxns{j}, RxForm); + end + end + end + FBA = optimizeCbModel(modelTest, 'max'); + if FBA.f > tol + model = modelTest; + % add replaced reactions + if ~isempty(reactionsToReplace{i, 3}) + for j=1:length(toRemove) + deletedRxns{delCnt, 1} = toRemove{j}; + delCnt = delCnt + 1; + end + end + if ~isempty(reactionsToReplace{i, 4}) + if ~isempty(reactionsToReplace{i, 3}) && length(toRemove)==1 + addedRxns{addCnt, 1} = toRemove{1}; + end + for j=1:length(rxns) + addedRxns{addCnt, j+1} = rxns{j}; + end + addCnt = addCnt + 1; + end + % add growth-restoring gapfilled reactions + for j=1:length(ggrxns) + gfRxns{length(gfRxns)+1, 1} = ggrxns{j}; + end + gf=0; + break + end + modelTest=modelPrevious; + end + end + end +end + +%% Make the proposed changes +model = model_old; +if ~isempty(deletedRxns) + for j = 1:length(deletedRxns) + model = removeRxns(model, deletedRxns{j, 1}); + end +end + +% make sure gene rule and notes are kept while replacing +if ~isempty(addedRxns) + for j = 1:size(addedRxns,1) + model = addReaction(model, addedRxns{j, 2}, database.reactions{find(ismember(database.reactions(:, 1), addedRxns{j, 2})), 3}); + % if a reaction from the old version is replaced, keep the GPR + if ~isempty(addedRxns{j, 1}) && ~isempty(find(ismember(model_old.rxns,addedRxns{j, 1}))) + rxnIDNew=find(ismember(model.rxns,addedRxns{j, 2})); + rxnIDOld=find(ismember(model_old.rxns,addedRxns{j, 1})); + model.grRules{rxnIDNew,1}=model_old.grRules{rxnIDOld,1}; + model.rxnConfidenceScores(rxnIDNew,1)=model_old.rxnConfidenceScores(rxnIDOld,1); + end + model.comments{end,1}='Added to eliminate futile cycles during DEMETER pipeline.'; + model.rxnConfidenceScores(end,1)=1; + % if more than one reaction is added + if size(addedRxns,2)>2 + if ~isempty(addedRxns{j,3}) + for k=3:size(addedRxns(j,:),2) + if ~isempty(addedRxns{j,k}) + model = addReaction(model, addedRxns{j, k}, database.reactions{find(ismember(database.reactions(:, 1), addedRxns{j, k})), 3}); + model.comments{end,1}='Added to eliminate futile cycles during DEMETER pipeline.'; + model.rxnConfidenceScores(end,1)=1; + end + end + end + end + end +end + +% add any gapf-filled reactions +if ~isempty(gfRxns) + for i=1:length(gfRxns) + model = addReaction(model, gfRxns{i,1}, database.reactions{find(ismember(database.reactions(:, 1), gfRxns{i,1})), 3}); + model.comments{end,1}='Added to enable growth after eliminating futile cycles during DEMETER pipeline.'; + model.rxnConfidenceScores(end,1)=1; + end +end + +if size(addedRxns,2) >1 + addedRxns=addedRxns(:,2); +end + +% relax constraints-cause infeasibility problems +relaxConstraints=model.rxns(find(model.lb>0)); +model=changeRxnBounds(model,relaxConstraints,0,'l'); + +% change back to unlimited medium +% list exchange reactions +exchanges = model.rxns(strncmp('EX_', model.rxns, 3)); +% open all exchanges +model = changeRxnBounds(model, exchanges, -1000, 'l'); +model = changeRxnBounds(model, exchanges, 1000, 'u'); + +end diff --git a/src/reconstruction/demeter/src/integration/createRBioNetDBFromVMHDB.m b/src/reconstruction/demeter/src/integration/createRBioNetDBFromVMHDB.m index 44123bc56d..03fd0860f5 100644 --- a/src/reconstruction/demeter/src/integration/createRBioNetDBFromVMHDB.m +++ b/src/reconstruction/demeter/src/integration/createRBioNetDBFromVMHDB.m @@ -24,7 +24,7 @@ function createRBioNetDBFromVMHDB(varargin) metaboliteDatabase(1,:)=[]; for i=1:size(metaboliteDatabase,1) metaboliteDatabase{i,5}=num2str(metaboliteDatabase{i,5}); - % metaboliteDatabase{i,12}=datestr(metaboliteDatabase{i,12}); + metaboliteDatabase{i,12}=datestr(metaboliteDatabase{i,12}); end metab=cell(metaboliteDatabase); diff --git a/src/reconstruction/demeter/src/properties/producetSNEPlots.m b/src/reconstruction/demeter/src/properties/producetSNEPlots.m old mode 100644 new mode 100755 index 15c40a90d8..f669f74510 --- a/src/reconstruction/demeter/src/properties/producetSNEPlots.m +++ b/src/reconstruction/demeter/src/properties/producetSNEPlots.m @@ -1,265 +1,267 @@ -function producetSNEPlots(propertiesFolder,infoFilePath,reconVersion,customFeatures) -% This function plots reaction presence and uptake and secretion potential -% by taxon with t-SNE. -% -% USAGE -% producetSNEPlots(propertiesFolder,infoFilePath,reconVersion) -% -% INPUTS -% propertiesFolder Folder where the reaction presences and uptake and -% secretion potential to be analyzed are stored -% (default: current folder) -% infoFilePath Path to spreadsheet with taxonomical information of -% the refined strains -% reconVersion Name assigned to the reconstruction resource -% OPTIONAL INPUT -% customFeatures Features other than taxonomy to cluster microbes -% by. Need to be a table header in the file with -% information on reconstructions. -% -% - AUTHOR -% Almut Heinken, 06/2020 - -% euclidean should work for most -distance='euclidean'; -alg='barneshut'; - -currentDir=pwd; -cd(propertiesFolder) -mkdir('tSNE_Plots') -cd('tSNE_Plots') - -tol=0.0000001; - -% get taxonomical information -try - infoFile = table2cell(readtable(infoFilePath, 'ReadVariableNames', false, 'Delimiter', 'tab')); -catch - infoFile = table2cell(readtable(infoFilePath, 'ReadVariableNames', false)); -end - -% define files to analyze -analyzedFiles={ - 'Reaction presence' ['ReactionMetabolitePresence' filesep 'ReactionPresence_' reconVersion] - 'Metabolite presence' ['ReactionMetabolitePresence' filesep 'MetabolitePresence_' reconVersion] - 'Uptake and secretion potential' ['ComputedFluxes' filesep 'UptakeSecretion_' reconVersion] - 'Internal metabolite production' ['ComputedFluxes' filesep 'InternalProduction_' reconVersion] - }; - -for k=1:size(analyzedFiles,1) - DataToAnalyze = readtable([propertiesFolder filesep analyzedFiles{k,2} '.txt'], 'ReadVariableNames', false); - DataToAnalyze = table2cell(DataToAnalyze); - DataToAnalyze=DataToAnalyze'; - - [C,I]=setdiff(DataToAnalyze(1,:),infoFile(:,1),'stable'); - DataToAnalyze(:,I(2:end))=[]; - - % can only be performed if there are enough strains with taxonomical information - if size(DataToAnalyze,2) >= 10 - - rp=str2double(DataToAnalyze(2:end,2:end)); - orgs=DataToAnalyze(1,2:end)'; - - taxonlevels={ - 'Phylum' - 'Class' - 'Order' - 'Family' - 'Genus' - 'Species' - }; - - Summary=struct; - for i=1:length(taxonlevels) - % plot on different taxon levels - taxa={}; - taxcol=find(strcmp(infoFile(1,:),taxonlevels{i})); - for j=2:size(DataToAnalyze,2) - if ~any(strcmp(infoFile(:,1),DataToAnalyze{1,j})) - taxa{j-1,1}='N/A'; - else - taxa{j-1,1}=infoFile{find(strcmp(infoFile(:,1),DataToAnalyze{1,j})),taxcol}; - end - end - - data=rp'; - red_orgs=orgs; - - % remove entries that are all zeros - toDel=sum(data,1)15 - % sort by number of entries and remove the ones with the least - % entries - [B,I]=sort(occ,'descend'); - uniqueXX=uniqueXX(I); - - if sum(B==1) > length(B)-15 - % remove all that are just one entry - uniqueXX(B==1)=[]; - else - % remove all but 15 highest - uniqueXX=uniqueXX(1:15); - end - - [C,IA]=setdiff(taxa,uniqueXX); - data(find(ismember(taxa,C)),:)=[]; - red_orgs(ismember(taxa,C),:)=[]; - taxa(find(ismember(taxa,C)),:)=[]; - end - - if size(data,1)>10 - - % adjust perplicity to number of variables - if size(data,1) > 150 - perpl=50; - elseif size(data,1) >= 50 - perpl=30; - elseif size(data,1) >= 20 - perpl=10; - else - perpl=5; - end - - Y = tsne(data,'Distance',distance,'Algorithm',alg,'Perplexity',perpl,'NumDimensions',3); - Summary.(taxonlevels{i})(:,1)=red_orgs; - Summary.(taxonlevels{i})(:,2)=taxa; - Summary.(taxonlevels{i})(:,3:size(Y,2)+2)=cellstr(string(Y)); - - if size(data,1) == size(Y,1) && size(Y,2) > 1 - f=figure; - cols=hsv(length(unique(taxa))); - % define markers to better distinguish groups - cmarkers=''; - for j=1:7:length(unique(taxa)) - cmarkers=[cmarkers '+o*xsdp']; - end - cmarkers=cmarkers(1:length(unique(taxa))); - h=gscatter3(Y(:,1),Y(:,2),Y(:,3),taxa,cols,cmarkers); - hold on - set(h,'MarkerSize',6) - title(analyzedFiles{k,1}) - plottitle=strrep(reconVersion,'_refined',''); - plottitle=strrep(plottitle,'_draft',''); - suptitle(plottitle) - - h=legend('Location','northeastoutside'); - if length(uniqueXX) < 12 - set(h,'FontSize',12) - elseif length(uniqueXX) < 20 - set(h,'FontSize',11) - else - set(h,'FontSize',8) - end - grid off - f.Renderer='painters'; - print([taxonlevels{i} '_' strrep(analyzedFiles{k,1},' ','_') '_' reconVersion],'-dpng','-r300') - else - warning('Not enough strains with available organism information. Cannot cluster based on taxonomy.') - end - end - end - save(['Summary_' reconVersion],'Summary'); - - % if the data should be clustered by any custom features from the info file - if nargin > 3 - for i=1:length(customFeatures) - % plot on different taxon levels - feats={}; - cuscol=find(strcmp(infoFile(1,:),customFeatures{i})); - if ~isempty(cuscol) - for j=2:size(DataToAnalyze,2) - if ~any(strcmp(infoFile(:,1),DataToAnalyze{1,j})) - feats{j-1,1}='N/A'; - else - feats{j-1,1}=infoFile{find(strcmp(infoFile(:,1),DataToAnalyze{1,j})),cuscol}; - end - end - - data=rp'; - red_orgs=orgs; - - % remove organisms with no data - data(find(strcmp(taxa,'N/A')),:)=[]; - red_orgs(strcmp(taxa,'N/A'),:)=[]; - taxa(find(strcmp(taxa,'N/A')),:)=[]; - - if size(data,1) >= 10 - - % remove features with too few members - [uniqueXX, ~, J]=unique(feats) ; - occ = histc(J, 1:numel(uniqueXX)); - toofew=uniqueXX(occ= 10 + + rp=str2double(DataToAnalyze(2:end,2:end)); + orgs=DataToAnalyze(1,2:end)'; + + taxonlevels={ + 'Phylum' + 'Class' + 'Order' + 'Family' + 'Genus' + 'Species' + }; + + Summary=struct; + for i=1:length(taxonlevels) + % plot on different taxon levels + taxa={}; + taxcol=find(strcmp(infoFile(1,:),taxonlevels{i})); + for j=2:size(DataToAnalyze,2) + if ~any(strcmp(infoFile(:,1),DataToAnalyze{1,j})) + taxa{j-1,1}='N/A'; + else + taxa{j-1,1}=infoFile{find(strcmp(infoFile(:,1),DataToAnalyze{1,j})),taxcol}; + end + end + + data=rp'; + red_orgs=orgs; + + % remove entries that are all zeros + toDel=sum(data,1)15 + % sort by number of entries and remove the ones with the least + % entries + [B,I]=sort(occ,'descend'); + uniqueXX=uniqueXX(I); + + if sum(B==1) > length(B)-15 + % remove all that are just one entry + uniqueXX(B==1)=[]; + else + % remove all but 20 highest + uniqueXX=uniqueXX(1:20); + end + + [C,IA]=setdiff(taxa,uniqueXX); + data(find(ismember(taxa,C)),:)=[]; + red_orgs(ismember(taxa,C),:)=[]; + taxa(find(ismember(taxa,C)),:)=[]; + end + + if size(data,1)>10 + + % adjust perplicity to number of variables + if size(data,1) > 150 + perpl=50; + elseif size(data,1) >= 50 + perpl=30; + elseif size(data,1) >= 20 + perpl=10; + else + perpl=5; + end + + Y = tsne(data,'Distance',distance,'Algorithm',alg,'Perplexity',perpl,'NumDimensions',2); +% Y = tsne(data,'Distance',distance,'Algorithm',alg,'Perplexity',perpl,'NumDimensions',3); + Summary.(taxonlevels{i})(:,1)=red_orgs; + Summary.(taxonlevels{i})(:,2)=taxa; + Summary.(taxonlevels{i})(:,3:size(Y,2)+2)=cellstr(string(Y)); + + if size(data,1) == size(Y,1) && size(Y,2) > 1 + f=figure; + cols=hsv(length(unique(taxa))); + % define markers to better distinguish groups + cmarkers=''; + for j=1:7:length(unique(taxa)) + cmarkers=[cmarkers '+o*xsdp']; + end + cmarkers=cmarkers(1:length(unique(taxa))); + gscatter3(Y(:,1),Y(:,2),Y(:,3),taxa,cols,{},30); + % h=gscatter(Y(:,1),Y(:,2),taxa,cols,cmarkers); + % set(h,'MarkerSize',6) + hold on + title(analyzedFiles{k,1}) + plottitle=strrep(reconVersion,'_refined',''); + plottitle=strrep(plottitle,'_draft',''); + suptitle(plottitle) + + h=legend('Location','northeastoutside'); + if length(uniqueXX) < 12 + set(h,'FontSize',12) + elseif length(uniqueXX) < 20 + set(h,'FontSize',11) + else + set(h,'FontSize',8) + end + grid off + f.Renderer='painters'; + print([taxonlevels{i} '_' strrep(analyzedFiles{k,1},' ','_') '_' reconVersion],'-dpng','-r300') + else + warning('Not enough strains with available organism information. Cannot cluster based on taxonomy.') + end + end + end + save(['Summary_' reconVersion],'Summary'); + + % if the data should be clustered by any custom features from the info file + if nargin > 3 + for i=1:length(customFeatures) + % plot on different taxon levels + feats={}; + cuscol=find(strcmp(infoFile(1,:),customFeatures{i})); + if ~isempty(cuscol) + for j=2:size(DataToAnalyze,2) + if ~any(strcmp(infoFile(:,1),DataToAnalyze{1,j})) + feats{j-1,1}='N/A'; + else + feats{j-1,1}=infoFile{find(strcmp(infoFile(:,1),DataToAnalyze{1,j})),cuscol}; + end + end + + data=rp'; + red_orgs=orgs; + + % remove organisms with no data + data(find(strcmp(taxa,'N/A')),:)=[]; + red_orgs(strcmp(taxa,'N/A'),:)=[]; + taxa(find(strcmp(taxa,'N/A')),:)=[]; + + if size(data,1) >= 10 + + % remove features with too few members + [uniqueXX, ~, J]=unique(feats) ; + occ = histc(J, 1:numel(uniqueXX)); + toofew=uniqueXX(occ Date: Sun, 30 May 2021 11:42:30 +0100 Subject: [PATCH 05/82] debugged futile cycles in pan-models --- .../mgPipe/createPanModels.m | 28 +++++++++++++------ 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/createPanModels.m b/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/createPanModels.m index 690648e834..566e333d2f 100755 --- a/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/createPanModels.m +++ b/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/createPanModels.m @@ -367,6 +367,12 @@ function createPanModels(agoraPath, panPath, taxonLevel, numWorkers, taxTable) 'PPCr AND MALCOAPYRCT AND MMSAD5 AND MMSAD4','PPCr','PPC' 'SERD_Lr','SERD_Lr','SERD_L' 'LDH_L AND LDH_L2','LDH_L',[] + '25DOPOX AND GLCRAL AND D4DGCD','D4DGCD','D4DGCDi' + 'CITt7 AND SUCCt AND CAt4i AND CITCAt','CITCAt','CITCAti' + 'HEDCHL AND OAAKEISO AND ACTLDCCL','ACTLDCCL','ACTLDCCLi' + 'ADNt AND ADNCNT3tc','ADNCNT3tc','ADNt2' + 'PGMT AND G1PP AND GK_adp_','G1PP','G1PPi' + 'LPCDH AND LPCOX AND NADH6','LPCDH','LPCDHi' }; % List Western diet constraints to test if the pan-model produces @@ -659,28 +665,32 @@ function createPanModels(agoraPath, panPath, taxonLevel, numWorkers, taxTable) % Rebuild model consistently model = rebuildModel(model,database); model=changeObjective(model,'biomassPan'); + +% constrain sink reactions +model.lb(find(strncmp(model.rxns,'sink_',5)))=-1; + % remove duplicate reactions % Will remove reversible reactions of which an irreversible version is also % there but keep the irreversible version. [modelRD, removedRxnInd, keptRxnInd] = checkDuplicateRxn(model); % test if the model can still grow -modelRD = useDiet(modelRD,dietConstraints); FBA=optimizeCbModel(modelRD,'max'); if FBA.f > tol model=modelRD; else - toRM={}; modelTest=model; - for k=1:length(removedRxnInd) - modelTest=removeRxns(modelTest,modelTest.rxns(removedRxnInd(k))); - FBA=optimizeCbModel(modelTest,'max'); + toRM={}; + for j=1:length(removedRxnInd) + modelRD=removeRxns(modelTest,model.rxns(removedRxnInd(j))); + modelRD = useDiet(modelRD,dietConstraints); + FBA=optimizeCbModel(modelRD,'max'); if FBA.f > tol - toRM{k} = modelTest.rxns{removedRxnInd(k)}; - model + modelTest=removeRxns(modelTest, model.rxns{removedRxnInd(j)}); + toRM{j}=model.rxns{removedRxnInd(j)}; else - toRM{k} = modelTest.rxns{keptRxnInd(k)}; + modelTest=removeRxns(modelTest, model.rxns{keptRxnInd(j)}); + toRM{j}=model.rxns{keptRxnInd(j)}; end - modelTest=removeRxns(modelTest,toRM{k}); end model=removeRxns(model,toRM); end From b10e2367b1b1812ed3ac2156c721e52d4059e812 Mon Sep 17 00:00:00 2001 From: "almut.heinken" Date: Mon, 31 May 2021 23:22:44 +0100 Subject: [PATCH 06/82] debugged futile cycles in pan-models --- .../microbiomeModelingToolbox/mgPipe/createPanModels.m | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/createPanModels.m b/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/createPanModels.m index 566e333d2f..a605890638 100755 --- a/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/createPanModels.m +++ b/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/createPanModels.m @@ -373,6 +373,16 @@ function createPanModels(agoraPath, panPath, taxonLevel, numWorkers, taxTable) 'ADNt AND ADNCNT3tc','ADNCNT3tc','ADNt2' 'PGMT AND G1PP AND GK_adp_','G1PP','G1PPi' 'LPCDH AND LPCOX AND NADH6','LPCDH','LPCDHi' + 'THMtrbc AND THMt3','THMtrbc','THMti' + 'MAN6PI AND HEX4 AND DCLMPDOH AND HMR_7271 AND PMANM','PMANM','PMANMi' + 'DRPAr AND r0570 AND ACALD AND DURIPP AND PYNP2r AND DURI2OR','DURI2OR','DURI2ORi' + 'PYDAMtr AND PYDAMt','PYDAMtr',[] + 'ENO AND GLXS AND PGM AND PGK AND TPI AND OAACL','OAACL','OAACLi' + 'NAt3_1 AND SUCCt2r AND SUCCt4_3','SUCCt4_3','SUCCt4_3i' + 'PNTOt2 AND r0974 AND PNTOte','PNTOte','PNTOti' + 'ARGSL AND ARGSSr AND ARGDr AND ACS AND ACKr AND PTAr','ARGSSr','ARGSS' + 'METt2r AND METt3r','METt2r','METt2' + 'GLUOOR AND GLUR AND H2O2D','H2O2D','NPR' }; % List Western diet constraints to test if the pan-model produces From 1cb1219905b9bacf7060221e0626ea415c5eae2f Mon Sep 17 00:00:00 2001 From: "almut.heinken" Date: Tue, 1 Jun 2021 22:45:52 +0100 Subject: [PATCH 07/82] debugged futile cycles in pan-models --- .../microbiomeModelingToolbox/mgPipe/createPanModels.m | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/createPanModels.m b/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/createPanModels.m index a605890638..78c41ed068 100755 --- a/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/createPanModels.m +++ b/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/createPanModels.m @@ -379,10 +379,11 @@ function createPanModels(agoraPath, panPath, taxonLevel, numWorkers, taxTable) 'PYDAMtr AND PYDAMt','PYDAMtr',[] 'ENO AND GLXS AND PGM AND PGK AND TPI AND OAACL','OAACL','OAACLi' 'NAt3_1 AND SUCCt2r AND SUCCt4_3','SUCCt4_3','SUCCt4_3i' - 'PNTOt2 AND r0974 AND PNTOte','PNTOte','PNTOti' + 'PNTOt2 AND PNTOte','PNTOte','PNTOti' 'ARGSL AND ARGSSr AND ARGDr AND ACS AND ACKr AND PTAr','ARGSSr','ARGSS' 'METt2r AND METt3r','METt2r','METt2' 'GLUOOR AND GLUR AND H2O2D','H2O2D','NPR' + 'KAS17rev AND RE3245C AND FAO181O','FAO181O','FAO181Oi' }; % List Western diet constraints to test if the pan-model produces From bf3e9d2809ae77e9164d8800910a634f3d171fd1 Mon Sep 17 00:00:00 2001 From: "almut.heinken" Date: Wed, 2 Jun 2021 13:52:28 +0100 Subject: [PATCH 08/82] debugged futile cycles in pan-models --- .../microbiomeModelingToolbox/mgPipe/createPanModels.m | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/createPanModels.m b/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/createPanModels.m index 78c41ed068..f162f4c007 100755 --- a/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/createPanModels.m +++ b/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/createPanModels.m @@ -379,11 +379,15 @@ function createPanModels(agoraPath, panPath, taxonLevel, numWorkers, taxTable) 'PYDAMtr AND PYDAMt','PYDAMtr',[] 'ENO AND GLXS AND PGM AND PGK AND TPI AND OAACL','OAACL','OAACLi' 'NAt3_1 AND SUCCt2r AND SUCCt4_3','SUCCt4_3','SUCCt4_3i' - 'PNTOt2 AND PNTOte','PNTOte','PNTOti' + 'PNTOt2 AND PNTOte','PNTOt2','PNTOt2i' + 'r0974 AND PNTOte','r0974','PNTOt4' 'ARGSL AND ARGSSr AND ARGDr AND ACS AND ACKr AND PTAr','ARGSSr','ARGSS' 'METt2r AND METt3r','METt2r','METt2' 'GLUOOR AND GLUR AND H2O2D','H2O2D','NPR' 'KAS17rev AND RE3245C AND FAO181O','FAO181O','FAO181Oi' + 'KAS17rev AND FACOAL181 AND FAO181O','FAO181O','FAO181Oi' + 'TMAt2r AND TMAOR2e AND TMAOt2r','TMAOR2e','TMAOR2ei' + 'ORNt2r AND PROPAT4te AND r2018 AND r1667','ORNt2r','ORNt2' }; % List Western diet constraints to test if the pan-model produces From e05a7d4632a39d34fa3f74aa0eefb7fb67eaf6f7 Mon Sep 17 00:00:00 2001 From: "almut.heinken" Date: Thu, 3 Jun 2021 16:49:48 +0100 Subject: [PATCH 09/82] debugged futile cycles in pan-models --- .../microbiomeModelingToolbox/mgPipe/createPanModels.m | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/createPanModels.m b/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/createPanModels.m index f162f4c007..4aeff898dc 100755 --- a/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/createPanModels.m +++ b/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/createPanModels.m @@ -388,6 +388,12 @@ function createPanModels(agoraPath, panPath, taxonLevel, numWorkers, taxTable) 'KAS17rev AND FACOAL181 AND FAO181O','FAO181O','FAO181Oi' 'TMAt2r AND TMAOR2e AND TMAOt2r','TMAOR2e','TMAOR2ei' 'ORNt2r AND PROPAT4te AND r2018 AND r1667','ORNt2r','ORNt2' + 'G1PP AND PGMT AND XYLI2 AND MAN6PI AND PNPHPT','G1PP','G1PPi' + 'DCLMPDOH AND HMR_7271 AND MAN1PT2 AND PMANM','PMANM','PMANMi' + 'ADEt2r AND ADEt','ADEt2r','ADEt2' + 'PPCr AND PPC','PPCr',[] + 'DGLU6Pt2 AND G6Pt6_2 AND PIt7 AND NAt3_1','PIt7','PIt7ir' + 'LEUt2r AND r1642 AND PROPAT4te','PROPAT4te','PROte' }; % List Western diet constraints to test if the pan-model produces From cee19bf7d11b48b260a70d510253cd2435663f7d Mon Sep 17 00:00:00 2001 From: "almut.heinken" Date: Fri, 4 Jun 2021 16:36:45 +0100 Subject: [PATCH 10/82] debugged futile cycles in pan-models --- .../mgPipe/createPanModels.m | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/createPanModels.m b/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/createPanModels.m index 4aeff898dc..d2b8ada2e8 100755 --- a/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/createPanModels.m +++ b/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/createPanModels.m @@ -360,6 +360,7 @@ function createPanModels(agoraPath, panPath, taxonLevel, numWorkers, taxTable) 'LPCDH AND LPCOX AND NADH6pp AND ATPS4pp','LPCDH','LPCDHi' 'CITt2pp AND CITCAtpp AND CAt4ipp','CITCAt','CITCAti' 'KAS17rev AND FACOAE181 AND FAO181E','FAO181E','FAO181Ei' + 'KAS17rev AND RE3245C AND FAO181E','FAO181E','FAO181Ei' 'G1PGTi AND PGMT2 AND G1PPT AND G16BPS','G16BPS','G16BPSi' 'HISSNAT5tc AND HISt2r','HISt2r','HISt2' 'TDCOATA AND ACPACT AND FAS140ACPrev','FAS140ACPrev','FAS140ACP' @@ -394,6 +395,18 @@ function createPanModels(agoraPath, panPath, taxonLevel, numWorkers, taxTable) 'PPCr AND PPC','PPCr',[] 'DGLU6Pt2 AND G6Pt6_2 AND PIt7 AND NAt3_1','PIt7','PIt7ir' 'LEUt2r AND r1642 AND PROPAT4te','PROPAT4te','PROte' + 'AGPAT120 AND PLIPA2A120 AND FA120ACPH','FA120ACPH','FA120ACPHi' + 'AGPAT160 AND PLIPA2A160 AND FA160ACPH','FA160ACPH','FA160ACPHi' + 'FAO181E AND DESAT18_3 AND FACOAL181','FAO181E','FAO181Ei' + 'GLUt2r AND GLUDy AND GLUt4r','GLUt2r','GLUt2' + 'ACOATA AND KAS14 AND C180SNrev AND 3HAD100','C180SNrev','C180SN' + 'ECOAH5 AND HACD2 AND HACD5 AND ACACT1r','ACACT1r','ACACT1' + 'FOLt2 AND FOLt','FOLt','FOLTle' + 'BTNt2 AND BTNT5r','BTNt2','BTNt2i' + 'AGOR AND SBTPD AND SBTpts AND SBTt6','SBTt6','SBTt6i' + 'PGM AND PGMT AND G16BPS AND G1PPT','G16BPS','G16BPSi' + 'EX_HC00319(e) AND MALNt AND C180SNrev AND ACACPT','C180SNrev','C180SN' + 'LYS6OR AND L2AADIPADOR AND LYSOR','LYSOR','LYSORi' }; % List Western diet constraints to test if the pan-model produces From af9570c1efc604beacdb698effcc94cf430d0eb5 Mon Sep 17 00:00:00 2001 From: "almut.heinken" Date: Wed, 9 Jun 2021 00:06:43 +0100 Subject: [PATCH 11/82] Debugged further futile cycles --- .../microbiomeModelingToolbox/mgPipe/createPanModels.m | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/createPanModels.m b/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/createPanModels.m index d2b8ada2e8..404dae06ce 100755 --- a/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/createPanModels.m +++ b/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/createPanModels.m @@ -407,6 +407,13 @@ function createPanModels(agoraPath, panPath, taxonLevel, numWorkers, taxTable) 'PGM AND PGMT AND G16BPS AND G1PPT','G16BPS','G16BPSi' 'EX_HC00319(e) AND MALNt AND C180SNrev AND ACACPT','C180SNrev','C180SN' 'LYS6OR AND L2AADIPADOR AND LYSOR','LYSOR','LYSORi' + 'TRPt2r AND TRPt','TRPt2r','TRPt2' + '2S6HCC AND SHCHCS AND SSALxr AND OOR2r AND SUCOAS','SSALxr','SSALx' + 'ADK8 AND NDPK7 AND NTP13','NTP13','NTP13i' + 'ADK10 AND NDPK4 AND NTP9','NTP9','NTP9i' + 'RE0583C AND SUCD1 AND ACOAD8f','ACOAD8f','ACOAD8fi' + 'TYRAL AND 4HBZOR AND 4HBZCL AND TYRL','4HBZCL','4HBZCLi' + 'NADH8 AND H2Ot AND SO3rDdmq AND SO3t AND H2St','SO3t','SO3ti' }; % List Western diet constraints to test if the pan-model produces From 03e88ebcfb5ba9ee279344730467b343dc7fc10c Mon Sep 17 00:00:00 2001 From: Ronan Fleming Date: Wed, 9 Jun 2021 01:01:14 +0100 Subject: [PATCH 12/82] logarithmic modulus --- src/analysis/thermo/utilities/logmod.m | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 src/analysis/thermo/utilities/logmod.m diff --git a/src/analysis/thermo/utilities/logmod.m b/src/analysis/thermo/utilities/logmod.m new file mode 100644 index 0000000000..b4067df277 --- /dev/null +++ b/src/analysis/thermo/utilities/logmod.m @@ -0,0 +1,25 @@ +function y = logmod(x,base) +% log modulus function +% +% INPUT +% x n x 1 real vector +% +% OPTIONAL INPUT +% base exp(1),2,10 +if ~exist('base','var') + y = sign(x).*log1p(abs(x)); +else + switch base + case exp(1) + y = sign(x).*log1p(abs(x)); + case 2 + y = sign(x).*log2(1+abs(x)); + case 10 + y = sign(x).*log10(1+abs(x)); + otherwise + error('base not recognised') + end +end + +end + From ec8021622d0ec78fbb68061012a89f24ff0d114a Mon Sep 17 00:00:00 2001 From: nmendozam Date: Wed, 9 Jun 2021 15:08:50 -0500 Subject: [PATCH 13/82] add color and thickness variation option --- .../maps/ReconMap/buildFluxDistLayout.m | 55 +++++++++++++++---- 1 file changed, 43 insertions(+), 12 deletions(-) diff --git a/src/visualization/maps/ReconMap/buildFluxDistLayout.m b/src/visualization/maps/ReconMap/buildFluxDistLayout.m index c95eb629be..adc84f6448 100644 --- a/src/visualization/maps/ReconMap/buildFluxDistLayout.m +++ b/src/visualization/maps/ReconMap/buildFluxDistLayout.m @@ -1,4 +1,4 @@ -function [serverResponse] = buildFluxDistLayout( minerva, model, solution, identifier, hexColour, thickness, content) +function [serverResponse] = buildFluxDistLayout( minerva, model, solution, identifier, hexColour, maxThickness, content) % Builds a layout for MINERVA from a flux distribution. If a dictionary % of identifiers is not provided it is assumed that the map and the COBRA % model's nomenclature is coherent. Sends the layout to the remote MINERVA @@ -18,9 +18,11 @@ % OPTIONAL INPUT: % hexColour colour of overlay (hex color format) % e.g. '#009933' corresponds to http://www.color-hex.com/color/009933 -% thickness: maximum thickness -% normalizedFluxesOption: if 'true' (default) then fluxes will be -% normalized, otw they will be displayed as is +% If you want to make a color gradient, you can input +% an array of 2 or 3 colors like ["#ff0000", "#6617B5", "#0000ff"] +% note that they should be declared with (") rather +% than with (') +% maxThickness: maximum thickness % content: character array with the following format for each % reaction to be displayed. Bypasses the use of solution.v to set the format. % 'name%09reactionIdentifier%09lineWidth%09color%0D' @@ -32,13 +34,30 @@ % - Ines Thiele April/2020, fixed issue with using ReconMap-3 as target map. if ~exist('thickness', 'var') - thickness = 10; + maxThickness = 10; end -if exist('hexColour','var') - defaultColor = hexColour; -else +useThickness = true; % flag to change thickness according to the flux + +if ~exist('hexColour','var') defaultColor = '#57c657'; +else + hexColour = convertStringsToChars(hexColour); + if ischar(hexColour) + defaultColor = hexColour; + elseif length(hexColour) == 1 + defaultColor = hexColour{1}; + else + useThickness = false; + + if length(hexColour) >= 2 + cmap = makeColorGradient(hexColour{2}, hexColour{1}, maxThickness + 1); + end + + if length(hexColour) >= 3 + ncmap = makeColorGradient(hexColour{3}, hexColour{2}, maxThickness + 1); + end + end end %nRxn=length(solution.v); @@ -46,9 +65,9 @@ % build input data for minerva if ~exist('content','var') - normalizedFluxes = normalizeFluxes(abs(solution.v), thickness); + normalizedFluxes = normalizeFluxes(abs(solution.v), maxThickness); content = 'name%09reactionIdentifier%09lineWidth%09color%0D'; - cmap = makeColorGradient('#ff0000', defaultColor, 11); + for i=1:length(solution.v) mapReactionId = model.rxns{i}; @@ -58,8 +77,20 @@ end if solution.v(i) ~= 0 - color = cmap{round(normalizedFluxes(i)) + 1}; - line = strcat('%09', mapReactionId, '%09', 1, '%09', color, '%0D'); + + if useThickness + thickness = normalizedFluxes(i); + color = defaultColor; + else + thickness = 1; + if solution.v(i) > 0 + color = cmap{round(normalizedFluxes(i)) + 1}; + else + color = ncmap{round(normalizedFluxes(i)) + 1}; + end + end + + line = strcat('%09', mapReactionId, '%09', num2str(thickness), '%09', color, '%0D'); content = strcat(content, line); end From 5aaaa93ae653b6fe4c67ecdee76153390c948873 Mon Sep 17 00:00:00 2001 From: nmendozam Date: Wed, 9 Jun 2021 15:13:51 -0500 Subject: [PATCH 14/82] support for two color gradient --- src/visualization/maps/ReconMap/buildFluxDistLayout.m | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/visualization/maps/ReconMap/buildFluxDistLayout.m b/src/visualization/maps/ReconMap/buildFluxDistLayout.m index adc84f6448..6888202788 100644 --- a/src/visualization/maps/ReconMap/buildFluxDistLayout.m +++ b/src/visualization/maps/ReconMap/buildFluxDistLayout.m @@ -83,10 +83,10 @@ color = defaultColor; else thickness = 1; - if solution.v(i) > 0 - color = cmap{round(normalizedFluxes(i)) + 1}; - else + if solution.v(i) < 0 && exist('ncmap','var') color = ncmap{round(normalizedFluxes(i)) + 1}; + else + color = cmap{round(normalizedFluxes(i)) + 1}; end end From 661a7992421b060b3df3b3b130626551a5da104a Mon Sep 17 00:00:00 2001 From: "almut.heinken" Date: Thu, 10 Jun 2021 13:04:25 +0100 Subject: [PATCH 15/82] enabled more propagation of fermentation pathway data on genus level --- .../src/integration/prepareInputData.m | 48 +++++++++---------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/src/reconstruction/demeter/src/integration/prepareInputData.m b/src/reconstruction/demeter/src/integration/prepareInputData.m index 98093133c1..2ae32bf956 100755 --- a/src/reconstruction/demeter/src/integration/prepareInputData.m +++ b/src/reconstruction/demeter/src/integration/prepareInputData.m @@ -99,7 +99,7 @@ % remove organisms not in the current reconstruction resource [C,IA] = setdiff(propagatedData(:,1),infoFile(:,1),'stable'); - propagatedData(IA,:) = []; + propagatedData(IA(2:end),:) = []; writetable(cell2table(propagatedData),[inputDataFolder filesep inputDataToCheck{i}],'FileType','text','WriteVariableNames',false,'Delimiter','tab'); end @@ -256,33 +256,33 @@ strains=agoraInfoFile(find(strcmp(agoraInfoFile(:,genusCol),genera{i})),1); % if there is more than 10 strains from this genus in the % experimental data table so a consensus can be reached - if length(strains)>10 - compData=[]; - % find the strains in the input table with experimental data - [C,IA,IB] = intersect(inputData(:,1),strains); - % find out if data agrees for all strains so the same can be - % assumed for new organisms of the genus - for j=1:length(C) - for k=2:refCols(1)-1 - compData(j,k)=str2double(inputData{IA(j),k}); - end - end - % remove the ones that do not agree for at least 90% of - % cases + % if length(strains)>10 + compData=[]; + % find the strains in the input table with experimental data + [C,IA,IB] = intersect(inputData(:,1),strains); + % find out if data agrees for all strains so the same can be + % assumed for new organisms of the genus + for j=1:length(C) for k=2:refCols(1)-1 - if sum(compData(:,k)) < 0.9*length(C) - compData(:,k)=0; - end + compData(j,k)=str2double(inputData{IA(j),k}); end - % propagate the data to new organisms - % take the data from the strain with the most data - [C,IAsum]=max(sum(compData,2)); - for j=1:length(newStrains) - inputData(find(strcmp(inputData(:,1),infoFile{newStrains(j),1})),2:refCols(1)-1)=num2cell(compData(1,2:end)); - % propagate references - inputData(find(strcmp(inputData(:,1),infoFile{newStrains(j),1})),refCols(1):refCols(end))=inputData(IAsum(1),refCols(1):refCols(end)); + end + % remove the ones that do not agree for at least 90% of + % cases + for k=2:refCols(1)-1 + if sum(compData(:,k)) < 0.9*length(C) + compData(:,k)=0; end end + % propagate the data to new organisms + % take the data from the strain with the most data + [C,IAsum]=max(sum(compData,2)); + for j=1:length(newStrains) + inputData(find(strcmp(inputData(:,1),infoFile{newStrains(j),1})),2:refCols(1)-1)=num2cell(compData(1,2:end)); + % propagate references + inputData(find(strcmp(inputData(:,1),infoFile{newStrains(j),1})),refCols(1):refCols(end))=inputData(IAsum(1),refCols(1):refCols(end)); + end + % end end end end From 4dd94bb388af3bf2581cf3946da8d63578792647 Mon Sep 17 00:00:00 2001 From: "almut.heinken" Date: Fri, 11 Jun 2021 15:29:53 +0100 Subject: [PATCH 16/82] Removed futile cycles --- src/reconstruction/demeter/src/debugging/removeFutileCycles.m | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/reconstruction/demeter/src/debugging/removeFutileCycles.m b/src/reconstruction/demeter/src/debugging/removeFutileCycles.m index 283f6ea162..785f57aaef 100755 --- a/src/reconstruction/demeter/src/debugging/removeFutileCycles.m +++ b/src/reconstruction/demeter/src/debugging/removeFutileCycles.m @@ -76,6 +76,7 @@ 'Kt1r AND Kt3r AND EX_chsterol(e) AND ARGDA',[],'Kt3r','Kt3 AND ASPTA AND PC AND H2CO3D AND ASPNH4L AND r1667 AND EX_orn(e)' 'Kt1r AND Kt3r','EX_for(e)','Kt3r','Kt3 AND EX_for(e) AND FORt2r' 'Kt1r AND Kt3r',[],'Kt3r','Kt3' + 'Kt1r AND Kt2r',[],'Kt2r','Kt2' 'Kt1r AND Kt3r AND ACtr',[],'Kt3r AND ACtr','Kt3 AND ACt2r' 'CYTDt4 AND CYTDt2r',[],'CYTDt2r','CYTDt2' 'ASPt2_2 AND ASPt2r',[],'ASPt2_2','ASPt2_2i' @@ -426,6 +427,7 @@ 'SNG3POR AND OOR2r AND FUM AND POR4 AND HPYRI',[],'SNG3POR','G3PD5' 'NTMAOR AND SUCDimq AND FRD7 AND NADH6',[],'NTMAOR','NTMAORi' 'PIt6bi AND PIt7',[],'PIt7','PIt7ir' + 'PIt6b AND PIt7',[],'PIt7','PIt7ir' 'THMt3 AND THMte',[],'THMt3','THMt3i' 'PROPAT4te AND PROt4r',[],'PROt4r','PROt4' 'GLUOR AND GALM1r AND NADH6',[],'GLUOR','GLUORi' From 511a6bfb11032aefed6c7e2817a877cd3f179194 Mon Sep 17 00:00:00 2001 From: Joe Desbonnet Date: Mon, 14 Jun 2021 11:59:53 +0100 Subject: [PATCH 17/82] SVG version of cobratoolbox logo --- docs/source/images/cobratoolbox_logo.svg | 6088 ++++++++++++++++++++++ 1 file changed, 6088 insertions(+) create mode 100644 docs/source/images/cobratoolbox_logo.svg diff --git a/docs/source/images/cobratoolbox_logo.svg b/docs/source/images/cobratoolbox_logo.svg new file mode 100644 index 0000000000..f5836d7c0e --- /dev/null +++ b/docs/source/images/cobratoolbox_logo.svg @@ -0,0 +1,6088 @@ + + + +image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file From 2bc351892dbee452eeb5d7e346a51c49d76bb486 Mon Sep 17 00:00:00 2001 From: Joe Desbonnet Date: Mon, 14 Jun 2021 15:28:36 +0100 Subject: [PATCH 18/82] add note about project logo in layout.html --- docs/README.md | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/docs/README.md b/docs/README.md index 88121835ba..df2329e758 100644 --- a/docs/README.md +++ b/docs/README.md @@ -61,6 +61,24 @@ and replace the ./stable or ./latest directory with the build output. Tracking code can be added to the template by editing layout.html or footer.html in https://github.com/opencobra/sphinx_cobra_theme/tree/develop/sphinx_cobra_theme/ +The tracking code is located near the end of the page. + +``` + + + +``` + +## Remarks about sphinx_cobra_theme + +The project logo (top-left) is hardcoded in layout.html at approx line 127. A comment in the code indicates this was done +to expedite configuration issues earlier in the project. Ideally the template should be reuseable (without modification) +for all the opencobra sub-projects: so need to find a way to externally configure the project logo. ## Checking for broken links From a8394a9736ae7e1fa7630c1afd7d8346c9bffe76 Mon Sep 17 00:00:00 2001 From: Joe Desbonnet Date: Wed, 16 Jun 2021 02:26:43 +0100 Subject: [PATCH 19/82] prepareTutorials.sh: bash and wkhtmltopdf to standard locations in filesystem --- docs/prepareTutorials.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/prepareTutorials.sh b/docs/prepareTutorials.sh index e527a5d754..7820d37287 100755 --- a/docs/prepareTutorials.sh +++ b/docs/prepareTutorials.sh @@ -1,4 +1,4 @@ -#!/usr/local/bin/bash +#!/bin/bash usage="$(basename $0) -p=pdfPath -t=COBRATutorialsPath -c=COBRAToolBoxPath [-f=folderNameOfATutorial] [-h] [-l] [-m=mode] -- script to create tutorial documentation for the COBRA Toolbox. where: @@ -118,7 +118,7 @@ buildHTMLTutorials(){ do createLocalVariables $tutorial # create PDF file - /usr/local/bin/wkhtmltopdf --page-size A8 --margin-right 2 --margin-bottom 3 --margin-top 3 --margin-left 2 $pdfPath/tutorials/$tutorialFolder/$tutorialName.html $pdfPath/tutorials/$tutorialFolder/$tutorialName.pdf + wkhtmltopdf --page-size A8 --margin-right 2 --margin-bottom 3 --margin-top 3 --margin-left 2 $pdfPath/tutorials/$tutorialFolder/$tutorialName.html $pdfPath/tutorials/$tutorialFolder/$tutorialName.pdf sed 's##&#g' "$pdfPath/tutorials/$tutorialFolder/$tutorialName.html" > "$pdfPath/tutorials/$tutorialFolder/iframe_$tutorialName.html" sed -i.bak 's/white-space:\ pre-wrap/white-space:\ normal/g' "$pdfPath/tutorials/$tutorialFolder/iframe_$tutorialName.html" sed -i.bak 's/white-space:\ pre/white-space:\ normal/g' "$pdfPath/tutorials/$tutorialFolder/iframe_$tutorialName.html" @@ -131,7 +131,7 @@ buildHTMLSpecificTutorial(){ $matlab -nodesktop -nosplash -r "restoredefaultpath;initCobraToolbox;addpath('.artenolis');generateTutorials('$pdfPath', '$specificTutorial');restoredefaultpath;savepath;exit;" createLocalVariables $specificTutorial # create PDF file - /usr/local/bin/wkhtmltopdf --page-size A8 --margin-right 2 --margin-bottom 3 --margin-top 3 --margin-left 2 $pdfPath/tutorials/$tutorialFolder/$tutorialName.html $pdfPath/tutorials/$tutorialFolder/$tutorialName.pdf + wkhtmltopdf --page-size A8 --margin-right 2 --margin-bottom 3 --margin-top 3 --margin-left 2 $pdfPath/tutorials/$tutorialFolder/$tutorialName.html $pdfPath/tutorials/$tutorialFolder/$tutorialName.pdf sed 's##&#g' "$pdfPath/tutorials/$tutorialFolder/$tutorialName.html" > "$pdfPath/tutorials/$tutorialFolder/iframe_$tutorialName.html" sed -i.bak 's/white-space:\ pre-wrap/white-space:\ normal/g' "$pdfPath/tutorials/$tutorialFolder/iframe_$tutorialName.html" sed -i.bak 's/white-space:\ pre/white-space:\ normal/g' "$pdfPath/tutorials/$tutorialFolder/iframe_$tutorialName.html" From e8c40f3e74de9f2d671b58dd918305697ffd64b9 Mon Sep 17 00:00:00 2001 From: Joe Desbonnet Date: Wed, 16 Jun 2021 02:41:07 +0100 Subject: [PATCH 20/82] openAndConvert() in matlab.internal.liveeditor package (not matlab.internal.richeditor) in recent releases --- .artenolis/generateTutorials.m | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.artenolis/generateTutorials.m b/.artenolis/generateTutorials.m index a480ac8668..3a39f6b925 100644 --- a/.artenolis/generateTutorials.m +++ b/.artenolis/generateTutorials.m @@ -15,8 +15,7 @@ function generateTutorials(destinationFolder, varargin) if strcmp(version('-release'), '2016b') openAndConvert = @matlab.internal.richeditor.openAndConvert; - end - if strcmp(version('-release'), '2017b') + else openAndConvert = @matlab.internal.liveeditor.openAndConvert; end [~, ~, ~] = mkdir(destinationFolder); From e9d3d7e7fdb7843f3659710e27d3bd9482172be5 Mon Sep 17 00:00:00 2001 From: Joe Desbonnet Date: Wed, 16 Jun 2021 10:44:11 +0100 Subject: [PATCH 21/82] Add section on building COBRA.tutorials docs --- docs/README.md | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/docs/README.md b/docs/README.md index df2329e758..1cc7fe88dd 100644 --- a/docs/README.md +++ b/docs/README.md @@ -47,7 +47,35 @@ in /var/tmp/cobratoolbox_doc_timestamp.tar.gz You can specify an alternative directory by changing the location of the /output mountpoint in the docker run command. +## Building COBRA.tutorials +Clone the cobratoolbox and COBRA.tutorials repository in an empty directory. Then cd to +./cobratoolbox/docs directory and create and run the following script: + +``` +MATLAB_ROOT=/usr/local/MATLAB +MATLAB_VERSION=R2020b +OUTPUT=/var/tmp/COBRA.tutorials_output +./prepareTutorials.sh \ + -p=${OUTPUT} \ + -t=../../COBRA.tutorials \ + -c=../../cobratoolbox \ + -e=${MATLAB_ROOT}/${MATLAB_VERSION}/bin/matlab \ + -m=html + +``` + +Replace MATLAB_ROOT with the location of the matlab if different to +/usr/local, and OUTPUT with the location to which the tutorial HTML files +are to be written. + +Remark: This procedure has been tested with head of cobratoolbox develop branch +(e8c40f3e74de9f2d671b58dd918305697ffd64b9) and +head of COBRA.tutorials master branch (0761e66374b0eff81db0f9adde87e118a12e967e) +on 2021-06-16 running on Ubuntu 18.04 with MATLAB R2020b + +Remark: the dependency on matlab for this step makes it difficult to dockerize +due to the need for matlab licence files. ## Publishing the HTML to live site From be0540c039da69a501952119f6c5b5354fd2c223 Mon Sep 17 00:00:00 2001 From: "almut.heinken" Date: Wed, 16 Jun 2021 19:10:10 +0100 Subject: [PATCH 22/82] debugged futile cycles, improved mgPipe --- .../mgPipe/fastSetupCreator.m | 95 +--- .../mgPipe/initMgPipe.m | 8 +- .../microbiomeModelingToolbox/mgPipe/mgPipe.m | 77 ++- .../mgPipe/microbiotaModelSimulator.m | 491 ++++++++++-------- .../mgPipe/normalizeCoverage.m | 18 + .../src/debugging/removeFutileCycles.m | 3 + 6 files changed, 323 insertions(+), 369 deletions(-) diff --git a/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/fastSetupCreator.m b/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/fastSetupCreator.m index 750de3eb47..ccb0fab289 100755 --- a/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/fastSetupCreator.m +++ b/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/fastSetupCreator.m @@ -1,4 +1,4 @@ -function model = fastSetupCreator(exch, modelStoragePath, microbeNames, host, objre, buildSetupAll) +function model = fastSetupCreator(exch, modelStoragePath, microbeNames, host, objre) % creates a microbiota model (min 1 microbe) that can be coupled with a host % model. Microbes and host are connected with a lumen compartment [u], host % can secrete metabolites into body fluids [b]. Diet is simulated as uptake @@ -25,10 +25,6 @@ % host: Host COBRA model structure, can be left empty if % there is no host model % objre: char with reaction name of objective function of microbeNames -% buildSetupAll: boolean indicating the strategy that should be used to -% build personalized models: if true, build a global setup model -% containing all organisms in at least model (default), false: create -% models one by one (recommended for more than ~500 organisms total) % % OUTPUT: % model: COBRA model structure with all models combined @@ -160,90 +156,19 @@ [host] = mergeTwoModels(dummyHostEU, host, 2, false, false); end -if buildSetupAll - % Merge the models in a parallel way - % First load the stored models with lumen compartment in place - modelStorage = {}; - for i = 1:size(microbeNames, 1) - loadedModel = readCbModel([modelStoragePath filesep microbeNames{i,1} '.mat']); - modelStorage{i, 1} = loadedModel; - end - - % Find the base 2 log of the number of models (how many branches are needed), and merge the models two by two: - % In each column of model storage the number of models decreases of half - %(because they have been pairwise merged) till the last column where only - % one big model is contained. The models that are not pairwise merged - %(because number of rows is not even ) are stored and then merged - % sequentially to the big model. - - pos = {}; % array where the position of models that cannot be merged pairwise (because their number in that iter is not - % even) in the original modelStorage vector is stored - dim = size(microbeNames, 1); - for j = 2:(floor(log2(size(microbeNames, 1))) + 1) % +1 because it starts with one column shifted - if mod(dim, 2) == 1 % check if number is even or not - halfdim = dim - 1; % approximated half dimension (needed to find how many iters to do - % for the pairwise merging - pos{1, j} = halfdim + 1; % find index of extramodel - halfdim = halfdim / 2; - else - halfdim = dim / 2; % no need for approximation - end - FirstSaveStore=modelStorage(:,(j-1)); - % SecondSaveStore=modelStorage(:,(j-1)); %changes 010318 - modelStorage(1:(dim-1),(j-1))={[]}; %this line will erase all the models from the container - %with the only exception of the last one that might be needed to be - %merged separately. This prevents a dramatic increase in ram usage in - %each iteration as result of stoaring all the merging three. - - for k=1:halfdim - parind = k; - parind=parind+(k-1); - FirstMod=FirstSaveStore(parind); - % SecondMod=SecondSaveStore(parind+1);%changes 010318 - SecondMod=FirstSaveStore(parind+1);%changes 010318 - % modelStorage{k,j} = mergeTwoModels(FirstMod{1},SecondMod{1},1,false,false)%changes 010318 - modelStorage{k,j} = mergeTwoModels(FirstMod{1},SecondMod{1},1,false,false); - end - dim = halfdim; - end - - % Merging the models remained alone and non-pairwise matched - if isempty(pos)== 1 %all the models were pairwise-merged - [model] = modelStorage{1,(floor(log2(size(microbeNames,1)))+1)}; +%% merge the models +for i = 2:size(microbeNames, 1) + if i==2 + model1 = readCbModel([modelStoragePath filesep microbeNames{1,1} '.mat']); + modelNew = readCbModel([modelStoragePath filesep microbeNames{i,1} '.mat']); + model = mergeTwoModels(model1,modelNew,1,false,false); else - position = pos(1,:); %finding positions of non merged models - nexmod = find(~cellfun(@isempty,pos(1,:))); - toMerge = cell2mat(position(nexmod));%list of models still to merge - if (length(toMerge)) > 1 %more than 1 model was not pairwise merged - for k=2:(length(toMerge)+1) - if k==2 - [model] = mergeTwoModels(modelStorage{toMerge(1,k-1),(nexmod(k-1))-1},modelStorage{toMerge(1,k),(nexmod(k))-1},1,false,false); - elseif k > 3 - [model] = mergeTwoModels(modelStorage{toMerge(1,k-1),(nexmod(k-1))-1},model,1,false,false); - end - end - [model] = mergeTwoModels(modelStorage{1,(floor(log2(size(microbeNames,1)))+1)},model,1,false,false); - end - if (length(toMerge)) == 1 %1 model was not pairwise merged - [model] = mergeTwoModels(modelStorage{1,(floor(log2(size(microbeNames,1)))+1)},modelStorage{toMerge(1,1),(nexmod-1)},1,false,false); - end - end - -else - % merge in non-parallel way - for i = 2:size(microbeNames, 1) - if i==2 - model1 = readCbModel([modelStoragePath filesep microbeNames{1,1} '.mat']); - modelNew = readCbModel([modelStoragePath filesep microbeNames{i,1} '.mat']); - model = mergeTwoModels(model1,modelNew,1,false,false); - else - modelNew = readCbModel([modelStoragePath filesep microbeNames{i,1} '.mat']); - model = mergeTwoModels(model,modelNew,1,false,false); - end + modelNew = readCbModel([modelStoragePath filesep microbeNames{i,1} '.mat']); + model = mergeTwoModels(model,modelNew,1,false,false); end end -% Merging with host if present +%% Merging with host if present % temp fix if isfield(model,'C') diff --git a/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/initMgPipe.m b/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/initMgPipe.m index fbb6176b4c..d6df940f2d 100755 --- a/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/initMgPipe.m +++ b/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/initMgPipe.m @@ -23,10 +23,6 @@ % hostBiomassRxnFlux: double with the desired upper bound on flux through the host % biomass reaction (default: 1) % objre: char with reaction name of objective function of organisms -% buildSetupAll: boolean indicating the strategy that should be used to -% build personalized models: if true, build a global setup model -% containing all organisms in at least model (default), false: create -% models one by one (recommended for more than ~500 organisms total) % saveConstrModels: boolean indicating if models with imposed % constraints are saved externally % numWorkers: integer indicating the number of cores to use for parallelization @@ -75,7 +71,6 @@ parser.addParameter('hostBiomassRxn', '', @ischar); parser.addParameter('hostBiomassRxnFlux', 1, @isnumeric); parser.addParameter('objre', '', @ischar); -parser.addParameter('buildSetupAll', true, @islogical); parser.addParameter('saveConstrModels', false, @islogical); parser.addParameter('numWorkers', 2, @isnumeric); parser.addParameter('rDiet', false, @islogical); @@ -97,7 +92,6 @@ hostBiomassRxn = parser.Results.hostBiomassRxn; hostBiomassRxnFlux = parser.Results.hostBiomassRxnFlux; objre = parser.Results.objre; -buildSetupAll = parser.Results.buildSetupAll; saveConstrModels = parser.Results.saveConstrModels; numWorkers = parser.Results.numWorkers; rDiet = parser.Results.rDiet; @@ -189,7 +183,7 @@ init = true; -[netSecretionFluxes, netUptakeFluxes, Y, modelStats, summary, statistics] = mgPipe(modPath, abunFilePath, computeProfiles, resPath, dietFilePath, infoFilePath, hostPath, hostBiomassRxn, hostBiomassRxnFlux, objre, buildSetupAll, saveConstrModels, figForm, numWorkers, rDiet, pDiet, includeHumanMets, lowerBMBound, repeatSim, adaptMedium); +[netSecretionFluxes, netUptakeFluxes, Y, modelStats, summary, statistics] = mgPipe(modPath, abunFilePath, computeProfiles, resPath, dietFilePath, infoFilePath, hostPath, hostBiomassRxn, hostBiomassRxnFlux, objre, saveConstrModels, figForm, numWorkers, rDiet, pDiet, includeHumanMets, lowerBMBound, repeatSim, adaptMedium); cd(currentDir) diff --git a/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/mgPipe.m b/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/mgPipe.m index 6b821d2ea1..018a644bf4 100755 --- a/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/mgPipe.m +++ b/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/mgPipe.m @@ -1,4 +1,4 @@ -function [netSecretionFluxes, netUptakeFluxes, Y, modelStats, summary, statistics] = mgPipe(modPath, abunFilePath, computeProfiles, resPath, dietFilePath, infoFilePath, hostPath, hostBiomassRxn, hostBiomassRxnFlux, objre, buildSetupAll, saveConstrModels, figForm, numWorkers, rDiet, pDiet, includeHumanMets, lowerBMBound, repeatSim, adaptMedium) +function [netSecretionFluxes, netUptakeFluxes, Y, modelStats, summary, statistics] = mgPipe(modPath, abunFilePath, computeProfiles, resPath, dietFilePath, infoFilePath, hostPath, hostBiomassRxn, hostBiomassRxnFlux, objre, saveConstrModels, figForm, numWorkers, rDiet, pDiet, includeHumanMets, lowerBMBound, repeatSim, adaptMedium) % mgPipe is a MATLAB based pipeline to integrate microbial abundances % (coming from metagenomic data) with constraint based modeling, creating % individuals' personalized models. @@ -14,7 +14,7 @@ % into a folder. % % USAGE: -% [netSecretionFluxes, netUptakeFluxes, Y, modelStats,summary, statistics] = mgPipe(modPath, abunFilePath, computeProfiles, resPath, dietFilePath, infoFilePath, hostPath, hostBiomassRxn, hostBiomassRxnFlux, objre, buildSetupAll, saveConstrModels, figForm, numWorkers, rDiet, pDiet, includeHumanMets, lowerBMBound, repeatSim, adaptMedium) +% [netSecretionFluxes, netUptakeFluxes, Y, modelStats,summary, statistics] = mgPipe(modPath, abunFilePath, computeProfiles, resPath, dietFilePath, infoFilePath, hostPath, hostBiomassRxn, hostBiomassRxnFlux, objre, saveConstrModels, figForm, numWorkers, rDiet, pDiet, includeHumanMets, lowerBMBound, repeatSim, adaptMedium) % % INPUTS: % modPath: char with path of directory where models are stored @@ -29,10 +29,6 @@ % hostBiomassRxnFlux: double with the desired flux through the host % biomass reaction (default: zero) % objre: char with reaction name of objective function -% buildSetupAll: boolean indicating the strategy that should be used to -% build personalized models: if true, build a global setup model -% containing all organisms in at least model (default), false: create -% models one by one (recommended for more than ~500 organisms total) % saveConstrModels: boolean indicating if models with imposed % constraints are saved externally % figForm: format to use for saving figures @@ -209,51 +205,38 @@ end end -% if there is 500 reconstruction total or less, use fast setup creator to -% carve each personalized model from one large setup model. -if buildSetupAll - if modbuild == 1 - setup=fastSetupCreator(exch, modelStoragePath, microbeNames, host, objre, buildSetupAll); - setup.name='Global reconstruction with lumen / fecal compartments no host'; - setup.recon=0; - if ~isempty(host) - save(strcat(resPath,'Setup_host_allbacs.mat'), 'setup') - else - save(strcat(resPath,'Setup_allbacs.mat'), 'setup') - end +% create a separate setup model for each sample +% define what counts as zero abundance +tol=0.0000001; + +clear('microbeNames','exMets','abundance') + +if length(sampNames)>50 + steps=50; +else + steps=length(sampNames); +end +% proceed in batches for improved effiency +for j=1:steps:length(sampNames) + if length(sampNames)-j>=steps-1 + endPnt=steps-1; + else + endPnt=length(sampNames)-j; end - if modbuild==0 + parfor i=j:j+endPnt + % Each personalized model will be created separately. + % get the list of models for each sample and remove the ones not in + % this sample + + % check if model already exists if ~isempty(host) - load(strcat(resPath,'Setup_host_allbacs.mat')) + mId = strcat('host_microbiota_model_samp_', sampNames{i,1}, '.mat'); else - load(strcat(resPath,'Setup_allbacs.mat')) + mId = strcat('microbiota_model_samp_', sampNames{i,1}, '.mat'); end - end - - [createdModels]=createPersonalizedModel(abundance,resPath,setup,sampNames,microbeNames,couplingMatrix,host,hostBiomassRxn); - -else - % create a separate setup model for each sample - % define what counts as zero abundance - tol=0.0000001; - - clear('microbeNames','exMets','abundance') - - steps=50; - % proceed in batches for improved effiency - for j=1:steps:length(sampNames) - if length(sampNames)-j>=steps-1 - endPnt=steps-1; - else - endPnt=length(sampNames)-j; - end - - parfor i=j:j+endPnt - % Here, we will not be starting from one joined model containing all - % reconstructions. Instead, each personalized model will be created separately.) - % get the list of models for each sample and remove the ones not in - % this sample + if ~isfile(mId) + mappingData=load([resPath filesep 'mapInfo.mat']) microbeNamesSample = mappingData.microbeNames; couplingMatrixSample = mappingData.couplingMatrix; @@ -262,7 +245,7 @@ microbeNamesSample(cell2mat(abunRed(:,2)) < tol,:)=[]; couplingMatrixSample(cell2mat(abunRed(:,2)) < tol,:)=[]; abunRed(cell2mat(abunRed(:,2)) < tol,:)=[]; - setupModel = fastSetupCreator(exch, modelStoragePath, microbeNamesSample, host, objre, buildSetupAll); + setupModel = fastSetupCreator(exch, modelStoragePath, microbeNamesSample, host, objre); % create personalized models for the batch createdModel=createPersonalizedModel(abunRed,resPath,setupModel,sampNames(i,1),microbeNamesSample,couplingMatrixSample,host,hostBiomassRxn); diff --git a/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/microbiotaModelSimulator.m b/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/microbiotaModelSimulator.m index 676a5d6132..036e0cecc1 100755 --- a/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/microbiotaModelSimulator.m +++ b/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/microbiotaModelSimulator.m @@ -39,9 +39,9 @@ % OUTPUTS: % exchanges: cell array with list of all unique Exchanges to diet/ % fecal compartment -% netProduction: cell array containing FVA values for maximal uptake +% netProduction: cell array containing FVA values for maximal uptake % and secretion for setup lumen / diet exchanges -% netUptake: cell array containing FVA values for minimal uptake +% netUptake: cell array containing FVA values for minimal uptake % and secretion for setup lumen / diet exchanges % presol array containing values of microbiota models % objective function @@ -50,12 +50,22 @@ % .. Author: Federico Baldini, 2017-2018 % Almut Heinken, 03/2021: simplified inputs -% set a solver if not done yet +% initialize COBRA Toolbox and parallel pool global CBT_LP_SOLVER +if isempty(CBT_LP_SOLVER) + initCobraToolbox +end solver = CBT_LP_SOLVER; -if isempty(solver) - initCobraToolbox(false); %Don't update the toolbox automatically + +if numWorkers>0 && ~isempty(ver('parallel')) + % with parallelization + poolobj = gcp('nocreate'); + if isempty(poolobj) + parpool(numWorkers) + end end +environment = getEnvironment(); +% for i=1:length(exMets) exchanges{i,1} = ['EX_' exMets{i}]; @@ -106,28 +116,6 @@ inFesMat = {}; presol = {}; - % Auto load for crashed simulations if desired - if repeatSim==0 - mapP = detectOutput(resPath, 'intRes.mat'); - if isempty(mapP) - startIter = 1; - else - s = 'simulation checkpoint file found: recovering crashed simulation'; - disp(s) - load(strcat(resPath, 'intRes.mat')) - - % Detecting when execution halted - for o = 1:length(netProduction(2, :)) - if isempty(netProduction{2, o}) == 0 - t = o; - end - end - startIter = t + 2; - end - elseif repeatSim==1 - startIter = 1; - end - % if simRes file already exists: some simulations may have been % incorrectly executed and need to repeat if isfile(strcat(resPath, 'simRes.mat')) @@ -146,243 +134,286 @@ HumanMets={'gchola','-10';'tdchola','-10';'tchola','-10';'dgchol','-10';'34dhphe','-10';'5htrp','-10';'Lkynr','-10';'f1a','-1';'gncore1','-1';'gncore2','-1';'dsT_antigen','-1';'sTn_antigen','-1';'core8','-1';'core7','-1';'core5','-1';'core4','-1';'ha','-1';'cspg_a','-1';'cspg_b','-1';'cspg_c','-1';'cspg_d','-1';'cspg_e','-1';'hspg','-1'}; end - % Starting personalized simulations - for k = startIter:length(sampNames) - doSim=1; - % check first if simulations already exist and were done properly - if ~isempty(netProduction{2,k}) - vals=netProduction{2,k}(find(~cellfun(@isempty,(netProduction{2,k}(:,2)))),2); - if abs(sum(cell2mat(vals)))> 0.1 - doSim=0; - end + %% start the simulations + + if length(sampNames)>50 + steps=50; + else + steps=length(sampNames); + end + % proceed in batches for improved effiency + for s=1:steps:length(sampNames) + if length(sampNames)-j>=steps-1 + endPnt=steps-1; + else + endPnt=length(sampNames)-j; end - if doSim==1 - % simulations either not done yet or done incorrectly -> go - sampleID = sampNames{k,1}; - if ~isempty(hostPath) - microbiota_model=readCbModel(strcat('host_microbiota_model_samp_', sampleID,'.mat')); - else - microbiota_model=readCbModel(strcat('microbiota_model_samp_', sampleID,'.mat')); - end - model = microbiota_model; - for j = 1:length(model.rxns) - if strfind(model.rxns{j}, 'biomass') - model.lb(j) = 0; - end - end - - % adapt constraints - BiomassNumber=find(strcmp(model.rxns,'communityBiomass')); - Components = model.mets(find(model.S(:, BiomassNumber))); - Components = strrep(Components,'_biomass[c]',''); - for j=1:length(Components) - % remove constraints on demand reactions to prevent infeasibilities - findDm= model.rxns(find(strncmp(model.rxns,[Components{j} '_DM_'],length([Components{j} '_DM_'])))); - model = changeRxnBounds(model, findDm, 0, 'l'); - % constrain flux through sink reactions - findSink= model.rxns(find(strncmp(model.rxns,[Components{j} '_sink_'],length([Components{j} '_sink_'])))); - model = changeRxnBounds(model, findSink, -1, 'l'); - end - - model = changeObjective(model, 'EX_microbeBiomass[fe]'); - AllRxn = model.rxns; - RxnInd = find(cellfun(@(x) ~isempty(strfind(x, '[d]')), AllRxn)); - EXrxn = model.rxns(RxnInd); - EXrxn = regexprep(EXrxn, 'EX_', 'Diet_EX_'); - model.rxns(RxnInd) = EXrxn; - model = changeRxnBounds(model, 'communityBiomass', lowerBMBound, 'l'); - model = changeRxnBounds(model, 'communityBiomass', 1, 'u'); - model=changeRxnBounds(model,model.rxns(strmatch('UFEt_',model.rxns)),1000000,'u'); - model=changeRxnBounds(model,model.rxns(strmatch('DUt_',model.rxns)),1000000,'u'); - model=changeRxnBounds(model,model.rxns(strmatch('EX_',model.rxns)),1000000,'u'); - - % set constraints on host exchanges if present - if ~isempty(hostBiomassRxn) - hostEXrxns=find(strncmp(model.rxns,'Host_EX_',8)); - model=changeRxnBounds(model,model.rxns(hostEXrxns),0,'l'); - % constrain blood exchanges but make exceptions for metabolites that should be taken up from - % blood - takeupExch={'h2o','hco3','o2'}; - takeupExch=strcat('Host_EX_', takeupExch, '[e]b'); - model=changeRxnBounds(model,takeupExch,-100,'l'); - % close internal exchanges except for human metabolites known - % to be found in the intestine - hostIEXrxns=find(strncmp(model.rxns,'Host_IEX_',9)); - model=changeRxnBounds(model,model.rxns(hostIEXrxns),0,'l'); - takeupExch={'gchola','tdchola','tchola','dgchol','34dhphe','5htrp','Lkynr','f1a','gncore1','gncore2','dsT_antigen','sTn_antigen','core8','core7','core5','core4','ha','cspg_a','cspg_b','cspg_c','cspg_d','cspg_e','hspg'}; - takeupExch=strcat('Host_IEX_', takeupExch, '[u]tr'); - model=changeRxnBounds(model,takeupExch,-1000,'l'); - % set a minimum and a limit for flux through host biomass - % reaction - model=changeRxnBounds(model,['Host_' hostBiomassRxn],0.001,'l'); - model=changeRxnBounds(model,['Host_' hostBiomassRxn],hostBiomassRxnFlux,'u'); - end + + presolTmp={}; + inFesMatTmp={}; + + if computeProfiles + netProdTmp1={}; + netUptTmp1={}; + netProdTmp2={}; + netUptTmp2={}; + netProdTmp3={}; + netUptTmp3={}; + end + + % Starting personalized simulations + parfor k=s:s+endPnt + restoreEnvironment(environment); + changeCobraSolver(solver, 'LP', 0, -1); - % set parallel pool if no longer active - if numWorkers > 1 - poolobj = gcp('nocreate'); - if isempty(poolobj) - parpool(numWorkers) + doSim=1; + % check first if simulations already exist and were done properly + if ~isempty(netProduction{k}) + vals=netProduction{k}(find(~cellfun(@isempty,(netProduction{k}(:,2)))),2); + if abs(sum(cell2mat(vals)))> 0.1 + doSim=0; end end - - solution_allOpen = solveCobraLP(buildLPproblemFromModel(model)); - % solution_allOpen=solveCobraLPCPLEX(model,2,0,0,[],0); - if solution_allOpen.stat==0 - warning('Presolve detected one or more infeasible models. Please check InFesMat object !') - inFesMat{k, 1} = model.name; - else - presol{k, 1} = solution_allOpen.obj; - AllRxn = model.rxns; - FecalInd = find(cellfun(@(x) ~isempty(strfind(x,'[fe]')),AllRxn)); - DietInd = find(cellfun(@(x) ~isempty(strfind(x,'[d]')),AllRxn)); - FecalRxn = AllRxn(FecalInd); - FecalRxn=setdiff(FecalRxn,'EX_microbeBiomass[fe]','stable'); - DietRxn = AllRxn(DietInd); - if rDiet==1 && computeProfiles - [minFlux,maxFlux]=guidedSim(model,FecalRxn); - minFluxFecal = minFlux; - maxFluxFecal = maxFlux; - [minFlux,maxFlux]=guidedSim(model,DietRxn); - minFluxDiet = minFlux; - maxFluxDiet = maxFlux; - netProduction{1,k}=exchanges; - netUptake{1,k}=exchanges; - for i =1:length(FecalRxn) - [truefalse, index] = ismember(FecalRxn(i), exchanges); - netProduction{1,k}{index,2} = minFluxDiet(i,1); - netProduction{1,k}{index,3} = maxFluxFecal(i,1); - netUptake{1,k}{index,2} = maxFluxDiet(i,1); - netUptake{1,k}{index,3} = minFluxFecal(i,1); - end - end - if rDiet==1 && saveConstrModels - microbiota_model=model; - mkdir([resPath filesep 'Rich']) - save([resPath filesep 'Rich' filesep 'microbiota_model_' sampleID '.mat'],'microbiota_model') - end - - % Using input diet - - model_sd=model; - if adaptMedium - [diet] = adaptVMHDietToAGORA(dietFilePath,'Microbiota'); + if doSim==1 || repeatSim==1 + % simulations either not done yet or done incorrectly -> go + sampleID = sampNames{k,1}; + if ~isempty(hostPath) + microbiota_model=readCbModel(strcat('host_microbiota_model_samp_', sampleID,'.mat')); else - diet = readtable(dietFilePath, 'Delimiter', '\t'); % load the text file with the diet - diet = table2cell(diet); - for j = 1:length(diet) - diet{j, 2} = num2str(-(diet{j, 2})); + microbiota_model=readCbModel(strcat('microbiota_model_samp_', sampleID,'.mat')); + end + model = microbiota_model; + for j = 1:length(model.rxns) + if strfind(model.rxns{j}, 'biomass') + model.lb(j) = 0; end end - [model_sd] = useDiet(model_sd, diet,0); - if includeHumanMets - % add the human metabolites - for l=1:length(HumanMets) - model_sd=changeRxnBounds(model_sd,strcat('Diet_EX_',HumanMets{l},'[d]'),str2num(HumanMets{l,2}),'l'); - end + % adapt constraints + BiomassNumber=find(strcmp(model.rxns,'communityBiomass')); + Components = model.mets(find(model.S(:, BiomassNumber))); + Components = strrep(Components,'_biomass[c]',''); + for j=1:length(Components) + % remove constraints on demand reactions to prevent infeasibilities + findDm= model.rxns(find(strncmp(model.rxns,[Components{j} '_DM_'],length([Components{j} '_DM_'])))); + model = changeRxnBounds(model, findDm, 0, 'l'); + % constrain flux through sink reactions + findSink= model.rxns(find(strncmp(model.rxns,[Components{j} '_sink_'],length([Components{j} '_sink_'])))); + model = changeRxnBounds(model, findSink, -1, 'l'); end - if exist('unfre') ==1 %option to directly add other essential nutrients - warning('Feasibility forced with addition of essential nutrients') - model_sd=changeRxnBounds(model_sd, unfre,-0.1,'l'); + model = changeObjective(model, 'EX_microbeBiomass[fe]'); + AllRxn = model.rxns; + RxnInd = find(cellfun(@(x) ~isempty(strfind(x, '[d]')), AllRxn)); + EXrxn = model.rxns(RxnInd); + EXrxn = regexprep(EXrxn, 'EX_', 'Diet_EX_'); + model.rxns(RxnInd) = EXrxn; + model = changeRxnBounds(model, 'communityBiomass', lowerBMBound, 'l'); + model = changeRxnBounds(model, 'communityBiomass', 1, 'u'); + model=changeRxnBounds(model,model.rxns(strmatch('UFEt_',model.rxns)),1000000,'u'); + model=changeRxnBounds(model,model.rxns(strmatch('DUt_',model.rxns)),1000000,'u'); + model=changeRxnBounds(model,model.rxns(strmatch('EX_',model.rxns)),1000000,'u'); + + % set constraints on host exchanges if present + if ~isempty(hostBiomassRxn) + hostEXrxns=find(strncmp(model.rxns,'Host_EX_',8)); + model=changeRxnBounds(model,model.rxns(hostEXrxns),0,'l'); + % constrain blood exchanges but make exceptions for metabolites that should be taken up from + % blood + takeupExch={'h2o','hco3','o2'}; + takeupExch=strcat('Host_EX_', takeupExch, '[e]b'); + model=changeRxnBounds(model,takeupExch,-100,'l'); + % close internal exchanges except for human metabolites known + % to be found in the intestine + hostIEXrxns=find(strncmp(model.rxns,'Host_IEX_',9)); + model=changeRxnBounds(model,model.rxns(hostIEXrxns),0,'l'); + takeupExch={'gchola','tdchola','tchola','dgchol','34dhphe','5htrp','Lkynr','f1a','gncore1','gncore2','dsT_antigen','sTn_antigen','core8','core7','core5','core4','ha','cspg_a','cspg_b','cspg_c','cspg_d','cspg_e','hspg'}; + takeupExch=strcat('Host_IEX_', takeupExch, '[u]tr'); + model=changeRxnBounds(model,takeupExch,-1000,'l'); + % set a minimum and a limit for flux through host biomass + % reaction + model=changeRxnBounds(model,['Host_' hostBiomassRxn],0.001,'l'); + model=changeRxnBounds(model,['Host_' hostBiomassRxn],hostBiomassRxnFlux,'u'); end - solution_sDiet=solveCobraLP(buildLPproblemFromModel(model_sd)); - % solution_sDiet=solveCobraLPCPLEX(model_sd,2,0,0,[],0); - presol{k,2}=solution_sDiet.obj; - if solution_sDiet.stat==0 + + solution_allOpen = solveCobraLP(buildLPproblemFromModel(model)); + % solution_allOpen=solveCobraLPCPLEX(model,2,0,0,[],0); + if solution_allOpen.stat==0 warning('Presolve detected one or more infeasible models. Please check InFesMat object !') - inFesMat{k,2}= model.name; + inFesMatTmp{k}(1) = model.name; else - if computeProfiles - [minFlux,maxFlux]=guidedSim(model_sd,FecalRxn); - sma=maxFlux; - sma2=minFlux; - [minFlux,maxFlux]=guidedSim(model_sd,DietRxn); - smi=minFlux; - smi2=maxFlux; - maxFlux=sma; - minFlux=smi; - - netProduction{2,k}=exchanges; - netUptake{2,k}=exchanges; + presolTmp{k}(1) = solution_allOpen.obj; + AllRxn = model.rxns; + FecalInd = find(cellfun(@(x) ~isempty(strfind(x,'[fe]')),AllRxn)); + DietInd = find(cellfun(@(x) ~isempty(strfind(x,'[d]')),AllRxn)); + FecalRxn = AllRxn(FecalInd); + FecalRxn=setdiff(FecalRxn,'EX_microbeBiomass[fe]','stable'); + DietRxn = AllRxn(DietInd); + if rDiet==1 && computeProfiles + [minFlux,maxFlux]=guidedSim(model,FecalRxn); + minFluxFecal = minFlux; + maxFluxFecal = maxFlux; + [minFlux,maxFlux]=guidedSim(model,DietRxn); + minFluxDiet = minFlux; + maxFluxDiet = maxFlux; + netProdTmp{1,k}=exchanges; + netUptTmp{1,k}=exchanges; for i =1:length(FecalRxn) [truefalse, index] = ismember(FecalRxn(i), exchanges); - netProduction{2,k}{index,2}=minFlux(i,1); - netProduction{2,k}{index,3}=maxFlux(i,1); - netUptake{2,k}{index,2}=smi2(i,1); - netUptake{2,k}{index,3}=sma2(i,1); + netProdTmp1{k}{index,2} = minFluxDiet(i,1); + netProdTmp1{k}{index,3} = maxFluxFecal(i,1); + netUptTmp1{k}{index,2} = maxFluxDiet(i,1); + netUptTmp1{k}{index,3} = minFluxFecal(i,1); end end - - if saveConstrModels - microbiota_model=model_sd; - mkdir([resPath filesep 'Diet']) - save([resPath filesep 'Diet' filesep 'microbiota_model_diet_' sampleID '.mat'],'microbiota_model') + if rDiet==1 && saveConstrModels + microbiota_model=model; + mkdir([resPath filesep 'Rich']) + parsave([resPath filesep 'Rich' filesep 'microbiota_model_' sampleID '.mat'],'microbiota_model') end - save(strcat(resPath,'intRes.mat'),'netProduction','presol','inFesMat', 'netUptake') + % Using input diet + + model_sd=model; + if adaptMedium + [diet] = adaptVMHDietToAGORA(dietFilePath,'Microbiota'); + else + diet = readtable(dietFilePath, 'Delimiter', '\t'); % load the text file with the diet + diet = table2cell(diet); + for j = 1:length(diet) + diet{j, 2} = num2str(-(diet{j, 2})); + end + end + [model_sd] = useDiet(model_sd, diet,0); - % Using personalized diet not documented in MgPipe and bug checked yet!!!! + if includeHumanMets + % add the human metabolites + for l=1:length(HumanMets) + model_sd=changeRxnBounds(model_sd,strcat('Diet_EX_',HumanMets{l},'[d]'),str2num(HumanMets{l,2}),'l'); + end + end - if pDiet==1 - model_pd=model; - [Numbers, Strings] = xlsread(strcat(abundancepath,fileNameDiets)); - % diet exchange reactions - DietNames = Strings(2:end,1); - % Diet exchanges for all individuals - Diets(:,k) = cellstr(num2str((Numbers(1:end,k)))); - Dietexchanges = {DietNames{:,1} ; Diets{:,k}}'; - Dietexchanges = regexprep(Dietexchanges,'EX_','Diet_EX_'); - Dietexchanges = regexprep(Dietexchanges,'\(e\)','\[d\]'); - - model_pd = setDietConstraints(model_pd,Dietexchanges); - - if includeHumanMets - % add the human metabolites - for l=1:length(HumanMets) - model_pd=changeRxnBounds(model_pd,strcat('Diet_EX_',HumanMets{l},'[d]'),str2num(HumanMets{l,2}),'l'); + solution_sDiet=solveCobraLP(buildLPproblemFromModel(model_sd)); + % solution_sDiet=solveCobraLPCPLEX(model_sd,2,0,0,[],0); + presolTmp{k}(2)=solution_sDiet.obj; + if solution_sDiet.stat==0 + warning('Presolve detected one or more infeasible models. Please check InFesMat object !') + inFesMatTmp{k}(2) = model.name; + else + if computeProfiles + [minFlux,maxFlux]=guidedSim(model_sd,FecalRxn); + sma=maxFlux; + sma2=minFlux; + [minFlux,maxFlux]=guidedSim(model_sd,DietRxn); + smi=minFlux; + smi2=maxFlux; + maxFlux=sma; + minFlux=smi; + + netProdTmp2{k}=exchanges; + netUptTmp2{k}=exchanges; + for i =1:length(FecalRxn) + [truefalse, index] = ismember(FecalRxn(i), exchanges); + netProdTmp2{k}{index,2}=minFlux(i,1); + netProdTmp2{k}{index,3}=maxFlux(i,1); + netUptTmp2{k}{index,2}=smi2(i,1); + netUptTmp2{k}{index,3}=sma2(i,1); end end - solution_pdiet=solveCobraLP(buildLPproblemFromModel(model_pd)); - %solution_pdiet=solveCobraLPCPLEX(model_pd,2,0,0,[],0); - presol{k,3}=solution_pdiet.obj; - if isnan(solution_pdiet.obj) - warning('Presolve detected one or more infeasible models. Please check InFesMat object !') - inFesMat{k,3}= model.name; - else + if saveConstrModels + microbiota_model=model_sd; + mkdir([resPath filesep 'Diet']) + parsave([resPath filesep 'Diet' filesep 'microbiota_model_diet_' sampleID '.mat'],'microbiota_model') + end + + % Using personalized diet not documented in MgPipe and bug checked yet!!!! + + if pDiet==1 + model_pd=model; + [Numbers, Strings] = xlsread(strcat(abundancepath,fileNameDiets)); + % diet exchange reactions + DietNames = Strings(2:end,1); + % Diet exchanges for all individuals + Diets(:,k) = cellstr(num2str((Numbers(1:end,k)))); + Dietexchanges = {DietNames{:,1} ; Diets{:,k}}'; + Dietexchanges = regexprep(Dietexchanges,'EX_','Diet_EX_'); + Dietexchanges = regexprep(Dietexchanges,'\(e\)','\[d\]'); + + model_pd = setDietConstraints(model_pd,Dietexchanges); - if computeProfiles - [minFlux,maxFlux]=guidedSim(model_pd,FecalRxn); - sma=maxFlux; - [minFlux,maxFlux]=guidedSim(model_pd,DietRxn); - smi=minFlux; - maxFlux=sma; - minFlux=smi; - netProduction{3,k}=exchanges; - for i = 1:length(FecalRxn) - [truefalse, index] = ismember(FecalRxn(i), exchanges); - netProduction{3,k}{index,2}=minFlux(i,1); - netProduction{3,k}{index,3}=maxFlux(i,1); + if includeHumanMets + % add the human metabolites + for l=1:length(HumanMets) + model_pd=changeRxnBounds(model_pd,strcat('Diet_EX_',HumanMets{l},'[d]'),str2num(HumanMets{l,2}),'l'); end end - if saveConstrModels - microbiota_model=model_pd; - mkdir(strcat(resPath,'Personalized')) - save([resPath filesep 'Personalized' filesep 'microbiota_model_pDiet_' sampleID '.mat'],'microbiota_model') + solution_pdiet=solveCobraLP(buildLPproblemFromModel(model_pd)); + %solution_pdiet=solveCobraLPCPLEX(model_pd,2,0,0,[],0); + presolTmp{k}(3)=solution_pdiet.obj; + if isnan(solution_pdiet.obj) + warning('Presolve detected one or more infeasible models. Please check InFesMat object !') + inFesMatTmp{k}(3) = model.name; + else + + if computeProfiles + [minFlux,maxFlux]=guidedSim(model_pd,FecalRxn); + sma=maxFlux; + [minFlux,maxFlux]=guidedSim(model_pd,DietRxn); + smi=minFlux; + maxFlux=sma; + minFlux=smi; + netProdTmp3{k}=exchanges; + for i = 1:length(FecalRxn) + [truefalse, index] = ismember(FecalRxn(i), exchanges); + netProdTmp3{k}{index,2}=minFlux(i,1); + netProdTmp3{k}{index,3}=maxFlux(i,1); + netUptTmp3{k}{index,2}=smi2(i,1); + netUptTmp3{k}{index,3}=sma2(i,1); + end + end + + if saveConstrModels + microbiota_model=model_pd; + mkdir(strcat(resPath,'Personalized')) + parsave([resPath filesep 'Personalized' filesep 'microbiota_model_pDiet_' sampleID '.mat'],'microbiota_model') + end + end - end end end end end - end - - % Saving all output of simulations - save(strcat(resPath,'simRes.mat'),'netProduction','presol','inFesMat', 'netUptake') + + for k=s:s+endPnt + if ~isempty(inFesMatTmp) + for l=1:size(inFesMatTmp{k},2) + inFesMat{k,l}=inFesMatTmp{k}(l); + end + end + for l=1:size(presolTmp{k},2) + presol{k,l}=presolTmp{k}(l); + end + + if computeProfiles + if ~isempty(netProdTmp1) + netProduction{1,k}=netProdTmp1{k}; + netUptake{1,k}=netUptTmp1{k}; + end + netProduction{2,k}=netProdTmp2{k}; + netUptake{2,k}=netUptTmp2{k}; + if ~isempty(netProdTmp3) + netProduction{3,k}=netProdTmp3{k}; + netUptake{3,k}=netUptTmp3{k}; + end + end + end + + % Saving all output of simulations + save(strcat(resPath,'simRes.mat'),'netProduction','presol','inFesMat', 'netUptake') + end end end diff --git a/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/normalizeCoverage.m b/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/normalizeCoverage.m index a595266b9b..690bd47e08 100755 --- a/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/normalizeCoverage.m +++ b/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/normalizeCoverage.m @@ -32,6 +32,24 @@ coverage = table2cell(readtable(abunFilePath,'ReadVariableNames',false)); coverage{1,1}='ID'; +% summarize duplicate entries +[uniqueA,i,j] = unique(coverage(:,1)); +n = accumarray(j(:),1); +Dupes=uniqueA(find(n>1)); +delArray=[]; +cnt=1; +for i=1:length(Dupes) + indexToDupes = find(strcmp(coverage(:,1),Dupes{i})); + for j=2:length(indexToDupes) + for k=2:size(coverage,2) + coverage{indexToDupes(1),k}=num2str(str2double(coverage{indexToDupes(1),k})+str2double(coverage{indexToDupes(j),k})); + end + delArray(cnt,1)=indexToDupes(j); + cnt=cnt+1; + end +end +coverage(delArray,:)=[]; + % delete samples that are all zeros (if applies) totalAbun=sum(str2double(coverage(2:end,2:end)),1); allzero=find(totalAbun<0.0000001); diff --git a/src/reconstruction/demeter/src/debugging/removeFutileCycles.m b/src/reconstruction/demeter/src/debugging/removeFutileCycles.m index 785f57aaef..c5828cff88 100755 --- a/src/reconstruction/demeter/src/debugging/removeFutileCycles.m +++ b/src/reconstruction/demeter/src/debugging/removeFutileCycles.m @@ -484,6 +484,9 @@ 'MAN6PI AND DCLMPDOH AND GDPGALP AND GDPMANNE AND HMR_7271',[],'GDPGALP','GDPGALPi' 'FE2DH AND FE3Ri AND NADH6 AND SUCD1 AND FRD7',[],'FE2DH','FE2DHi' 'GLBRAN AND GLDBRAN AND GLGNS1 AND GLPASE1 AND GPDDA1',[],'GLDBRAN',[] + 'CDPDPH AND CYTK1',[],'CDPDPH','CDPDPHi' + 'UMPK AND NDP7',[],'NDP7','NDP7i' + 'CLt4r AND r2137',[],'r2137','CLti' }; From ab8614a5264a8296651d59d534f57293fed75d15 Mon Sep 17 00:00:00 2001 From: "almut.heinken" Date: Thu, 17 Jun 2021 16:47:33 +0100 Subject: [PATCH 23/82] improved mgPipe --- .../mgPipe/buildModelStorage.m | 59 +++++++++++++++---- .../microbiomeModelingToolbox/mgPipe/mgPipe.m | 2 +- .../mgPipe/microbiotaModelSimulator.m | 2 +- 3 files changed, 50 insertions(+), 13 deletions(-) diff --git a/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/buildModelStorage.m b/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/buildModelStorage.m index d588dba6ef..ad90ad7234 100644 --- a/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/buildModelStorage.m +++ b/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/buildModelStorage.m @@ -1,10 +1,37 @@ -function [exch,modelStoragePath,couplingMatrix] = buildModelStorage(microbeNames,modPath) +function [exch,modelStoragePath,couplingMatrix] = buildModelStorage(microbeNames,modPath,numWorkers) +% This function builds the internal exchange space and the coupling +% constraints for models to join within mgPipe so they can be merged into +% microbiome models afterwards. +% +% USAGE +% [exch,modelStoragePath,couplingMatrix] = buildModelStorage(microbeNames,modPath,numWorkers) +% +% INPUTS +% modPath: char with path of directory where models are stored +% microbeNames: list of microbe models included in the microbiome models +% numWorkers: integer indicating the number of cores to use for parallelization +% +% OUTPUTS +% exch: list of exchanges present in at least one microbe model +% modelStoragePath: path to the modified models to join afterwards +% couplingMatrix: matrix containing coupling constraints for each model to join +% +% AUTHOR: +% - Almut Heinken, 05/2021 currentDir=pwd; mkdir('modelStorage') cd('modelStorage') modelStoragePath = pwd; +if numWorkers>0 && ~isempty(ver('parallel')) + % with parallelization + poolobj = gcp('nocreate'); + if isempty(poolobj) + parpool(numWorkers) + end +end + exch = {}; for j = 1:size(microbeNames, 1) model = readCbModel([modPath filesep microbeNames{j,1} '.mat']); @@ -23,24 +50,34 @@ if length(microbesNames)>0 %% create a new extracellular space [u] for microbes - for j = 1:size(microbeNames, 1) - model = readCbModel([modPath filesep microbeNames{j,1} '.mat']); + for i = 1:size(microbeNames, 1) + model = readCbModel([modPath filesep microbeNames{i,1} '.mat']); % temp fix if isfield(model,'C') model=rmfield(model,'C'); model=rmfield(model,'d'); end - % % removing possible constraints of the bacs selExc = findExcRxns(model); Reactions2 = model.rxns(find(selExc)); allex = Reactions2(strmatch('EX', Reactions2)); - biomass = allex(find(strncmp(allex,'bio',3))); + biomass = allex(find(strncmp(allex,'EX_bio',6))); finrex = setdiff(allex, biomass); model = changeRxnBounds(model, finrex, -1000, 'l'); model = changeRxnBounds(model, finrex, 1000, 'u'); + % remove exchange reactions that cannot carry flux + try + [minFlux,maxFlux]=fastFVA(model,0,'max','ibm_cplex',finrex); + catch + [minFlux,maxFlux]=fluxVariability(model,0,'max',finrex); + end + nominflux=find(abs(minFlux) < 0.00000001); + nomaxflux=find(abs(maxFlux) < 0.00000001); + noflux=intersect(nominflux,nomaxflux); + model=removeRxns(model,finrex(noflux)); + % removing blocked reactions from the bacs %BlockedRxns = identifyFastBlockedRxns(model,model.rxns, printLevel); %model= removeRxns(model, BlockedRxns); @@ -51,17 +88,17 @@ eMets = model.mets(~cellfun(@isempty, strfind(model.mets, '[e]'))); % exchanged metabolites dummyMicEU = createModel(); %dummyMicEU = makeDummyModel(2 * size(eMets, 1), size(eMets, 1)); - dummyMicEUmets = [strcat(strcat(microbeNames{j, 1}, '_'), regexprep(eMets, '\[e\]', '\[u\]')); regexprep(eMets, '\[e\]', '\[u\]')]; + dummyMicEUmets = [strcat(strcat(microbeNames{i, 1}, '_'), regexprep(eMets, '\[e\]', '\[u\]')); regexprep(eMets, '\[e\]', '\[u\]')]; dummyMicEU = addMultipleMetabolites(dummyMicEU,dummyMicEUmets); nMets = numel(eMets); S = [speye(nMets);-speye(nMets)]; lbs = repmat(-1000,nMets,1); ubs = repmat(1000,nMets,1); - names = strcat(strcat(microbeNames{j, 1}, '_'), 'IEX_', regexprep(eMets, '\[e\]', '\[u\]'), 'tr'); + names = strcat(strcat(microbeNames{i, 1}, '_'), 'IEX_', regexprep(eMets, '\[e\]', '\[u\]'), 'tr'); dummyMicEU = addMultipleReactions(dummyMicEU,names,dummyMicEUmets,S,'lb',lbs,'ub',ubs); model = removeRxns(model, exmod); - model.rxns = strcat(strcat(microbeNames{j, 1}, '_'), model.rxns); - model.mets = strcat(strcat(microbeNames{j, 1}, '_'), regexprep(model.mets, '\[e\]', '\[u\]')); % replace [e] with [u] + model.rxns = strcat(strcat(microbeNames{i, 1}, '_'), model.rxns); + model.mets = strcat(strcat(microbeNames{i, 1}, '_'), regexprep(model.mets, '\[e\]', '\[u\]')); % replace [e] with [u] [model] = mergeTwoModels(dummyMicEU, model, 2, false, false); %finish up by A: removing duplicate reactions @@ -71,9 +108,9 @@ writeCbModel(model,'format','mat','fileName',[microbeNames{j,1} '.mat']); % store model % add coupling constraints and store them - IndRxns=find(strncmp(model.rxns,[microbeNames{j,1} '_'],length(microbeNames{j,1})+1));%finding indixes of specific reactions + IndRxns=find(strncmp(model.rxns,[microbeNames{j,1} '_'],length(microbeNames{i,1})+1));%finding indixes of specific reactions % find the name of biomass reaction in the microbe model - bioRxn=model.rxns{find(strncmp(model.rxns,strcat(microbeNames{j,1},'_bio'),length(char(strcat(microbeNames{j,1},'_bio')))))}; + bioRxn=model.rxns{find(strncmp(model.rxns,strcat(microbeNames{j,1},'_bio'),length(char(strcat(microbeNames{i,1},'_bio')))))}; model=coupleRxnList2Rxn(model,model.rxns(IndRxns(1:length(model.rxns(IndRxns(:,1)))-1,1)),bioRxn,400,0); %couple the specific reactions couplingMatrix{j,1}=model.C; couplingMatrix{j,2}=model.d; diff --git a/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/mgPipe.m b/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/mgPipe.m index 018a644bf4..e37392d45a 100755 --- a/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/mgPipe.m +++ b/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/mgPipe.m @@ -96,7 +96,7 @@ % Extracellular spaces simulating the lumen are built and stored for % each microbe. - [exch,modelStoragePath,couplingMatrix]=buildModelStorage(microbeNames,modPath); + [exch,modelStoragePath,couplingMatrix]=buildModelStorage(microbeNames,modPath,numWorkers); % Computing reaction presence ReactionPresence=calculateReactionPresence(abunFilePath, modPath, {}); diff --git a/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/microbiotaModelSimulator.m b/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/microbiotaModelSimulator.m index 036e0cecc1..2d3fc0ce7d 100755 --- a/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/microbiotaModelSimulator.m +++ b/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/microbiotaModelSimulator.m @@ -129,7 +129,7 @@ end % determine human-derived metabolites present in the gut: primary bile - % acexchangess, amines, mucins, host glycans + % amines, mucins, host glycans if includeHumanMets HumanMets={'gchola','-10';'tdchola','-10';'tchola','-10';'dgchol','-10';'34dhphe','-10';'5htrp','-10';'Lkynr','-10';'f1a','-1';'gncore1','-1';'gncore2','-1';'dsT_antigen','-1';'sTn_antigen','-1';'core8','-1';'core7','-1';'core5','-1';'core4','-1';'ha','-1';'cspg_a','-1';'cspg_b','-1';'cspg_c','-1';'cspg_d','-1';'cspg_e','-1';'hspg','-1'}; end From f16deb55b3f7a5d9778f8f1b440d12b26841c6d6 Mon Sep 17 00:00:00 2001 From: "almut.heinken" Date: Fri, 18 Jun 2021 00:46:10 +0100 Subject: [PATCH 24/82] improved mgPipe --- .../mgPipe/buildModelStorage.m | 144 +++++++++++++----- .../mgPipe/initMgPipe.m | 12 +- .../microbiomeModelingToolbox/mgPipe/mgPipe.m | 12 +- .../mgPipe/microbiotaModelSimulator.m | 22 +-- 4 files changed, 137 insertions(+), 53 deletions(-) diff --git a/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/buildModelStorage.m b/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/buildModelStorage.m index ad90ad7234..3f5b17cba5 100644 --- a/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/buildModelStorage.m +++ b/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/buildModelStorage.m @@ -1,18 +1,29 @@ -function [exch,modelStoragePath,couplingMatrix] = buildModelStorage(microbeNames,modPath,numWorkers) +function [exch,modelStoragePath,couplingMatrix] = buildModelStorage(microbeNames,modPath,pruneModels,dietFilePath, includeHumanMets, adaptMedium, numWorkers) % This function builds the internal exchange space and the coupling % constraints for models to join within mgPipe so they can be merged into -% microbiome models afterwards. +% microbiome models afterwards. Exchanges that can never carry flux on the +% given diet are removed to reduce computation time. % % USAGE -% [exch,modelStoragePath,couplingMatrix] = buildModelStorage(microbeNames,modPath,numWorkers) +% [exch,modelStoragePath,couplingMatrix] = buildModelStorage(microbeNames,modPath,pruneModels,dietFilePath, includeHumanMets, adaptMedium, numWorkers) % % INPUTS -% modPath: char with path of directory where models are stored % microbeNames: list of microbe models included in the microbiome models +% modPath: char with path of directory where models are stored +% adaptMedium: boolean indicating if the medium should be adapted through the +% adaptVMHDietToAGORA function or used as is (default=true) +% pruneModels: boolean indicating whether exchanges and reactions that cannot carry flux +% under the given constraints should be removed (default=false). +% Recommended for large-scale simulation projects. +% dietFilePath: char with path of directory where the diet is saved +% includeHumanMets: boolean indicating if human-derived metabolites +% present in the gut should be provided to the models (default: true) +% adaptMedium: boolean indicating if the medium should be adapted through the +% adaptVMHDietToAGORA function or used as is (default=true) % numWorkers: integer indicating the number of cores to use for parallelization % % OUTPUTS -% exch: list of exchanges present in at least one microbe model +% exch: list of exchanged metabolites present in at least one microbe model % modelStoragePath: path to the modified models to join afterwards % couplingMatrix: matrix containing coupling constraints for each model to join % @@ -32,13 +43,68 @@ end end +% determine human-derived metabolites present in the gut: primary bile +% amines, mucins, host glycans +if includeHumanMets + HumanMets={'gchola','-10';'tdchola','-10';'tchola','-10';'dgchol','-10';'34dhphe','-10';'5htrp','-10';'Lkynr','-10';'f1a','-1';'gncore1','-1';'gncore2','-1';'dsT_antigen','-1';'sTn_antigen','-1';'core8','-1';'core7','-1';'core5','-1';'core4','-1';'ha','-1';'cspg_a','-1';'cspg_b','-1';'cspg_c','-1';'cspg_d','-1';'cspg_e','-1';'hspg','-1'}; +end + +% load diet constraints +if adaptMedium + [diet] = adaptVMHDietToAGORA(dietFilePath,'AGORA'); +else + diet = readtable(dietFilePath, 'Delimiter', '\t'); % load the text file with the diet + diet = table2cell(diet); + for i = 1:length(diet) + diet{i, 2} = num2str(-(diet{i, 2})); + end +end + +% get all exchanges that can carry flux in at least one model on the given +% diet, including metabolites that can be secreted exch = {}; -for j = 1:size(microbeNames, 1) - model = readCbModel([modPath filesep microbeNames{j,1} '.mat']); - %exch = union(exch, model.mets(find(sum(model.S(:, strncmp('EX_', model.rxns, 3)), 2) ~= 0))); - exStruct = findSExRxnInd(model); - new_exch = findMetsFromRxns(model,model.rxns(exStruct.ExchRxnBool & ~exStruct.biomassBool)); - exch = union(exch,new_exch); +for i = 1:size(microbeNames, 1) + model = readCbModel([modPath filesep microbeNames{i,1} '.mat']); + + exMets = model.mets(~cellfun(@isempty, strfind(model.mets, '[e]'))); + ex_rxns = {}; + for j=1:length(exMets) + ex_rxns{j}=['EX_' exMets{j}]; + ex_rxns{j}=strrep(ex_rxns{j},'[e]','(e)'); + end + % account for depracated nomenclature + ex_rxns=intersect(ex_rxns,model.rxns); + + if pruneModels + % Using input diet + model = useDiet(model, diet,0); + + if includeHumanMets + % add the human metabolites + for l=1:length(HumanMets) + model=changeRxnBounds(model,strcat('EX_',HumanMets{l},'(e)'),str2num(HumanMets{l,2}),'l'); + end + end + + % compute which exchanges can carry flux + try + [minFlux,maxFlux]=fastFVA(model,0,'max','ibm_cplex',ex_rxns); + catch + [minFlux,maxFlux]=fluxVariability(model,0,'max',ex_rxns); + end + minflux=find(abs(minFlux) > 0.00000001); + maxflux=find(abs(maxFlux) > 0.00000001); + flux=union(minflux,maxflux); + + pruned_ex_rxns = ex_rxns(flux); + pruned_ex_rxns=strrep(pruned_ex_rxns,'EX_',''); + pruned_ex_rxns=strrep(pruned_ex_rxns,'(e)','[e]'); + exch = union(exch,pruned_ex_rxns); + else + ex_rxns=strrep(ex_rxns,'EX_',''); + ex_rxns=strrep(ex_rxns,'(e)','[e]'); + exch = union(exch,ex_rxns); + end end % get already built reconstructions @@ -46,9 +112,8 @@ modelList={dInfo.name}; modelList=modelList'; modelList=strrep(modelList,'.mat',''); -microbesNames=setdiff(microbeNames,modelList); -if length(microbesNames)>0 +if length(setdiff(microbeNames,modelList))>0 %% create a new extracellular space [u] for microbes for i = 1:size(microbeNames, 1) model = readCbModel([modPath filesep microbeNames{i,1} '.mat']); @@ -67,21 +132,30 @@ model = changeRxnBounds(model, finrex, -1000, 'l'); model = changeRxnBounds(model, finrex, 1000, 'u'); - % remove exchange reactions that cannot carry flux - try - [minFlux,maxFlux]=fastFVA(model,0,'max','ibm_cplex',finrex); - catch - [minFlux,maxFlux]=fluxVariability(model,0,'max',finrex); + if pruneModels + % remove exchange reactions that cannot carry flux + try + [minFlux,maxFlux]=fastFVA(model,0,'max','ibm_cplex',finrex); + catch + [minFlux,maxFlux]=fluxVariability(model,0,'max',finrex); + end + nominflux=find(abs(minFlux) < 0.00000001); + nomaxflux=find(abs(maxFlux) < 0.00000001); + noflux=intersect(nominflux,nomaxflux); + model=removeRxns(model,finrex(noflux)); + + % additionally, remove exchanges that are neither consumed or + % secreted by any microbe on this diet + selExc = findExcRxns(model); + Reactions2 = model.rxns(find(selExc)); + allex = Reactions2(strmatch('EX', Reactions2)); + [C]=setdiff(allex,exch); + model=removeRxns(model,C); + +% % removing blocked reactions from the bacs +% BlockedRxns = identifyFastBlockedRxns(model,model.rxns); +% model= removeRxns(model, BlockedRxns); end - nominflux=find(abs(minFlux) < 0.00000001); - nomaxflux=find(abs(maxFlux) < 0.00000001); - noflux=intersect(nominflux,nomaxflux); - model=removeRxns(model,finrex(noflux)); - - % removing blocked reactions from the bacs - %BlockedRxns = identifyFastBlockedRxns(model,model.rxns, printLevel); - %model= removeRxns(model, BlockedRxns); - %BlockedReaction = findBlockedReaction(model,'L2') model = convertOldStyleModel(model); exmod = model.rxns(strncmp('EX_', model.rxns, 3)); % find exchange reactions @@ -100,22 +174,22 @@ model.rxns = strcat(strcat(microbeNames{i, 1}, '_'), model.rxns); model.mets = strcat(strcat(microbeNames{i, 1}, '_'), regexprep(model.mets, '\[e\]', '\[u\]')); % replace [e] with [u] [model] = mergeTwoModels(dummyMicEU, model, 2, false, false); - + %finish up by A: removing duplicate reactions %We will lose information here, but we will just remove the duplicates. [model,rxnToRemove,rxnToKeep]= checkDuplicateRxn(model,'S',1,0,1); - writeCbModel(model,'format','mat','fileName',[microbeNames{j,1} '.mat']); % store model + writeCbModel(model,'format','mat','fileName',[microbeNames{i,1} '.mat']); % store model % add coupling constraints and store them - IndRxns=find(strncmp(model.rxns,[microbeNames{j,1} '_'],length(microbeNames{i,1})+1));%finding indixes of specific reactions + IndRxns=find(strncmp(model.rxns,[microbeNames{i,1} '_'],length(microbeNames{i,1})+1));%finding indixes of specific reactions % find the name of biomass reaction in the microbe model - bioRxn=model.rxns{find(strncmp(model.rxns,strcat(microbeNames{j,1},'_bio'),length(char(strcat(microbeNames{i,1},'_bio')))))}; + bioRxn=model.rxns{find(strncmp(model.rxns,strcat(microbeNames{i,1},'_bio'),length(char(strcat(microbeNames{i,1},'_bio')))))}; model=coupleRxnList2Rxn(model,model.rxns(IndRxns(1:length(model.rxns(IndRxns(:,1)))-1,1)),bioRxn,400,0); %couple the specific reactions - couplingMatrix{j,1}=model.C; - couplingMatrix{j,2}=model.d; - couplingMatrix{j,3}=model.dsense; - couplingMatrix{j,4}=model.ctrs; + couplingMatrix{i,1}=model.C; + couplingMatrix{i,2}=model.d; + couplingMatrix{i,3}=model.dsense; + couplingMatrix{i,4}=model.ctrs; end end diff --git a/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/initMgPipe.m b/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/initMgPipe.m index d6df940f2d..059c4b8d19 100755 --- a/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/initMgPipe.m +++ b/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/initMgPipe.m @@ -33,9 +33,11 @@ % lowerBMBound: lower bound on community biomass (default=0.4) % repeatSim: boolean defining if simulations should be repeated and previous results % overwritten (default=false) -% adaptMedium: boolean indicating if the medium should be -% adapted through the adaptVMHDietToAGORA -% function or used as is (default=true) +% adaptMedium: boolean indicating if the medium should be adapted through the +% adaptVMHDietToAGORA function or used as is (default=true) +% pruneModels: boolean indicating whether exchanges and reactions that cannot carry flux +% under the given constraints should be removed (default=false). +% Recommended for large-scale simulation projects. % % OUTPUTS: % init: status of initialization @@ -79,6 +81,7 @@ parser.addParameter('lowerBMBound', 0.4, @isnumeric); parser.addParameter('repeatSim', false, @islogical); parser.addParameter('adaptMedium', true, @islogical); +parser.addParameter('pruneModels', false, @islogical); parser.parse(modPath, abunFilePath, computeProfiles, varargin{:}); @@ -100,6 +103,7 @@ lowerBMBound = parser.Results.lowerBMBound; repeatSim = parser.Results.repeatSim; adaptMedium = parser.Results.adaptMedium; +pruneModels = parser.Results.pruneModels; global CBT_LP_SOLVER if isempty(CBT_LP_SOLVER) @@ -183,7 +187,7 @@ init = true; -[netSecretionFluxes, netUptakeFluxes, Y, modelStats, summary, statistics] = mgPipe(modPath, abunFilePath, computeProfiles, resPath, dietFilePath, infoFilePath, hostPath, hostBiomassRxn, hostBiomassRxnFlux, objre, saveConstrModels, figForm, numWorkers, rDiet, pDiet, includeHumanMets, lowerBMBound, repeatSim, adaptMedium); +[netSecretionFluxes, netUptakeFluxes, Y, modelStats, summary, statistics] = mgPipe(modPath, abunFilePath, computeProfiles, resPath, dietFilePath, infoFilePath, hostPath, hostBiomassRxn, hostBiomassRxnFlux, objre, saveConstrModels, figForm, numWorkers, rDiet, pDiet, includeHumanMets, lowerBMBound, repeatSim, adaptMedium, pruneModels); cd(currentDir) diff --git a/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/mgPipe.m b/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/mgPipe.m index e37392d45a..b1aaf5e496 100755 --- a/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/mgPipe.m +++ b/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/mgPipe.m @@ -1,4 +1,4 @@ -function [netSecretionFluxes, netUptakeFluxes, Y, modelStats, summary, statistics] = mgPipe(modPath, abunFilePath, computeProfiles, resPath, dietFilePath, infoFilePath, hostPath, hostBiomassRxn, hostBiomassRxnFlux, objre, saveConstrModels, figForm, numWorkers, rDiet, pDiet, includeHumanMets, lowerBMBound, repeatSim, adaptMedium) +function [netSecretionFluxes, netUptakeFluxes, Y, modelStats, summary, statistics] = mgPipe(modPath, abunFilePath, computeProfiles, resPath, dietFilePath, infoFilePath, hostPath, hostBiomassRxn, hostBiomassRxnFlux, objre, saveConstrModels, figForm, numWorkers, rDiet, pDiet, includeHumanMets, lowerBMBound, repeatSim, adaptMedium,pruneModels) % mgPipe is a MATLAB based pipeline to integrate microbial abundances % (coming from metagenomic data) with constraint based modeling, creating % individuals' personalized models. @@ -40,9 +40,11 @@ % lowerBMBound: lower bound on community biomass (default=0.4) % repeatSim: boolean defining if simulations should be repeated and previous results % overwritten (default=false) -% adaptMedium: boolean indicating if the medium should be -% adapted through the adaptVMHDietToAGORA -% function or used as is (default=true) +% adaptMedium: boolean indicating if the medium should be adapted through the +% adaptVMHDietToAGORA function or used as is (default=true) +% pruneModels: boolean indicating whether exchanges and reactions that cannot carry flux +% under the given constraints should be removed (default=false). +% Recommended for large-scale simulation projects. % % OUTPUTS: % init: status of initialization @@ -96,7 +98,7 @@ % Extracellular spaces simulating the lumen are built and stored for % each microbe. - [exch,modelStoragePath,couplingMatrix]=buildModelStorage(microbeNames,modPath,numWorkers); + [exch,modelStoragePath,couplingMatrix]=buildModelStorage(microbeNames,modPath,pruneModels,dietFilePath, includeHumanMets, adaptMedium, numWorkers); % Computing reaction presence ReactionPresence=calculateReactionPresence(abunFilePath, modPath, {}); diff --git a/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/microbiotaModelSimulator.m b/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/microbiotaModelSimulator.m index 2d3fc0ce7d..7def000bbb 100755 --- a/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/microbiotaModelSimulator.m +++ b/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/microbiotaModelSimulator.m @@ -143,10 +143,10 @@ end % proceed in batches for improved effiency for s=1:steps:length(sampNames) - if length(sampNames)-j>=steps-1 + if length(sampNames)-s>=steps-1 endPnt=steps-1; else - endPnt=length(sampNames)-j; + endPnt=length(sampNames)-s; end presolTmp={}; @@ -162,7 +162,7 @@ end % Starting personalized simulations - parfor k=s:s+endPnt + for k=s:s+endPnt restoreEnvironment(environment); changeCobraSolver(solver, 'LP', 0, -1); @@ -240,7 +240,7 @@ % solution_allOpen=solveCobraLPCPLEX(model,2,0,0,[],0); if solution_allOpen.stat==0 warning('Presolve detected one or more infeasible models. Please check InFesMat object !') - inFesMatTmp{k}(1) = model.name; + inFesMatTmp{k}{1} = model.name; else presolTmp{k}(1) = solution_allOpen.obj; AllRxn = model.rxns; @@ -298,7 +298,7 @@ presolTmp{k}(2)=solution_sDiet.obj; if solution_sDiet.stat==0 warning('Presolve detected one or more infeasible models. Please check InFesMat object !') - inFesMatTmp{k}(2) = model.name; + inFesMatTmp{k}{2} = model.name; else if computeProfiles [minFlux,maxFlux]=guidedSim(model_sd,FecalRxn); @@ -393,8 +393,10 @@ inFesMat{k,l}=inFesMatTmp{k}(l); end end - for l=1:size(presolTmp{k},2) - presol{k,l}=presolTmp{k}(l); + if ~isempty(presolTmp) + for l=1:size(presolTmp{k},2) + presol{k,l}=presolTmp{k}(l); + end end if computeProfiles @@ -402,8 +404,10 @@ netProduction{1,k}=netProdTmp1{k}; netUptake{1,k}=netUptTmp1{k}; end - netProduction{2,k}=netProdTmp2{k}; - netUptake{2,k}=netUptTmp2{k}; + if ~isempty(netProdTmp2) + netProduction{2,k}=netProdTmp2{k}; + netUptake{2,k}=netUptTmp2{k}; + end if ~isempty(netProdTmp3) netProduction{3,k}=netProdTmp3{k}; netUptake{3,k}=netUptTmp3{k}; From 430048ff2e799bbaf0b021502e5c8b6a4ef36278 Mon Sep 17 00:00:00 2001 From: "almut.heinken" Date: Fri, 18 Jun 2021 16:47:13 +0100 Subject: [PATCH 25/82] improved mgPipe --- .../mgPipe/buildModelStorage.m | 2 + .../mgPipe/microbiotaModelSimulator.m | 382 +++++++++++------- 2 files changed, 244 insertions(+), 140 deletions(-) diff --git a/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/buildModelStorage.m b/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/buildModelStorage.m index 3f5b17cba5..7bf86da321 100644 --- a/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/buildModelStorage.m +++ b/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/buildModelStorage.m @@ -149,6 +149,8 @@ selExc = findExcRxns(model); Reactions2 = model.rxns(find(selExc)); allex = Reactions2(strmatch('EX', Reactions2)); + allex=strrep(allex,'EX_',''); + allex=strrep(allex,'(e)','[e]'); [C]=setdiff(allex,exch); model=removeRxns(model,C); diff --git a/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/microbiotaModelSimulator.m b/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/microbiotaModelSimulator.m index 7def000bbb..56f81674b8 100755 --- a/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/microbiotaModelSimulator.m +++ b/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/microbiotaModelSimulator.m @@ -50,22 +50,16 @@ % .. Author: Federico Baldini, 2017-2018 % Almut Heinken, 03/2021: simplified inputs -% initialize COBRA Toolbox and parallel pool +% set a solver if not done yet global CBT_LP_SOLVER -if isempty(CBT_LP_SOLVER) - initCobraToolbox -end solver = CBT_LP_SOLVER; +if isempty(solver) + initCobraToolbox(false); %Don't update the toolbox automatically +end -if numWorkers>0 && ~isempty(ver('parallel')) - % with parallelization - poolobj = gcp('nocreate'); - if isempty(poolobj) - parpool(numWorkers) - end +if saveConstrModels + mkdir([resPath filesep 'Diet']) end -environment = getEnvironment(); -% for i=1:length(exMets) exchanges{i,1} = ['EX_' exMets{i}]; @@ -78,103 +72,98 @@ repeatSim=0; end -% define whether simulations should be skipped -skipSim=0; -if isfile(strcat(resPath, 'simRes.mat')) - load(strcat(resPath, 'simRes.mat')) - skipSim=1; - for i=1:size(presol,1) - % check for all feasible models that simulations were properly - % executed - if presol{i,2} > lowerBMBound - if isempty(netProduction{2,i}(:,2)) - % feasible model was skipped, repeat simulations - skipSim=0; - end - vals=netProduction{2,i}(find(~cellfun(@isempty,(netProduction{2,i}(:,2)))),2); - if abs(sum(cell2mat(vals)))<0.000001 - % feasible model was skipped, repeat simulations - skipSim=0; - end - end - end - % verify that every simulation result is correct -end - -% if repeatSim is true, simulations will be repeated in any case -if repeatSim==1 - skipSim=0; -end - -if skipSim==1 - s = 'simulations already done, file found: loading from resPath'; - disp(s) -else - % Cell array to store results - netProduction = cell(3, length(sampNames)); - netUptake = cell(3, length(sampNames)); - inFesMat = {}; - presol = {}; +if computeProfiles + %% start the simulations - % if simRes file already exists: some simulations may have been - % incorrectly executed and need to repeat + % define whether simulations should be skipped + skipSim=0; if isfile(strcat(resPath, 'simRes.mat')) load(strcat(resPath, 'simRes.mat')) + skipSim=1; + for i=1:size(presol,1) + % check for all feasible models that simulations were properly + % executed + if presol{i,2} > lowerBMBound + if isempty(netProduction{2,i}(:,2)) + % feasible model was skipped, repeat simulations + skipSim=0; + end + vals=netProduction{2,i}(find(~cellfun(@isempty,(netProduction{2,i}(:,2)))),2); + if abs(sum(cell2mat(vals)))<0.000001 + % feasible model was skipped, repeat simulations + skipSim=0; + end + end + end + % verify that every simulation result is correct end - % End of Auto load for crashed simulations - - if ~exist('lowerBMBound','var') - lowerBMBound=0.4; - end - - % determine human-derived metabolites present in the gut: primary bile - % amines, mucins, host glycans - if includeHumanMets - HumanMets={'gchola','-10';'tdchola','-10';'tchola','-10';'dgchol','-10';'34dhphe','-10';'5htrp','-10';'Lkynr','-10';'f1a','-1';'gncore1','-1';'gncore2','-1';'dsT_antigen','-1';'sTn_antigen','-1';'core8','-1';'core7','-1';'core5','-1';'core4','-1';'ha','-1';'cspg_a','-1';'cspg_b','-1';'cspg_c','-1';'cspg_d','-1';'cspg_e','-1';'hspg','-1'}; + % if repeatSim is true, simulations will be repeated in any case + if repeatSim==1 + skipSim=0; end - %% start the simulations - - if length(sampNames)>50 - steps=50; + if skipSim==1 + s = 'simulations already done, file found: loading from resPath'; + disp(s) else - steps=length(sampNames); - end - % proceed in batches for improved effiency - for s=1:steps:length(sampNames) - if length(sampNames)-s>=steps-1 - endPnt=steps-1; - else - endPnt=length(sampNames)-s; + % Cell array to store results + netProduction = cell(3, length(sampNames)); + netUptake = cell(3, length(sampNames)); + inFesMat = {}; + presol = {}; + + % Auto load for crashed simulations if desired + if repeatSim==0 + mapP = detectOutput(resPath, 'intRes.mat'); + if isempty(mapP) + startIter = 1; + else + s = 'simulation checkpoint file found: recovering crashed simulation'; + disp(s) + load(strcat(resPath, 'intRes.mat')) + + % Detecting when execution halted + for o = 1:length(netProduction(2, :)) + if isempty(netProduction{2, o}) == 0 + t = o; + end + end + startIter = t + 2; + end + elseif repeatSim==1 + startIter = 1; end - presolTmp={}; - inFesMatTmp={}; + % if simRes file already exists: some simulations may have been + % incorrectly executed and need to repeat + if isfile(strcat(resPath, 'simRes.mat')) + load(strcat(resPath, 'simRes.mat')) + end + + % End of Auto load for crashed simulations + + if ~exist('lowerBMBound','var') + lowerBMBound=0.4; + end - if computeProfiles - netProdTmp1={}; - netUptTmp1={}; - netProdTmp2={}; - netUptTmp2={}; - netProdTmp3={}; - netUptTmp3={}; + % determine human-derived metabolites present in the gut: primary bile + % acexchangess, amines, mucins, host glycans + if includeHumanMets + HumanMets={'gchola','-10';'tdchola','-10';'tchola','-10';'dgchol','-10';'34dhphe','-10';'5htrp','-10';'Lkynr','-10';'f1a','-1';'gncore1','-1';'gncore2','-1';'dsT_antigen','-1';'sTn_antigen','-1';'core8','-1';'core7','-1';'core5','-1';'core4','-1';'ha','-1';'cspg_a','-1';'cspg_b','-1';'cspg_c','-1';'cspg_d','-1';'cspg_e','-1';'hspg','-1'}; end % Starting personalized simulations - for k=s:s+endPnt - restoreEnvironment(environment); - changeCobraSolver(solver, 'LP', 0, -1); - + for k = startIter:length(sampNames) doSim=1; % check first if simulations already exist and were done properly - if ~isempty(netProduction{k}) - vals=netProduction{k}(find(~cellfun(@isempty,(netProduction{k}(:,2)))),2); + if ~isempty(netProduction{2,k}) + vals=netProduction{2,k}(find(~cellfun(@isempty,(netProduction{2,k}(:,2)))),2); if abs(sum(cell2mat(vals)))> 0.1 doSim=0; end end - if doSim==1 || repeatSim==1 + if doSim==1 % simulations either not done yet or done incorrectly -> go sampleID = sampNames{k,1}; if ~isempty(hostPath) @@ -236,13 +225,21 @@ model=changeRxnBounds(model,['Host_' hostBiomassRxn],hostBiomassRxnFlux,'u'); end + % set parallel pool if no longer active + if numWorkers > 1 + poolobj = gcp('nocreate'); + if isempty(poolobj) + parpool(numWorkers) + end + end + solution_allOpen = solveCobraLP(buildLPproblemFromModel(model)); % solution_allOpen=solveCobraLPCPLEX(model,2,0,0,[],0); if solution_allOpen.stat==0 warning('Presolve detected one or more infeasible models. Please check InFesMat object !') - inFesMatTmp{k}{1} = model.name; + inFesMat{k, 1} = model.name; else - presolTmp{k}(1) = solution_allOpen.obj; + presol{k, 1} = solution_allOpen.obj; AllRxn = model.rxns; FecalInd = find(cellfun(@(x) ~isempty(strfind(x,'[fe]')),AllRxn)); DietInd = find(cellfun(@(x) ~isempty(strfind(x,'[d]')),AllRxn)); @@ -256,20 +253,20 @@ [minFlux,maxFlux]=guidedSim(model,DietRxn); minFluxDiet = minFlux; maxFluxDiet = maxFlux; - netProdTmp{1,k}=exchanges; - netUptTmp{1,k}=exchanges; + netProduction{1,k}=exchanges; + netUptake{1,k}=exchanges; for i =1:length(FecalRxn) [truefalse, index] = ismember(FecalRxn(i), exchanges); - netProdTmp1{k}{index,2} = minFluxDiet(i,1); - netProdTmp1{k}{index,3} = maxFluxFecal(i,1); - netUptTmp1{k}{index,2} = maxFluxDiet(i,1); - netUptTmp1{k}{index,3} = minFluxFecal(i,1); + netProduction{1,k}{index,2} = minFluxDiet(i,1); + netProduction{1,k}{index,3} = maxFluxFecal(i,1); + netUptake{1,k}{index,2} = maxFluxDiet(i,1); + netUptake{1,k}{index,3} = minFluxFecal(i,1); end end if rDiet==1 && saveConstrModels microbiota_model=model; mkdir([resPath filesep 'Rich']) - parsave([resPath filesep 'Rich' filesep 'microbiota_model_' sampleID '.mat'],'microbiota_model') + save([resPath filesep 'Rich' filesep 'microbiota_model_' sampleID '.mat'],'microbiota_model') end % Using input diet @@ -293,12 +290,16 @@ end end + if exist('unfre') ==1 %option to directly add other essential nutrients + warning('Feasibility forced with addition of essential nutrients') + model_sd=changeRxnBounds(model_sd, unfre,-0.1,'l'); + end solution_sDiet=solveCobraLP(buildLPproblemFromModel(model_sd)); % solution_sDiet=solveCobraLPCPLEX(model_sd,2,0,0,[],0); - presolTmp{k}(2)=solution_sDiet.obj; + presol{k,2}=solution_sDiet.obj; if solution_sDiet.stat==0 warning('Presolve detected one or more infeasible models. Please check InFesMat object !') - inFesMatTmp{k}{2} = model.name; + inFesMat{k,2}= model.name; else if computeProfiles [minFlux,maxFlux]=guidedSim(model_sd,FecalRxn); @@ -310,23 +311,24 @@ maxFlux=sma; minFlux=smi; - netProdTmp2{k}=exchanges; - netUptTmp2{k}=exchanges; + netProduction{2,k}=exchanges; + netUptake{2,k}=exchanges; for i =1:length(FecalRxn) [truefalse, index] = ismember(FecalRxn(i), exchanges); - netProdTmp2{k}{index,2}=minFlux(i,1); - netProdTmp2{k}{index,3}=maxFlux(i,1); - netUptTmp2{k}{index,2}=smi2(i,1); - netUptTmp2{k}{index,3}=sma2(i,1); + netProduction{2,k}{index,2}=minFlux(i,1); + netProduction{2,k}{index,3}=maxFlux(i,1); + netUptake{2,k}{index,2}=smi2(i,1); + netUptake{2,k}{index,3}=sma2(i,1); end end if saveConstrModels microbiota_model=model_sd; - mkdir([resPath filesep 'Diet']) - parsave([resPath filesep 'Diet' filesep 'microbiota_model_diet_' sampleID '.mat'],'microbiota_model') + save([resPath filesep 'Diet' filesep 'microbiota_model_diet_' sampleID '.mat'],'microbiota_model') end + save(strcat(resPath,'intRes.mat'),'netProduction','presol','inFesMat', 'netUptake') + % Using personalized diet not documented in MgPipe and bug checked yet!!!! if pDiet==1 @@ -351,10 +353,10 @@ solution_pdiet=solveCobraLP(buildLPproblemFromModel(model_pd)); %solution_pdiet=solveCobraLPCPLEX(model_pd,2,0,0,[],0); - presolTmp{k}(3)=solution_pdiet.obj; + presol{k,3}=solution_pdiet.obj; if isnan(solution_pdiet.obj) warning('Presolve detected one or more infeasible models. Please check InFesMat object !') - inFesMatTmp{k}(3) = model.name; + inFesMat{k,3}= model.name; else if computeProfiles @@ -364,20 +366,18 @@ smi=minFlux; maxFlux=sma; minFlux=smi; - netProdTmp3{k}=exchanges; + netProduction{3,k}=exchanges; for i = 1:length(FecalRxn) [truefalse, index] = ismember(FecalRxn(i), exchanges); - netProdTmp3{k}{index,2}=minFlux(i,1); - netProdTmp3{k}{index,3}=maxFlux(i,1); - netUptTmp3{k}{index,2}=smi2(i,1); - netUptTmp3{k}{index,3}=sma2(i,1); + netProduction{3,k}{index,2}=minFlux(i,1); + netProduction{3,k}{index,3}=maxFlux(i,1); end end if saveConstrModels microbiota_model=model_pd; mkdir(strcat(resPath,'Personalized')) - parsave([resPath filesep 'Personalized' filesep 'microbiota_model_pDiet_' sampleID '.mat'],'microbiota_model') + save([resPath filesep 'Personalized' filesep 'microbiota_model_pDiet_' sampleID '.mat'],'microbiota_model') end end @@ -387,37 +387,139 @@ end end - for k=s:s+endPnt - if ~isempty(inFesMatTmp) - for l=1:size(inFesMatTmp{k},2) - inFesMat{k,l}=inFesMatTmp{k}(l); - end - end - if ~isempty(presolTmp) - for l=1:size(presolTmp{k},2) - presol{k,l}=presolTmp{k}(l); - end + % Saving all output of simulations + save(strcat(resPath,'simRes.mat'),'netProduction','presol','inFesMat', 'netUptake') + end + +else + if saveConstrModels + %% just export the models with diet constraints + inFesMat = {}; + presol = {}; + + if length(sampNames)>50 + steps=50; + else + steps=length(sampNames); + end + % proceed in batches for improved effiency + for s=1:steps:length(sampNames) + if length(sampNames)-s>=steps-1 + endPnt=steps-1; + else + endPnt=length(sampNames)-s; end - if computeProfiles - if ~isempty(netProdTmp1) - netProduction{1,k}=netProdTmp1{k}; - netUptake{1,k}=netUptTmp1{k}; + presolTmp={}; + infesMatTmp={}; + + % Starting personalized simulations + parfor k=s:s+endPnt + restoreEnvironment(environment); + changeCobraSolver(solver, 'LP', 0, -1); + + if ~isempty(hostPath) + microbiota_model=readCbModel(strcat('host_microbiota_model_samp_', sampleID,'.mat')); + else + microbiota_model=readCbModel(strcat('microbiota_model_samp_', sampleID,'.mat')); + end + model = microbiota_model; + for j = 1:length(model.rxns) + if strfind(model.rxns{j}, 'biomass') + model.lb(j) = 0; + end + end + + % adapt constraints + BiomassNumber=find(strcmp(model.rxns,'communityBiomass')); + Components = model.mets(find(model.S(:, BiomassNumber))); + Components = strrep(Components,'_biomass[c]',''); + for j=1:length(Components) + % remove constraints on demand reactions to prevent infeasibilities + findDm= model.rxns(find(strncmp(model.rxns,[Components{j} '_DM_'],length([Components{j} '_DM_'])))); + model = changeRxnBounds(model, findDm, 0, 'l'); + % constrain flux through sink reactions + findSink= model.rxns(find(strncmp(model.rxns,[Components{j} '_sink_'],length([Components{j} '_sink_'])))); + model = changeRxnBounds(model, findSink, -1, 'l'); + end + + model = changeObjective(model, 'EX_microbeBiomass[fe]'); + AllRxn = model.rxns; + RxnInd = find(cellfun(@(x) ~isempty(strfind(x, '[d]')), AllRxn)); + EXrxn = model.rxns(RxnInd); + EXrxn = regexprep(EXrxn, 'EX_', 'Diet_EX_'); + model.rxns(RxnInd) = EXrxn; + model = changeRxnBounds(model, 'communityBiomass', lowerBMBound, 'l'); + model = changeRxnBounds(model, 'communityBiomass', 1, 'u'); + model=changeRxnBounds(model,model.rxns(strmatch('UFEt_',model.rxns)),1000000,'u'); + model=changeRxnBounds(model,model.rxns(strmatch('DUt_',model.rxns)),1000000,'u'); + model=changeRxnBounds(model,model.rxns(strmatch('EX_',model.rxns)),1000000,'u'); + + % set constraints on host exchanges if present + if ~isempty(hostBiomassRxn) + hostEXrxns=find(strncmp(model.rxns,'Host_EX_',8)); + model=changeRxnBounds(model,model.rxns(hostEXrxns),0,'l'); + % constrain blood exchanges but make exceptions for metabolites that should be taken up from + % blood + takeupExch={'h2o','hco3','o2'}; + takeupExch=strcat('Host_EX_', takeupExch, '[e]b'); + model=changeRxnBounds(model,takeupExch,-100,'l'); + % close internal exchanges except for human metabolites known + % to be found in the intestine + hostIEXrxns=find(strncmp(model.rxns,'Host_IEX_',9)); + model=changeRxnBounds(model,model.rxns(hostIEXrxns),0,'l'); + takeupExch={'gchola','tdchola','tchola','dgchol','34dhphe','5htrp','Lkynr','f1a','gncore1','gncore2','dsT_antigen','sTn_antigen','core8','core7','core5','core4','ha','cspg_a','cspg_b','cspg_c','cspg_d','cspg_e','hspg'}; + takeupExch=strcat('Host_IEX_', takeupExch, '[u]tr'); + model=changeRxnBounds(model,takeupExch,-1000,'l'); + % set a minimum and a limit for flux through host biomass + % reaction + model=changeRxnBounds(model,['Host_' hostBiomassRxn],0.001,'l'); + model=changeRxnBounds(model,['Host_' hostBiomassRxn],hostBiomassRxnFlux,'u'); end - if ~isempty(netProdTmp2) - netProduction{2,k}=netProdTmp2{k}; - netUptake{2,k}=netUptTmp2{k}; + % Using input diet + + model_sd=model; + if adaptMedium + [diet] = adaptVMHDietToAGORA(dietFilePath,'Microbiota'); + else + diet = readtable(dietFilePath, 'Delimiter', '\t'); % load the text file with the diet + diet = table2cell(diet); + for j = 1:length(diet) + diet{j, 2} = num2str(-(diet{j, 2})); + end + end + [model_sd] = useDiet(model_sd, diet,0); + + if includeHumanMets + % add the human metabolites + for l=1:length(HumanMets) + model_sd=changeRxnBounds(model_sd,strcat('Diet_EX_',HumanMets{l},'[d]'),str2num(HumanMets{l,2}),'l'); + end + end + + solution_sDiet=solveCobraLP(buildLPproblemFromModel(model_sd)); + % solution_sDiet=solveCobraLPCPLEX(model_sd,2,0,0,[],0); + presolTmp{k}=solution_sDiet.obj; + if solution_sDiet.stat==0 + warning('Presolve detected one or more infeasible models. Please check InFesMat object !') + infesMatTmp{k}= model.name; + end + microbiota_model=model_sd; + + parsave([resPath filesep 'Diet' filesep 'microbiota_model_diet_' sampleID '.mat'],microbiota_model) + end + for k=s:s+endPnt + if ~isempty(presolTmp) + presol{k,2} = presolTmp{k}; end - if ~isempty(netProdTmp3) - netProduction{3,k}=netProdTmp3{k}; - netUptake{3,k}=netUptTmp3{k}; + if ~isempty(infesMatTmp) + inFesMat{k,2} = infesMatTmp{k}; end + save([resPath filesep 'presol.mat'],presol) + save([resPath filesep 'inFesMat.mat'],inFesMat) end end - - % Saving all output of simulations - save(strcat(resPath,'simRes.mat'),'netProduction','presol','inFesMat', 'netUptake') - end + end end end From 008febf45f0063d161363f70881983c9a9856f0c Mon Sep 17 00:00:00 2001 From: "almut.heinken" Date: Mon, 21 Jun 2021 16:27:18 +0100 Subject: [PATCH 26/82] bugfix in DEMETER --- .../mgPipe/microbiotaModelSimulator.m | 31 ++++++++++++------- .../src/refinement/refinementPipeline.m | 2 +- 2 files changed, 21 insertions(+), 12 deletions(-) diff --git a/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/microbiotaModelSimulator.m b/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/microbiotaModelSimulator.m index 56f81674b8..4d9cee8642 100755 --- a/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/microbiotaModelSimulator.m +++ b/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/microbiotaModelSimulator.m @@ -50,12 +50,21 @@ % .. Author: Federico Baldini, 2017-2018 % Almut Heinken, 03/2021: simplified inputs -% set a solver if not done yet +% initialize COBRA Toolbox and parallel pool global CBT_LP_SOLVER +if isempty(CBT_LP_SOLVER) + initCobraToolbox +end solver = CBT_LP_SOLVER; -if isempty(solver) - initCobraToolbox(false); %Don't update the toolbox automatically + +if numWorkers>0 && ~isempty(ver('parallel')) + % with parallelization + poolobj = gcp('nocreate'); + if isempty(poolobj) + parpool(numWorkers) + end end +environment = getEnvironment(); if saveConstrModels mkdir([resPath filesep 'Diet']) @@ -72,6 +81,11 @@ repeatSim=0; end +% define human-derived metabolites present in the gut: primary bile acids, amines, mucins, host glycans +if includeHumanMets + HumanMets={'gchola','-10';'tdchola','-10';'tchola','-10';'dgchol','-10';'34dhphe','-10';'5htrp','-10';'Lkynr','-10';'f1a','-1';'gncore1','-1';'gncore2','-1';'dsT_antigen','-1';'sTn_antigen','-1';'core8','-1';'core7','-1';'core5','-1';'core4','-1';'ha','-1';'cspg_a','-1';'cspg_b','-1';'cspg_c','-1';'cspg_d','-1';'cspg_e','-1';'hspg','-1'}; +end + if computeProfiles %% start the simulations @@ -147,12 +161,6 @@ lowerBMBound=0.4; end - % determine human-derived metabolites present in the gut: primary bile - % acexchangess, amines, mucins, host glycans - if includeHumanMets - HumanMets={'gchola','-10';'tdchola','-10';'tchola','-10';'dgchol','-10';'34dhphe','-10';'5htrp','-10';'Lkynr','-10';'f1a','-1';'gncore1','-1';'gncore2','-1';'dsT_antigen','-1';'sTn_antigen','-1';'core8','-1';'core7','-1';'core5','-1';'core4','-1';'ha','-1';'cspg_a','-1';'cspg_b','-1';'cspg_c','-1';'cspg_d','-1';'cspg_e','-1';'hspg','-1'}; - end - % Starting personalized simulations for k = startIter:length(sampNames) doSim=1; @@ -418,6 +426,7 @@ restoreEnvironment(environment); changeCobraSolver(solver, 'LP', 0, -1); + sampleID = sampNames{k,1}; if ~isempty(hostPath) microbiota_model=readCbModel(strcat('host_microbiota_model_samp_', sampleID,'.mat')); else @@ -515,8 +524,8 @@ if ~isempty(infesMatTmp) inFesMat{k,2} = infesMatTmp{k}; end - save([resPath filesep 'presol.mat'],presol) - save([resPath filesep 'inFesMat.mat'],inFesMat) + save([resPath filesep 'presol.mat'],'presol') + save([resPath filesep 'inFesMat.mat'],'inFesMat') end end end diff --git a/src/reconstruction/demeter/src/refinement/refinementPipeline.m b/src/reconstruction/demeter/src/refinement/refinementPipeline.m index fc8ed9810d..43ce8b36d1 100755 --- a/src/reconstruction/demeter/src/refinement/refinementPipeline.m +++ b/src/reconstruction/demeter/src/refinement/refinementPipeline.m @@ -360,7 +360,7 @@ %% add periplasmatic space if ~isempty(infoFilePath) - if ~any(strcmp(infoFile(:,1),microbeID)) + if any(strcmp(infoFile(:,1),microbeID)) model = createPeriplasmaticSpace(model,microbeID,infoFile); end end From e2cd7f49d3cbcc57b4b1c8d107a7a93bf886c3bb Mon Sep 17 00:00:00 2001 From: "almut.heinken" Date: Tue, 22 Jun 2021 00:05:12 +0100 Subject: [PATCH 27/82] Improved performance of mgPipe --- .../mgPipe/createPersonalizedModel.m | 7 ++++++ .../mgPipe/initMgPipe.m | 6 +++-- .../microbiomeModelingToolbox/mgPipe/mgPipe.m | 23 +++++++++++++++---- .../mgPipe/microbiotaModelSimulator.m | 10 ++++++-- 4 files changed, 37 insertions(+), 9 deletions(-) diff --git a/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/createPersonalizedModel.m b/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/createPersonalizedModel.m index 337fa1941e..ed5c0bb83a 100644 --- a/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/createPersonalizedModel.m +++ b/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/createPersonalizedModel.m @@ -110,6 +110,11 @@ % allmod(k,1)={finam}; microbiota_model=pruned_model; microbiota_model.name=sampNames{k,1}; + + % remove unnecessary fields + toRemove={'citations';'comments';'grRules';'rxnConfidenceScores';'rxnECNumbers';'rxnKEGGID';'subSystems';'metHMDBID';'metInChIString';'metKEGGID';'metPubChemID';'metSmiles';'genes'}; + microbiota_model = rmfield(microbiota_model,toRemove); + sresPath=resPath(1:(length(resPath)-1)); cd(sresPath) % give a different name if host is present @@ -118,6 +123,8 @@ else parsave(sprintf(strcat('microbiota_model_samp_',sampNames{k,1},'%d.mat')),microbiota_model) end + + createdModels{k} = microbiota_model; else s= 'microbiota model file found: skipping model creation for this sample'; disp(s) diff --git a/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/initMgPipe.m b/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/initMgPipe.m index 059c4b8d19..5fe2cc0ab7 100755 --- a/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/initMgPipe.m +++ b/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/initMgPipe.m @@ -1,4 +1,4 @@ -function [init, netSecretionFluxes, netUptakeFluxes, Y, modelStats, summary, statistics] = initMgPipe(modPath, abunFilePath, computeProfiles, varargin) +function [init, netSecretionFluxes, netUptakeFluxes, Y, modelStats, summary, statistics, modelsWithErrors] = initMgPipe(modPath, abunFilePath, computeProfiles, varargin) % This function initializes the mgPipe pipeline and sets the optional input % variables if not defined. % @@ -38,6 +38,8 @@ % pruneModels: boolean indicating whether exchanges and reactions that cannot carry flux % under the given constraints should be removed (default=false). % Recommended for large-scale simulation projects. +% modelsWithErrors: List of created models that did not pass +% verifyModel. If empty, all models passed. % % OUTPUTS: % init: status of initialization @@ -187,7 +189,7 @@ init = true; -[netSecretionFluxes, netUptakeFluxes, Y, modelStats, summary, statistics] = mgPipe(modPath, abunFilePath, computeProfiles, resPath, dietFilePath, infoFilePath, hostPath, hostBiomassRxn, hostBiomassRxnFlux, objre, saveConstrModels, figForm, numWorkers, rDiet, pDiet, includeHumanMets, lowerBMBound, repeatSim, adaptMedium, pruneModels); +[netSecretionFluxes, netUptakeFluxes, Y, modelStats, summary, statistics, modelsWithErrors] = mgPipe(modPath, abunFilePath, computeProfiles, resPath, dietFilePath, infoFilePath, hostPath, hostBiomassRxn, hostBiomassRxnFlux, objre, saveConstrModels, figForm, numWorkers, rDiet, pDiet, includeHumanMets, lowerBMBound, repeatSim, adaptMedium, pruneModels); cd(currentDir) diff --git a/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/mgPipe.m b/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/mgPipe.m index b1aaf5e496..74adadaecd 100755 --- a/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/mgPipe.m +++ b/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/mgPipe.m @@ -1,4 +1,4 @@ -function [netSecretionFluxes, netUptakeFluxes, Y, modelStats, summary, statistics] = mgPipe(modPath, abunFilePath, computeProfiles, resPath, dietFilePath, infoFilePath, hostPath, hostBiomassRxn, hostBiomassRxnFlux, objre, saveConstrModels, figForm, numWorkers, rDiet, pDiet, includeHumanMets, lowerBMBound, repeatSim, adaptMedium,pruneModels) +function [netSecretionFluxes, netUptakeFluxes, Y, modelStats, summary, statistics, modelsWithErrors] = mgPipe(modPath, abunFilePath, computeProfiles, resPath, dietFilePath, infoFilePath, hostPath, hostBiomassRxn, hostBiomassRxnFlux, objre, saveConstrModels, figForm, numWorkers, rDiet, pDiet, includeHumanMets, lowerBMBound, repeatSim, adaptMedium,pruneModels) % mgPipe is a MATLAB based pipeline to integrate microbial abundances % (coming from metagenomic data) with constraint based modeling, creating % individuals' personalized models. @@ -9,12 +9,9 @@ % integrating abundance data retrieved from metagenomics. For each organism, % reactions are coupled to the objective function. % [PART 3] Simulations under different diet regimes. -% mgPipe was created (and tested) for AGORA 1.0 please first download AGORA -% version 1.0 from https://www.vmh.life/#downloadview and place the mat files -% into a folder. % % USAGE: -% [netSecretionFluxes, netUptakeFluxes, Y, modelStats,summary, statistics] = mgPipe(modPath, abunFilePath, computeProfiles, resPath, dietFilePath, infoFilePath, hostPath, hostBiomassRxn, hostBiomassRxnFlux, objre, saveConstrModels, figForm, numWorkers, rDiet, pDiet, includeHumanMets, lowerBMBound, repeatSim, adaptMedium) +% [netSecretionFluxes, netUptakeFluxes, Y, modelStats,summary, statistics, modelsWithErrors] = mgPipe(modPath, abunFilePath, computeProfiles, resPath, dietFilePath, infoFilePath, hostPath, hostBiomassRxn, hostBiomassRxnFlux, objre, saveConstrModels, figForm, numWorkers, rDiet, pDiet, includeHumanMets, lowerBMBound, repeatSim, adaptMedium) % % INPUTS: % modPath: char with path of directory where models are stored @@ -56,6 +53,8 @@ % reactions and metabolites % statistics: If info file with stratification is provided, will % determine if there is a significant difference. +% modelsWithErrors: List of created models that did not pass +% verifyModel. If empty, all models passed. % % AUTHORS: % - Federico Baldini, 2017-2018 @@ -219,12 +218,16 @@ steps=length(sampNames); end % proceed in batches for improved effiency +cnt=1; +modelsWithErrors={}; + for j=1:steps:length(sampNames) if length(sampNames)-j>=steps-1 endPnt=steps-1; else endPnt=length(sampNames)-j; end + getErrors={}; parfor i=j:j+endPnt % Each personalized model will be created separately. @@ -251,6 +254,16 @@ % create personalized models for the batch createdModel=createPersonalizedModel(abunRed,resPath,setupModel,sampNames(i,1),microbeNamesSample,couplingMatrixSample,host,hostBiomassRxn); + results=verifyModel(createdModel{1}); + getErrors{i} = results; + end + end + for i=j:j+endPnt + if length(getErrors) >= i + if ~isempty(getErrors{i}) + modelsWithErrors{cnt,1} = sampNames{i,1}; + cnt=cnt+1; + end end end end diff --git a/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/microbiotaModelSimulator.m b/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/microbiotaModelSimulator.m index 4d9cee8642..b70c177c79 100755 --- a/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/microbiotaModelSimulator.m +++ b/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/microbiotaModelSimulator.m @@ -175,9 +175,15 @@ % simulations either not done yet or done incorrectly -> go sampleID = sampNames{k,1}; if ~isempty(hostPath) - microbiota_model=readCbModel(strcat('host_microbiota_model_samp_', sampleID,'.mat')); + % microbiota_model=readCbModel(strcat('host_microbiota_model_samp_', sampleID,'.mat')); + modelStr=load(strcat('host_microbiota_model_samp_', sampleID,'.mat')); + modelF=fieldnames(modelStr); + microbiota_model=modelStr.(modelF{1}); else - microbiota_model=readCbModel(strcat('microbiota_model_samp_', sampleID,'.mat')); + % microbiota_model=readCbModel(strcat('microbiota_model_samp_', sampleID,'.mat')); + modelStr=load(strcat('microbiota_model_samp_', sampleID,'.mat')); + modelF=fieldnames(modelStr); + microbiota_model=modelStr.(modelF{1}); end model = microbiota_model; for j = 1:length(model.rxns) From 281a3c6d0ea46891eb7fe67f19a2e1e0e48dbc3b Mon Sep 17 00:00:00 2001 From: "almut.heinken" Date: Tue, 22 Jun 2021 15:54:30 +0100 Subject: [PATCH 28/82] Improved performance of mgPipe --- .../mgPipe/buildModelStorage.m | 67 +++++-------------- .../mgPipe/initMgPipe.m | 10 +-- .../microbiomeModelingToolbox/mgPipe/mgPipe.m | 25 ++++--- .../mgPipe/mgSimResCollect.m | 8 +-- .../mgPipe/microbiotaModelSimulator.m | 28 ++++++-- 5 files changed, 63 insertions(+), 75 deletions(-) diff --git a/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/buildModelStorage.m b/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/buildModelStorage.m index 7bf86da321..0915d56d5c 100644 --- a/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/buildModelStorage.m +++ b/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/buildModelStorage.m @@ -1,20 +1,15 @@ -function [exch,modelStoragePath,couplingMatrix] = buildModelStorage(microbeNames,modPath,pruneModels,dietFilePath, includeHumanMets, adaptMedium, numWorkers) +function [activeExMets,modelStoragePath,couplingMatrix] = buildModelStorage(microbeNames,modPath,dietFilePath, includeHumanMets, adaptMedium, numWorkers) % This function builds the internal exchange space and the coupling % constraints for models to join within mgPipe so they can be merged into -% microbiome models afterwards. Exchanges that can never carry flux on the +% microbiome models afterwards. exchanges that can never carry flux on the % given diet are removed to reduce computation time. % % USAGE -% [exch,modelStoragePath,couplingMatrix] = buildModelStorage(microbeNames,modPath,pruneModels,dietFilePath, includeHumanMets, adaptMedium, numWorkers) +% [exMets,modelStoragePath,couplingMatrix] = buildModelStorage(microbeNames,modPath,dietFilePath, includeHumanMets, adaptMedium, numWorkers) % % INPUTS % microbeNames: list of microbe models included in the microbiome models % modPath: char with path of directory where models are stored -% adaptMedium: boolean indicating if the medium should be adapted through the -% adaptVMHDietToAGORA function or used as is (default=true) -% pruneModels: boolean indicating whether exchanges and reactions that cannot carry flux -% under the given constraints should be removed (default=false). -% Recommended for large-scale simulation projects. % dietFilePath: char with path of directory where the diet is saved % includeHumanMets: boolean indicating if human-derived metabolites % present in the gut should be provided to the models (default: true) @@ -23,7 +18,8 @@ % numWorkers: integer indicating the number of cores to use for parallelization % % OUTPUTS -% exch: list of exchanged metabolites present in at least one microbe model +% activeExMets: list of exchanged metabolites present in at +% least one microbe model that can carry flux % modelStoragePath: path to the modified models to join afterwards % couplingMatrix: matrix containing coupling constraints for each model to join % @@ -62,20 +58,19 @@ % get all exchanges that can carry flux in at least one model on the given % diet, including metabolites that can be secreted -exch = {}; +activeExMets = {}; for i = 1:size(microbeNames, 1) model = readCbModel([modPath filesep microbeNames{i,1} '.mat']); - exMets = model.mets(~cellfun(@isempty, strfind(model.mets, '[e]'))); + activeExMets = model.mets(~cellfun(@isempty, strfind(model.mets, '[e]'))); ex_rxns = {}; - for j=1:length(exMets) - ex_rxns{j}=['EX_' exMets{j}]; + for j=1:length(activeExMets) + ex_rxns{j}=['EX_' activeExMets{j}]; ex_rxns{j}=strrep(ex_rxns{j},'[e]','(e)'); end % account for depracated nomenclature ex_rxns=intersect(ex_rxns,model.rxns); - - if pruneModels + % Using input diet model = useDiet(model, diet,0); @@ -99,12 +94,7 @@ pruned_ex_rxns = ex_rxns(flux); pruned_ex_rxns=strrep(pruned_ex_rxns,'EX_',''); pruned_ex_rxns=strrep(pruned_ex_rxns,'(e)','[e]'); - exch = union(exch,pruned_ex_rxns); - else - ex_rxns=strrep(ex_rxns,'EX_',''); - ex_rxns=strrep(ex_rxns,'(e)','[e]'); - exch = union(exch,ex_rxns); - end + activeExMets = union(activeExMets,pruned_ex_rxns); end % get already built reconstructions @@ -122,42 +112,21 @@ model=rmfield(model,'C'); model=rmfield(model,'d'); end + % % removing possible constraints of the bacs selExc = findExcRxns(model); Reactions2 = model.rxns(find(selExc)); allex = Reactions2(strmatch('EX', Reactions2)); - biomass = allex(find(strncmp(allex,'EX_bio',6))); + biomass = allex(find(strncmp(allex,'bio',3))); finrex = setdiff(allex, biomass); model = changeRxnBounds(model, finrex, -1000, 'l'); model = changeRxnBounds(model, finrex, 1000, 'u'); - if pruneModels - % remove exchange reactions that cannot carry flux - try - [minFlux,maxFlux]=fastFVA(model,0,'max','ibm_cplex',finrex); - catch - [minFlux,maxFlux]=fluxVariability(model,0,'max',finrex); - end - nominflux=find(abs(minFlux) < 0.00000001); - nomaxflux=find(abs(maxFlux) < 0.00000001); - noflux=intersect(nominflux,nomaxflux); - model=removeRxns(model,finrex(noflux)); - - % additionally, remove exchanges that are neither consumed or - % secreted by any microbe on this diet - selExc = findExcRxns(model); - Reactions2 = model.rxns(find(selExc)); - allex = Reactions2(strmatch('EX', Reactions2)); - allex=strrep(allex,'EX_',''); - allex=strrep(allex,'(e)','[e]'); - [C]=setdiff(allex,exch); - model=removeRxns(model,C); - -% % removing blocked reactions from the bacs -% BlockedRxns = identifyFastBlockedRxns(model,model.rxns); -% model= removeRxns(model, BlockedRxns); - end + % removing blocked reactions from the bacs + %BlockedRxns = identifyFastBlockedRxns(model,model.rxns, printLevel); + %model= removeRxns(model, BlockedRxns); + %BlockedReaction = findBlockedReaction(model,'L2') model = convertOldStyleModel(model); exmod = model.rxns(strncmp('EX_', model.rxns, 3)); % find exchange reactions @@ -176,7 +145,7 @@ model.rxns = strcat(strcat(microbeNames{i, 1}, '_'), model.rxns); model.mets = strcat(strcat(microbeNames{i, 1}, '_'), regexprep(model.mets, '\[e\]', '\[u\]')); % replace [e] with [u] [model] = mergeTwoModels(dummyMicEU, model, 2, false, false); - + %finish up by A: removing duplicate reactions %We will lose information here, but we will just remove the duplicates. [model,rxnToRemove,rxnToKeep]= checkDuplicateRxn(model,'S',1,0,1); diff --git a/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/initMgPipe.m b/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/initMgPipe.m index 5fe2cc0ab7..904b713c22 100755 --- a/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/initMgPipe.m +++ b/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/initMgPipe.m @@ -1,9 +1,9 @@ -function [init, netSecretionFluxes, netUptakeFluxes, Y, modelStats, summary, statistics, modelsWithErrors] = initMgPipe(modPath, abunFilePath, computeProfiles, varargin) +function [init, netSecretionFluxes, netUptakeFluxes, Y, modelStats, summary, statistics, modelsOK] = initMgPipe(modPath, abunFilePath, computeProfiles, varargin) % This function initializes the mgPipe pipeline and sets the optional input % variables if not defined. % % USAGE -% [init, netSecretionFluxes, netUptakeFluxes, Y, modelStats, summary, statistics] = initMgPipe(modPath, abunFilePath, computeProfiles, varargin) +% [init, netSecretionFluxes, netUptakeFluxes, Y, modelStats, summary, statistics, modelsOK] = initMgPipe(modPath, abunFilePath, computeProfiles, varargin) % % INPUTS: % modPath: char with path of directory where models are stored @@ -38,8 +38,6 @@ % pruneModels: boolean indicating whether exchanges and reactions that cannot carry flux % under the given constraints should be removed (default=false). % Recommended for large-scale simulation projects. -% modelsWithErrors: List of created models that did not pass -% verifyModel. If empty, all models passed. % % OUTPUTS: % init: status of initialization @@ -51,6 +49,8 @@ % reactions and metabolites % statistics: If info file with stratification is provided, will % determine if there is a significant difference. +% modelsOK: Boolean indicating if the created microbiome models +% passed verifyModel. If true, all models passed. % % .. Author: Federico Baldini 2018 % - Almut Heinken 02/2020: removed unnecessary outputs @@ -189,7 +189,7 @@ init = true; -[netSecretionFluxes, netUptakeFluxes, Y, modelStats, summary, statistics, modelsWithErrors] = mgPipe(modPath, abunFilePath, computeProfiles, resPath, dietFilePath, infoFilePath, hostPath, hostBiomassRxn, hostBiomassRxnFlux, objre, saveConstrModels, figForm, numWorkers, rDiet, pDiet, includeHumanMets, lowerBMBound, repeatSim, adaptMedium, pruneModels); +[netSecretionFluxes, netUptakeFluxes, Y, modelStats, summary, statistics, modelsOK] = mgPipe(modPath, abunFilePath, computeProfiles, resPath, dietFilePath, infoFilePath, hostPath, hostBiomassRxn, hostBiomassRxnFlux, objre, saveConstrModels, figForm, numWorkers, rDiet, pDiet, includeHumanMets, lowerBMBound, repeatSim, adaptMedium, pruneModels); cd(currentDir) diff --git a/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/mgPipe.m b/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/mgPipe.m index 74adadaecd..2a575cf7a5 100755 --- a/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/mgPipe.m +++ b/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/mgPipe.m @@ -1,4 +1,4 @@ -function [netSecretionFluxes, netUptakeFluxes, Y, modelStats, summary, statistics, modelsWithErrors] = mgPipe(modPath, abunFilePath, computeProfiles, resPath, dietFilePath, infoFilePath, hostPath, hostBiomassRxn, hostBiomassRxnFlux, objre, saveConstrModels, figForm, numWorkers, rDiet, pDiet, includeHumanMets, lowerBMBound, repeatSim, adaptMedium,pruneModels) +function [netSecretionFluxes, netUptakeFluxes, Y, modelStats, summary, statistics, modelsOK] = mgPipe(modPath, abunFilePath, computeProfiles, resPath, dietFilePath, infoFilePath, hostPath, hostBiomassRxn, hostBiomassRxnFlux, objre, saveConstrModels, figForm, numWorkers, rDiet, pDiet, includeHumanMets, lowerBMBound, repeatSim, adaptMedium,pruneModels) % mgPipe is a MATLAB based pipeline to integrate microbial abundances % (coming from metagenomic data) with constraint based modeling, creating % individuals' personalized models. @@ -39,9 +39,6 @@ % overwritten (default=false) % adaptMedium: boolean indicating if the medium should be adapted through the % adaptVMHDietToAGORA function or used as is (default=true) -% pruneModels: boolean indicating whether exchanges and reactions that cannot carry flux -% under the given constraints should be removed (default=false). -% Recommended for large-scale simulation projects. % % OUTPUTS: % init: status of initialization @@ -53,8 +50,8 @@ % reactions and metabolites % statistics: If info file with stratification is provided, will % determine if there is a significant difference. -% modelsWithErrors: List of created models that did not pass -% verifyModel. If empty, all models passed. +% modelsOK: Boolean indicating if the created microbiome models +% passed verifyModel. If true, all models passed. % % AUTHORS: % - Federico Baldini, 2017-2018 @@ -97,7 +94,7 @@ % Extracellular spaces simulating the lumen are built and stored for % each microbe. - [exch,modelStoragePath,couplingMatrix]=buildModelStorage(microbeNames,modPath,pruneModels,dietFilePath, includeHumanMets, adaptMedium, numWorkers); + [activeExMets,modelStoragePath,couplingMatrix]=buildModelStorage(microbeNames,modPath, dietFilePath, includeHumanMets, adaptMedium, numWorkers); % Computing reaction presence ReactionPresence=calculateReactionPresence(abunFilePath, modPath, {}); @@ -145,7 +142,7 @@ print(strcat(resPath, 'Subsystem_abundances'), figForm) % save mapping info - save([resPath filesep 'mapInfo.mat'], 'mapP', 'exMets', 'exch', 'sampNames', 'microbeNames', 'couplingMatrix', 'modelStoragePath','abundance','-v7.3') + save([resPath filesep 'mapInfo.mat'], 'mapP', 'exMets', 'activeExMets', 'sampNames', 'microbeNames', 'couplingMatrix', 'modelStoragePath','abundance','-v7.3') end %end of trigger for Autoload @@ -210,7 +207,7 @@ % define what counts as zero abundance tol=0.0000001; -clear('microbeNames','exMets','abundance') +clear('microbeNames','activeExMets','abundance') if length(sampNames)>50 steps=50; @@ -250,7 +247,7 @@ microbeNamesSample(cell2mat(abunRed(:,2)) < tol,:)=[]; couplingMatrixSample(cell2mat(abunRed(:,2)) < tol,:)=[]; abunRed(cell2mat(abunRed(:,2)) < tol,:)=[]; - setupModel = fastSetupCreator(exch, modelStoragePath, microbeNamesSample, host, objre); + setupModel = fastSetupCreator(exMets, modelStoragePath, microbeNamesSample, host, objre); % create personalized models for the batch createdModel=createPersonalizedModel(abunRed,resPath,setupModel,sampNames(i,1),microbeNamesSample,couplingMatrixSample,host,hostBiomassRxn); @@ -268,6 +265,12 @@ end end +if isempty(modelsWithErrors) + modelsOK = true; +else + modelsOK = false; +end + %% PIPELINE: [PART 3] % % In this phase, for each microbiota model, a diet, in the form of set constraints @@ -277,7 +280,7 @@ load([resPath filesep 'mapInfo.mat']) if computeProfiles || saveConstrModels - [exchanges, netProduction, netUptake, presol, inFesMat] = microbiotaModelSimulator(resPath, exMets, sampNames, dietFilePath, hostPath, hostBiomassRxn, hostBiomassRxnFlux, numWorkers, rDiet, pDiet, saveConstrModels, computeProfiles, includeHumanMets, lowerBMBound, repeatSim, adaptMedium); + [exchanges, netProduction, netUptake, presol, inFesMat] = microbiotaModelSimulator(resPath, activeExMets, sampNames, dietFilePath, hostPath, hostBiomassRxn, hostBiomassRxnFlux, numWorkers, rDiet, pDiet, saveConstrModels, computeProfiles, includeHumanMets, lowerBMBound, repeatSim, adaptMedium); % Finally, NMPCs (net maximal production capability) are computed in a metabolite % resolved manner and saved in a comma delimited file in the results folder. NMPCs % indicate the maximal production of each metabolite and are computing summing diff --git a/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/mgSimResCollect.m b/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/mgSimResCollect.m index 8c8b696aa3..7ae69bdff8 100755 --- a/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/mgSimResCollect.m +++ b/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/mgSimResCollect.m @@ -12,15 +12,15 @@ % INPUTS: % resPath: char with path of directory where results are saved % sampNames: nx1 cell array cell array with names of individuals in the study -% exchanges: cell array with list of all unique Exchanges to diet/ -% fecal compartment +% exchanges: cell array with list of all unique exchanges to diet/ +% fecal compartment that were interrogated in simulations % rDiet: number (double) indicating if to simulate a rich diet % pDiet: number (double) indicating if a personalized diet % is available and should be simulated -% infoFilePath: char indicating, if stratification criteria are available, +% infoFilePath: char indicating, if stratification criteria are available, % full path and name to related documentation(default: no) % is available -% netProduction: cell array containing FVA values for maximal uptake +% netProduction: cell array containing FVA values for maximal uptake % figForm: char indicating the format of figures % % OUTPUTS: diff --git a/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/microbiotaModelSimulator.m b/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/microbiotaModelSimulator.m index b70c177c79..ac19175cf9 100755 --- a/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/microbiotaModelSimulator.m +++ b/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/microbiotaModelSimulator.m @@ -12,8 +12,8 @@ % % INPUTS: % resPath: char with path of directory where results are saved -% exMets: cell array with all unique extracellular metabolites -% contained in the models +% exMets: list of exchanged metabolites present in at least +% one microbe model that can carry flux % sampNames: cell array with names of individuals in the study % dietFilePath: path to and name of the text file with dietary information % hostPath: char with path to host model, e.g., Recon3D (default: empty) @@ -37,8 +37,8 @@ % adaptVMHDietToAGORA function or used as is (default=true) % % OUTPUTS: -% exchanges: cell array with list of all unique Exchanges to diet/ -% fecal compartment +% exchanges: cell array with list of all unique exchanges to diet/ +% fecal compartment that were interrogated in simulations % netProduction: cell array containing FVA values for maximal uptake % and secretion for setup lumen / diet exchanges % netUptake: cell array containing FVA values for minimal uptake @@ -76,6 +76,11 @@ exchanges = regexprep(exchanges, '\[e\]', '\[fe\]'); exchanges = setdiff(exchanges, 'EX_biomass[fe]', 'stable'); +allFecalExch = exchanges; +allDietExch = exchanges; +allDietExch = regexprep(allDietExch,'EX_','Diet_EX_'); +allDietExch = regexprep(allDietExch,'\[fe\]','\[d\]'); + % reload existing simulation results by default if ~exist('repeatSim', 'var') repeatSim=0; @@ -260,6 +265,11 @@ FecalRxn = AllRxn(FecalInd); FecalRxn=setdiff(FecalRxn,'EX_microbeBiomass[fe]','stable'); DietRxn = AllRxn(DietInd); + + % remove exchanges that cannot carry flux + FecalRxn=intersect(FecalRxn,allFecalExch); + DietRxn=intersect(DietRxn,allDietExch); + if rDiet==1 && computeProfiles [minFlux,maxFlux]=guidedSim(model,FecalRxn); minFluxFecal = minFlux; @@ -434,9 +444,15 @@ sampleID = sampNames{k,1}; if ~isempty(hostPath) - microbiota_model=readCbModel(strcat('host_microbiota_model_samp_', sampleID,'.mat')); + % microbiota_model=readCbModel(strcat('host_microbiota_model_samp_', sampleID,'.mat')); + modelStr=load(strcat('host_microbiota_model_samp_', sampleID,'.mat')); + modelF=fieldnames(modelStr); + microbiota_model=modelStr.(modelF{1}); else - microbiota_model=readCbModel(strcat('microbiota_model_samp_', sampleID,'.mat')); + % microbiota_model=readCbModel(strcat('microbiota_model_samp_', sampleID,'.mat')); + modelStr=load(strcat('microbiota_model_samp_', sampleID,'.mat')); + modelF=fieldnames(modelStr); + microbiota_model=modelStr.(modelF{1}); end model = microbiota_model; for j = 1:length(model.rxns) From 1a006646cc86ddaf8f06a8f98f140390bb911dee Mon Sep 17 00:00:00 2001 From: "almut.heinken" Date: Tue, 22 Jun 2021 16:19:19 +0100 Subject: [PATCH 29/82] Improved performance of mgPipe --- .../mgPipe/microbiotaModelSimulator.m | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/microbiotaModelSimulator.m b/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/microbiotaModelSimulator.m index ac19175cf9..cc461ff5b2 100755 --- a/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/microbiotaModelSimulator.m +++ b/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/microbiotaModelSimulator.m @@ -265,11 +265,8 @@ FecalRxn = AllRxn(FecalInd); FecalRxn=setdiff(FecalRxn,'EX_microbeBiomass[fe]','stable'); DietRxn = AllRxn(DietInd); - - % remove exchanges that cannot carry flux - FecalRxn=intersect(FecalRxn,allFecalExch); - DietRxn=intersect(DietRxn,allDietExch); - + + %% computing fluxes on the rich diet if rDiet==1 && computeProfiles [minFlux,maxFlux]=guidedSim(model,FecalRxn); minFluxFecal = minFlux; @@ -293,7 +290,11 @@ save([resPath filesep 'Rich' filesep 'microbiota_model_' sampleID '.mat'],'microbiota_model') end - % Using input diet + %% Computing fluxes on the input diet + + % remove exchanges that cannot carry flux on the diet + FecalRxn=intersect(FecalRxn,allFecalExch); + DietRxn=intersect(DietRxn,allDietExch); model_sd=model; if adaptMedium @@ -353,7 +354,7 @@ save(strcat(resPath,'intRes.mat'),'netProduction','presol','inFesMat', 'netUptake') - % Using personalized diet not documented in MgPipe and bug checked yet!!!! + %% Using personalized diet not documented in MgPipe and bug checked yet!!!! if pDiet==1 model_pd=model; From 1f4cd05e427e12958fb3b607d2f65c6f387dfcff Mon Sep 17 00:00:00 2001 From: "almut.heinken" Date: Wed, 23 Jun 2021 10:18:10 +0100 Subject: [PATCH 30/82] Improved performance of mgPipe --- .../mgPipe/buildModelStorage.m | 26 ++++++++++++++++++- .../mgPipe/initMgPipe.m | 11 ++++---- .../microbiomeModelingToolbox/mgPipe/mgPipe.m | 6 +++-- .../src/integration/prepareInputData.m | 2 +- 4 files changed, 35 insertions(+), 10 deletions(-) diff --git a/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/buildModelStorage.m b/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/buildModelStorage.m index 0915d56d5c..c5cac19dea 100644 --- a/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/buildModelStorage.m +++ b/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/buildModelStorage.m @@ -1,4 +1,4 @@ -function [activeExMets,modelStoragePath,couplingMatrix] = buildModelStorage(microbeNames,modPath,dietFilePath, includeHumanMets, adaptMedium, numWorkers) +function [activeExMets,modelStoragePath,couplingMatrix] = buildModelStorage(microbeNames,modPath,dietFilePath, includeHumanMets, adaptMedium, numWorkers,removeBlockedRxns) % This function builds the internal exchange space and the coupling % constraints for models to join within mgPipe so they can be merged into % microbiome models afterwards. exchanges that can never carry flux on the @@ -16,6 +16,8 @@ % adaptMedium: boolean indicating if the medium should be adapted through the % adaptVMHDietToAGORA function or used as is (default=true) % numWorkers: integer indicating the number of cores to use for parallelization +% removeBlockedRxns: Remove reactions blocked on the input diet to +% reduce computation time (optional) % % OUTPUTS % activeExMets: list of exchanged metabolites present in at @@ -123,6 +125,28 @@ model = changeRxnBounds(model, finrex, -1000, 'l'); model = changeRxnBounds(model, finrex, 1000, 'u'); + % optional: remove blocked reactions on the diet from the models + if removeBlockedRxns + modelDiet = useDiet(model, diet,0); + + if includeHumanMets + % add the human metabolites + for l=1:length(HumanMets) + modelDiet=changeRxnBounds(modelDiet,strcat('EX_',HumanMets{l},'(e)'),str2num(HumanMets{l,2}),'l'); + end + end + + try + [minFlux,maxFlux]=fastFVA(modelDiet,0,'max','ibm_cplex'); + catch + [minFlux,maxFlux]=fluxVariability(modelDiet,0,'max'); + end + nominflux=find(abs(minFlux) < 0.00000001); + nomaxflux=find(abs(maxFlux) < 0.00000001); + noflux=intersect(nominflux,nomaxflux); + model=removeRxns(model,model.rxns(noflux)); + end + % removing blocked reactions from the bacs %BlockedRxns = identifyFastBlockedRxns(model,model.rxns, printLevel); %model= removeRxns(model, BlockedRxns); diff --git a/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/initMgPipe.m b/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/initMgPipe.m index 904b713c22..24b054135d 100755 --- a/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/initMgPipe.m +++ b/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/initMgPipe.m @@ -35,9 +35,8 @@ % overwritten (default=false) % adaptMedium: boolean indicating if the medium should be adapted through the % adaptVMHDietToAGORA function or used as is (default=true) -% pruneModels: boolean indicating whether exchanges and reactions that cannot carry flux -% under the given constraints should be removed (default=false). -% Recommended for large-scale simulation projects. +% removeBlockedRxns: Remove reactions blocked on the input diet to +% reduce computation time (default=false) % % OUTPUTS: % init: status of initialization @@ -83,7 +82,7 @@ parser.addParameter('lowerBMBound', 0.4, @isnumeric); parser.addParameter('repeatSim', false, @islogical); parser.addParameter('adaptMedium', true, @islogical); -parser.addParameter('pruneModels', false, @islogical); +parser.addParameter('removeBlockedRxns', false, @islogical); parser.parse(modPath, abunFilePath, computeProfiles, varargin{:}); @@ -105,7 +104,7 @@ lowerBMBound = parser.Results.lowerBMBound; repeatSim = parser.Results.repeatSim; adaptMedium = parser.Results.adaptMedium; -pruneModels = parser.Results.pruneModels; +removeBlockedRxns = parser.Results.removeBlockedRxns; global CBT_LP_SOLVER if isempty(CBT_LP_SOLVER) @@ -189,7 +188,7 @@ init = true; -[netSecretionFluxes, netUptakeFluxes, Y, modelStats, summary, statistics, modelsOK] = mgPipe(modPath, abunFilePath, computeProfiles, resPath, dietFilePath, infoFilePath, hostPath, hostBiomassRxn, hostBiomassRxnFlux, objre, saveConstrModels, figForm, numWorkers, rDiet, pDiet, includeHumanMets, lowerBMBound, repeatSim, adaptMedium, pruneModels); +[netSecretionFluxes, netUptakeFluxes, Y, modelStats, summary, statistics, modelsOK] = mgPipe(modPath, abunFilePath, computeProfiles, resPath, dietFilePath, infoFilePath, hostPath, hostBiomassRxn, hostBiomassRxnFlux, objre, saveConstrModels, figForm, numWorkers, rDiet, pDiet, includeHumanMets, lowerBMBound, repeatSim, adaptMedium, removeBlockedRxns); cd(currentDir) diff --git a/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/mgPipe.m b/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/mgPipe.m index 2a575cf7a5..c9c417fb0f 100755 --- a/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/mgPipe.m +++ b/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/mgPipe.m @@ -1,4 +1,4 @@ -function [netSecretionFluxes, netUptakeFluxes, Y, modelStats, summary, statistics, modelsOK] = mgPipe(modPath, abunFilePath, computeProfiles, resPath, dietFilePath, infoFilePath, hostPath, hostBiomassRxn, hostBiomassRxnFlux, objre, saveConstrModels, figForm, numWorkers, rDiet, pDiet, includeHumanMets, lowerBMBound, repeatSim, adaptMedium,pruneModels) +function [netSecretionFluxes, netUptakeFluxes, Y, modelStats, summary, statistics, modelsOK] = mgPipe(modPath, abunFilePath, computeProfiles, resPath, dietFilePath, infoFilePath, hostPath, hostBiomassRxn, hostBiomassRxnFlux, objre, saveConstrModels, figForm, numWorkers, rDiet, pDiet, includeHumanMets, lowerBMBound, repeatSim, adaptMedium,removeBlockedRxns) % mgPipe is a MATLAB based pipeline to integrate microbial abundances % (coming from metagenomic data) with constraint based modeling, creating % individuals' personalized models. @@ -39,6 +39,8 @@ % overwritten (default=false) % adaptMedium: boolean indicating if the medium should be adapted through the % adaptVMHDietToAGORA function or used as is (default=true) +% removeBlockedRxns: Remove reactions blocked on the input diet to +% reduce computation time (default=false) % % OUTPUTS: % init: status of initialization @@ -94,7 +96,7 @@ % Extracellular spaces simulating the lumen are built and stored for % each microbe. - [activeExMets,modelStoragePath,couplingMatrix]=buildModelStorage(microbeNames,modPath, dietFilePath, includeHumanMets, adaptMedium, numWorkers); + [activeExMets,modelStoragePath,couplingMatrix]=buildModelStorage(microbeNames,modPath, dietFilePath, includeHumanMets, adaptMedium, numWorkers, removeBlockedRxns); % Computing reaction presence ReactionPresence=calculateReactionPresence(abunFilePath, modPath, {}); diff --git a/src/reconstruction/demeter/src/integration/prepareInputData.m b/src/reconstruction/demeter/src/integration/prepareInputData.m index 2ae32bf956..9653fa62e5 100755 --- a/src/reconstruction/demeter/src/integration/prepareInputData.m +++ b/src/reconstruction/demeter/src/integration/prepareInputData.m @@ -224,7 +224,7 @@ for j=1:length(C) sumData(j,1)=abs(nansum(nonzeros(str2double(inputData(IA(j),2:end))))); end - if any(sumData)>0 + if any(sumData>0) % if there is any data, propagate the experimental data from % the strain with the most data to the strains with no data % find the row with the most experimental data From f503a7956638c96c8e50a175228646fc37bdf65a Mon Sep 17 00:00:00 2001 From: Ronan Fleming Date: Wed, 23 Jun 2021 18:36:44 +0100 Subject: [PATCH 31/82] QP objective check bypassed for large v --- .gitattributes | 32 +++++++++++++++++-- src/base/solvers/solveCobraQP.m | 10 +++--- .../preprocessing/selectGeneFromGPR.m | 2 +- 3 files changed, 35 insertions(+), 9 deletions(-) diff --git a/.gitattributes b/.gitattributes index d20f44ae0c..69a9a4415e 100644 --- a/.gitattributes +++ b/.gitattributes @@ -2,6 +2,32 @@ *.html linguist-vendored=true *.gms linguist-vendored=true -*.mat -crlf -diff -merge -*.mlx -crlf -diff -merge -*.xlsx -crlf -diff -merge +## https://nl.mathworks.com/help/matlab/matlab_prog/set-up-git-source-control.html#buhx2d6-1_3 +*.fig binary +*.mat binary +*.mdl binary +*.mdlp binary +*.mexa64 binary +*.mexw64 binary +*.mexmaci64 binary +*.mlapp binary +*.mldatx binary +*.mlproj binary +*.mlx binary +*.p binary +*.sfx binary +*.sldd binary +*.slreqx binary +*.slmx binary +*.sltx binary +*.slxc binary +*.slx binary merge=mlAutoMerge +*.slxp binary + +## Other common binary file types +*.docx binary +*.exe binary +*.jpg binary +*.pdf binary +*.png binary +*.xlsx binary diff --git a/src/base/solvers/solveCobraQP.m b/src/base/solvers/solveCobraQP.m index 473859c585..bdeab3aa65 100644 --- a/src/base/solvers/solveCobraQP.m +++ b/src/base/solvers/solveCobraQP.m @@ -1072,11 +1072,11 @@ if ~isempty(solution.full) %set the value of the objective solution.obj = c'*solution.full + 0.5*solution.full'*F*solution.full; - if norm(solution.obj - f) > getCobraSolverParams('LP', 'feasTol')*100 - warning('solveCobraQP: Objectives do not match. Switch to a different solver if you rely on the value of the optimal objective.') - fprintf('%s\n%g\n%s\n%g\n%s\n%g\n',['The optimal value of the objective from ' solution.solver ' is:'],f, ... - 'while the value constructed from c''*x + 0.5*x''*F*x:', solution.obj,... - 'while the value constructed from osense*(c''*x + x''*F*x) :', osense*(c'*solution.full + solution.full'*F*solution.full)) + %expect some variability if the norm of the optimal flux vector is large + %TODO how to scale this + if norm(solution.obj - f) > getCobraSolverParams('LP', 'feasTol')*100 && norm(solution.full)<1e2 + warning('solveCobraQP: Objectives do not match. Rescale problem if you rely on the exact value of the optimal objective.') + fprintf('%s%g\n','The difference between the optimal value of the solver objective and objective from c''*x + 0.5*x''*F*x is: ' ,f - solution.obj) end else solution.obj = NaN; diff --git a/src/dataIntegration/transcriptomics/preprocessing/selectGeneFromGPR.m b/src/dataIntegration/transcriptomics/preprocessing/selectGeneFromGPR.m index c4ba8c0cef..97f8209e79 100644 --- a/src/dataIntegration/transcriptomics/preprocessing/selectGeneFromGPR.m +++ b/src/dataIntegration/transcriptomics/preprocessing/selectGeneFromGPR.m @@ -25,7 +25,7 @@ % OUTPUTS: % expressionCol: reaction expression, corresponding to model.rxns. % No gene-expression data and orphan reactions will -% be given a value of -1. +% be given a value of NaN. % gene_used: gene identifier, corresponding to model.rxns, from GPRs % whose value (expression and/or significance) was chosen for that % reaction From b763a4cabdd3a9e716c9809da8c590eaf92ce238 Mon Sep 17 00:00:00 2001 From: Richelle Date: Thu, 24 Jun 2021 19:06:52 +0200 Subject: [PATCH 32/82] updating checkMetabolicTasks scripts --- external/analysis/RAVEN/checkMetabolicTasks.m | 363 ++++++++---------- external/analysis/RAVEN/essentialRxnsTasks.m | 21 +- .../analysis/RAVEN/generateTaskStructure.m | 91 ++--- 3 files changed, 212 insertions(+), 263 deletions(-) diff --git a/external/analysis/RAVEN/checkMetabolicTasks.m b/external/analysis/RAVEN/checkMetabolicTasks.m index 3e3b027a7d..a5f82919ac 100644 --- a/external/analysis/RAVEN/checkMetabolicTasks.m +++ b/external/analysis/RAVEN/checkMetabolicTasks.m @@ -1,4 +1,4 @@ -function [taskReport essentialRxns taskStructure] = checkMetabolicTasks(model,inputFile,printOutput,printOnlyFailed,getEssential,taskStructure) +function [taskReport, essentialRxns, taskStructure]=checkMetabolicTasks(model,inputFile,printOutput,printOnlyFailed,printDetails,getEssential,taskStructure) % Performs a set of simulations as defined in a task file % to check if the model is able to pass a list of metabolic tasks. % A metabolic task is defined as the capacity of producing a list of @@ -25,7 +25,7 @@ % getEssential: true if the minimal number of reactions that need to be % active to pass the task need to be computed % (default - false) -% taskStructure: structure with the tasks, as from `parseTaskList`. If +% taskStructure: structure with the tasks, as from `generateTaskStructure`. If % this is supplied then inputFile is ignored % % OUTPUTS: @@ -38,46 +38,37 @@ % essentialRxns: cell array containing the essential reactions required % to pass a task % -% taskStructure: structure with the tasks, as from `parseTaskList`. +% taskStructure: structure with the tasks, as from `generateTaskStructure`. % % .. Authors: % - Originally written for RAVEN toolbox by Rasmus Agren, 2013-11-17 % - Adapted for cobratoolbox and modified to rely only on flux constraints by Richelle Anne, 2017-05-18 - if nargin < 3 || isempty(printOutput) printOutput=true; end if nargin < 4 || isempty(printOnlyFailed) printOnlyFailed=false; end -if nargin < 5 || isempty(getEssential) +if nargin < 5 || isempty(printDetails) + printDetails=false; +end +if nargin < 6 || isempty(getEssential) getEssential=false; end % Generate a task structure from a list of task in excell format -if nargin < 6 || isempty(taskStructure) - taskStructure=generateTaskStructure(inputFile); +if nargin < 7 || isempty(taskStructure) + taskStructure=generateTaskStructure(inputFile); end - -%CHECK the format of the model +% Check the format of the model if size(model.rxns,2)>size(model.rxns,1) model.rxns=model.rxns'; end -if size(model.rxnNames,2)>size(model.rxnNames,1) - model.rxnNames=model.rxnNames'; -end -if size(model.rules,2)>size(model.rules,1) - model.rules=model.rules'; -end - -if isfield(model,'grRules') && size(model.grRules,2)>size(model.grRules,1) - model.grRules=model.grRules'; -end %Find all exchange/demand/sink reactions Exchange = {}; for k=1:length(model.rxns) - if sum(abs(model.S(:,k))) == 1 + if sum(abs(model.S(:,k))) == 1 Exchange(end+1) = model.rxns(k); end end @@ -87,152 +78,115 @@ model.lb(findRxnIDs(model,Exchange))=0; model.ub(findRxnIDs(model,Exchange))=0; - score=0; totalTask=0; notPresent=0; taskReport={numel(taskStructure),3}; essentialRxns={numel(taskStructure)}; +metabolites={}; + for i=1:numel(taskStructure) - clear tModel tModel=model; modelMets=upper(tModel.mets); - + %%SETUP of the input model %suppress objective function if any tModel.c(tModel.c==1)=0; - tModel.csense(1:length(model.b),1) = 'E'; - taskReport{i,1}=taskStructure(i).id; - taskReport{i,2}=taskStructure(i).description; + if isfield(model,'csense') + if size(tModel.csense,2)>size(tModel.csense,1) + tModel.csense=tModel.csense(:); + end + tModel.csense(length(model.b),1) = 'E'; + end + taskReport{i,1}=taskStructure(i).id; + taskReport{i,2}=taskStructure(i).system; + taskReport{i,3}=taskStructure(i).subsystem; + taskReport{i,4}=taskStructure(i).description; + %Set the inputs if ~isempty(taskStructure(i).inputs) - + rxn_Subs={}; for n=1:length(taskStructure(i).inputs) INPUT=taskStructure(i).inputs(n); + + metabolites(end+1)=INPUT; INPUT=INPUT{1}; match_INPUTS = strncmpi(INPUT,modelMets,length(INPUT(1:end-3))); match_INPUTS = modelMets(match_INPUTS==1); - + compSymbol={}; - for k=1:length(match_INPUTS) + for k=1:length(match_INPUTS) [tokens] = regexp(match_INPUTS{k},'(.+)\[(.+)\]','tokens'); Symb = tokens{1}{2}; compSymbol{end+1} = Symb; end + % Definition of the compartment for the exchange reaction + comp_used=INPUT(end-2:end); % Set the exchange reactions for the inputs - % If the metabolites already exist extracellularly AddExchange=0; - if ismember('E',compSymbol)==1 + if ismember(upper(INPUT(end-1)),compSymbol)==1 + Tsp_ID=findRxnIDs(tModel,findRxnsFromMets(tModel,INPUT)); Tsp_rxn = full(tModel.S(:,Tsp_ID)); Nb_React=sum(abs(Tsp_rxn),1); - % If an exchange reaction already exist - if isempty(Nb_React==1)==0 + if ~isempty(find(Nb_React==1)) + ID_exc=find(Nb_React==1); - % Remove the existing exchange reaction - tModel=removeRxns(tModel,tModel.rxns(Tsp_ID(ID_exc))); - AddExchange=1; - else + + % If the input is also member of the outputs, let the exchange reversible + if ismember(INPUT,taskStructure(i).outputs)==1 + tModel = changeRxnBounds(tModel,tModel.rxns(Tsp_ID(ID_exc)), -1000, 'l'); + tModel = changeRxnBounds(tModel,tModel.rxns(Tsp_ID(ID_exc)), 1000, 'u'); + rxn_Subs(end+1) = tModel.rxns(Tsp_ID(ID_exc)); + else + tModel = changeRxnBounds(tModel,tModel.rxns(Tsp_ID(ID_exc)), -taskStructure(i).UBin(n), 'l'); + tModel = changeRxnBounds(tModel,tModel.rxns(Tsp_ID(ID_exc)), -taskStructure(i).LBin(n), 'u'); + rxn_Subs(end+1) = tModel.rxns(Tsp_ID(ID_exc)); + end + + else AddExchange=1; end else AddExchange=1; end + % Add a temporary exchange reaction that allows the import of - % the metabolite + % the metabolite if AddExchange==1 + % If the input is also member of the outputs, let the exchange reversible - warning off - if ismember(INPUT,taskStructure(i).outputs)==1 - [tModel]=addReaction(tModel,['temporary_exchange_',INPUT(1:end-3)],[' <=> ',INPUT],[],[],-1000,1000,[], [], [], [], [], [],0); - else - [tModel]=addReaction(tModel,['temporary_exchange_',INPUT(1:end-3)],[' => ',INPUT],[],[],taskStructure(i).LBin(n),taskStructure(i).UBin(n),[], [], [], [], [], [],0); - end - warning on - rxn_Subs(end+1) = {['temporary_exchange_',INPUT(1:end-3)]}; - end - - % Definition of the compartment for the transport reaction - if ischar(taskStructure(i).COMP)==1 - comp_used=taskStructure(i).COMP; - if strcmpi(comp_used,'[e]')==1 - continue - end - elseif ismember('C',compSymbol)==1 - comp_used='[c]'; - elseif ismember('M',compSymbol)==1 - comp_used='[m]'; - elseif ismember('N',compSymbol)==1 - comp_used='[n]'; - elseif ismember('X',compSymbol)==1 - comp_used='[x]'; - elseif ismember('L',compSymbol)==1 - comp_used='[l]'; - elseif ismember('R',compSymbol)==1 - comp_used='[R]'; - end - - % Set the transport reactions for the inputs - % Find existing transporters associated with the input - AddTransport=0; - Tsp_ID=findRxnIDs(tModel,findRxnsFromMets(tModel,INPUT)); - Tsp_rxn = full(tModel.S(:,Tsp_ID)); - Nb_React=sum(abs(Tsp_rxn),1); - - % If free diffusion exist - if isempty(Nb_React==2)==0 - Tsp_ID2=Tsp_ID(Nb_React==2); - - % Choose the transport reaction related to the defined - % compartment (comp_used) - Tsp_ID=Tsp_ID2(tModel.S((strcmpi(tModel.mets,([INPUT(1:end-3),comp_used]))==1),Tsp_ID2)~=0); - if isempty(Tsp_ID)==0 - % Remove the existing transport reaction - tModel=removeRxns(tModel,tModel.rxns(Tsp_ID)); - AddTransport=1; - else - AddTransport=1; - end - else - AddTransport=1; - end - - %Create a transport reaction - if AddTransport==1 - warning off if ismember(INPUT,taskStructure(i).outputs)==1 - %if the input is also output make the reaction - %reversible - [tModel]=addReaction(tModel,['temporary_trsp_',INPUT(1:end-3)],[INPUT,' <=> ',INPUT(1:end-3),comp_used],[],[],-1000,1000,[], [], [], [], [], [],0); + [tModel]=addReaction(tModel,['temporary_exchange_',INPUT],[' <=> ',INPUT],[],[],-1000,1000); + taskStructure(i).inputs(n)={[INPUT]}; else - [tModel]=addReaction(tModel,['temporary_trsp_',INPUT(1:end-3)],[INPUT,' => ',INPUT(1:end-3),comp_used],[],[],taskStructure(i).LBin(n),taskStructure(i).UBin(n),[], [], [], [], [], [],0); - + [tModel]=addReaction(tModel,['temporary_exchange_',INPUT],[' => ',INPUT],[],[],taskStructure(i).LBin(n),taskStructure(i).UBin(n)); + taskStructure(i).inputs(n)={[INPUT]}; end - warning on - rxn_Subs(end+1) = {['temporary_trsp_',INPUT(1:end-3)]}; - end - + rxn_Subs(end+1) = {['temporary_exchange_',INPUT]}; + end end end modelMets=upper(tModel.mets); - [I J]=ismember(upper(taskStructure(i).inputs),modelMets); + [I, J]=ismember(upper(taskStructure(i).inputs),modelMets); J=J(I); - %Check that all metabolites are either real metabolites + + %Check that all metabolites exist and are defined only once if ~all(I) - fprintf(['ERROR: Could not find all inputs in "[' taskStructure(i).id '] ' taskStructure(i).description '"\n']); - taskReport{i,3}='Could not find all inputs'; - notPresent=notPresent+1; + disp(['ERROR: Could not find all inputs in "[' taskStructure(i).id '] ' taskStructure(i).description '"\n']); + taskReport{i,5}='Could not find all inputs'; + notPresent=notPresent+1; end if numel(J)~=numel(unique(J)) - dispEM(['The constraints on some input(s) in "[' taskStructure(i).id '] ' taskStructure(i).description '" are defined more than one time']); + disp(['The constraints on some input(s) in "[' taskStructure(i).id '] ' taskStructure(i).description '" are defined more than one time']); end %Set the outputs @@ -241,8 +195,9 @@ rxn_Prod={}; for n=1:length(taskStructure(i).outputs) OUTPUT=taskStructure(i).outputs(n); + metabolites(end+1)=OUTPUT; OUTPUT=OUTPUT{1}; - + %skip the setup if output is also input as it has already been %setup if ismember(upper(OUTPUT),upper(taskStructure(i).inputs))==1 @@ -258,115 +213,136 @@ compSymbol{end+1} = Symb; end + % Definition of the compartment for the exchange reaction + comp_used=OUTPUT(end-2:end); % Set the exchange reactions for the outputs - % If the metabolites already exist extracellularly AddExchange=0; - if ismember('E',compSymbol)==1 + if ismember(upper(OUTPUT(end-1)),compSymbol)==1 Tsp_ID=findRxnIDs(tModel,findRxnsFromMets(tModel,OUTPUT)); Tsp_rxn = full(tModel.S(:,Tsp_ID)); Nb_React=sum(abs(Tsp_rxn),1); + % If an exchange reaction already exist - if isempty(Nb_React==1)==0 + if ~isempty(find(Nb_React==1)) ID_exc=find(Nb_React==1); - % Remove the existing exchange reaction - tModel=removeRxns(tModel,tModel.rxns(Tsp_ID(ID_exc))); - AddExchange=1; + tModel = changeRxnBounds(tModel,tModel.rxns(Tsp_ID(ID_exc)), taskStructure(i).LBout(n), 'l'); + tModel = changeRxnBounds(tModel,tModel.rxns(Tsp_ID(ID_exc)), taskStructure(i).UBout(n), 'u'); + rxn_Prod(end+1)=tModel.rxns(Tsp_ID(ID_exc)); else AddExchange=1; end else + AddExchange=1; end - + % Add a temporary exchange reaction that allows the export of - % the metabolite + % the metabolite if AddExchange==1 - warning off - [tModel]=addReaction(tModel,['temporary_exchange_',OUTPUT(1:end-3)],[OUTPUT,' => '],[],[],taskStructure(i).LBout(n),taskStructure(i).UBout(n),[], [], [], [], [], [],0); - warning on - rxn_Prod(end+1) = {['temporary_exchange_',OUTPUT(1:end-3)]}; - end - - % Definition of the compartment for the transport reaction - if ischar(taskStructure(i).COMP)==1 - comp_used=taskStructure(i).COMP; - if strcmpi(comp_used,'[e]')==1 - continue - end - elseif ismember('C',compSymbol)==1 - comp_used='[c]'; - elseif ismember('M',compSymbol)==1 - comp_used='[m]'; - elseif ismember('N',compSymbol)==1 - comp_used='[n]'; - elseif ismember('X',compSymbol)==1 - comp_used='[x]'; - elseif ismember('L',compSymbol)==1 - comp_used='[l]'; - elseif ismember('R',compSymbol)==1 - comp_used='[R]'; - end - - % Set the transport reactions for the outputs - % Find existing transporters associated with the ouput - AddTransport=0; - Tsp_ID=findRxnIDs(tModel,findRxnsFromMets(tModel,OUTPUT)); - Tsp_rxn = full(tModel.S(:,Tsp_ID)); - Nb_React=sum(abs(Tsp_rxn),1); - - % If free diffusion exist - if isempty(Nb_React==2)==0 - Tsp_ID2=Tsp_ID(Nb_React==2); - - % Choose the transport reaction related to the defined - % compartment (comp_used) - Tsp_ID=Tsp_ID2(tModel.S((strcmpi(tModel.mets,([OUTPUT(1:end-3),comp_used]))==1),Tsp_ID2)~=0); - if isempty(Tsp_ID)==0 - % Remove the existing transport reaction - tModel=removeRxns(tModel,tModel.rxns(Tsp_ID)); - AddTransport=1; - else - AddTransport=1; - end - else - AddTransport=1; - end - - %Create a transport reaction - if AddTransport==1 - warning off - [tModel]=addReaction(tModel,['temporary_trsp_',OUTPUT(1:end-3)],[OUTPUT(1:end-3),comp_used,' => ',OUTPUT],[],[],taskStructure(i).LBout(n),taskStructure(i).UBout(n),[], [], [], [], [], [],0); - warning on - rxn_Prod(end+1) = {['temporary_trsp_',OUTPUT(1:end-3)]}; + [tModel]=addReaction(tModel,['temporary_exchange_',OUTPUT],[OUTPUT,' => '],[],[],taskStructure(i).LBout(n),taskStructure(i).UBout(n)); + taskStructure(i).outputs(n)={[OUTPUT]}; + rxn_Prod(end+1) = {['temporary_exchange_',OUTPUT]}; end end end modelMets=upper(tModel.mets); - [I J]=ismember(upper(taskStructure(i).outputs),modelMets); + [I, J]=ismember(upper(taskStructure(i).outputs),modelMets); J=J(I); - %Check that all metabolites are either real metabolites + %Check that all metabolites exist and are defined only once if ~all(I) - fprintf(['ERROR: Could not find all outputs in "[' taskStructure(i).id '] ' taskStructure(i).description '"\n']); - taskReport{i,3}='Could not find all outputs'; + disp(['ERROR: Could not find all outputs in "[' taskStructure(i).id '] ' taskStructure(i).description '"\n']); + taskReport{i,5}='Could not find all outputs'; notPresent=notPresent+1; end if numel(J)~=numel(unique(J)) - dispEM(['The constraints on some output(s) in "[' taskStructure(i).id '] ' taskStructure(i).description '" are defined more than one time']); + disp(['The constraints on some output(s) in "[' taskStructure(i).id '] ' taskStructure(i).description '" are defined more than one time']); end + %Define some metabolites with free exchange + open_exchange={['h',comp_used];['pi',comp_used];['h2o',comp_used];['na1',comp_used];['ppi',comp_used]}; + for n=1:5 + EXC=open_exchange(n); + EXC=EXC{1}; + match_EXCS = strncmpi(EXC,modelMets,length(EXC(1:end-3))); + match_EXCS = modelMets(match_EXCS==1); + + compSymbol={}; + for k=1:length(match_EXCS) + [tokens] = regexp(match_EXCS{k},'(.+)\[(.+)\]','tokens'); + Symb = tokens{1}{2}; + compSymbol{end+1} = Symb; + end + % Definition of the compartment for the exchange reaction + comp_used=EXC(end-2:end); + % Set the exchange reactions for the EXCs + AddExchange=0; + if ismember(upper(EXC(end-1)),compSymbol)==1 + Tsp_ID=findRxnIDs(tModel,findRxnsFromMets(tModel,EXC)); + Tsp_rxn = full(tModel.S(:,Tsp_ID)); + Nb_React=sum(abs(Tsp_rxn),1); + + % If an exchange reaction already exist + if ~isempty(find(Nb_React==1)) + ID_exc=find(Nb_React==1); + + tModel = changeRxnBounds(tModel,tModel.rxns(Tsp_ID(ID_exc)), -1000, 'l'); + tModel = changeRxnBounds(tModel,tModel.rxns(Tsp_ID(ID_exc)), 1000, 'u'); + rxn_Subs(end+1) = tModel.rxns(Tsp_ID(ID_exc)); + else + AddExchange=1; + end + else + AddExchange=1; + end + + % Add a temporary exchange reaction that allows the import of + % the metabolite + if AddExchange==1 + % If the EXC is also member of the outputs, let the exchange reversible + [tModel]=addReaction(tModel,['temporary_exchange_',EXC(1:end-3)],[' <=> ',EXC(1:end-3),comp_used],[],[],-1000,1000); + taskStructure(i).EXCs(n)={[EXC(1:end-3),comp_used]}; + rxn_Subs(end+1) = {['temporary_exchange_',EXC(1:end-3)]}; + end + end %Solve the constrained problem - tModel.csense(1:length(tModel.mets),1) = 'E'; + + if isfield(model,'csense') + if size(tModel.csense,2)>size(tModel.csense,1) + tModel.csense=tModel.csense(:); + end + tModel.csense(length(tModel.mets),1) = 'E'; + end tModel.osense = -1; tModel.A=tModel.S; sol=solveCobraLP(tModel); + + if printDetails==true + if sol.stat~=0 + SUBS=rxn_Subs; + PROD=rxn_Prod; + disp('Reactions associated with substrates') + printRxnFormula(tModel,rxn_Subs); + disp('Bounds of reactions associated with substrates') + [tModel.lb(findRxnIDs(tModel,rxn_Subs)) tModel.ub(findRxnIDs(tModel,rxn_Subs))] + disp('Flux values of reactions associated with substrates') + sol.full(findRxnIDs(tModel,rxn_Subs)) + disp('Reactions associated with products') + printRxnFormula(tModel,rxn_Prod); + disp('Bounds of reactions associated with products') + [tModel.lb(findRxnIDs(tModel,PROD)) tModel.ub(findRxnIDs(tModel,rxn_Prod))] + disp('Flux values of reactions associated with products') + sol.full(findRxnIDs(tModel,rxn_Prod)) + end + end + if ~isempty(sol.full) if sum(abs(sol.full))~=0 if taskStructure(i).shouldFail==0 - taskReport{i,3}='true'; + taskReport{i,5}='true'; if printOnlyFailed==false && printOutput==true fprintf(['PASS: [' taskStructure(i).id '] ' taskStructure(i).description '\n']); score=score+1; @@ -375,36 +351,35 @@ if getEssential==true [Rxns_taskEssential]=essentialRxnsTasks(tModel); essentialRxns{i}= Rxns_taskEssential'; - - end + end else - taskReport{i,3}='PASS (should fail)'; + taskReport{i,5}='PASS (should fail)'; if printOutput==true fprintf(['PASS (should fail): [' taskStructure(i).id '] ' taskStructure(i).description '\n']); end end else if taskStructure(i).shouldFail==0 - taskReport{i,3}='FAIL (should NOT fail)'; + taskReport{i,5}='FAIL (should NOT fail)'; if printOutput==true fprintf(['FAIL: [' taskStructure(i).id '] ' taskStructure(i).description '\n']); end else - taskReport{i,3}='FAIL (should fail)'; + taskReport{i,5}='FAIL (should fail)'; if printOnlyFailed==false && printOutput==true fprintf(['FAIL (should fail): [' taskStructure(i).id '] ' taskStructure(i).description '\n']); score=score+1; end end - end + end else if taskStructure(i).shouldFail==0 - taskReport{i,3}='FAIL (should NOT fail)'; + taskReport{i,5}='FAIL (should NOT fail)'; if printOutput==true fprintf(['FAIL: [' taskStructure(i).id '] ' taskStructure(i).description '\n']); end else - taskReport{i,3}='FAIL (should fail)'; + taskReport{i,5}='FAIL (should fail)'; if printOnlyFailed==false && printOutput==true fprintf(['FAIL (should fail): [' taskStructure(i).id '] ' taskStructure(i).description '\n']); score=score+1; @@ -417,6 +392,6 @@ fprintf(['Pass ',num2str(score),' over ',num2str(totalTask),' metabolic tasks tested','\n']); fprintf([num2str(notPresent),' failed metabolic task are due to absence of metabolites in the model','\n']); taskReport{end+1,1}='Final Score'; -taskReport{end,3}=[num2str(score),'/',num2str(i)]; +taskReport{end,5}=[num2str(score),'/',num2str(i)]; taskReport{end+1,1}='Not present in the model'; -taskReport{end,3}=num2str(notPresent); \ No newline at end of file +taskReport{end,5}=num2str(notPresent); diff --git a/external/analysis/RAVEN/essentialRxnsTasks.m b/external/analysis/RAVEN/essentialRxnsTasks.m index 0e49ad9b90..16575be837 100644 --- a/external/analysis/RAVEN/essentialRxnsTasks.m +++ b/external/analysis/RAVEN/essentialRxnsTasks.m @@ -22,16 +22,17 @@ % - Originally written for RAVEN toolbox by Rasmus Agren, 2013-11-17 % - Adapted for cobratoolbox and modified to rely on pFBA by Richelle Anne, 2017-05-18 - [solMin modelIrrevFM]= minimizeModelFlux(model); %Compute the minimal set of reactions - modelIrrevFM = changeRxnBounds(modelIrrevFM,'netFlux',solMin.f,'b'); + [solMin, modelIrrevFM]= minimizeModelFlux(model); %Compute the minimal set of reactions + modelIrrevFM_0 = changeRxnBounds(modelIrrevFM,'netFlux',solMin.f,'b'); %Define the list of reactions to test - rxnsToCheck=modelIrrevFM.rxns(abs(solMin.x)>10^-6); + rxnsToCheck=modelIrrevFM_0.rxns(abs(solMin.x)>10^-6); % Loop that set to 0 each reaction to test and check if the problem % still has a solution essentialRxns={}; for i=1:numel(rxnsToCheck) + modelIrrevFM=modelIrrevFM_0; modelIrrevFM.lb(findRxnIDs(modelIrrevFM,rxnsToCheck(i)))=0; modelIrrevFM.ub(findRxnIDs(modelIrrevFM,rxnsToCheck(i)))=0; modelIrrevFM.csense(1:length(modelIrrevFM.mets),1) = 'E'; @@ -50,14 +51,18 @@ %% Analysis part for i=1: length(rxns_kept) string=rxns_kept{i}; - if strcmp('_f', string(end-1:end))==1 - rxns_final{i}= string(1:end-2); + if strcmp('netFlux', string)==1 + continue + elseif length(string)>=10 && strcmp('temporary', string(1:9))==1 + continue + elseif strcmp('_f', string(end-1:end))==1 + rxns_final{end+1}= string(1:end-2); elseif strcmp('_b', string(end-1:end))==1 - rxns_final{i}= string(1:end-2); + rxns_final{end+1}= string(1:end-2); elseif strcmp('_r', string(end-1:end))==1 - rxns_final{i}= string(1:end-2); + rxns_final{end+1}= string(1:end-2); else - rxns_final{i}=string; + rxns_final{end+1}=string; end end essentialRxns=unique(rxns_final); diff --git a/external/analysis/RAVEN/generateTaskStructure.m b/external/analysis/RAVEN/generateTaskStructure.m index ea10baf6ab..36b629bddc 100644 --- a/external/analysis/RAVEN/generateTaskStructure.m +++ b/external/analysis/RAVEN/generateTaskStructure.m @@ -1,4 +1,4 @@ -function taskStruct = generateTaskStructure(inputFile) +function taskStruct=generateTaskStructure(inputFile) % Generates a task structure from a Excell sheet containing a list of tasks % % USAGE: @@ -41,8 +41,6 @@ % * SHOULD FAIL - 1 if the correct behavior of the model is to % not have a feasible solution given the constraints % (opt, default 0) -% * COMP - specify the compartment where occurs the task -% (defaut [c], cytosol) % % OUTPUT: % taskStruct: array of structures with the following fields: @@ -58,28 +56,19 @@ % * .outputs - cell array with output metabolites (in the form metName[comps]) % * .LBout - array with lower bounds on outputs (default, 1e-04) % * .UBout - array with upper bounds on outputs (default, 1000) -% * .COMP - compartment where occurs the task (default [c], cytosol) % -% NOTE: -% -% This function is used for defining a set of tasks for a model to -% perform. The tasks are defined by defining constraints on the exchange -% and transport reaction fluxes associated with the inputs and the outputs, -% and if the problem is feasible, then the task is considered successful. -% In general, each row can contain one constraint on uptakes, one -% constraint on outputs. % % .. Authors: % - Originally written for RAVEN toolbox by Rasmus Agren, 2013-08-01 -% - Adapted for cobratoolbox and modified to rely only on flux constraints by Richelle Anne, 2017-04-18 +% - Adapted for cobratoolbox by Richelle Anne, 2017-04-18 -[crap,crap,raw]=xlsread(inputFile,'TASKS'); %Load the tasks file +%Load the tasks file +[~,~,raw]=xlsread(inputFile,'TASKS'); %Captions of the column in the excell file -columns={'ID';'DESCRIPTION';'IN';'IN LB';'IN UB';'OUT';'OUT LB';'OUT UB';'SHOULD FAIL';'COMP';'SYSTEM';'SUBSYSTEM'}; +columns={'ID';'DESCRIPTION';'IN';'IN LB';'IN UB';'OUT';'OUT LB';'OUT UB';'SHOULD FAIL';'SYSTEM';'SUBSYSTEM'}; -[I colI]=ismember(columns,raw(1,1:end)); -colI=colI; +[I, colI]=ismember(columns,raw(1,1:end)); %Check that the ID field is present if I(1)==0 @@ -88,16 +77,15 @@ %Prepare the input file a little. Put NaN for missing strings and default %bounds where needed -for i=1:numel(colI) - I=cellfun(@isBad,raw(:,colI(i))); - if ~ismember(i,[4 5 7 8]) - raw(I,colI(i))={NaN}; - else - if i==5 || i==8 - raw(I,colI(i))={1000}; - else - raw(I,colI(i))={1e-04}; - end +for i=[4 5 7 8] + for j=1:length(raw(:,1)) + if isnan(raw{j,colI(i)}) + if i==5 || i==8 + raw{j,colI(i)}=10000; + else + raw{j,colI(i)}=1e-04; + end + end end end @@ -106,8 +94,6 @@ eTask.description=''; eTask.system=''; eTask.subsystem=''; - -%eTask.shouldFail=false; eTask.shouldFail=[]; eTask.inputs={}; eTask.LBin=[]; @@ -115,45 +101,44 @@ eTask.outputs={}; eTask.LBout=[]; eTask.UBout=[]; -eTask.COMP=''; %Main loop taskStruct=[]; task=eTask; + if isnumeric(raw{2,colI(1)}) - task.id=num2str(raw{2,colI(1)}); + task.id=num2str(raw{2,colI(1)}); else task.id=raw{2,colI(1)}; end task.description=raw{2,colI(2)}; -task.shouldFail=raw{2,colI(9)}; -task.COMP=raw{2,colI(10)}; -task.system=raw{2,colI(11)}; -task.subsystem=raw{2,colI(12)}; +task.shouldFail=(raw{2,colI(9)}); +task.system=raw{2,colI(10)}; +task.subsystem=raw{2,colI(11)}; for i=2:size(raw,1) %Set the inputs - if ischar(raw{i,colI(3)}) + if ~isnan(raw{i,colI(3)}) inputs=regexp(raw{i,colI(3)},';','split'); task.inputs=[task.inputs;inputs(:)]; - task.LBin=[task.LBin;ones(numel(inputs),1)*raw{i,colI(4)}]; - task.UBin=[task.UBin;ones(numel(inputs),1)*raw{i,colI(5)}]; + task.LBin=[task.LBin;(raw{i,colI(4)})]; + task.UBin=[task.UBin;(raw{i,colI(5)})]; end %Set the outputs - if ischar(raw{i,colI(6)}) + if ~isnan(raw{i,colI(6)}) outputs=regexp(raw{i,colI(6)},';','split'); task.outputs=[task.outputs;outputs(:)]; - task.LBout=[task.LBout;ones(numel(outputs),1)*raw{i,colI(7)}]; - task.UBout=[task.UBout;ones(numel(outputs),1)*raw{i,colI(8)}]; + task.LBout=[task.LBout;(raw{i,colI(7)})]; + task.UBout=[task.UBout;(raw{i,colI(8)})]; end %Check if it should add more constraints - if i Date: Thu, 24 Jun 2021 19:31:30 +0200 Subject: [PATCH 33/82] adding description input --- external/analysis/RAVEN/checkMetabolicTasks.m | 3 +++ 1 file changed, 3 insertions(+) diff --git a/external/analysis/RAVEN/checkMetabolicTasks.m b/external/analysis/RAVEN/checkMetabolicTasks.m index a5f82919ac..bc4ad5b910 100644 --- a/external/analysis/RAVEN/checkMetabolicTasks.m +++ b/external/analysis/RAVEN/checkMetabolicTasks.m @@ -22,6 +22,9 @@ % (default - true) % printOnlyFailed: true if only tasks that failed should be displayed % (default - false) +% printDetails: true if the details of each metabolic tasks check +% should be displayed during the computation +% (default - false) % getEssential: true if the minimal number of reactions that need to be % active to pass the task need to be computed % (default - false) From 3546ad7070cf6984c80dee02c2002b83b5a6b68c Mon Sep 17 00:00:00 2001 From: ruoqi Date: Sat, 26 Jun 2021 16:15:55 -0400 Subject: [PATCH 34/82] add RHMC submodule --- .gitmodules | 3 +++ external/analysis/PolytopeSamplerMatlab | 1 + src/analysis/sampling/sampleCbModel.m | 13 +++++++++++++ 3 files changed, 17 insertions(+) create mode 160000 external/analysis/PolytopeSamplerMatlab diff --git a/.gitmodules b/.gitmodules index 5f8345fb3c..44b365e72c 100644 --- a/.gitmodules +++ b/.gitmodules @@ -49,3 +49,6 @@ [submodule "external/base/utilities/condalab"] path = external/base/utilities/condalab url = https://github.com/sg-s/condalab +[submodule "external/analysis/PolytopeSamplerMatlab"] + path = external/analysis/PolytopeSamplerMatlab + url = https://github.com/ConstrainedSampler/PolytopeSamplerMatlab diff --git a/external/analysis/PolytopeSamplerMatlab b/external/analysis/PolytopeSamplerMatlab new file mode 160000 index 0000000000..ca67170ed8 --- /dev/null +++ b/external/analysis/PolytopeSamplerMatlab @@ -0,0 +1 @@ +Subproject commit ca67170ed8d20215d06e0bd8b218673a46b887f0 diff --git a/src/analysis/sampling/sampleCbModel.m b/src/analysis/sampling/sampleCbModel.m index 43559c5c2d..6ecd5f8814 100644 --- a/src/analysis/sampling/sampleCbModel.m +++ b/src/analysis/sampling/sampleCbModel.m @@ -228,6 +228,19 @@ modelSampling=[]; samples=[]; + + case 'RHMC' + + P = struct; + P.Aeq = model.S; + P.beq = model.b; + P.lb = model.lb; + P.ub = model.ub; + + opts = default_options(); + opts.maxTime = maxTime; + o = sample(P, nPointsReturned, opts); + samples = o.samples; otherwise error(['Unknown sampler: ' samplerName]); From 05694cb68386fd02dc94d4a46edf21df5495dabf Mon Sep 17 00:00:00 2001 From: "almut.heinken" Date: Thu, 1 Jul 2021 17:20:30 +0100 Subject: [PATCH 35/82] Added optional deletion of blocked reactions in mgPipe --- .../exploration/identifyFastBlockedRxns.m | 11 +- .../mgPipe/buildModelStorage.m | 339 ++++++++---------- .../microbiomeModelingToolbox/mgPipe/mgPipe.m | 2 +- 3 files changed, 156 insertions(+), 196 deletions(-) mode change 100644 => 100755 src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/buildModelStorage.m diff --git a/src/analysis/exploration/identifyFastBlockedRxns.m b/src/analysis/exploration/identifyFastBlockedRxns.m index 4f9e318d99..7cf46f1915 100644 --- a/src/analysis/exploration/identifyFastBlockedRxns.m +++ b/src/analysis/exploration/identifyFastBlockedRxns.m @@ -1,14 +1,15 @@ -function [BlockedRxns] = identifyFastBlockedRxns(model,rxnList, printLevel) +function [BlockedRxns] = identifyFastBlockedRxns(model,rxnList,printLevel,sTol) % This function evaluates the presence of blocked reactions in a metabolic model % % USAGE: % -% [BlockedRxns] = identifyFastBlockedRxns(model,rxnList) +% [BlockedRxns] = identifyFastBlockedRxns(model,rxnList, printLevel,sTol) % % INPUTS: % organisms: model in COBRA model structure format % rxnList: nx1 cell array with reactions to test % printLevel: Verbose level (default: printLevel = 1) +% sTol: Solver tolerance for flux (default: 1e-6) % % OUTPUT: % BlockedRxns: nx1 cell array containing blocked reactions @@ -23,6 +24,10 @@ rxnList = model.rxns; end +if ~exist('sTol', 'var') + sTol = 1e-6; +end + Rxns2CheckF = rxnList; L = length(Rxns2CheckF); @@ -41,7 +46,7 @@ % model.LPBasis = LPProblem.LPBasis; Rxns2Check = model.rxns; %Find reactions that carry flux (above solver tolerance) - Rxns2Check(find(abs(solutionGF_O2.v)>1e-6))=[]; + Rxns2Check(find(abs(solutionGF_O2.v)>sTol))=[]; Rxns2CheckF = intersect(Rxns2CheckF,Rxns2Check); L = length(Rxns2CheckF); if printLevel > 0 diff --git a/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/buildModelStorage.m b/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/buildModelStorage.m old mode 100644 new mode 100755 index c5cac19dea..4203508562 --- a/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/buildModelStorage.m +++ b/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/buildModelStorage.m @@ -1,193 +1,148 @@ -function [activeExMets,modelStoragePath,couplingMatrix] = buildModelStorage(microbeNames,modPath,dietFilePath, includeHumanMets, adaptMedium, numWorkers,removeBlockedRxns) -% This function builds the internal exchange space and the coupling -% constraints for models to join within mgPipe so they can be merged into -% microbiome models afterwards. exchanges that can never carry flux on the -% given diet are removed to reduce computation time. -% -% USAGE -% [exMets,modelStoragePath,couplingMatrix] = buildModelStorage(microbeNames,modPath,dietFilePath, includeHumanMets, adaptMedium, numWorkers) -% -% INPUTS -% microbeNames: list of microbe models included in the microbiome models -% modPath: char with path of directory where models are stored -% dietFilePath: char with path of directory where the diet is saved -% includeHumanMets: boolean indicating if human-derived metabolites -% present in the gut should be provided to the models (default: true) -% adaptMedium: boolean indicating if the medium should be adapted through the -% adaptVMHDietToAGORA function or used as is (default=true) -% numWorkers: integer indicating the number of cores to use for parallelization -% removeBlockedRxns: Remove reactions blocked on the input diet to -% reduce computation time (optional) -% -% OUTPUTS -% activeExMets: list of exchanged metabolites present in at -% least one microbe model that can carry flux -% modelStoragePath: path to the modified models to join afterwards -% couplingMatrix: matrix containing coupling constraints for each model to join -% -% AUTHOR: -% - Almut Heinken, 05/2021 - -currentDir=pwd; -mkdir('modelStorage') -cd('modelStorage') -modelStoragePath = pwd; - -if numWorkers>0 && ~isempty(ver('parallel')) - % with parallelization - poolobj = gcp('nocreate'); - if isempty(poolobj) - parpool(numWorkers) - end -end - -% determine human-derived metabolites present in the gut: primary bile -% amines, mucins, host glycans -if includeHumanMets - HumanMets={'gchola','-10';'tdchola','-10';'tchola','-10';'dgchol','-10';'34dhphe','-10';'5htrp','-10';'Lkynr','-10';'f1a','-1';'gncore1','-1';'gncore2','-1';'dsT_antigen','-1';'sTn_antigen','-1';'core8','-1';'core7','-1';'core5','-1';'core4','-1';'ha','-1';'cspg_a','-1';'cspg_b','-1';'cspg_c','-1';'cspg_d','-1';'cspg_e','-1';'hspg','-1'}; -end - -% load diet constraints -if adaptMedium - [diet] = adaptVMHDietToAGORA(dietFilePath,'AGORA'); -else - diet = readtable(dietFilePath, 'Delimiter', '\t'); % load the text file with the diet - diet = table2cell(diet); - for i = 1:length(diet) - diet{i, 2} = num2str(-(diet{i, 2})); - end -end - -% get all exchanges that can carry flux in at least one model on the given -% diet, including metabolites that can be secreted -activeExMets = {}; -for i = 1:size(microbeNames, 1) - model = readCbModel([modPath filesep microbeNames{i,1} '.mat']); - - activeExMets = model.mets(~cellfun(@isempty, strfind(model.mets, '[e]'))); - ex_rxns = {}; - for j=1:length(activeExMets) - ex_rxns{j}=['EX_' activeExMets{j}]; - ex_rxns{j}=strrep(ex_rxns{j},'[e]','(e)'); - end - % account for depracated nomenclature - ex_rxns=intersect(ex_rxns,model.rxns); - - % Using input diet - model = useDiet(model, diet,0); - - if includeHumanMets - % add the human metabolites - for l=1:length(HumanMets) - model=changeRxnBounds(model,strcat('EX_',HumanMets{l},'(e)'),str2num(HumanMets{l,2}),'l'); - end - end - - % compute which exchanges can carry flux - try - [minFlux,maxFlux]=fastFVA(model,0,'max','ibm_cplex',ex_rxns); - catch - [minFlux,maxFlux]=fluxVariability(model,0,'max',ex_rxns); - end - minflux=find(abs(minFlux) > 0.00000001); - maxflux=find(abs(maxFlux) > 0.00000001); - flux=union(minflux,maxflux); - - pruned_ex_rxns = ex_rxns(flux); - pruned_ex_rxns=strrep(pruned_ex_rxns,'EX_',''); - pruned_ex_rxns=strrep(pruned_ex_rxns,'(e)','[e]'); - activeExMets = union(activeExMets,pruned_ex_rxns); -end - -% get already built reconstructions -dInfo = dir(modelStoragePath); -modelList={dInfo.name}; -modelList=modelList'; -modelList=strrep(modelList,'.mat',''); - -if length(setdiff(microbeNames,modelList))>0 - %% create a new extracellular space [u] for microbes - for i = 1:size(microbeNames, 1) - model = readCbModel([modPath filesep microbeNames{i,1} '.mat']); - % temp fix - if isfield(model,'C') - model=rmfield(model,'C'); - model=rmfield(model,'d'); - end - % - - % removing possible constraints of the bacs - selExc = findExcRxns(model); - Reactions2 = model.rxns(find(selExc)); - allex = Reactions2(strmatch('EX', Reactions2)); - biomass = allex(find(strncmp(allex,'bio',3))); - finrex = setdiff(allex, biomass); - model = changeRxnBounds(model, finrex, -1000, 'l'); - model = changeRxnBounds(model, finrex, 1000, 'u'); - - % optional: remove blocked reactions on the diet from the models - if removeBlockedRxns - modelDiet = useDiet(model, diet,0); - - if includeHumanMets - % add the human metabolites - for l=1:length(HumanMets) - modelDiet=changeRxnBounds(modelDiet,strcat('EX_',HumanMets{l},'(e)'),str2num(HumanMets{l,2}),'l'); - end - end - - try - [minFlux,maxFlux]=fastFVA(modelDiet,0,'max','ibm_cplex'); - catch - [minFlux,maxFlux]=fluxVariability(modelDiet,0,'max'); - end - nominflux=find(abs(minFlux) < 0.00000001); - nomaxflux=find(abs(maxFlux) < 0.00000001); - noflux=intersect(nominflux,nomaxflux); - model=removeRxns(model,model.rxns(noflux)); - end - - % removing blocked reactions from the bacs - %BlockedRxns = identifyFastBlockedRxns(model,model.rxns, printLevel); - %model= removeRxns(model, BlockedRxns); - %BlockedReaction = findBlockedReaction(model,'L2') - - model = convertOldStyleModel(model); - exmod = model.rxns(strncmp('EX_', model.rxns, 3)); % find exchange reactions - eMets = model.mets(~cellfun(@isempty, strfind(model.mets, '[e]'))); % exchanged metabolites - dummyMicEU = createModel(); - %dummyMicEU = makeDummyModel(2 * size(eMets, 1), size(eMets, 1)); - dummyMicEUmets = [strcat(strcat(microbeNames{i, 1}, '_'), regexprep(eMets, '\[e\]', '\[u\]')); regexprep(eMets, '\[e\]', '\[u\]')]; - dummyMicEU = addMultipleMetabolites(dummyMicEU,dummyMicEUmets); - nMets = numel(eMets); - S = [speye(nMets);-speye(nMets)]; - lbs = repmat(-1000,nMets,1); - ubs = repmat(1000,nMets,1); - names = strcat(strcat(microbeNames{i, 1}, '_'), 'IEX_', regexprep(eMets, '\[e\]', '\[u\]'), 'tr'); - dummyMicEU = addMultipleReactions(dummyMicEU,names,dummyMicEUmets,S,'lb',lbs,'ub',ubs); - model = removeRxns(model, exmod); - model.rxns = strcat(strcat(microbeNames{i, 1}, '_'), model.rxns); - model.mets = strcat(strcat(microbeNames{i, 1}, '_'), regexprep(model.mets, '\[e\]', '\[u\]')); % replace [e] with [u] - [model] = mergeTwoModels(dummyMicEU, model, 2, false, false); - - %finish up by A: removing duplicate reactions - %We will lose information here, but we will just remove the duplicates. - [model,rxnToRemove,rxnToKeep]= checkDuplicateRxn(model,'S',1,0,1); - - writeCbModel(model,'format','mat','fileName',[microbeNames{i,1} '.mat']); % store model - - % add coupling constraints and store them - IndRxns=find(strncmp(model.rxns,[microbeNames{i,1} '_'],length(microbeNames{i,1})+1));%finding indixes of specific reactions - % find the name of biomass reaction in the microbe model - bioRxn=model.rxns{find(strncmp(model.rxns,strcat(microbeNames{i,1},'_bio'),length(char(strcat(microbeNames{i,1},'_bio')))))}; - model=coupleRxnList2Rxn(model,model.rxns(IndRxns(1:length(model.rxns(IndRxns(:,1)))-1,1)),bioRxn,400,0); %couple the specific reactions - couplingMatrix{i,1}=model.C; - couplingMatrix{i,2}=model.d; - couplingMatrix{i,3}=model.dsense; - couplingMatrix{i,4}=model.ctrs; - end -end - -cd(currentDir) - +function [activeExMets,modelStoragePath,couplingMatrix] = buildModelStorage(microbeNames,modPath,numWorkers,removeBlockedRxns) +% This function builds the internal exchange space and the coupling +% constraints for models to join within mgPipe so they can be merged into +% microbiome models afterwards. exchanges that can never carry flux on the +% given diet are removed to reduce computation time. +% +% USAGE +% [activeExMets,modelStoragePath,couplingMatrix] = buildModelStorage(microbeNames,modPath,numWorkers) +% +% INPUTS +% microbeNames: list of microbe models included in the microbiome models +% modPath: char with path of directory where models are stored +% numWorkers: integer indicating the number of cores to use for parallelization +% removeBlockedRxns: Remove reactions blocked on the input diet to +% reduce computation time (default=false) +% +% OUTPUTS +% activeExMets: list of exchanged metabolites present in at +% least one microbe model that can carry flux +% modelStoragePath: path to the modified models to join afterwards +% couplingMatrix: matrix containing coupling constraints for each model to join +% +% AUTHOR: +% - Almut Heinken, 05/2021 + +currentDir=pwd; +mkdir('modelStorage') +cd('modelStorage') +modelStoragePath = pwd; + +if numWorkers>0 && ~isempty(ver('parallel')) + % with parallelization + poolobj = gcp('nocreate'); + if isempty(poolobj) + parpool(numWorkers) + end +end + +% get all exchanges that can carry flux in at least one model on the given +% diet, including metabolites that can be secreted +activeExMets = {}; +for i = 1:size(microbeNames, 1) + model = readCbModel([modPath filesep microbeNames{i,1} '.mat']); + + activeExMets = model.mets(~cellfun(@isempty, strfind(model.mets, '[e]'))); + ex_rxns = {}; + for j=1:length(activeExMets) + ex_rxns{j}=['EX_' activeExMets{j}]; + ex_rxns{j}=strrep(ex_rxns{j},'[e]','(e)'); + end + % account for depracated nomenclature + ex_rxns=intersect(ex_rxns,model.rxns); + + % compute which exchanges can carry flux + try + tic + [minFlux,maxFlux]=fastFVA(model,0,'max','ibm_cplex',ex_rxns); + toc + catch + [minFlux,maxFlux]=fluxVariability(model,0,'max',ex_rxns); + end + minflux=find(abs(minFlux) > 0.00000001); + maxflux=find(abs(maxFlux) > 0.00000001); + flux=union(minflux,maxflux); + + pruned_ex_rxns = ex_rxns(flux); + pruned_ex_rxns=strrep(pruned_ex_rxns,'EX_',''); + pruned_ex_rxns=strrep(pruned_ex_rxns,'(e)','[e]'); + activeExMets = union(activeExMets,pruned_ex_rxns); +end + +% get already built reconstructions +dInfo = dir(modelStoragePath); +modelList={dInfo.name}; +modelList=modelList'; +modelList=strrep(modelList,'.mat',''); + +if length(setdiff(microbeNames,modelList))>0 + %% create a new extracellular space [u] for microbes + for i = 1:size(microbeNames, 1) + model = readCbModel([modPath filesep microbeNames{i,1} '.mat']); + % temp fix + if isfield(model,'C') + model=rmfield(model,'C'); + model=rmfield(model,'d'); + end + % + % make sure biomass reaction is the objective function + bio=model.rxns{find(strncmp(model.rxns,'bio',3)),1}; + model=changeObjective(model,bio); + + % removing possible constraints of the bacs + selExc = findExcRxns(model); + Reactions2 = model.rxns(find(selExc)); + allex = Reactions2(strmatch('EX', Reactions2)); + biomass = allex(find(strncmp(allex,'bio',3))); + finrex = setdiff(allex, biomass); + model = changeRxnBounds(model, finrex, -1000, 'l'); + model = changeRxnBounds(model, finrex, 1000, 'u'); + + if removeBlockedRxns + % remove blocked reactions from the models + tic + BlockedRxns = identifyFastBlockedRxns(model,model.rxns, 1,1e-8); + toc + model= removeRxns(model, BlockedRxns); + BlockedReaction = findBlockedReaction(model,'L2'); + model=removeRxns(model,BlockedReaction); + end + + model = convertOldStyleModel(model); + exmod = model.rxns(strncmp('EX_', model.rxns, 3)); % find exchange reactions + eMets = model.mets(~cellfun(@isempty, strfind(model.mets, '[e]'))); % exchanged metabolites + dummyMicEU = createModel(); + %dummyMicEU = makeDummyModel(2 * size(eMets, 1), size(eMets, 1)); + dummyMicEUmets = [strcat(strcat(microbeNames{i, 1}, '_'), regexprep(eMets, '\[e\]', '\[u\]')); regexprep(eMets, '\[e\]', '\[u\]')]; + dummyMicEU = addMultipleMetabolites(dummyMicEU,dummyMicEUmets); + nMets = numel(eMets); + S = [speye(nMets);-speye(nMets)]; + lbs = repmat(-1000,nMets,1); + ubs = repmat(1000,nMets,1); + names = strcat(strcat(microbeNames{i, 1}, '_'), 'IEX_', regexprep(eMets, '\[e\]', '\[u\]'), 'tr'); + dummyMicEU = addMultipleReactions(dummyMicEU,names,dummyMicEUmets,S,'lb',lbs,'ub',ubs); + model = removeRxns(model, exmod); + model.rxns = strcat(strcat(microbeNames{i, 1}, '_'), model.rxns); + model.mets = strcat(strcat(microbeNames{i, 1}, '_'), regexprep(model.mets, '\[e\]', '\[u\]')); % replace [e] with [u] + [model] = mergeTwoModels(dummyMicEU, model, 2, false, false); + + %finish up by A: removing duplicate reactions + %We will lose information here, but we will just remove the duplicates. + [model,rxnToRemove,rxnToKeep]= checkDuplicateRxn(model,'S',1,0,1); + + writeCbModel(model,'format','mat','fileName',[microbeNames{i,1} '.mat']); % store model + + % add coupling constraints and store them + IndRxns=find(strncmp(model.rxns,[microbeNames{i,1} '_'],length(microbeNames{i,1})+1));%finding indixes of specific reactions + % find the name of biomass reaction in the microbe model + bioRxn=model.rxns{find(strncmp(model.rxns,strcat(microbeNames{i,1},'_bio'),length(char(strcat(microbeNames{i,1},'_bio')))))}; + model=coupleRxnList2Rxn(model,model.rxns(IndRxns(1:length(model.rxns(IndRxns(:,1)))-1,1)),bioRxn,400,0); %couple the specific reactions + couplingMatrix{i,1}=model.C; + couplingMatrix{i,2}=model.d; + couplingMatrix{i,3}=model.dsense; + couplingMatrix{i,4}=model.ctrs; + end + + cd(currentDir) + end \ No newline at end of file diff --git a/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/mgPipe.m b/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/mgPipe.m index c9c417fb0f..8b90a2fc94 100755 --- a/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/mgPipe.m +++ b/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/mgPipe.m @@ -96,7 +96,7 @@ % Extracellular spaces simulating the lumen are built and stored for % each microbe. - [activeExMets,modelStoragePath,couplingMatrix]=buildModelStorage(microbeNames,modPath, dietFilePath, includeHumanMets, adaptMedium, numWorkers, removeBlockedRxns); + [activeExMets,modelStoragePath,couplingMatrix]=buildModelStorage(microbeNames,modPath, numWorkers, removeBlockedRxns); % Computing reaction presence ReactionPresence=calculateReactionPresence(abunFilePath, modPath, {}); From 875b72f520803b37ba2e2fe3f53acf4a89c7d073 Mon Sep 17 00:00:00 2001 From: "almut.heinken" Date: Fri, 2 Jul 2021 11:27:24 +0100 Subject: [PATCH 36/82] Added optional deletion of blocked reactions in mgPipe --- .../multiSpecies/microbiomeModelingToolbox/mgPipe/mgPipe.m | 1 + 1 file changed, 1 insertion(+) diff --git a/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/mgPipe.m b/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/mgPipe.m index 8b90a2fc94..9c420c0083 100755 --- a/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/mgPipe.m +++ b/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/mgPipe.m @@ -313,5 +313,6 @@ else [modelStats,summary,statistics]=retrieveModelStats(resPath, modelNames); end +save([resPath filesep 'modelStatistics.mat'],'modelStats','summary','statistics') end From b28b37f286d5093666b0852222f28d2aeee54451 Mon Sep 17 00:00:00 2001 From: "almut.heinken" Date: Fri, 2 Jul 2021 11:28:38 +0100 Subject: [PATCH 37/82] Added optional deletion of blocked reactions in mgPipe --- .../multiSpecies/microbiomeModelingToolbox/mgPipe/mgPipe.m | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/mgPipe.m b/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/mgPipe.m index 9c420c0083..a9ef42bbf6 100755 --- a/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/mgPipe.m +++ b/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/mgPipe.m @@ -297,9 +297,9 @@ netSecretionFluxes={}; netUptakeFluxes={}; Y=[]; - delete('simRes.mat','intRes.mat') - rmdir([resPath filesep 'modelStorage'],'s') + delete('simRes.mat','intRes.mat') end +rmdir([resPath filesep 'modelStorage'],'s') % get stats on microbiome models-number of reactions and metabolites for i=1:length(sampNames) From d373ad78f62207645d303d4b91abd9ce2c92605b Mon Sep 17 00:00:00 2001 From: "almut.heinken" Date: Sat, 3 Jul 2021 01:06:41 +0100 Subject: [PATCH 38/82] enabled anaerobic growth foor some cases --- .../demeter/src/refinement/anaerobicGrowthGapfill.m | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/reconstruction/demeter/src/refinement/anaerobicGrowthGapfill.m b/src/reconstruction/demeter/src/refinement/anaerobicGrowthGapfill.m index 42cb7bdfa8..fe2419d956 100755 --- a/src/reconstruction/demeter/src/refinement/anaerobicGrowthGapfill.m +++ b/src/reconstruction/demeter/src/refinement/anaerobicGrowthGapfill.m @@ -89,6 +89,8 @@ 'PYAM5POr', {'EX_pydx(e)', 'PYDXabc'} % some can only produce PYDX5P aerobically 'PYAM5POr', {'EX_pydxn(e)', 'PYDXNabc', 'PDX5PO2'} % some can only produce PYDX5P aerobically 'UNKENZ',{'ACCOAC','H2CO3D'} + 'SUCDi', {'EX_succ(e)','SUCCt','EX_q8(e)','Q8abc'} + 'DHFOR2', {'EX_fol(e)','FOLabc'} }; for i = 1:size(testFix, 1) if any(ismember(model.rxns, testFix{i, 1})) From 145adc7aa3238262f0cee712c5ec6513fed2d517 Mon Sep 17 00:00:00 2001 From: ruoqi Date: Sat, 3 Jul 2021 15:12:58 -0400 Subject: [PATCH 39/82] add error check and nworker option --- src/analysis/sampling/sampleCbModel.m | 34 +++++++++++++++++++++------ 1 file changed, 27 insertions(+), 7 deletions(-) diff --git a/src/analysis/sampling/sampleCbModel.m b/src/analysis/sampling/sampleCbModel.m index 6ecd5f8814..7568b1c736 100644 --- a/src/analysis/sampling/sampleCbModel.m +++ b/src/analysis/sampling/sampleCbModel.m @@ -28,6 +28,7 @@ % * .maxTime - Maximum time limit (Default = 36000 s). ACHR only. % * .toRound - Option to round the model before sampling (true). CHRR only. % * .lambda - the bias vector for exponential sampling. CHRR_EXP only. +% * .nWorkers - Number of parallel workers. RHMC only. % modelSampling: From a previous round of sampling the same % model. Input to avoid repeated preprocessing. % @@ -109,6 +110,9 @@ if (isfield(options,'optPercentage')) optPercentage = options.optPercentage; end + if (isfield(options,'nWorkers')) + nWorkers = options.nWorkers; + end end switch samplerName @@ -230,15 +234,31 @@ samples=[]; case 'RHMC' - - P = struct; - P.Aeq = model.S; - P.beq = model.b; - P.lb = model.lb; - P.ub = model.ub; + P = struct; + if (~isfield(model,'S') || ~isfield(model,'b')) + error('You need to define both P.A and P.b for a polytope {x | P.A*x = P.b}.'); + else + P.Aeq = model.S; + P.beq = model.b; + end + if isfield(model,'lb') + P.lb = model.lb; + end + if isfield(model,'ub') + P.ub = model.ub; + end opts = default_options(); - opts.maxTime = maxTime; + if isfield(options,'maxTime') + opts.maxTime = options.maxTime; + end + if isfield(options,'nWorkers') + opts.nWorkers = options.nWorkers; + end + if ~isfield(options,'nPointsReturned') + nPointsReturned = 3; + end + o = sample(P, nPointsReturned, opts); samples = o.samples; From 775cc343caaa1996e619839c2543e8c0ad1e7e8a Mon Sep 17 00:00:00 2001 From: YinTat Date: Sun, 4 Jul 2021 10:05:44 -0700 Subject: [PATCH 40/82] Update sampleCbModel.m --- src/analysis/sampling/sampleCbModel.m | 30 ++++++++++++++++----------- 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/src/analysis/sampling/sampleCbModel.m b/src/analysis/sampling/sampleCbModel.m index 7568b1c736..f9fd060e56 100644 --- a/src/analysis/sampling/sampleCbModel.m +++ b/src/analysis/sampling/sampleCbModel.m @@ -14,7 +14,7 @@ % % OPTIONAL INPUTS: % sampleFile: File names for sampling output files (only implemented for ACHR) -% samplerName: {('CHRR'), 'ACHR'} Name of the sampler to be used to +% samplerName: {('CHRR'), 'ACHR', 'RHMC'} Name of the sampler to be used to % sample the solution. % options: Options for sampling and pre/postprocessing (default values % in parenthesis). @@ -236,32 +236,38 @@ case 'RHMC' P = struct; if (~isfield(model,'S') || ~isfield(model,'b')) - error('You need to define both P.A and P.b for a polytope {x | P.A*x = P.b}.'); + error('You need to define both model.S and model.b'); else P.Aeq = model.S; P.beq = model.b; - end + end if isfield(model,'lb') P.lb = model.lb; end if isfield(model,'ub') P.ub = model.ub; end + if isfield(model,'dsense') + I = (model.dsense == 'E'); + P.Aeq = [P.Aeq; model.C(I,:)]; + P.beq = [P.beq; model.d(I)]; + P.Aineq = model.C(~I,:); + P.bineq = model.d(~I,:); + flip = 1-2*(model.dsense(~I) == 'G'); + P.Aineq = flip.*P.Aineq; + P.bineq = flip.*P.bineq; + end opts = default_options(); - if isfield(options,'maxTime') - opts.maxTime = options.maxTime; - end + opts.maxTime = maxTime; if isfield(options,'nWorkers') opts.nWorkers = options.nWorkers; end - if ~isfield(options,'nPointsReturned') - nPointsReturned = 3; - end - o = sample(P, nPointsReturned, opts); - samples = o.samples; - + samples = o.samples; + if size(samples,2) > nPointsReturned + samples = samples(:, ((size(samples,2)-nPointsReturned):end)); + end otherwise error(['Unknown sampler: ' samplerName]); end \ No newline at end of file From e983207d79eafd1fb4d062343525fdd265f7395a Mon Sep 17 00:00:00 2001 From: YinTat Date: Sun, 4 Jul 2021 10:05:50 -0700 Subject: [PATCH 41/82] Update PolytopeSamplerMatlab --- external/analysis/PolytopeSamplerMatlab | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/external/analysis/PolytopeSamplerMatlab b/external/analysis/PolytopeSamplerMatlab index ca67170ed8..a1a5164c0e 160000 --- a/external/analysis/PolytopeSamplerMatlab +++ b/external/analysis/PolytopeSamplerMatlab @@ -1 +1 @@ -Subproject commit ca67170ed8d20215d06e0bd8b218673a46b887f0 +Subproject commit a1a5164c0ed5617718a15b51d8f427bd5de1adc1 From 606cc26681dbe924ed633e50b9b9c524df570dda Mon Sep 17 00:00:00 2001 From: YinTat Date: Sun, 4 Jul 2021 10:16:19 -0700 Subject: [PATCH 42/82] Revert "Update PolytopeSamplerMatlab" This reverts commit e983207d79eafd1fb4d062343525fdd265f7395a. --- external/analysis/PolytopeSamplerMatlab | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/external/analysis/PolytopeSamplerMatlab b/external/analysis/PolytopeSamplerMatlab index a1a5164c0e..ca67170ed8 160000 --- a/external/analysis/PolytopeSamplerMatlab +++ b/external/analysis/PolytopeSamplerMatlab @@ -1 +1 @@ -Subproject commit a1a5164c0ed5617718a15b51d8f427bd5de1adc1 +Subproject commit ca67170ed8d20215d06e0bd8b218673a46b887f0 From e2a2e622a91affb2363cb57dacdfa29870c4658f Mon Sep 17 00:00:00 2001 From: YinTat Date: Sun, 4 Jul 2021 10:19:07 -0700 Subject: [PATCH 43/82] Update PolytopeSamplerMatlab --- external/analysis/PolytopeSamplerMatlab | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/external/analysis/PolytopeSamplerMatlab b/external/analysis/PolytopeSamplerMatlab index ca67170ed8..60dde05498 160000 --- a/external/analysis/PolytopeSamplerMatlab +++ b/external/analysis/PolytopeSamplerMatlab @@ -1 +1 @@ -Subproject commit ca67170ed8d20215d06e0bd8b218673a46b887f0 +Subproject commit 60dde05498c64086b5afde0566776ab96240ca01 From 495f6e72008eb217365dd6ca2c1d09b0f9ec55d9 Mon Sep 17 00:00:00 2001 From: YinTat Date: Sun, 4 Jul 2021 10:33:37 -0700 Subject: [PATCH 44/82] Update PolytopeSamplerMatlab --- external/analysis/PolytopeSamplerMatlab | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/external/analysis/PolytopeSamplerMatlab b/external/analysis/PolytopeSamplerMatlab index 60dde05498..5c0d9e76d3 160000 --- a/external/analysis/PolytopeSamplerMatlab +++ b/external/analysis/PolytopeSamplerMatlab @@ -1 +1 @@ -Subproject commit 60dde05498c64086b5afde0566776ab96240ca01 +Subproject commit 5c0d9e76d3f925728671ad11134e7356646755bc From 1d8efbe621aebaf0c669c2ad24f3f92f5e9d785c Mon Sep 17 00:00:00 2001 From: YinTat Date: Sun, 4 Jul 2021 10:39:45 -0700 Subject: [PATCH 45/82] Update sampleCbModel.m --- src/analysis/sampling/sampleCbModel.m | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/analysis/sampling/sampleCbModel.m b/src/analysis/sampling/sampleCbModel.m index f9fd060e56..f88db1c2a8 100644 --- a/src/analysis/sampling/sampleCbModel.m +++ b/src/analysis/sampling/sampleCbModel.m @@ -268,6 +268,8 @@ if size(samples,2) > nPointsReturned samples = samples(:, ((size(samples,2)-nPointsReturned):end)); end + + volume = 'Set samplerName = ''MFE'' to estimate volume.'; otherwise error(['Unknown sampler: ' samplerName]); -end \ No newline at end of file +end From ec61e7a471f74a55a0e9930a771f7218689a63d9 Mon Sep 17 00:00:00 2001 From: "almut.heinken" Date: Tue, 6 Jul 2021 18:19:43 +0100 Subject: [PATCH 46/82] debugged DEMETER functions --- .../src/debugging/removeFutileCycles.m | 49 ++++++++++++------- .../properties/compareDraftRefinedVersions.m | 4 +- .../demeter/src/properties/producetSNEPlots.m | 6 +-- 3 files changed, 35 insertions(+), 24 deletions(-) mode change 100644 => 100755 src/reconstruction/demeter/src/properties/compareDraftRefinedVersions.m diff --git a/src/reconstruction/demeter/src/debugging/removeFutileCycles.m b/src/reconstruction/demeter/src/debugging/removeFutileCycles.m index c5828cff88..567b08efa7 100755 --- a/src/reconstruction/demeter/src/debugging/removeFutileCycles.m +++ b/src/reconstruction/demeter/src/debugging/removeFutileCycles.m @@ -487,6 +487,7 @@ 'CDPDPH AND CYTK1',[],'CDPDPH','CDPDPHi' 'UMPK AND NDP7',[],'NDP7','NDP7i' 'CLt4r AND r2137',[],'r2137','CLti' + 'DESAT16_3 AND FAOp_even AND FAO181E',[],'DESAT16_3','DESAT16_3i' }; @@ -517,6 +518,7 @@ 'EX_arg_L(e) AND ARGt2r' 'EX_ser_L(e) AND SERt2r' 'PPA' + 'EX_glyald[e] AND GLYALDt' }; for i = 2:size(reactionsToReplace, 1) @@ -587,6 +589,8 @@ end end + % sometimes oxygen uptake needs to be enabled + modelTest=changeRxnBounds(modelTest,'EX_o2(e)',-10,'l'); FBA = optimizeCbModel(modelTest, 'max'); if FBA.f > tol model = modelTest; @@ -638,29 +642,36 @@ end FBA = optimizeCbModel(modelTest, 'max'); if FBA.f > tol - model = modelTest; - % add replaced reactions - if ~isempty(reactionsToReplace{i, 3}) - for j=1:length(toRemove) - deletedRxns{delCnt, 1} = toRemove{j}; - delCnt = delCnt + 1; + % ensure this does not add new futile cycles + modelATPBefore=changeObjective(model,'DM_atp_c_'); + fbaATPBefore=optimizeCbModel(modelATPBefore,'max'); + modelATPAfter=changeObjective(modelTest,'DM_atp_c_'); + fbaATPAfter=optimizeCbModel(modelATPAfter,'max'); + if fbaATPAfter.f-fbaATPBefore.f < 100 + model = modelTest; + % add replaced reactions + if ~isempty(reactionsToReplace{i, 3}) + for j=1:length(toRemove) + deletedRxns{delCnt, 1} = toRemove{j}; + delCnt = delCnt + 1; + end end - end - if ~isempty(reactionsToReplace{i, 4}) - if ~isempty(reactionsToReplace{i, 3}) && length(toRemove)==1 - addedRxns{addCnt, 1} = toRemove{1}; + if ~isempty(reactionsToReplace{i, 4}) + if ~isempty(reactionsToReplace{i, 3}) && length(toRemove)==1 + addedRxns{addCnt, 1} = toRemove{1}; + end + for j=1:length(rxns) + addedRxns{addCnt, j+1} = rxns{j}; + end + addCnt = addCnt + 1; end - for j=1:length(rxns) - addedRxns{addCnt, j+1} = rxns{j}; + % add growth-restoring gapfilled reactions + for j=1:length(ggrxns) + gfRxns{length(gfRxns)+1, 1} = ggrxns{j}; end - addCnt = addCnt + 1; - end - % add growth-restoring gapfilled reactions - for j=1:length(ggrxns) - gfRxns{length(gfRxns)+1, 1} = ggrxns{j}; + gf=0; + break end - gf=0; - break end modelTest=modelPrevious; end diff --git a/src/reconstruction/demeter/src/properties/compareDraftRefinedVersions.m b/src/reconstruction/demeter/src/properties/compareDraftRefinedVersions.m old mode 100644 new mode 100755 index 03cd98c79f..31732921ad --- a/src/reconstruction/demeter/src/properties/compareDraftRefinedVersions.m +++ b/src/reconstruction/demeter/src/properties/compareDraftRefinedVersions.m @@ -70,7 +70,7 @@ function compareDraftRefinedVersions(draftFolder,curatedFolder,propertiesFolder, load(['stats_' toCompare{j,1} '.mat']); % remove models that were already analyzed - modelsRenamed=strrep(modelList(:,1),'.mat',''); + modelsRenamed=strrep(models(:,1),'.mat',''); modelsRenamed=strrep(modelsRenamed,'.sbml',''); [C,IA]=intersect(modelsRenamed(:,1),stats(2:end,1)); models(IA,:)=[]; @@ -150,7 +150,7 @@ function compareDraftRefinedVersions(draftFolder,curatedFolder,propertiesFolder, for i=l:l+endPnt % grab all statistics onerowmore=size(stats,1)+1; - modelID=strrep(models{j},'.mat',''); + modelID=strrep(models{i},'.mat',''); modelID=strrep(modelID,'.sbml',''); stats{onerowmore,1}=modelID; for k=2:12 diff --git a/src/reconstruction/demeter/src/properties/producetSNEPlots.m b/src/reconstruction/demeter/src/properties/producetSNEPlots.m index f669f74510..68dcba6c20 100755 --- a/src/reconstruction/demeter/src/properties/producetSNEPlots.m +++ b/src/reconstruction/demeter/src/properties/producetSNEPlots.m @@ -172,9 +172,9 @@ function producetSNEPlots(propertiesFolder,infoFilePath,reconVersion,customFeatu cmarkers=[cmarkers '+o*xsdp']; end cmarkers=cmarkers(1:length(unique(taxa))); - gscatter3(Y(:,1),Y(:,2),Y(:,3),taxa,cols,{},30); - % h=gscatter(Y(:,1),Y(:,2),taxa,cols,cmarkers); - % set(h,'MarkerSize',6) +% gscatter3(Y(:,1),Y(:,2),Y(:,3),taxa,cols,{},30); + h=gscatter(Y(:,1),Y(:,2),taxa,cols,cmarkers); + set(h,'MarkerSize',6) hold on title(analyzedFiles{k,1}) plottitle=strrep(reconVersion,'_refined',''); From 6bb8e25df6c2b4119b7af2a5426a47443bb40fe1 Mon Sep 17 00:00:00 2001 From: "almut.heinken" Date: Tue, 6 Jul 2021 20:19:23 +0100 Subject: [PATCH 47/82] debugged DEMETER functions --- .../demeter/src/debugging/debugModel.m | 12 + .../src/refinement/anaerobicGrowthGapfill.m | 709 +++++++++--------- 2 files changed, 367 insertions(+), 354 deletions(-) diff --git a/src/reconstruction/demeter/src/debugging/debugModel.m b/src/reconstruction/demeter/src/debugging/debugModel.m index 6a6c6382c8..9b9f5c38ae 100755 --- a/src/reconstruction/demeter/src/debugging/debugModel.m +++ b/src/reconstruction/demeter/src/debugging/debugModel.m @@ -67,6 +67,18 @@ if AnaerobicGrowth(1,1) < tol % find reactions that are preventing the model from growing % anaerobically + % first gapfilling specialized for anaerobic growth + [model,oxGapfillRxns,anaerGrowthOK] = anaerobicGrowthGapfill(model, biomassReaction, database); + if ~isempty(oxGapfillRxns) + summary.condGF=union(summary.condGF,oxGapfillRxns); + + gapfilledReactions{cntGF,1}=microbeID; + gapfilledReactions{cntGF,2}='Enabling anaerobic growth'; + gapfilledReactions{cntGF,3}='Condition-specific gapfilling'; + gapfilledReactions(cntGF,4:length(oxGapfillRxns)+3)=oxGapfillRxns; + cntGF=cntGF+1; + end + % then less targeted gapfilling [model,condGF,targetGF,relaxGF] = runGapfillingFunctions(model,biomassReaction,biomassReaction,'max',database); % export the gapfilled reactions if ~isempty(condGF) diff --git a/src/reconstruction/demeter/src/refinement/anaerobicGrowthGapfill.m b/src/reconstruction/demeter/src/refinement/anaerobicGrowthGapfill.m index fe2419d956..4f0c07e646 100755 --- a/src/reconstruction/demeter/src/refinement/anaerobicGrowthGapfill.m +++ b/src/reconstruction/demeter/src/refinement/anaerobicGrowthGapfill.m @@ -1,354 +1,355 @@ -function [model,oxGapfillRxns,anaerGrowthOK] = anaerobicGrowthGapfill(model, biomassReaction, database) -% Tests if the input microbe model can grow anaerobically and gap-fills -% by adding anaerobic co-factor utilizing reactions. -% -% USAGE -% [model,oxGapfillRxns,anaerGrowthOK] = anaerobicGrowthGapfill(model, biomassReaction, database) -% -% INPUT -% model COBRA model structure -% biomassReaction Biomass reaction abbreviation -% database rBioNet reaction database containing min. 3 columns: -% Column 1: reaction abbreviation, Column 2: reaction -% name, Column 3: reaction formula. -% -% OUTPUT -% model COBRA model structure -% -% .. Authors: -% Almut Heinken and Stefania Magnusdottir, 2016-2019 - -tol = 1e-6; -model_old=model; - -anaerGrowthOK=1; -% Test if model can grow anaerobically -model = changeRxnBounds(model, 'EX_o2(e)', 0, 'l'); - -% block internal O2-utilizing cytosolic reactions -if any(ismember(model.mets, 'o2[c]')) - o2rxns = find(any(model.S(ismember(model.mets, 'o2[c]'), :), 1)); - model = changeRxnBounds(model, model.rxns(o2rxns), 0, 'b'); -end - -% check anaerobic growth -model = changeObjective(model, biomassReaction); -FBA = optimizeCbModel(model, 'max'); -if FBA.f < tol - - % List oxygen-utilizing reactions and their anaerobic cofactor-utilizing - % partner reaction - anaerobicRxns = { - 'PDX5PO', {'PDX5PO2'} - 'ASPO6', {'ASPO5','EX_succ(e)','SUCCt'} - 'DHORDi', {'DHORDfum','EX_succ(e)','SUCCt'} - 'CPPPGO', {'CPPPGO2','5DOAN','DM_5DRIB'} - 'AHMMPS', {'AMPMS2'} - }; - - % add anaerobic reactions to model (if contains O2-using reaction) - for i = 1:length(anaerobicRxns) - if any(ismember(model.rxns, anaerobicRxns{i, 1})) - for j=1:length(anaerobicRxns{i, 2}) - formula = database.reactions{ismember(database.reactions(:, 1), anaerobicRxns{i, 2}{j}), 3}; - model = addReaction(model, anaerobicRxns{i, 2}{j}, 'reactionFormula', formula); - end - end - end - - % reactions for anaerobic quinone synthesis - anaerobicQuinone = { - 'OMMBLHX3' - 'DMQMT' - 'OMPHHX3' - 'OPHHX3' - 'OMBZLM' - }; - - % add anaerobic reactions for quinone synthesis if contains aerobic - % versions - if any(ismember(model.rxns, {'2OMMBOX', 'OMPHHX'})) - for i = 1:length(anaerobicQuinone) - formula = database.reactions{ismember(database.reactions(:, 1), anaerobicQuinone{i}), 3}; - model = addReaction(model, anaerobicQuinone{i}, 'reactionFormula', formula, 'geneRule', 'AnaerobicGapfill'); - end - end - - % test if can grow now - FBA = optimizeCbModel(model, 'max'); - if FBA.f < tol - - % List possible fixes if model contains reaction in column 1 - testFix = { - 'PHE4MO', {'EX_tyr_L(e)', 'TYRt2r'} % very unlikely tyrosine synthesis reaction - 'H2SO', {'EX_so4(e)', 'SO4t2'} % sulfate requirement-add a transporter instead - 'r0389', {'EX_pydx(e)', 'PYDXabc'} % could be replaced by reaction 1.1.1.65 but only found in few bacteria - 'ASPT', {'EX_asp_L(e)', 'ASPt2r'} % need aspartate to produce fumarate - 'QUILSYN', {'EX_nac(e)', 'NACt2r', 'NAPRT'} % can only produce NAD aerobically - 'CYTBD', {'EX_nac(e)', 'NACt2r', 'NAPRT'} % can only produce NAD aerobically - 'PYAM5POr', {'EX_pydx(e)', 'PYDXabc'} % some can only produce PYDX5P aerobically - 'PYAM5POr', {'EX_pydxn(e)', 'PYDXNabc', 'PDX5PO2'} % some can only produce PYDX5P aerobically - 'UNKENZ',{'ACCOAC','H2CO3D'} - 'SUCDi', {'EX_succ(e)','SUCCt','EX_q8(e)','Q8abc'} - 'DHFOR2', {'EX_fol(e)','FOLabc'} - }; - for i = 1:size(testFix, 1) - if any(ismember(model.rxns, testFix{i, 1})) - modelTest = model; - newRxns = testFix{i, 2}; - for j = 1:length(newRxns) - % add reactions - formula = database.reactions{ismember(database.reactions(:, 1), newRxns{j}), 3}; - modelTest = addReaction(modelTest, newRxns{j}, 'reactionFormula', formula, 'geneRule', 'AnaerobicGapfill'); - end - % test growth - FBA = optimizeCbModel(modelTest, 'max'); - if FBA.f > tol - model = modelTest; - break - end - end - end - end - - % List possible fixes if model does NOT contain reaction in column 1 - testFix = { - 'EX_sheme(e)', {'EX_sheme(e)', 'SHEMEabc'} % if it is due to inability to synthesize heme - 'EX_pheme(e)', {'EX_pheme(e)', 'HEMEti'} % if it is due to inability to synthesize heme - 'EX_ser_L(e)', {'EX_ser_L(e)', 'SERt2r'} % if due to deleting the Kegg gapfilled reaction R03472, which used to synthesize serine from glycolaldehyde (makes no sense), fill in serine transporter - }; - for i = 1:size(testFix, 1) - modelTest = model; - newRxns = testFix{i, 2}; - for j = 1:length(newRxns) - % add reactions - formula = database.reactions{ismember(database.reactions(:, 1), newRxns{j}), 3}; - modelTest = addReaction(modelTest, newRxns{j}, 'reactionFormula', formula); - end - % test growth - FBA = optimizeCbModel(modelTest, 'max'); - if FBA.f >= tol - model = modelTest; - end - end - - % make sure quinones can be regenerated from quinols - if any(ismember(model.rxns, 'AMMQT8')) - modelTest = model; - formula = database.reactions{ismember(database.reactions(:, 1), 'AMMQLT8'), 3}; - modelTest = addReaction(modelTest, 'AMMQLT8', 'reactionFormula', formula); - % test growth - FBA = optimizeCbModel(modelTest, 'max'); - if FBA.f >= tol - model = modelTest; - end - end - - % Try adding fumarate reductase reactions - rxns={ - 'FRD2' - 'FRD3' - 'FRD7' - }; - modelTest=model; - for i=1:length(rxns) - if isempty(find(ismember(model.rxns, rxns{i}))) - modelTest = addReaction(modelTest, rxns{i}, 'reactionFormula', database.reactions{find(ismember(database.reactions(:, 1), rxns{i})), 3}); - end - end - % test growth - FBA = optimizeCbModel(modelTest, 'max'); - if FBA.f >= tol - model = modelTest; - end - - % some models can't consume 5-Methylthio-D-ribose - if any(ismember(model.rxns, 'DKMPPD2')) - formula = database.reactions{ismember(database.reactions(:, 1), 'DM_5MTR'), 3}; - model = addReaction(model, 'DM_5MTR', 'reactionFormula', formula); - end - - % some models need oxygen to produce 3-methyl-2-oxopentanoate - if any(ismember(model.rxns, 'ILEDA')) && ~any(ismember(model.rxns, 'ILETA')) - rxns={ - 'EX_3mop(e)' - '3MOPt2r' - }; - modelTest=model; - for i=1:length(rxns) - if isempty(find(ismember(model.rxns, rxns{i}))) - modelTest = addReaction(modelTest, rxns{i}, 'reactionFormula', database.reactions{find(ismember(database.reactions(:, 1), rxns{i})), 3}); - end - end - % test growth - FBA = optimizeCbModel(modelTest, 'max'); - if FBA.f >= tol - model = modelTest; - end - end - - - % some cases: demand reaction in thiamin biosynthesis pathway fixes it - if any(ismember(model.rxns, 'THZPSN')) - rxns={ - 'DM_4HBA' - }; - modelTest=model; - for i=1:length(rxns) - if isempty(find(ismember(model.rxns, rxns{i}))) - modelTest = addReaction(modelTest, rxns{i}, 'reactionFormula', database.reactions{find(ismember(database.reactions(:, 1), rxns{i})), 3}); - end - end - % test growth - FBA = optimizeCbModel(modelTest, 'max'); - if FBA.f >= tol - model = modelTest; - end - end - - % rare case-some models cannot generate any ATP without oxygen - if ~any(ismember(model.rxns, 'ATPS4')) - rxns={ - 'ATPS4' - }; - modelTest=model; - for i=1:length(rxns) - if isempty(find(ismember(model.rxns, rxns{i}))) - modelTest = addReaction(modelTest, rxns{i}, 'reactionFormula', database.reactions{find(ismember(database.reactions(:, 1), rxns{i})), 3}); - end - end - % test growth - FBA = optimizeCbModel(modelTest, 'max'); - if FBA.f >= tol - model = modelTest; - end - end - - % rare case-acetyl-CoA biosynthesis blocked - if any(ismember(model.rxns, 'FAO181O')) - rxns={ - 'ACS' - 'H2CO3D' - }; - modelTest=model; - for i=1:length(rxns) - if isempty(find(ismember(model.rxns, rxns{i}))) - modelTest = addReaction(modelTest, rxns{i}, 'reactionFormula', database.reactions{find(ismember(database.reactions(:, 1), rxns{i})), 3}); - end - end - % test growth - FBA = optimizeCbModel(modelTest, 'max'); - if FBA.f >= tol - model = modelTest; - end - end - % rare case-acetyl-CoA biosynthesis blocked (acetate lacking) - if any(ismember(model.rxns, 'ACKr')) && any(ismember(model.rxns, 'PTAr')) && any(ismember(model.rxns, 'ACOAD20')) - rxns={ - 'EX_ac(e)' - 'ACtr' - }; - modelTest=model; - for i=1:length(rxns) - if isempty(find(ismember(model.rxns, rxns{i}))) - modelTest = addReaction(modelTest, rxns{i}, 'reactionFormula', database.reactions{find(ismember(database.reactions(:, 1), rxns{i})), 3}); - end - end - % test growth - FBA = optimizeCbModel(modelTest, 'max'); - if FBA.f >= tol - model = modelTest; - end - end - - % rare case-10-Formyltetrahydrofolate biosynthesis blocked - if any(ismember(model.rxns, '5MTHFCL')) - rxns={ - 'FTHFL' - }; - modelTest=model; - for i=1:length(rxns) - if isempty(find(ismember(model.rxns, rxns{i}))) - modelTest = addReaction(modelTest, rxns{i}, 'reactionFormula', database.reactions{find(ismember(database.reactions(:, 1), rxns{i})), 3}); - end - end - % test growth - FBA = optimizeCbModel(modelTest, 'max'); - if FBA.f >= tol - model = modelTest; - end - end - - % test again for anaerobic growth - FBA = optimizeCbModel(model, 'max'); - % add quinone transporters and exchanges - if FBA.f < tol - quinoneRxns = { - 'EX_q8(e)' - 'Q8abc' - }; - modelTest=model; - for i = 1:length(quinoneRxns) - formula = database.reactions{ismember(database.reactions(:, 1), quinoneRxns{i}), 3}; - modelTest = addReaction(modelTest, quinoneRxns{i}, 'reactionFormula', formula); - modelTest.rxnConfidenceScores(end,1)=1; - end - % test growth - FBA = optimizeCbModel(modelTest, 'max'); - if FBA.f >= tol - model = modelTest; - else - % add demethylmenaquinone transporters and exchanges - quinoneRxns = { - 'EX_2dmmq8(e)' - '2DMMQ8abc' - }; - modelTest=model; - for i = 1:length(quinoneRxns) - formula = database.reactions{ismember(database.reactions(:, 1), quinoneRxns{i}), 3}; - modelTest = addReaction(modelTest, quinoneRxns{i}, 'reactionFormula', formula); - modelTest.rxnConfidenceScores(end,1)=1; - end - % test growth - FBA = optimizeCbModel(modelTest, 'max'); - if FBA.f >= tol - model = modelTest; - end - end - end - - % final test for anaerobic growth - FBA = optimizeCbModel(modelTest, 'max'); - if FBA.f < tol - warning('Model cannot grow anaerobically after gap-filling.') - anaerGrowthOK=0; - end - - % get the reactions that were added - rxnsPreAnaerGapfill = model_old.rxns; - oxGapfillRxns = setdiff(model.rxns, rxnsPreAnaerGapfill); - - % add the reactions to the previous version of the model - model=model_old; - for i = 1:length(oxGapfillRxns) - formula = database.reactions{ismember(database.reactions(:, 1), oxGapfillRxns{i}), 3}; - model = addReaction(model, oxGapfillRxns{i}, 'reactionFormula', formula, 'geneRule', 'AnaerobicGapfill'); - end - -else - model=model_old; - oxGapfillRxns={}; -end - -% relax constraints-cause infeasibility problems -relaxConstraints=model.rxns(find(model.lb>0)); -model=changeRxnBounds(model,relaxConstraints,0,'l'); - -% change back to unlimited medium -% list exchange reactions -exchanges = model.rxns(strncmp('EX_', model.rxns, 3)); -% open all exchanges -model = changeRxnBounds(model, exchanges, -1000, 'l'); -model = changeRxnBounds(model, exchanges, 1000, 'u'); - -end +function [model,oxGapfillRxns,anaerGrowthOK] = anaerobicGrowthGapfill(model, biomassReaction, database) +% Tests if the input microbe model can grow anaerobically and gap-fills +% by adding anaerobic co-factor utilizing reactions. +% +% USAGE +% [model,oxGapfillRxns,anaerGrowthOK] = anaerobicGrowthGapfill(model, biomassReaction, database) +% +% INPUT +% model COBRA model structure +% biomassReaction Biomass reaction abbreviation +% database rBioNet reaction database containing min. 3 columns: +% Column 1: reaction abbreviation, Column 2: reaction +% name, Column 3: reaction formula. +% +% OUTPUT +% model COBRA model structure +% +% .. Authors: +% Almut Heinken and Stefania Magnusdottir, 2016-2019 + +tol = 1e-6; +model_old=model; + +anaerGrowthOK=1; +% Test if model can grow anaerobically +model = changeRxnBounds(model, 'EX_o2(e)', 0, 'l'); + +% block internal O2-utilizing cytosolic reactions +if any(ismember(model.mets, 'o2[c]')) + o2rxns = findRxnsFromMets(model,'o2[c]'); + model = changeRxnBounds(model, o2rxns, 0, 'b'); +end + +% check anaerobic growth +model = changeObjective(model, biomassReaction); +FBA = optimizeCbModel(model, 'max'); +if FBA.f < tol + + % List oxygen-utilizing reactions and their anaerobic cofactor-utilizing + % partner reaction + anaerobicRxns = { + 'PDX5PO', {'PDX5PO2'} + 'ASPO6', {'ASPO5','EX_succ(e)','SUCCt'} + 'DHORDi', {'DHORDfum','EX_succ(e)','SUCCt'} + 'CPPPGO', {'CPPPGO2','5DOAN','DM_5DRIB'} + 'AHMMPS', {'AMPMS2'} + % reaction with low confidence + 'UNKENZ',{'ACCOAC','H2CO3D'} + }; + + % add anaerobic reactions to model (if contains O2-using reaction) + for i = 1:length(anaerobicRxns) + if any(ismember(model.rxns, anaerobicRxns{i, 1})) + for j=1:length(anaerobicRxns{i, 2}) + formula = database.reactions{ismember(database.reactions(:, 1), anaerobicRxns{i, 2}{j}), 3}; + model = addReaction(model, anaerobicRxns{i, 2}{j}, 'reactionFormula', formula); + end + end + end + + % reactions for anaerobic quinone synthesis + anaerobicQuinone = { + 'OMMBLHX3' + 'DMQMT' + 'OMPHHX3' + 'OPHHX3' + 'OMBZLM' + }; + + % add anaerobic reactions for quinone synthesis if contains aerobic + % versions + if any(ismember(model.rxns, {'2OMMBOX', 'OMPHHX'})) + for i = 1:length(anaerobicQuinone) + formula = database.reactions{ismember(database.reactions(:, 1), anaerobicQuinone{i}), 3}; + model = addReaction(model, anaerobicQuinone{i}, 'reactionFormula', formula, 'geneRule', 'AnaerobicGapfill'); + end + end + + % test if can grow now + FBA = optimizeCbModel(model, 'max'); + if FBA.f < tol + + % List possible fixes if model contains reaction in column 1 + testFix = { + 'PHE4MO', {'EX_tyr_L(e)', 'TYRt2r'} % very unlikely tyrosine synthesis reaction + 'H2SO', {'EX_so4(e)', 'SO4t2'} % sulfate requirement-add a transporter instead + 'r0389', {'EX_pydx(e)', 'PYDXabc'} % could be replaced by reaction 1.1.1.65 but only found in few bacteria + 'ASPT', {'EX_asp_L(e)', 'ASPt2r'} % need aspartate to produce fumarate + 'QUILSYN', {'EX_nac(e)', 'NACt2r', 'NAPRT'} % can only produce NAD aerobically + 'CYTBD', {'EX_nac(e)', 'NACt2r', 'NAPRT'} % can only produce NAD aerobically + 'PYAM5POr', {'EX_pydx(e)', 'PYDXabc'} % some can only produce PYDX5P aerobically + 'PYAM5POr', {'EX_pydxn(e)', 'PYDXNabc', 'PDX5PO2'} % some can only produce PYDX5P aerobically + 'SUCDi', {'EX_succ(e)','SUCCt','EX_q8(e)','Q8abc'} + 'DHFOR2', {'EX_fol(e)','FOLabc'} + }; + for i = 1:size(testFix, 1) + if any(ismember(model.rxns, testFix{i, 1})) + modelTest = model; + newRxns = testFix{i, 2}; + for j = 1:length(newRxns) + % add reactions + formula = database.reactions{ismember(database.reactions(:, 1), newRxns{j}), 3}; + modelTest = addReaction(modelTest, newRxns{j}, 'reactionFormula', formula, 'geneRule', 'AnaerobicGapfill'); + end + % test growth + FBA = optimizeCbModel(modelTest, 'max'); + if FBA.f > tol + model = modelTest; + break + end + end + end + end + + % List possible fixes if model does NOT contain reaction in column 1 + testFix = { + 'EX_sheme(e)', {'EX_sheme(e)', 'SHEMEabc'} % if it is due to inability to synthesize heme + 'EX_pheme(e)', {'EX_pheme(e)', 'HEMEti'} % if it is due to inability to synthesize heme + 'EX_ser_L(e)', {'EX_ser_L(e)', 'SERt2r'} % if due to deleting the Kegg gapfilled reaction R03472, which used to synthesize serine from glycolaldehyde (makes no sense), fill in serine transporter + }; + for i = 1:size(testFix, 1) + modelTest = model; + newRxns = testFix{i, 2}; + for j = 1:length(newRxns) + % add reactions + formula = database.reactions{ismember(database.reactions(:, 1), newRxns{j}), 3}; + modelTest = addReaction(modelTest, newRxns{j}, 'reactionFormula', formula); + end + % test growth + FBA = optimizeCbModel(modelTest, 'max'); + if FBA.f >= tol + model = modelTest; + end + end + + % make sure quinones can be regenerated from quinols + if any(ismember(model.rxns, 'AMMQT8')) + modelTest = model; + formula = database.reactions{ismember(database.reactions(:, 1), 'AMMQLT8'), 3}; + modelTest = addReaction(modelTest, 'AMMQLT8', 'reactionFormula', formula); + % test growth + FBA = optimizeCbModel(modelTest, 'max'); + if FBA.f >= tol + model = modelTest; + end + end + + % Try adding fumarate reductase reactions + rxns={ + 'FRD2' + 'FRD3' + 'FRD7' + }; + modelTest=model; + for i=1:length(rxns) + if isempty(find(ismember(model.rxns, rxns{i}))) + modelTest = addReaction(modelTest, rxns{i}, 'reactionFormula', database.reactions{find(ismember(database.reactions(:, 1), rxns{i})), 3}); + end + end + % test growth + FBA = optimizeCbModel(modelTest, 'max'); + if FBA.f >= tol + model = modelTest; + end + + % some models can't consume 5-Methylthio-D-ribose + if any(ismember(model.rxns, 'DKMPPD2')) + formula = database.reactions{ismember(database.reactions(:, 1), 'DM_5MTR'), 3}; + model = addReaction(model, 'DM_5MTR', 'reactionFormula', formula); + end + + % some models need oxygen to produce 3-methyl-2-oxopentanoate + if any(ismember(model.rxns, 'ILEDA')) && ~any(ismember(model.rxns, 'ILETA')) + rxns={ + 'EX_3mop(e)' + '3MOPt2r' + }; + modelTest=model; + for i=1:length(rxns) + if isempty(find(ismember(model.rxns, rxns{i}))) + modelTest = addReaction(modelTest, rxns{i}, 'reactionFormula', database.reactions{find(ismember(database.reactions(:, 1), rxns{i})), 3}); + end + end + % test growth + FBA = optimizeCbModel(modelTest, 'max'); + if FBA.f >= tol + model = modelTest; + end + end + + + % some cases: demand reaction in thiamin biosynthesis pathway fixes it + if any(ismember(model.rxns, 'THZPSN')) + rxns={ + 'DM_4HBA' + }; + modelTest=model; + for i=1:length(rxns) + if isempty(find(ismember(model.rxns, rxns{i}))) + modelTest = addReaction(modelTest, rxns{i}, 'reactionFormula', database.reactions{find(ismember(database.reactions(:, 1), rxns{i})), 3}); + end + end + % test growth + FBA = optimizeCbModel(modelTest, 'max'); + if FBA.f >= tol + model = modelTest; + end + end + + % rare case-some models cannot generate any ATP without oxygen + if ~any(ismember(model.rxns, 'ATPS4')) + rxns={ + 'ATPS4' + }; + modelTest=model; + for i=1:length(rxns) + if isempty(find(ismember(model.rxns, rxns{i}))) + modelTest = addReaction(modelTest, rxns{i}, 'reactionFormula', database.reactions{find(ismember(database.reactions(:, 1), rxns{i})), 3}); + end + end + % test growth + FBA = optimizeCbModel(modelTest, 'max'); + if FBA.f >= tol + model = modelTest; + end + end + + % rare case-acetyl-CoA biosynthesis blocked + if any(ismember(model.rxns, 'FAO181O')) + rxns={ + 'ACS' + 'H2CO3D' + }; + modelTest=model; + for i=1:length(rxns) + if isempty(find(ismember(model.rxns, rxns{i}))) + modelTest = addReaction(modelTest, rxns{i}, 'reactionFormula', database.reactions{find(ismember(database.reactions(:, 1), rxns{i})), 3}); + end + end + % test growth + FBA = optimizeCbModel(modelTest, 'max'); + if FBA.f >= tol + model = modelTest; + end + end + % rare case-acetyl-CoA biosynthesis blocked (acetate lacking) + if any(ismember(model.rxns, 'ACKr')) && any(ismember(model.rxns, 'PTAr')) && any(ismember(model.rxns, 'ACOAD20')) + rxns={ + 'EX_ac(e)' + 'ACtr' + }; + modelTest=model; + for i=1:length(rxns) + if isempty(find(ismember(model.rxns, rxns{i}))) + modelTest = addReaction(modelTest, rxns{i}, 'reactionFormula', database.reactions{find(ismember(database.reactions(:, 1), rxns{i})), 3}); + end + end + % test growth + FBA = optimizeCbModel(modelTest, 'max'); + if FBA.f >= tol + model = modelTest; + end + end + + % rare case-10-Formyltetrahydrofolate biosynthesis blocked + if any(ismember(model.rxns, '5MTHFCL')) + rxns={ + 'FTHFL' + }; + modelTest=model; + for i=1:length(rxns) + if isempty(find(ismember(model.rxns, rxns{i}))) + modelTest = addReaction(modelTest, rxns{i}, 'reactionFormula', database.reactions{find(ismember(database.reactions(:, 1), rxns{i})), 3}); + end + end + % test growth + FBA = optimizeCbModel(modelTest, 'max'); + if FBA.f >= tol + model = modelTest; + end + end + + % test again for anaerobic growth + FBA = optimizeCbModel(model, 'max'); + % add quinone transporters and exchanges + if FBA.f < tol + quinoneRxns = { + 'EX_q8(e)' + 'Q8abc' + }; + modelTest=model; + for i = 1:length(quinoneRxns) + formula = database.reactions{ismember(database.reactions(:, 1), quinoneRxns{i}), 3}; + modelTest = addReaction(modelTest, quinoneRxns{i}, 'reactionFormula', formula); + modelTest.rxnConfidenceScores(end,1)=1; + end + % test growth + FBA = optimizeCbModel(modelTest, 'max'); + if FBA.f >= tol + model = modelTest; + else + % add demethylmenaquinone transporters and exchanges + quinoneRxns = { + 'EX_2dmmq8(e)' + '2DMMQ8abc' + }; + modelTest=model; + for i = 1:length(quinoneRxns) + formula = database.reactions{ismember(database.reactions(:, 1), quinoneRxns{i}), 3}; + modelTest = addReaction(modelTest, quinoneRxns{i}, 'reactionFormula', formula); + modelTest.rxnConfidenceScores(end,1)=1; + end + % test growth + FBA = optimizeCbModel(modelTest, 'max'); + if FBA.f >= tol + model = modelTest; + end + end + end + + % final test for anaerobic growth + FBA = optimizeCbModel(modelTest, 'max'); + if FBA.f < tol + warning('Model cannot grow anaerobically after gap-filling.') + anaerGrowthOK=0; + end + + % get the reactions that were added + rxnsPreAnaerGapfill = model_old.rxns; + oxGapfillRxns = setdiff(model.rxns, rxnsPreAnaerGapfill); + + % add the reactions to the previous version of the model + model=model_old; + for i = 1:length(oxGapfillRxns) + formula = database.reactions{ismember(database.reactions(:, 1), oxGapfillRxns{i}), 3}; + model = addReaction(model, oxGapfillRxns{i}, 'reactionFormula', formula, 'geneRule', 'AnaerobicGapfill'); + end + +else + model=model_old; + oxGapfillRxns={}; +end + +% relax constraints-cause infeasibility problems +relaxConstraints=model.rxns(find(model.lb>0)); +model=changeRxnBounds(model,relaxConstraints,0,'l'); + +% change back to unlimited medium +% list exchange reactions +exchanges = model.rxns(strncmp('EX_', model.rxns, 3)); +% open all exchanges +model = changeRxnBounds(model, exchanges, -1000, 'l'); +model = changeRxnBounds(model, exchanges, 1000, 'u'); + +end From f9b4ca9fb3204fdaaed8110088dbae14c977860c Mon Sep 17 00:00:00 2001 From: YinTat Date: Wed, 7 Jul 2021 09:18:20 -0700 Subject: [PATCH 48/82] Update sampleCbModel.m --- src/analysis/sampling/sampleCbModel.m | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/analysis/sampling/sampleCbModel.m b/src/analysis/sampling/sampleCbModel.m index f88db1c2a8..0b4a1f05b9 100644 --- a/src/analysis/sampling/sampleCbModel.m +++ b/src/analysis/sampling/sampleCbModel.m @@ -11,6 +11,9 @@ % * .b - Right hand side vector % * .lb - Lower bounds % * .ub - Upper bounds +% * .C - 'k x n' matrix of additional inequality constraints +% * .d - 'k x 1' rhs of the above constraints +% * .dsense - 'k x 1' the sense of the above constraints ('L' or 'G') % % OPTIONAL INPUTS: % sampleFile: File names for sampling output files (only implemented for ACHR) From 9a60c718805abc76773158b0575bc3c66c876fa8 Mon Sep 17 00:00:00 2001 From: "almut.heinken" Date: Fri, 9 Jul 2021 15:45:40 +0100 Subject: [PATCH 49/82] Enabled debugging of periplasmatic reactions in Demeter, save model stats in mgPipe as spreadsheet --- .../mgPipe/adaptVMHDietToAGORA.m | 2 +- .../microbiomeModelingToolbox/mgPipe/mgPipe.m | 10 +- .../mgPipe/mgSimResCollect.m | 8 - .../mgPipe/microbiotaModelSimulator.m | 40 ++-- .../demeter/src/debugging/debugModel.m | 20 +- .../src/debugging/removeFutileCycles.m | 18 +- .../demeter/src/debugging/runDebuggingTools.m | 7 +- .../src/refinement/createPeriplasmaticSpace.m | 209 +++++++++--------- .../src/refinement/refinementPipeline.m | 7 +- 9 files changed, 181 insertions(+), 140 deletions(-) mode change 100755 => 100644 src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/adaptVMHDietToAGORA.m mode change 100755 => 100644 src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/mgPipe.m mode change 100755 => 100644 src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/mgSimResCollect.m mode change 100755 => 100644 src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/microbiotaModelSimulator.m mode change 100755 => 100644 src/reconstruction/demeter/src/debugging/debugModel.m mode change 100755 => 100644 src/reconstruction/demeter/src/debugging/removeFutileCycles.m mode change 100755 => 100644 src/reconstruction/demeter/src/debugging/runDebuggingTools.m mode change 100755 => 100644 src/reconstruction/demeter/src/refinement/refinementPipeline.m diff --git a/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/adaptVMHDietToAGORA.m b/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/adaptVMHDietToAGORA.m old mode 100755 new mode 100644 index a8bcf706da..e2aaf973c2 --- a/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/adaptVMHDietToAGORA.m +++ b/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/adaptVMHDietToAGORA.m @@ -51,7 +51,7 @@ % Define the list of metabolites required by at least one AGORA model for % growth -essentialMetabolites = {'EX_12dgr180(e)'; 'EX_26dap_M(e)'; 'EX_2dmmq8(e)'; 'EX_2obut(e)'; 'EX_3mop(e)'; 'EX_4abz(e)'; 'EX_4hbz(e)'; 'EX_ac(e)'; 'EX_acgam(e)'; 'EX_acmana(e)'; 'EX_acnam(e)'; 'EX_ade(e)'; 'EX_adn(e)'; 'EX_adocbl(e)'; 'EX_ala_D(e)'; 'EX_ala_L(e)'; 'EX_amet(e)'; 'EX_amp(e)'; 'EX_arab_D(e)'; 'EX_arab_L(e)'; 'EX_arg_L(e)'; 'EX_asn_L(e)'; 'EX_btn(e)'; 'EX_ca2(e)'; 'EX_cbl1(e)'; 'EX_cgly(e)'; 'EX_chor(e)'; 'EX_chsterol(e)'; 'EX_cit(e)'; 'EX_cl(e)'; 'EX_cobalt2(e)'; 'EX_csn(e)'; 'EX_cu2(e)'; 'EX_cys_L(e)'; 'EX_cytd(e)'; 'EX_dad_2(e)'; 'EX_dcyt(e)'; 'EX_ddca(e)'; 'EX_dgsn(e)'; 'EX_fald(e)'; 'EX_fe2(e)'; 'EX_fe3(e)'; 'EX_fol(e)'; 'EX_for(e)'; 'EX_gal(e)'; 'EX_glc_D(e)'; 'EX_gln_L(e)'; 'EX_glu_L(e)'; 'EX_gly(e)'; 'EX_glyc(e)'; 'EX_glyc3p(e)'; 'EX_gsn(e)'; 'EX_gthox(e)'; 'EX_gthrd(e)'; 'EX_gua(e)'; 'EX_h(e)'; 'EX_h2o(e)'; 'EX_h2s(e)'; 'EX_his_L(e)'; 'EX_hxan(e)'; 'EX_ile_L(e)'; 'EX_k(e)'; 'EX_lanost(e)'; 'EX_leu_L(e)'; 'EX_lys_L(e)'; 'EX_malt(e)'; 'EX_met_L(e)'; 'EX_mg2(e)'; 'EX_mn2(e)'; 'EX_mqn7(e)'; 'EX_mqn8(e)'; 'EX_nac(e)'; 'EX_ncam(e)'; 'EX_nmn(e)'; 'EX_no2(e)'; 'EX_ocdca(e)'; 'EX_ocdcea(e)'; 'EX_orn(e)'; 'EX_phe_L(e)'; 'EX_pheme(e)'; 'EX_pi(e)'; 'EX_pnto_R(e)'; 'EX_pro_L(e)'; 'EX_ptrc(e)'; 'EX_pydx(e)'; 'EX_pydxn(e)'; 'EX_q8(e)'; 'EX_rib_D(e)'; 'EX_ribflv(e)'; 'EX_ser_L(e)'; 'EX_sheme(e)'; 'EX_so4(e)'; 'EX_spmd(e)'; 'EX_thm(e)'; 'EX_thr_L(e)'; 'EX_thymd(e)'; 'EX_trp_L(e)'; 'EX_ttdca(e)'; 'EX_tyr_L(e)'; 'EX_ura(e)'; 'EX_val_L(e)'; 'EX_xan(e)'; 'EX_xyl_D(e)'; 'EX_zn2(e)'; 'EX_glu_D(e)'; 'EX_melib(e)'; 'EX_chtbs(e)'; 'EX_metsox_S_L(e)'; 'EX_hdca(e)'; 'EX_gam(e)'; 'EX_indole(e)'; 'EX_glcn(e)'; 'EX_coa(e)'; 'EX_man(e)'; 'EX_fum(e)'; 'EX_succ(e)'; 'EX_no3(e)'; 'EX_ins(e)'; 'EX_uri(e)'; 'EX_drib(e)'; 'EX_pime(e)'; 'EX_lac_L(e)'; 'EX_glypro(e)'; 'EX_urea(e)'; 'EX_duri(e)'; 'EX_h2(e)'; 'EX_mal_L(e)'; 'EX_tre(e)'; 'EX_orot(e)'}; +essentialMetabolites = {'EX_12dgr180(e)'; 'EX_26dap_M(e)'; 'EX_2dmmq8(e)'; 'EX_2obut(e)'; 'EX_3mop(e)'; 'EX_4abz(e)'; 'EX_4hbz(e)'; 'EX_ac(e)'; 'EX_acgam(e)'; 'EX_acmana(e)'; 'EX_acnam(e)'; 'EX_ade(e)'; 'EX_adn(e)'; 'EX_adocbl(e)'; 'EX_ala_D(e)'; 'EX_ala_L(e)'; 'EX_amet(e)'; 'EX_amp(e)'; 'EX_arab_D(e)'; 'EX_arab_L(e)'; 'EX_arg_L(e)'; 'EX_asn_L(e)'; 'EX_btn(e)'; 'EX_ca2(e)'; 'EX_cbl1(e)'; 'EX_cgly(e)'; 'EX_chor(e)'; 'EX_chsterol(e)'; 'EX_cit(e)'; 'EX_cl(e)'; 'EX_cobalt2(e)'; 'EX_csn(e)'; 'EX_cu2(e)'; 'EX_cys_L(e)'; 'EX_cytd(e)'; 'EX_dad_2(e)'; 'EX_dcyt(e)'; 'EX_ddca(e)'; 'EX_dgsn(e)'; 'EX_fald(e)'; 'EX_fe2(e)'; 'EX_fe3(e)'; 'EX_fol(e)'; 'EX_for(e)'; 'EX_gal(e)'; 'EX_glc_D(e)'; 'EX_gln_L(e)'; 'EX_glu_L(e)'; 'EX_gly(e)'; 'EX_glyc(e)'; 'EX_glyc3p(e)'; 'EX_gsn(e)'; 'EX_gthox(e)'; 'EX_gthrd(e)'; 'EX_gua(e)'; 'EX_h(e)'; 'EX_h2o(e)'; 'EX_h2s(e)'; 'EX_his_L(e)'; 'EX_hxan(e)'; 'EX_ile_L(e)'; 'EX_k(e)'; 'EX_lanost(e)'; 'EX_leu_L(e)'; 'EX_lys_L(e)'; 'EX_malt(e)'; 'EX_met_L(e)'; 'EX_mg2(e)'; 'EX_mn2(e)'; 'EX_mqn7(e)'; 'EX_mqn8(e)'; 'EX_nac(e)'; 'EX_ncam(e)'; 'EX_nmn(e)'; 'EX_no2(e)'; 'EX_ocdca(e)'; 'EX_ocdcea(e)'; 'EX_orn(e)'; 'EX_phe_L(e)'; 'EX_pheme(e)'; 'EX_pi(e)'; 'EX_pnto_R(e)'; 'EX_pro_L(e)'; 'EX_ptrc(e)'; 'EX_pydx(e)'; 'EX_pydxn(e)'; 'EX_q8(e)'; 'EX_rib_D(e)'; 'EX_ribflv(e)'; 'EX_ser_L(e)'; 'EX_sheme(e)'; 'EX_so4(e)'; 'EX_spmd(e)'; 'EX_thm(e)'; 'EX_thr_L(e)'; 'EX_thymd(e)'; 'EX_trp_L(e)'; 'EX_ttdca(e)'; 'EX_tyr_L(e)'; 'EX_ura(e)'; 'EX_val_L(e)'; 'EX_xan(e)'; 'EX_xyl_D(e)'; 'EX_zn2(e)'; 'EX_glu_D(e)'; 'EX_melib(e)'; 'EX_chtbs(e)'; 'EX_metsox_S_L(e)'; 'EX_hdca(e)'; 'EX_gam(e)'; 'EX_indole(e)'; 'EX_glcn(e)'; 'EX_coa(e)'; 'EX_man(e)'; 'EX_fum(e)'; 'EX_succ(e)'; 'EX_no3(e)'; 'EX_ins(e)'; 'EX_uri(e)'; 'EX_drib(e)'; 'EX_pime(e)'; 'EX_lac_L(e)'; 'EX_glypro(e)'; 'EX_urea(e)'; 'EX_duri(e)'; 'EX_h2(e)'; 'EX_mal_L(e)'; 'EX_tre(e)'; 'EX_orot(e)'; 'EX_glymet(e)'}; % fix any exchange nomenclature issues adaptedDietConstraints(:, 1) = strrep(adaptedDietConstraints(:, 1), '[e]', '(e)'); diff --git a/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/mgPipe.m b/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/mgPipe.m old mode 100755 new mode 100644 index a9ef42bbf6..db083bcf77 --- a/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/mgPipe.m +++ b/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/mgPipe.m @@ -299,7 +299,9 @@ Y=[]; delete('simRes.mat','intRes.mat') end -rmdir([resPath filesep 'modelStorage'],'s') +if isdir([resPath filesep 'modelStorage']) + rmdir([resPath filesep 'modelStorage'],'s') +end % get stats on microbiome models-number of reactions and metabolites for i=1:length(sampNames) @@ -313,6 +315,10 @@ else [modelStats,summary,statistics]=retrieveModelStats(resPath, modelNames); end -save([resPath filesep 'modelStatistics.mat'],'modelStats','summary','statistics') +writetable(cell2table(modelStats),[resPath filesep 'ModelStatistics.csv'], 'WriteVariableNames', false); +writetable(cell2table(summary),[resPath filesep 'ModelStatsSummary.csv'], 'WriteVariableNames', false); +if ~isempty(statistics) + writetable(cell2table(statistics),[resPath filesep 'ModelStatsStratification.csv'], 'WriteVariableNames', false); +end end diff --git a/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/mgSimResCollect.m b/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/mgSimResCollect.m old mode 100755 new mode 100644 index 7ae69bdff8..e1e906e20b --- a/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/mgSimResCollect.m +++ b/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/mgSimResCollect.m @@ -61,14 +61,6 @@ fl = 3; end -% find empty rows in input data-tmp fix -emptyRows=find(cellfun(@isempty,netProduction{2,1}(:,2))); -for i=1:size(netProduction,2) - netProduction{2,i}(emptyRows,:)=[]; - netUptake{2,i}(emptyRows,:)=[]; -end -exchanges(emptyRows,:)=[]; - names = {'rich', 'inputDiet', 'personalized'}; for j = init:fl diff --git a/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/microbiotaModelSimulator.m b/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/microbiotaModelSimulator.m old mode 100755 new mode 100644 index cc461ff5b2..d3ffb5ad87 --- a/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/microbiotaModelSimulator.m +++ b/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/microbiotaModelSimulator.m @@ -38,7 +38,7 @@ % % OUTPUTS: % exchanges: cell array with list of all unique exchanges to diet/ -% fecal compartment that were interrogated in simulations +% fecal compartment that were interrogated in simulations % netProduction: cell array containing FVA values for maximal uptake % and secretion for setup lumen / diet exchanges % netUptake: cell array containing FVA values for minimal uptake @@ -67,7 +67,7 @@ environment = getEnvironment(); if saveConstrModels - mkdir([resPath filesep 'Diet']) + mkdir([resPath filesep 'Diet']) end for i=1:length(exMets) @@ -98,23 +98,29 @@ skipSim=0; if isfile(strcat(resPath, 'simRes.mat')) load(strcat(resPath, 'simRes.mat')) - skipSim=1; - for i=1:size(presol,1) - % check for all feasible models that simulations were properly - % executed - if presol{i,2} > lowerBMBound - if isempty(netProduction{2,i}(:,2)) - % feasible model was skipped, repeat simulations - skipSim=0; - end - vals=netProduction{2,i}(find(~cellfun(@isempty,(netProduction{2,i}(:,2)))),2); - if abs(sum(cell2mat(vals)))<0.000001 - % feasible model was skipped, repeat simulations - skipSim=0; + + % if any simulations were infeasible, repeat simulations + if length(inFesMat)>0 + skipSim=0; + else + skipSim=1; + % verify that every simulation result is correct + for i=1:size(presol,1) + % check for all feasible models that simulations were properly + % executed + if presol{i,2} > lowerBMBound + if isempty(netProduction{2,i}(:,2)) + % feasible model was skipped, repeat simulations + skipSim=0; + end + vals=netProduction{2,i}(find(~cellfun(@isempty,(netProduction{2,i}(:,2)))),2); + if abs(sum(cell2mat(vals)))<0.000001 + % feasible model was skipped, repeat simulations + skipSim=0; + end end end end - % verify that every simulation result is correct end % if repeatSim is true, simulations will be repeated in any case @@ -265,7 +271,7 @@ FecalRxn = AllRxn(FecalInd); FecalRxn=setdiff(FecalRxn,'EX_microbeBiomass[fe]','stable'); DietRxn = AllRxn(DietInd); - + %% computing fluxes on the rich diet if rDiet==1 && computeProfiles [minFlux,maxFlux]=guidedSim(model,FecalRxn); diff --git a/src/reconstruction/demeter/src/debugging/debugModel.m b/src/reconstruction/demeter/src/debugging/debugModel.m old mode 100755 new mode 100644 index 9b9f5c38ae..275409ef57 --- a/src/reconstruction/demeter/src/debugging/debugModel.m +++ b/src/reconstruction/demeter/src/debugging/debugModel.m @@ -1,4 +1,4 @@ -function [revisedModel,gapfilledReactions,replacedReactions]=debugModel(model,testResults, inputDataFolder,microbeID,biomassReaction) +function [revisedModel,gapfilledReactions,replacedReactions]=debugModel(model,testResults, infoFilePath, inputDataFolder,microbeID,biomassReaction) % This function runs a suite of debugging functions on a refined % reconstruction produced by the DEMETER pipeline. Tests % are performed whether or not the models can produce biomass aerobically @@ -7,11 +7,12 @@ % % USAGE: % -% [revisedModel,gapfilledReactions,replacedReactions]=debugModel(model,testResults, inputDataFolder,microbeID,biomassReaction) +% [revisedModel,gapfilledReactions,replacedReactions]=debugModel(model,testResults, infoFilePath, inputDataFolder,microbeID,biomassReaction) % % INPUTS % model: COBRA model structure % testResults: Structure with results of test run +% infoFilePath: File with information on reconstructions to refine % inputDataFolder: Folder with input tables with experimental data % and databases that inform the refinement process % microbeID: ID of the reconstructed microbe that serves as @@ -36,6 +37,18 @@ model=changeObjective(model,biomassReaction); +% read the info file +try + infoFile = readtable(infoFilePath, 'ReadVariableNames', false, 'Delimiter', 'tab'); +catch + % if the input file is not a text file + infoFile = readtable(infoFilePath, 'ReadVariableNames', false); +end +infoFile = table2cell(infoFile); +if ~any(strcmp(infoFile(:,1),microbeID)) + warning('No organism information provided. The pipeline will not be able to curate the reconstruction based on gram status.') +end + % implement complex medium constraints = readtable('ComplexMedium.txt', 'Delimiter', 'tab'); constraints=table2cell(constraints); @@ -239,6 +252,9 @@ end end +% may need to rebuild periplasm compartment +[model] = createPeriplasmaticSpace(model,microbeID,infoFile); + % remove futile cycles if any exist [atpFluxAerobic, atpFluxAnaerobic] = testATP(model); if atpFluxAerobic > 200 || atpFluxAnaerobic > 150 diff --git a/src/reconstruction/demeter/src/debugging/removeFutileCycles.m b/src/reconstruction/demeter/src/debugging/removeFutileCycles.m old mode 100755 new mode 100644 index 567b08efa7..e0deca66fc --- a/src/reconstruction/demeter/src/debugging/removeFutileCycles.m +++ b/src/reconstruction/demeter/src/debugging/removeFutileCycles.m @@ -81,7 +81,7 @@ 'CYTDt4 AND CYTDt2r',[],'CYTDt2r','CYTDt2' 'ASPt2_2 AND ASPt2r',[],'ASPt2_2','ASPt2_2i' 'ASPt2_3 AND ASPt2r',[],'ASPt2r','ASPt2' - 'FUMt2_2 AND FUMt2r',[],'FUMt2r','FUMt' + 'FUMt2_2 AND FUMt2r','FUMt','FUMt2r','FUMt' 'SUCCt2_2 AND SUCCt2r','SUCCt','SUCCt2r','SUCCt' 'SUCCt2_3r AND SUCCt2r',[],'SUCCt2r',[] 'MALFADO AND MDH',[],'MALFADO','MALFADOi' @@ -488,6 +488,7 @@ 'UMPK AND NDP7',[],'NDP7','NDP7i' 'CLt4r AND r2137',[],'r2137','CLti' 'DESAT16_3 AND FAOp_even AND FAO181E',[],'DESAT16_3','DESAT16_3i' + 'LDH_L2 AND LDH_L',[],'LDH_L',[] }; @@ -537,13 +538,26 @@ go = 1; present=strsplit(reactionsToReplace{i,1},' AND '); if ~(length(intersect(model.rxns,present))==length(present)) - go= 0; + % try periplasmatic reactions + for j=1:length(present) + present{j}=[present{j} 'pp']; + end + if ~(length(intersect(model.rxns,present))==length(present)) + go= 0; + end end if ~isempty(reactionsToReplace{i,2}) notpresent=strsplit(reactionsToReplace{i,2},' AND '); if length(intersect(model.rxns,notpresent))==length(notpresent) go= 0; end + % try periplasmatic reactions + for j=1:length(notpresent) + notpresent{j}=[notpresent{j} 'pp']; + end + if length(intersect(model.rxns,notpresent))==length(notpresent) + go= 0; + end end end if go == 1 diff --git a/src/reconstruction/demeter/src/debugging/runDebuggingTools.m b/src/reconstruction/demeter/src/debugging/runDebuggingTools.m old mode 100755 new mode 100644 index db66281fb2..db712fe649 --- a/src/reconstruction/demeter/src/debugging/runDebuggingTools.m +++ b/src/reconstruction/demeter/src/debugging/runDebuggingTools.m @@ -1,4 +1,4 @@ -function [debuggingFolder,debuggingReport, fixedModels, failedModels]=runDebuggingTools(refinedFolder,testResultsFolder,inputDataFolder,reconVersion,varargin) +function [debuggingFolder,debuggingReport, fixedModels, failedModels]=runDebuggingTools(refinedFolder,testResultsFolder,infoFilePath,inputDataFolder,reconVersion,varargin) % This function runs a suite of debugging functions on a set of refined % reconstructions produced by the DEMETER pipeline. Tests % are performed whether or not the models can produce biomass aerobically @@ -6,12 +6,13 @@ % produced on the complex medium. % % USAGE -% [debuggingFolder,debuggingReport, fixedModels, failedModels]=runDebuggingTools(refinedFolder,testResultsFolder,inputDataFolder,numWorkers,reconVersion,varargin) +% [debuggingFolder,debuggingReport, fixedModels, failedModels]=runDebuggingTools(refinedFolder,testResultsFolder,infoFilePath,inputDataFolder,numWorkers,reconVersion,varargin) % % INPUTS % refinedFolder Folder with refined COBRA models generated by % the refinement pipeline % testResultsFolder Folder where the test results are saved +% infoFilePath: File with information on reconstructions to refine % inputDataFolder Folder with experimental data and database files % reconVersion Name of the refined reconstruction resource % @@ -186,7 +187,7 @@ end % run the gapfilling suite - [revisedModel,gapfilledReactions,replacedReactions]=debugModel(model,Results, inputDataFolder,failedModels{j,1},biomassReaction); + [revisedModel,gapfilledReactions,replacedReactions]=debugModel(model,Results,infoFilePath,inputDataFolder,failedModels{j,1},biomassReaction); gapfilledReactionsTmp{j} = gapfilledReactions; replacedReactionsTmp{j} = replacedReactions; revisedModelTmp{j} = revisedModel; diff --git a/src/reconstruction/demeter/src/refinement/createPeriplasmaticSpace.m b/src/reconstruction/demeter/src/refinement/createPeriplasmaticSpace.m index f4dab45dc0..65f1144ba5 100644 --- a/src/reconstruction/demeter/src/refinement/createPeriplasmaticSpace.m +++ b/src/reconstruction/demeter/src/refinement/createPeriplasmaticSpace.m @@ -1,104 +1,105 @@ -function [model] = createPeriplasmaticSpace(model,microbeID,infoFile) -% Part of the DEMETER pipeline. This function creates a periplasmatic space -% for refined reconstructions if it is appropriate for the organism. The -% periplasmatic space is created by by retrieving all extracellular -% metabolites and adding a third compartment. -% -% USAGE -% [model] = createPeriplasmaticSpace(model,microbeID,infoFile) -% -% INPUT -% model COBRA model structure -% microbeID: ID of the reconstructed microbe that serves as the -% reconstruction name and to identify it in input tables -% infoFile: Table with taxonomic and gram staining information on -% microbes to reconstruct -% -% OUTPUT -% model COBRA model structure -% -% AUTHOR: -% - Almut Heinken, 03/2020 - -% get the information on taxonomy and gram staining to find out if a periplasmatic space -% should be added -phylCol=find(strcmp(infoFile(1,:),'Phylum')); -if ~isempty(find(strcmp(infoFile(:,1),microbeID))) - phylum=infoFile{find(strcmp(infoFile(:,1),microbeID)),phylCol}; - genCol=find(strcmp(infoFile(1,:),'Genus')); - genus=infoFile{find(strcmp(infoFile(:,1),microbeID)),genCol}; - - gramCol=find(strcmp(infoFile(1,:),'Gram Staining')); - gramStatus=infoFile(find(strcmp(infoFile(:,1),microbeID)),gramCol); - - if (strcmp(gramStatus,'Gram-') || strcmp(phylum,'Deinococcus-Thermus')) && ~any(strcmp(phylum,{'Euryarchaeota','Crenarchaeota','Thaumarchaeota','Tenericutes'})) && ~any(strcmp(genus,{'Acidaminobacter','Gracilibacter'})) - - % get all extracellular metabolites - exMets=model.mets(find(contains(model.mets,'[e]'))); - - % Add periplasmatic metabolites, metNames and metFormulas - metNames = {}; - metFormulas = {}; - metCharges = []; - - for i=1:length(exMets) - pMets{i} = strrep(exMets{i},'[e]','[p]'); - metNames{i} = model.metNames{find(strcmp(model.mets,exMets{i}))}; - metFormulas{i} = model.metFormulas{find(strcmp(model.mets,exMets{i}))}; - metCharges(i) = model.metCharges(find(strcmp(model.mets,exMets{i}))); - end - - modelNew=model; - % Add all new periplasmatic metabolites - for i=1:length(pMets) - modelNew = addMetabolite(modelNew, pMets{i}, 'metName', metNames{i}, 'metFormula', metFormulas{i}, 'Charge', metCharges(i)); - end - - % Get all transport reactions associated with the exchange metabolites - rxnsToAdd = {}; - rxnNames = {}; - rxnFormulas = {}; - rxnLB = []; - rxnUB = []; - rxnSubsystem = {}; - - % convert extracellular transport reactions to transport reactions from - % periplasmatic space to cytosol - for i=1:length(exMets) - transpRxns = findRxnsFromMets(model,exMets{i}); - % remove exchange reactions - transpRxns(find(strncmp(transpRxns,'EX_',3)))=[]; - for j=1:length(transpRxns) - rxnsToAdd{end+1} = [transpRxns{j} 'pp']; - rxnNames{end+1} = [model.rxnNames{find(strcmp(model.rxns,transpRxns{j}))} ', periplasmatic']; - rxnLB(end+1) = model.lb(find(strcmp(model.rxns,transpRxns{j}))); - rxnUB(end+1) = model.ub(find(strcmp(model.rxns,transpRxns{j}))); - form = printRxnFormula(model,transpRxns{j}); - rxnFormulas{end+1} = strrep(form{1},'[e]','[p]'); - rxnSubsystem{end+1} = 'Transport, periplasmatic'; - end - modelNew = removeRxns(modelNew,transpRxns, 'metFlag', false); - end - - % add transport reactions to transport reactions from - % extracellular to periplasmatic space - for i=1:length(exMets) - rxnsToAdd{end+1} = [upper(strrep(exMets{i},'[e]','')) 'tex']; - rxnNames{end+1} = [model.metNames{find(strcmp(model.mets,exMets{i}))} ' diffusion extracellular to periplasm']; - rxnLB(end+1) = -1000; - rxnUB(end+1) = 1000; - rxnFormulas{end+1} = [exMets{i} ' <=> ' strrep(exMets{i},'[e]','[p]')]; - rxnSubsystem{end+1} = 'Transport, extracellular'; - end - - % add all new reactions - for i=1:length(rxnsToAdd) - modelNew = addReaction(modelNew,rxnsToAdd{i},'reactionName',rxnNames{i},... - 'reactionFormula',rxnFormulas{i},'lowerBound',rxnLB(i),'upperBound',rxnUB(i),'subSystem',rxnSubsystem{i}); - end - - model = convertOldStyleModel(modelNew); - end -end - -end +function [model] = createPeriplasmaticSpace(model,microbeID,infoFile) +% Part of the DEMETER pipeline. This function creates a periplasmatic space +% for refined reconstructions if it is appropriate for the organism. The +% periplasmatic space is created by by retrieving all extracellular +% metabolites and adding a third compartment. +% +% USAGE +% [model] = createPeriplasmaticSpace(model,microbeID,infoFile) +% +% INPUT +% model COBRA model structure +% microbeID: ID of the reconstructed microbe that serves as the +% reconstruction name and to identify it in input tables +% infoFile: Table with taxonomic and gram staining information on +% microbes to reconstruct +% +% OUTPUT +% model COBRA model structure +% +% AUTHOR: +% - Almut Heinken, 03/2020 + +% get the information on taxonomy and gram staining to find out if a periplasmatic space +% should be added +phylCol=find(strcmp(infoFile(1,:),'Phylum')); +if ~isempty(find(strcmp(infoFile(:,1),microbeID))) + phylum=infoFile{find(strcmp(infoFile(:,1),microbeID)),phylCol}; + genCol=find(strcmp(infoFile(1,:),'Genus')); + genus=infoFile{find(strcmp(infoFile(:,1),microbeID)),genCol}; + + gramCol=find(strcmp(infoFile(1,:),'Gram Staining')); + gramStatus=infoFile(find(strcmp(infoFile(:,1),microbeID)),gramCol); + + if (strcmp(gramStatus,'Gram-') || strcmp(phylum,'Deinococcus-Thermus')) && ~any(strcmp(phylum,{'Euryarchaeota','Crenarchaeota','Thaumarchaeota','Tenericutes'})) && ~any(strcmp(genus,{'Acidaminobacter','Gracilibacter'})) + + % get all extracellular metabolites + exMets=model.mets(find(contains(model.mets,'[e]'))); + + % Add periplasmatic metabolites, metNames and metFormulas + metNames = {}; + metFormulas = {}; + metCharges = []; + + for i=1:length(exMets) + pMets{i} = strrep(exMets{i},'[e]','[p]'); + metNames{i} = model.metNames{find(strcmp(model.mets,exMets{i}))}; + metFormulas{i} = model.metFormulas{find(strcmp(model.mets,exMets{i}))}; + metCharges(i) = model.metCharges(find(strcmp(model.mets,exMets{i}))); + end + + modelNew=model; + % Add all new periplasmatic metabolites + pMets=setdiff(pMets,model.mets); + for i=1:length(pMets) + modelNew = addMetabolite(modelNew, pMets{i}, 'metName', metNames{i}, 'metFormula', metFormulas{i}, 'Charge', metCharges(i)); + end + + % Get all transport reactions associated with the exchange metabolites + rxnsToAdd = {}; + rxnNames = {}; + rxnFormulas = {}; + rxnLB = []; + rxnUB = []; + rxnSubsystem = {}; + + % convert extracellular transport reactions to transport reactions from + % periplasmatic space to cytosol + for i=1:length(exMets) + transpRxns = findRxnsFromMets(model,exMets{i}); + % remove exchange reactions + transpRxns(find(strncmp(transpRxns,'EX_',3)))=[]; + for j=1:length(transpRxns) + rxnsToAdd{end+1} = [transpRxns{j} 'pp']; + rxnNames{end+1} = [model.rxnNames{find(strcmp(model.rxns,transpRxns{j}))} ', periplasmatic']; + rxnLB(end+1) = model.lb(find(strcmp(model.rxns,transpRxns{j}))); + rxnUB(end+1) = model.ub(find(strcmp(model.rxns,transpRxns{j}))); + form = printRxnFormula(model,transpRxns{j}); + rxnFormulas{end+1} = strrep(form{1},'[e]','[p]'); + rxnSubsystem{end+1} = 'Transport, periplasmatic'; + end + modelNew = removeRxns(modelNew,transpRxns, 'metFlag', false); + end + + % add transport reactions to transport reactions from + % extracellular to periplasmatic space + for i=1:length(exMets) + rxnsToAdd{end+1} = [upper(strrep(exMets{i},'[e]','')) 'tex']; + rxnNames{end+1} = [model.metNames{find(strcmp(model.mets,exMets{i}))} ' diffusion extracellular to periplasm']; + rxnLB(end+1) = -1000; + rxnUB(end+1) = 1000; + rxnFormulas{end+1} = [exMets{i} ' <=> ' strrep(exMets{i},'[e]','[p]')]; + rxnSubsystem{end+1} = 'Transport, extracellular'; + end + + % add all new reactions + for i=1:length(rxnsToAdd) + modelNew = addReaction(modelNew,rxnsToAdd{i},'reactionName',rxnNames{i},... + 'reactionFormula',rxnFormulas{i},'lowerBound',rxnLB(i),'upperBound',rxnUB(i),'subSystem',rxnSubsystem{i}); + end + + model = convertOldStyleModel(modelNew); + end +end + +end diff --git a/src/reconstruction/demeter/src/refinement/refinementPipeline.m b/src/reconstruction/demeter/src/refinement/refinementPipeline.m old mode 100755 new mode 100644 index 43ce8b36d1..7dcfbc7b16 --- a/src/reconstruction/demeter/src/refinement/refinementPipeline.m +++ b/src/reconstruction/demeter/src/refinement/refinementPipeline.m @@ -27,7 +27,6 @@ % if the input file is not a text file infoFile = readtable(infoFilePath, 'ReadVariableNames', false); end - infoFile = table2cell(infoFile); if ~any(strcmp(infoFile(:,1),microbeID)) warning('No organism information provided. The pipeline will not be able to curate the reconstruction based on gram status.') @@ -297,6 +296,12 @@ end %% perform growth gap-filling if still needed + +% tmp fix +if isfield(model,'C') + model=rmfield(model,'C'); + model=rmfield(model,'d'); +end [AerobicGrowth, AnaerobicGrowth] = testGrowth(model, biomassReaction); if AerobicGrowth(1,2) < tol % apply complex medium From bfce51d8570a454519a9628e9531255c15c65ead Mon Sep 17 00:00:00 2001 From: "almut.heinken" Date: Fri, 9 Jul 2021 16:55:47 +0100 Subject: [PATCH 50/82] Enabled debugging of periplasmatic reactions in Demeter, save model stats in mgPipe as spreadsheet --- src/reconstruction/demeter/src/debugging/debugModel.m | 3 --- .../src/properties/computeStochiometricFluxConsistency.m | 2 +- .../demeter/src/refinement/createPeriplasmaticSpace.m | 1 - 3 files changed, 1 insertion(+), 5 deletions(-) diff --git a/src/reconstruction/demeter/src/debugging/debugModel.m b/src/reconstruction/demeter/src/debugging/debugModel.m index 275409ef57..b84e238b1c 100644 --- a/src/reconstruction/demeter/src/debugging/debugModel.m +++ b/src/reconstruction/demeter/src/debugging/debugModel.m @@ -252,9 +252,6 @@ end end -% may need to rebuild periplasm compartment -[model] = createPeriplasmaticSpace(model,microbeID,infoFile); - % remove futile cycles if any exist [atpFluxAerobic, atpFluxAnaerobic] = testATP(model); if atpFluxAerobic > 200 || atpFluxAnaerobic > 150 diff --git a/src/reconstruction/demeter/src/properties/computeStochiometricFluxConsistency.m b/src/reconstruction/demeter/src/properties/computeStochiometricFluxConsistency.m index 22ed7f90c7..aa6ef32429 100644 --- a/src/reconstruction/demeter/src/properties/computeStochiometricFluxConsistency.m +++ b/src/reconstruction/demeter/src/properties/computeStochiometricFluxConsistency.m @@ -79,7 +79,7 @@ function computeStochiometricFluxConsistency(translDraftsFolder,refinedFolder,pr end for i=1:steps:length(models) - if length(models)>steps-1 && (length(models)-1)>=steps-1 + if length(models)-i>=steps-1 endPnt=steps-1; else endPnt=length(models)-i; diff --git a/src/reconstruction/demeter/src/refinement/createPeriplasmaticSpace.m b/src/reconstruction/demeter/src/refinement/createPeriplasmaticSpace.m index 65f1144ba5..a5b0b41e03 100644 --- a/src/reconstruction/demeter/src/refinement/createPeriplasmaticSpace.m +++ b/src/reconstruction/demeter/src/refinement/createPeriplasmaticSpace.m @@ -50,7 +50,6 @@ modelNew=model; % Add all new periplasmatic metabolites - pMets=setdiff(pMets,model.mets); for i=1:length(pMets) modelNew = addMetabolite(modelNew, pMets{i}, 'metName', metNames{i}, 'metFormula', metFormulas{i}, 'Charge', metCharges(i)); end From 03275d2304a6c98d93ba4a922706685c04e50d88 Mon Sep 17 00:00:00 2001 From: "almut.heinken" Date: Fri, 9 Jul 2021 23:21:54 +0100 Subject: [PATCH 51/82] save model stats in mgPipe as spreadsheet --- .../demeter/src/debugging/debugModel.m | 15 +-------- .../src/debugging/removeFutileCycles.m | 31 ++++++++++--------- .../demeter/src/debugging/runDebuggingTools.m | 7 ++--- .../src/integration/prepareInputData.m | 0 .../demeter/src/refinement/runPipeline.m | 17 ++++------ 5 files changed, 27 insertions(+), 43 deletions(-) mode change 100644 => 100755 src/reconstruction/demeter/src/debugging/removeFutileCycles.m mode change 100755 => 100644 src/reconstruction/demeter/src/integration/prepareInputData.m mode change 100755 => 100644 src/reconstruction/demeter/src/refinement/runPipeline.m diff --git a/src/reconstruction/demeter/src/debugging/debugModel.m b/src/reconstruction/demeter/src/debugging/debugModel.m index b84e238b1c..88ecb33ab2 100644 --- a/src/reconstruction/demeter/src/debugging/debugModel.m +++ b/src/reconstruction/demeter/src/debugging/debugModel.m @@ -1,4 +1,4 @@ -function [revisedModel,gapfilledReactions,replacedReactions]=debugModel(model,testResults, infoFilePath, inputDataFolder,microbeID,biomassReaction) +function [revisedModel,gapfilledReactions,replacedReactions]=debugModel(model,testResults, inputDataFolder,microbeID,biomassReaction) % This function runs a suite of debugging functions on a refined % reconstruction produced by the DEMETER pipeline. Tests % are performed whether or not the models can produce biomass aerobically @@ -12,7 +12,6 @@ % INPUTS % model: COBRA model structure % testResults: Structure with results of test run -% infoFilePath: File with information on reconstructions to refine % inputDataFolder: Folder with input tables with experimental data % and databases that inform the refinement process % microbeID: ID of the reconstructed microbe that serves as @@ -37,18 +36,6 @@ model=changeObjective(model,biomassReaction); -% read the info file -try - infoFile = readtable(infoFilePath, 'ReadVariableNames', false, 'Delimiter', 'tab'); -catch - % if the input file is not a text file - infoFile = readtable(infoFilePath, 'ReadVariableNames', false); -end -infoFile = table2cell(infoFile); -if ~any(strcmp(infoFile(:,1),microbeID)) - warning('No organism information provided. The pipeline will not be able to curate the reconstruction based on gram status.') -end - % implement complex medium constraints = readtable('ComplexMedium.txt', 'Delimiter', 'tab'); constraints=table2cell(constraints); diff --git a/src/reconstruction/demeter/src/debugging/removeFutileCycles.m b/src/reconstruction/demeter/src/debugging/removeFutileCycles.m old mode 100644 new mode 100755 index e0deca66fc..6da960e11a --- a/src/reconstruction/demeter/src/debugging/removeFutileCycles.m +++ b/src/reconstruction/demeter/src/debugging/removeFutileCycles.m @@ -473,8 +473,8 @@ 'METt2r AND METt3r',[],'METt2r','METt2' 'NTP9 AND NDPK4',[],'NTP9','NTP9i' 'MAN1PT2r',[],'MAN1PT2r','MAN1PT2' - 'HEX4 AND HMR_7271 AND MAN1PT2 AND MAN6PI AND PGM AND PMANM',[],'PMANM','PMANMi' - 'MANISO AND HMR_7271 AND MAN1PT2 AND MAN6PI AND PGM AND PMANM',[],'PMANM','PMANMi' + 'HMR_7271 AND MAN1PT2 AND MAN6PI AND PMANM',[],'PMANM','PMANMi' + 'HMR_7271 AND MAN1PT2 AND MANISO AND PMANM',[],'PMANM','PMANMi' 'PGMT AND GALU AND GLBRAN AND GLDBRAN AND GLGNS1 AND GLPASE1 AND NDPK2 AND PPA AND r1393',[],'NDPK2','NDPK2i' 'D_GLUMANt AND MANt2r AND GLU_Dt2r',[],'GLU_Dt2r','GLU_Dt2' 'NACUP AND NACSMCTte AND NAt3_1',[],'NAt3_1','NAt3' @@ -537,23 +537,26 @@ else go = 1; present=strsplit(reactionsToReplace{i,1},' AND '); - if ~(length(intersect(model.rxns,present))==length(present)) - % try periplasmatic reactions + if any(contains(model.mets,'[p]')) + % if a periplasmatic reaction exists, use that for j=1:length(present) - present{j}=[present{j} 'pp']; - end - if ~(length(intersect(model.rxns,present))==length(present)) - go= 0; + if ~isempty(intersect(database.reactions(:,1),[present{j} 'pp'])) + present{j}=[present{j} 'pp']; + end end end + if ~(length(intersect(model.rxns,present))==length(present)) + go= 0; + end if ~isempty(reactionsToReplace{i,2}) notpresent=strsplit(reactionsToReplace{i,2},' AND '); - if length(intersect(model.rxns,notpresent))==length(notpresent) - go= 0; - end - % try periplasmatic reactions - for j=1:length(notpresent) - notpresent{j}=[notpresent{j} 'pp']; + if any(contains(model.mets,'[p]')) + % if a periplasmatic reaction exists, use that + for j=1:length(notpresent) + if ~isempty(intersect(database.reactions(:,1),[notpresent{j} 'pp'])) + notpresent{j}=[notpresent{j} 'pp']; + end + end end if length(intersect(model.rxns,notpresent))==length(notpresent) go= 0; diff --git a/src/reconstruction/demeter/src/debugging/runDebuggingTools.m b/src/reconstruction/demeter/src/debugging/runDebuggingTools.m index db712fe649..6326c6cc42 100644 --- a/src/reconstruction/demeter/src/debugging/runDebuggingTools.m +++ b/src/reconstruction/demeter/src/debugging/runDebuggingTools.m @@ -1,4 +1,4 @@ -function [debuggingFolder,debuggingReport, fixedModels, failedModels]=runDebuggingTools(refinedFolder,testResultsFolder,infoFilePath,inputDataFolder,reconVersion,varargin) +function [debuggingFolder,debuggingReport, fixedModels, failedModels]=runDebuggingTools(refinedFolder,testResultsFolder,inputDataFolder,reconVersion,varargin) % This function runs a suite of debugging functions on a set of refined % reconstructions produced by the DEMETER pipeline. Tests % are performed whether or not the models can produce biomass aerobically @@ -6,13 +6,12 @@ % produced on the complex medium. % % USAGE -% [debuggingFolder,debuggingReport, fixedModels, failedModels]=runDebuggingTools(refinedFolder,testResultsFolder,infoFilePath,inputDataFolder,numWorkers,reconVersion,varargin) +% [debuggingFolder,debuggingReport, fixedModels, failedModels]=runDebuggingTools(refinedFolder,testResultsFolder,inputDataFolder,numWorkers,reconVersion,varargin) % % INPUTS % refinedFolder Folder with refined COBRA models generated by % the refinement pipeline % testResultsFolder Folder where the test results are saved -% infoFilePath: File with information on reconstructions to refine % inputDataFolder Folder with experimental data and database files % reconVersion Name of the refined reconstruction resource % @@ -187,7 +186,7 @@ end % run the gapfilling suite - [revisedModel,gapfilledReactions,replacedReactions]=debugModel(model,Results,infoFilePath,inputDataFolder,failedModels{j,1},biomassReaction); + [revisedModel,gapfilledReactions,replacedReactions]=debugModel(model,Results,inputDataFolder,failedModels{j,1},biomassReaction); gapfilledReactionsTmp{j} = gapfilledReactions; replacedReactionsTmp{j} = replacedReactions; revisedModelTmp{j} = revisedModel; diff --git a/src/reconstruction/demeter/src/integration/prepareInputData.m b/src/reconstruction/demeter/src/integration/prepareInputData.m old mode 100755 new mode 100644 diff --git a/src/reconstruction/demeter/src/refinement/runPipeline.m b/src/reconstruction/demeter/src/refinement/runPipeline.m old mode 100755 new mode 100644 index 63a538aa84..2e47335b10 --- a/src/reconstruction/demeter/src/refinement/runPipeline.m +++ b/src/reconstruction/demeter/src/refinement/runPipeline.m @@ -24,9 +24,8 @@ % reconVersion Name of the refined reconstruction resource % (default: "Reconstructions") % numWorkers Number of workers in parallel pool (default: 2) -% sbmlFolder Folder where SBML files, if desired, will be saved -% overwriteModels Define whether already finished reconstructions -% should be overwritten (default: false) +% createSBML Defines whether refined reconstructions should +% be exported in SBML format (default: false) % % OUTPUTS % reconVersion Name of the refined reconstruction resource @@ -51,8 +50,7 @@ parser.addParameter('inputDataFolder', '', @ischar); parser.addParameter('numWorkers', 2, @isnumeric); parser.addParameter('reconVersion', 'Reconstructions', @ischar); -parser.addParameter('sbmlFolder', '', @ischar); -parser.addParameter('overwriteModels', false, @islogical); +parser.addParameter('createSBML', false, @islogical); parser.parse(draftFolder, varargin{:}); @@ -65,8 +63,7 @@ inputDataFolder = parser.Results.inputDataFolder; numWorkers = parser.Results.numWorkers; reconVersion = parser.Results.reconVersion; -sbmlFolder = parser.Results.sbmlFolder; -overwriteModels = parser.Results.overwriteModels; +createSBML = parser.Results.createSBML; if isempty(infoFilePath) % create a file with reconstruction names based on file names. Note: @@ -91,9 +88,6 @@ mkdir(refinedFolder) mkdir(translatedDraftsFolder) mkdir(summaryFolder) -if ~isempty(sbmlFolder) -mkdir(sbmlFolder) -end %% prepare pipeline run % Get all models from the input folder @@ -270,7 +264,8 @@ %% create SBML files (default=not created) -if ~isempty(sbmlFolder) +if createSBML + sbmlFolder = [pwd filesep refinedFolder '_SBML']; createSBMLFiles(refinedFolder, sbmlFolder) end From d339f2e84b76b78df9b47d1e9eaf92b4963a8766 Mon Sep 17 00:00:00 2001 From: "almut.heinken" Date: Sat, 10 Jul 2021 14:35:47 +0100 Subject: [PATCH 52/82] enabled soome microbiome mdodels to grow --- .../mgPipe/adaptVMHDietToAGORA.m | 2 +- .../demeter/src/refinement/runPipeline.m | 12 +++++------- 2 files changed, 6 insertions(+), 8 deletions(-) mode change 100644 => 100755 src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/adaptVMHDietToAGORA.m mode change 100644 => 100755 src/reconstruction/demeter/src/refinement/runPipeline.m diff --git a/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/adaptVMHDietToAGORA.m b/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/adaptVMHDietToAGORA.m old mode 100644 new mode 100755 index e2aaf973c2..d79659bff1 --- a/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/adaptVMHDietToAGORA.m +++ b/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/adaptVMHDietToAGORA.m @@ -51,7 +51,7 @@ % Define the list of metabolites required by at least one AGORA model for % growth -essentialMetabolites = {'EX_12dgr180(e)'; 'EX_26dap_M(e)'; 'EX_2dmmq8(e)'; 'EX_2obut(e)'; 'EX_3mop(e)'; 'EX_4abz(e)'; 'EX_4hbz(e)'; 'EX_ac(e)'; 'EX_acgam(e)'; 'EX_acmana(e)'; 'EX_acnam(e)'; 'EX_ade(e)'; 'EX_adn(e)'; 'EX_adocbl(e)'; 'EX_ala_D(e)'; 'EX_ala_L(e)'; 'EX_amet(e)'; 'EX_amp(e)'; 'EX_arab_D(e)'; 'EX_arab_L(e)'; 'EX_arg_L(e)'; 'EX_asn_L(e)'; 'EX_btn(e)'; 'EX_ca2(e)'; 'EX_cbl1(e)'; 'EX_cgly(e)'; 'EX_chor(e)'; 'EX_chsterol(e)'; 'EX_cit(e)'; 'EX_cl(e)'; 'EX_cobalt2(e)'; 'EX_csn(e)'; 'EX_cu2(e)'; 'EX_cys_L(e)'; 'EX_cytd(e)'; 'EX_dad_2(e)'; 'EX_dcyt(e)'; 'EX_ddca(e)'; 'EX_dgsn(e)'; 'EX_fald(e)'; 'EX_fe2(e)'; 'EX_fe3(e)'; 'EX_fol(e)'; 'EX_for(e)'; 'EX_gal(e)'; 'EX_glc_D(e)'; 'EX_gln_L(e)'; 'EX_glu_L(e)'; 'EX_gly(e)'; 'EX_glyc(e)'; 'EX_glyc3p(e)'; 'EX_gsn(e)'; 'EX_gthox(e)'; 'EX_gthrd(e)'; 'EX_gua(e)'; 'EX_h(e)'; 'EX_h2o(e)'; 'EX_h2s(e)'; 'EX_his_L(e)'; 'EX_hxan(e)'; 'EX_ile_L(e)'; 'EX_k(e)'; 'EX_lanost(e)'; 'EX_leu_L(e)'; 'EX_lys_L(e)'; 'EX_malt(e)'; 'EX_met_L(e)'; 'EX_mg2(e)'; 'EX_mn2(e)'; 'EX_mqn7(e)'; 'EX_mqn8(e)'; 'EX_nac(e)'; 'EX_ncam(e)'; 'EX_nmn(e)'; 'EX_no2(e)'; 'EX_ocdca(e)'; 'EX_ocdcea(e)'; 'EX_orn(e)'; 'EX_phe_L(e)'; 'EX_pheme(e)'; 'EX_pi(e)'; 'EX_pnto_R(e)'; 'EX_pro_L(e)'; 'EX_ptrc(e)'; 'EX_pydx(e)'; 'EX_pydxn(e)'; 'EX_q8(e)'; 'EX_rib_D(e)'; 'EX_ribflv(e)'; 'EX_ser_L(e)'; 'EX_sheme(e)'; 'EX_so4(e)'; 'EX_spmd(e)'; 'EX_thm(e)'; 'EX_thr_L(e)'; 'EX_thymd(e)'; 'EX_trp_L(e)'; 'EX_ttdca(e)'; 'EX_tyr_L(e)'; 'EX_ura(e)'; 'EX_val_L(e)'; 'EX_xan(e)'; 'EX_xyl_D(e)'; 'EX_zn2(e)'; 'EX_glu_D(e)'; 'EX_melib(e)'; 'EX_chtbs(e)'; 'EX_metsox_S_L(e)'; 'EX_hdca(e)'; 'EX_gam(e)'; 'EX_indole(e)'; 'EX_glcn(e)'; 'EX_coa(e)'; 'EX_man(e)'; 'EX_fum(e)'; 'EX_succ(e)'; 'EX_no3(e)'; 'EX_ins(e)'; 'EX_uri(e)'; 'EX_drib(e)'; 'EX_pime(e)'; 'EX_lac_L(e)'; 'EX_glypro(e)'; 'EX_urea(e)'; 'EX_duri(e)'; 'EX_h2(e)'; 'EX_mal_L(e)'; 'EX_tre(e)'; 'EX_orot(e)'; 'EX_glymet(e)'}; +essentialMetabolites = {'EX_12dgr180(e)'; 'EX_26dap_M(e)'; 'EX_2dmmq8(e)'; 'EX_2obut(e)'; 'EX_3mop(e)'; 'EX_4abz(e)'; 'EX_4hbz(e)'; 'EX_ac(e)'; 'EX_acgam(e)'; 'EX_acmana(e)'; 'EX_acnam(e)'; 'EX_ade(e)'; 'EX_adn(e)'; 'EX_adocbl(e)'; 'EX_ala_D(e)'; 'EX_ala_L(e)'; 'EX_amet(e)'; 'EX_amp(e)'; 'EX_arab_D(e)'; 'EX_arab_L(e)'; 'EX_arg_L(e)'; 'EX_asn_L(e)'; 'EX_btn(e)'; 'EX_ca2(e)'; 'EX_cbl1(e)'; 'EX_cgly(e)'; 'EX_chor(e)'; 'EX_chsterol(e)'; 'EX_cit(e)'; 'EX_cl(e)'; 'EX_cobalt2(e)'; 'EX_csn(e)'; 'EX_cu2(e)'; 'EX_cys_L(e)'; 'EX_cytd(e)'; 'EX_dad_2(e)'; 'EX_dcyt(e)'; 'EX_ddca(e)'; 'EX_dgsn(e)'; 'EX_fald(e)'; 'EX_fe2(e)'; 'EX_fe3(e)'; 'EX_fol(e)'; 'EX_for(e)'; 'EX_gal(e)'; 'EX_glc_D(e)'; 'EX_gln_L(e)'; 'EX_glu_L(e)'; 'EX_gly(e)'; 'EX_glyc(e)'; 'EX_glyc3p(e)'; 'EX_gsn(e)'; 'EX_gthox(e)'; 'EX_gthrd(e)'; 'EX_gua(e)'; 'EX_h(e)'; 'EX_h2o(e)'; 'EX_h2s(e)'; 'EX_his_L(e)'; 'EX_hxan(e)'; 'EX_ile_L(e)'; 'EX_k(e)'; 'EX_lanost(e)'; 'EX_leu_L(e)'; 'EX_lys_L(e)'; 'EX_malt(e)'; 'EX_met_L(e)'; 'EX_mg2(e)'; 'EX_mn2(e)'; 'EX_mqn7(e)'; 'EX_mqn8(e)'; 'EX_nac(e)'; 'EX_ncam(e)'; 'EX_nmn(e)'; 'EX_no2(e)'; 'EX_ocdca(e)'; 'EX_ocdcea(e)'; 'EX_orn(e)'; 'EX_phe_L(e)'; 'EX_pheme(e)'; 'EX_pi(e)'; 'EX_pnto_R(e)'; 'EX_pro_L(e)'; 'EX_ptrc(e)'; 'EX_pydx(e)'; 'EX_pydxn(e)'; 'EX_q8(e)'; 'EX_rib_D(e)'; 'EX_ribflv(e)'; 'EX_ser_L(e)'; 'EX_sheme(e)'; 'EX_so4(e)'; 'EX_spmd(e)'; 'EX_thm(e)'; 'EX_thr_L(e)'; 'EX_thymd(e)'; 'EX_trp_L(e)'; 'EX_ttdca(e)'; 'EX_tyr_L(e)'; 'EX_ura(e)'; 'EX_val_L(e)'; 'EX_xan(e)'; 'EX_xyl_D(e)'; 'EX_zn2(e)'; 'EX_glu_D(e)'; 'EX_melib(e)'; 'EX_chtbs(e)'; 'EX_metsox_S_L(e)'; 'EX_hdca(e)'; 'EX_gam(e)'; 'EX_indole(e)'; 'EX_glcn(e)'; 'EX_coa(e)'; 'EX_man(e)'; 'EX_fum(e)'; 'EX_succ(e)'; 'EX_no3(e)'; 'EX_ins(e)'; 'EX_uri(e)'; 'EX_drib(e)'; 'EX_pime(e)'; 'EX_lac_L(e)'; 'EX_glypro(e)'; 'EX_urea(e)'; 'EX_duri(e)'; 'EX_h2(e)'; 'EX_mal_L(e)'; 'EX_tre(e)'; 'EX_orot(e)'; 'EX_glymet(e)'; 'EX_glyleu(e)'}; % fix any exchange nomenclature issues adaptedDietConstraints(:, 1) = strrep(adaptedDietConstraints(:, 1), '[e]', '(e)'); diff --git a/src/reconstruction/demeter/src/refinement/runPipeline.m b/src/reconstruction/demeter/src/refinement/runPipeline.m old mode 100644 new mode 100755 index 2e47335b10..def2c5083e --- a/src/reconstruction/demeter/src/refinement/runPipeline.m +++ b/src/reconstruction/demeter/src/refinement/runPipeline.m @@ -121,13 +121,11 @@ modelList(~contains(modelList(:,1),'.mat'),:)=[]; modelList(:,1)=strrep(modelList(:,1),'.mat',''); - if ~overwriteModels - % remove models that were already created - [C,IA]=intersect(outputNamesToTest(:,1),modelList(:,1)); - if ~isempty(C) - models(IA,:)=[]; - folders(IA,:)=[]; - end + % remove models that were already created + [C,IA]=intersect(outputNamesToTest(:,1),modelList(:,1)); + if ~isempty(C) + models(IA,:)=[]; + folders(IA,:)=[]; end end From 01cd15366d037dfbf388977705d261a21179cdf5 Mon Sep 17 00:00:00 2001 From: "almut.heinken" Date: Tue, 13 Jul 2021 22:18:27 +0100 Subject: [PATCH 53/82] added y-axis labels to plots --- .../mgPipe/createPanModels.m | 31 +++++--- .../src/debugging/removeFutileCycles.m | 75 ++++++++++--------- .../demeter/src/debugging/runDebuggingTools.m | 0 .../demeter/suite/tests/plotATPTestResults.m | 2 + .../suite/tests/plotBiomassTestResults.m | 4 + .../suite/tests/plotTestSuiteResults.m | 7 +- 6 files changed, 71 insertions(+), 48 deletions(-) mode change 100644 => 100755 src/reconstruction/demeter/src/debugging/runDebuggingTools.m mode change 100644 => 100755 src/reconstruction/demeter/suite/tests/plotATPTestResults.m mode change 100644 => 100755 src/reconstruction/demeter/suite/tests/plotBiomassTestResults.m diff --git a/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/createPanModels.m b/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/createPanModels.m index 404dae06ce..9761eab02c 100755 --- a/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/createPanModels.m +++ b/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/createPanModels.m @@ -411,9 +411,16 @@ function createPanModels(agoraPath, panPath, taxonLevel, numWorkers, taxTable) '2S6HCC AND SHCHCS AND SSALxr AND OOR2r AND SUCOAS','SSALxr','SSALx' 'ADK8 AND NDPK7 AND NTP13','NTP13','NTP13i' 'ADK10 AND NDPK4 AND NTP9','NTP9','NTP9i' + 'ADK1 AND NDPK4 AND NTP9','NTP9','NTP9i' 'RE0583C AND SUCD1 AND ACOAD8f','ACOAD8f','ACOAD8fi' 'TYRAL AND 4HBZOR AND 4HBZCL AND TYRL','4HBZCL','4HBZCLi' 'NADH8 AND H2Ot AND SO3rDdmq AND SO3t AND H2St','SO3t','SO3ti' + 'PHE_Ltex AND PHEt2rpp AND PHEtec','PHEtec',[] + 'TYR_Ltex AND TYRt2rpp AND TYRt','TYRt',[] + 'LYSt3rpp AND LYS_Ltex AND LYSt2r','LYSt2r',[] + 'CITt2 AND CITt4_4','CITt4_4','CITt' + 'SUCD1 AND 5MTHFOR','5MTHFOR','5MTHFORi' + 'MLTHFTRU AND AMETRNAMT AND 5MTHCYST AND 5MTHGLUS','AMETRNAMT','AMETRNAMTi' }; % List Western diet constraints to test if the pan-model produces @@ -449,16 +456,18 @@ function createPanModels(agoraPath, panPath, taxonLevel, numWorkers, taxTable) rxns = strsplit(reactionsToReplace{j, 1}, ' AND '); go = true; for k = 1:size(rxns, 2) - RxForm = database.reactions{find(ismember(database.reactions(:, 1), rxns{k})), 3}; - if contains(RxForm,'[e]') && any(contains(model.mets,'[p]')) - newName=[rxns{k} 'pp']; - % make sure we get the correct reaction - newForm=strrep(RxForm,'[e]','[p]'); - rxnInd=find(ismember(database.reactions(:, 1), {newName})); - if ~isempty(rxnInd) - dbForm=database.reactions{rxnInd, 3}; - if checkFormulae(newForm, dbForm) && any(contains(model.mets,'[p]')) - rxns{k}=newName; + if isempty(intersect(model.rxns,rxns{k})) + RxForm = database.reactions{find(ismember(database.reactions(:, 1), rxns{k})), 3}; + if contains(RxForm,'[e]') && any(contains(model.mets,'[p]')) + newName=[rxns{k} 'pp']; + % make sure we get the correct reaction + newForm=strrep(RxForm,'[e]','[p]'); + rxnInd=find(ismember(database.reactions(:, 1), {newName})); + if ~isempty(rxnInd) + dbForm=database.reactions{rxnInd, 3}; + if checkFormulae(newForm, dbForm) && any(contains(model.mets,'[p]')) + rxns{k}=newName; + end end end end @@ -478,7 +487,7 @@ function createPanModels(agoraPath, panPath, taxonLevel, numWorkers, taxTable) replacePP=1; end % Only make the change if biomass can still be produced - if replacePP + if replacePP && isempty(intersect(model.rxns,reactionsToReplace{j, 2})) modelTest = removeRxns(model, newName); else modelTest = removeRxns(model, reactionsToReplace{j, 2}); diff --git a/src/reconstruction/demeter/src/debugging/removeFutileCycles.m b/src/reconstruction/demeter/src/debugging/removeFutileCycles.m index 6da960e11a..132144fcf1 100755 --- a/src/reconstruction/demeter/src/debugging/removeFutileCycles.m +++ b/src/reconstruction/demeter/src/debugging/removeFutileCycles.m @@ -489,6 +489,8 @@ 'CLt4r AND r2137',[],'r2137','CLti' 'DESAT16_3 AND FAOp_even AND FAO181E',[],'DESAT16_3','DESAT16_3i' 'LDH_L2 AND LDH_L',[],'LDH_L',[] + 'HXANtex AND HYXNtipp AND HXANt2r',[],'HXANt2r','HYXNtpp' + 'SUCCt2rpp AND SUCCtex AND SUCCt',[],'SUCCt',[] }; @@ -537,16 +539,18 @@ else go = 1; present=strsplit(reactionsToReplace{i,1},' AND '); - if any(contains(model.mets,'[p]')) - % if a periplasmatic reaction exists, use that - for j=1:length(present) - if ~isempty(intersect(database.reactions(:,1),[present{j} 'pp'])) - present{j}=[present{j} 'pp']; + if ~(length(intersect(model.rxns,present))==length(present)) + if any(contains(model.mets,'[p]')) + % if a periplasmatic reaction exists, use that + for j=1:length(present) + if ~isempty(intersect(database.reactions(:,1),[present{j} 'pp'])) + present{j}=[present{j} 'pp']; + end end end - end - if ~(length(intersect(model.rxns,present))==length(present)) - go= 0; + if ~(length(intersect(model.rxns,present))==length(present)) + go= 0; + end end if ~isempty(reactionsToReplace{i,2}) notpresent=strsplit(reactionsToReplace{i,2},' AND '); @@ -567,16 +571,18 @@ % Only make the change if biomass can still be produced toRemove=strsplit(reactionsToReplace{i,3},' AND '); for k=1:length(toRemove) - RxForm = database.reactions{find(ismember(database.reactions(:, 1), toRemove{k})), 3}; - if contains(RxForm,'[e]') - newName=[toRemove{k} 'pp']; - % make sure we get the correct reaction - newForm=strrep(RxForm,'[e]','[p]'); - rxnInd=find(ismember(database.reactions(:, 1), {newName})); - if ~isempty(rxnInd) - dbForm=database.reactions{rxnInd, 3}; - if checkFormulae(newForm, dbForm) && any(contains(model.mets,'[p]')) - toRemove{k}=newName; + if isempty(intersect(model.rxns,toRemove{k})) + RxForm = database.reactions{find(ismember(database.reactions(:, 1), toRemove{k})), 3}; + if contains(RxForm,'[e]') + newName=[toRemove{k} 'pp']; + % make sure we get the correct reaction + newForm=strrep(RxForm,'[e]','[p]'); + rxnInd=find(ismember(database.reactions(:, 1), {newName})); + if ~isempty(rxnInd) + dbForm=database.reactions{rxnInd, 3}; + if checkFormulae(newForm, dbForm) && any(contains(model.mets,'[p]')) + toRemove{k}=newName; + end end end end @@ -585,25 +591,26 @@ if ~isempty(reactionsToReplace{i, 4}) rxns=strsplit(reactionsToReplace{i, 4},' AND '); for j=1:length(rxns) - % create a new formula - RxForm = database.reactions{find(ismember(database.reactions(:, 1), rxns{j})), 3}; - - if contains(RxForm,'[e]') && any(contains(model.mets,'[p]')) - newName=[rxns{j} 'ipp']; - % make sure we get the correct reaction - newForm=strrep(RxForm,'[e]','[p]'); - rxnInd=find(ismember(database.reactions(:, 1), {newName})); - if ~isempty(rxnInd) - dbForm=database.reactions{rxnInd, 3}; - if checkFormulae(newForm, dbForm) && any(contains(model.mets,'[p]')) - RxForm=dbForm; + if isempty(intersect(model.rxns,rxns{j})) + % create a new formula + RxForm = database.reactions{find(ismember(database.reactions(:, 1), rxns{j})), 3}; + + if contains(RxForm,'[e]') && any(contains(model.mets,'[p]')) + newName=[rxns{j} 'ipp']; + % make sure we get the correct reaction + newForm=strrep(RxForm,'[e]','[p]'); + rxnInd=find(ismember(database.reactions(:, 1), {newName})); + if ~isempty(rxnInd) + dbForm=database.reactions{rxnInd, 3}; + if checkFormulae(newForm, dbForm) && any(contains(model.mets,'[p]')) + RxForm=dbForm; + end end + modelTest = addReaction(modelTest, newName, RxForm); + else + modelTest = addReaction(modelTest, rxns{j}, RxForm); end - modelTest = addReaction(modelTest, newName, RxForm); - else - modelTest = addReaction(modelTest, rxns{j}, RxForm); end - end end % sometimes oxygen uptake needs to be enabled diff --git a/src/reconstruction/demeter/src/debugging/runDebuggingTools.m b/src/reconstruction/demeter/src/debugging/runDebuggingTools.m old mode 100644 new mode 100755 diff --git a/src/reconstruction/demeter/suite/tests/plotATPTestResults.m b/src/reconstruction/demeter/suite/tests/plotATPTestResults.m old mode 100644 new mode 100755 index d349341bd0..35d6efdd03 --- a/src/reconstruction/demeter/suite/tests/plotATPTestResults.m +++ b/src/reconstruction/demeter/suite/tests/plotATPTestResults.m @@ -116,6 +116,7 @@ box on maxval=max(data,[],'all'); ylim([0 maxval + maxval/10]) + ylabel('mmol *g dry weight-1 * hr-1') h=title(['ATP production on complex medium, ' reconVersion]); set(h,'interpreter','none') set(gca,'TickLabelInterpreter','none') @@ -175,6 +176,7 @@ box on maxval=max(data,[],'all'); ylim([0 maxval + maxval/10]) + ylabel('mmol *g dry weight-1 * hr-1') h=title(['ATP production on complex medium, ' reconVersion]); set(h,'interpreter','none') set(gca,'TickLabelInterpreter','none') diff --git a/src/reconstruction/demeter/suite/tests/plotBiomassTestResults.m b/src/reconstruction/demeter/suite/tests/plotBiomassTestResults.m old mode 100644 new mode 100755 index f55f2496b5..61c96a11ae --- a/src/reconstruction/demeter/suite/tests/plotBiomassTestResults.m +++ b/src/reconstruction/demeter/suite/tests/plotBiomassTestResults.m @@ -123,6 +123,7 @@ box on maxval=max(data,[],'all'); ylim([0 maxval + maxval/10]) + ylabel('mmol *g dry weight-1 * hr-1') h=title(['Growth on rich medium, ' reconVersion]); set(h,'interpreter','none') set(gca,'TickLabelInterpreter','none') @@ -147,6 +148,7 @@ box on maxval=max(data,[],'all'); ylim([0 maxval + maxval/10]) + ylabel('mmol *g dry weight-1 * hr-1') h=title(['Growth on complex medium, ' reconVersion]); set(h,'interpreter','none') set(gca,'TickLabelInterpreter','none') @@ -246,6 +248,7 @@ box on maxval=max(data,[],'all'); ylim([0 maxval + maxval/10]) + ylabel('mmol *g dry weight-1 * hr-1') h=title(['Growth on rich medium, ' reconVersion]); set(h,'interpreter','none') set(gca,'TickLabelInterpreter','none') @@ -265,6 +268,7 @@ box on maxval=max(data,[],'all'); ylim([0 maxval + maxval/10]) + ylabel('mmol *g dry weight-1 * hr-1') h=title(['Growth on complex medium, ' reconVersion]); set(h,'interpreter','none') set(gca,'TickLabelInterpreter','none') diff --git a/src/reconstruction/demeter/suite/tests/plotTestSuiteResults.m b/src/reconstruction/demeter/suite/tests/plotTestSuiteResults.m index e3425d453a..d6994ffba6 100755 --- a/src/reconstruction/demeter/suite/tests/plotTestSuiteResults.m +++ b/src/reconstruction/demeter/suite/tests/plotTestSuiteResults.m @@ -51,7 +51,7 @@ function plotTestSuiteResults(testResultsFolder,reconVersion) for k=1:length(data) plotdata(k,1)=0; end - label='Number of entries'; + label='Number of data points to test'; else if strcmp(fields{j},'growsOnDefinedMedium') plotdata=data(:,2); @@ -64,14 +64,14 @@ function plotTestSuiteResults(testResultsFolder,reconVersion) if ~any(strcmp(fields{j},{'Number_genes', 'Number_reactions', 'Number_metabolites'})) label='Flux (mmol*gDW-1*hr-1)'; else - label='Number of entries'; + label='Number of data points to test'; end else % count the non-empty data entries for k=1:size(data,1) plotdata(k,1)=length(find(~cellfun(@isempty,data(k,2:end)))); end - label='Number of entries'; + label='Number of data points to test'; end end end @@ -171,6 +171,7 @@ function plotTestSuiteResults(testResultsFolder,reconVersion) xtickangle(45) set(h,'interpreter','none') set(gca,'YTickLabel',[]) +ylabel('Total number of model predictions') legend('Number of false negatives','Number of true positives') set(gca,'TickLabelInterpreter','none') set(gca,'FontSize',14) From f7f9c3ffa47b34fee4e583990746e2c734042230 Mon Sep 17 00:00:00 2001 From: Valcarcel Date: Fri, 16 Jul 2021 11:15:40 +0200 Subject: [PATCH 54/82] update to ngMCS v1.0.0 --- src/analysis/gMCS/GPR2models.m | 29 +-- src/analysis/gMCS/buildGmatrix.m | 14 +- src/analysis/gMCS/calculateGeneMCS.m | 195 +++++++++--------- src/analysis/gMCS/calculateMCS.m | 75 +++---- .../gMCS/prepareModelNutrientGeneMCS.m | 89 ++++++++ .../analysis/testGeneMCS/testGeneMCS.m | 4 +- 6 files changed, 247 insertions(+), 159 deletions(-) create mode 100644 src/analysis/gMCS/prepareModelNutrientGeneMCS.m diff --git a/src/analysis/gMCS/GPR2models.m b/src/analysis/gMCS/GPR2models.m index 43bc9aded5..a94809ad2b 100644 --- a/src/analysis/gMCS/GPR2models.m +++ b/src/analysis/gMCS/GPR2models.m @@ -36,19 +36,24 @@ % - Francisco J. Planes, Aug 2017, University of Navarra, TECNUN School of Engineering. % - Inigo Apaolaza, April 2018, University of Navarra, TECNUN School of Engineering. -if (nargin < 5 || isempty(printLevel)) - printLevel = 1; % Default is show progress -end - -if (nargin < 4 || isempty(numWorkers)) - numWorkers = 0; % Default is gpc('nocreate') -end - -if (nargin < 3) - separate_transcript = ''; % default is empty -end -if (nargin < 2) +p = inputParser; +% check required arguments +addRequired(p, 'metabolic_model'); +addOptional(p, 'selected_rxns', [], @isnumeric); +addOptional(p, 'separate_transcript', '', @(x)ischar(x)); % default is empty +addOptional(p, 'numWorkers', 0, @(x)isnumeric(x)&&isscalar(x)); % Default is gpc('nocreate') +addOptional(p, 'printLevel', 1, @(x)isnumeric(x)&&isscalar(x)); +% extract variables from parser +parse(p, metabolic_model, selected_rxns, separate_transcript, numWorkers, printLevel); +metabolic_model = p.Results.metabolic_model; +selected_rxns = p.Results.selected_rxns; +separate_transcript = p.Results.separate_transcript; +numWorkers = p.Results.numWorkers; +printLevel = p.Results.printLevel; + +% fill with all reactions +if (isempty(selected_rxns)) selected_rxns = 1:length(metabolic_model.rxns); end diff --git a/src/analysis/gMCS/buildGmatrix.m b/src/analysis/gMCS/buildGmatrix.m index 40b8317dae..1a612360ae 100644 --- a/src/analysis/gMCS/buildGmatrix.m +++ b/src/analysis/gMCS/buildGmatrix.m @@ -176,12 +176,16 @@ DM = ~cellfun(@isempty, DM); n_DM = sum(DM); DM = rxns(find(DM)); - options.rxn_set = DM; - options.timelimit = timelimit; - options.target_b = target_b; - options.printLevel = 0; +% options.rxn_set = DM; +% % options.timelimit = timelimit; +% options.target_b = target_b; +% options.printLevel = 0; max_len_mcs = length(DM); - [act_mcs, act_mcs_time] = calculateMCS(act_model, n_mcs, max_len_mcs, options); + [act_mcs, act_mcs_time] = calculateMCS(act_model, n_mcs, max_len_mcs,... + 'rxn_set', DM,... + 'timelimit', timelimit,... + 'target_b', target_b,... + 'printLevel', 0); mcs{i, 1} = act_mcs; mcs_time{i, 1} = act_mcs_time; save(search_filename_3, 'act_mcs', 'act_mcs_time'); diff --git a/src/analysis/gMCS/calculateGeneMCS.m b/src/analysis/gMCS/calculateGeneMCS.m index 8cf8454f51..8fa92f0c73 100644 --- a/src/analysis/gMCS/calculateGeneMCS.m +++ b/src/analysis/gMCS/calculateGeneMCS.m @@ -1,4 +1,4 @@ -function [gmcs, gmcs_time] = calculateGeneMCS(model_name, model_struct, n_gmcs, max_len_gmcs, options) +function [gmcs, gmcs_time] = calculateGeneMCS(model_name, model_struct, n_gmcs, max_len_gmcs, varargin) % Calculate genetic Minimal Cut Sets (gMCSs) using the warm-start strategy % available in CPLEX, namely cplex.populate(), with or without selecting a % given knockout, among all the genes included in the model or a given @@ -6,7 +6,7 @@ % % USAGE: % -% [gmcs, gmcs_time] = calculateGeneMCS(model_name, model_struct, n_gmcs, max_len_gmcs, options) +% [gmcs, gmcs_time] = calculateGeneMCS(model_name, model_struct, n_gmcs, max_len_gmcs, varargin) % % INPUTS: % model_name: Name of the metabolic model under study (in order to @@ -15,36 +15,47 @@ % n_gmcs: Number of gMCSs to calculate. % max_len_gmcs: Number of genes in the largest gMCS to be calculated. % -% OPTIONAL INPUT: -% options: Structure with fields: -% -% * .KO - Selected gene knockout. Default: []. -% * .gene_set - Cell array containing the set of -% genes among which the gMCSs are wanted to be calculated. -% Default: [] (all genes are included). -% * .timelimit - Time limit for the calculation of gMCSs -% each time the solver is called. Default: 1e75. -% * .target_b - Desired activity level of the metabolic -% task to be disrupted. Default: 1e-3; -% * .separate_transcript - Character used to discriminate -% different transcripts of a gene. Default: ''. -% Example: separate_transcript = '' -% gene 10005.1 ==> gene 10005.1 -% gene 10005.2 ==> gene 10005.2 -% gene 10005.3 ==> gene 10005.3 -% separate_transcript = '.' -% gene 10005.1 -% gene 10005.2 ==> gene 10005 -% gene 10005.3 -% * .forceLength - 1 if the constraint limiting the -% length of the gMCSs is to be active (recommended for -% enumerating low order gMCSs), 0 otherwise. -% Default: 1. -% * .numWorkers - is the maximun number of workers -% used by Cplex and GPR2models. 0 = automatic, 1 = -% sequential, >1 = parallel. Default = 0; -% * .printLevel - 1 if the process is wanted to be -% shown on the screen, 0 otherwise. Default: 1. +% OPTIONAL INPUTS: +% KO: Selected gene knockout. (default = []) +% gene_set: Cell array containing the set of genes among which +% the gMCSs are wanted to be calculated. +% (default = [], all genes) +% target_b: Desired activity level of the metabolic task to be +% disrupted. (default = 1e-3) +% nutrientGMCS: Boolean variable. 0 to calculate GeneMCS, 1 to +% calculate MCS containing genes and nutrients, +% known as ngMCS. (default = false) +% exchangeRxns: Cell array containing the set of reactions to be +% included as inputs of nutrients from the cell +% environment / culture medium. (default = [], which +% are all reactions with only one 1 metabolite +% consiedered as input for the model) +% onlyNutrients: Boolean variable. 1 to calculate MCS only using +% selected KO and nutrients, 0 to use everything. +% If there is no KO selected, it is set to false. +% (default = false) +% separate_transcript:Character used to separate +% different transcripts of a gene. (default = '') +% Examples: +% - separate_transcript = '' +% - gene 10005.1 ==> gene 10005.1 +% - gene 10005.2 ==> gene 10005.2 +% - gene 10005.3 ==> gene 10005.3 +% - separate_transcript = '.' +% - gene 10005.1 +% - gene 10005.2 ==> gene 10005 +% - gene 10005.3 +% forceLength: 1 if the constraint limiting the length of the +% gMCSs is to be active (recommended for +% enumerating low order gMCSs), 0 otherwise +% (default = 1) +% timelimit: Time limit for the calculation of gMCSs each time +% the solver is called. (default = 1e75) +% numWorkers: Integer: is the maximun number of workers used +% by Cplex and GPR2models. 0 = automatic, +% 1 = sequential, > 1 = parallel. (default = 0) +% printLevel: Integer. 1 if the process is wanted to be shown +% on the screen, 0 otherwise. (default = 1) % % OUTPUTS: % gmcs: Cell array containing the calculated gMCSs. @@ -53,15 +64,14 @@ % % EXAMPLE: % %With optional values -% [gmcs, gmcs_time] = calculateGeneMCS('Recon2.v04', modelR204, 100, 10, options) -% %Being: -% %options.KO = '6240' -% %options.gene_set = {'2987'; '6241'} -% %options.timelimit = 300 -% %options.target_b = 1e-4 -% %options.separate_transcript = '.'; -% %options.forceLength = 0 -% %options.printLevel = 0 +% [gmcs, gmcs_time] = calculateGeneMCS('Recon2.v04', modelR204, 100, 10, ... +% 'KO' = '6240', ... +% 'gene_set' = {'2987'; '6241'}, ... +% 'timelimit' = 300, ... +% 'target_b' = 1e-4, +% 'separate_transcript' = '.', ... +% 'forceLength' = 0, ... +% 'printLevel' = 0) % % %Without optional values % [gmcs, gmcs_time] = calculateGeneMCS('ecoli_core_model', model, 100, 10) @@ -73,6 +83,7 @@ % .. Revisions: % - Inigo Apaolaza, 10/04/2018, University of Navarra, TECNUN School of Engineering. % - Luis V. Valcarcel, 17/04/2018, University of Navarra, TECNUN School of Engineering. +% - Luis V. Valcarcel, 20/04/2021, University of Navarra, TECNUN School of Engineering. % Check the installation of cplex global SOLVERS; @@ -85,58 +96,43 @@ error('This version calculateMCS only works with IBM CPLEX. Newer versions will include more solvers included in COBRA Toolbox') end -if nargin == 4 % Set Parameters - KO = []; % Optional inputs - gene_set = []; - target_b = 1e-3; - timelimit = 1e75; - separate_transcript = ''; - forceLength = true; - numWorkers = 0; - printLevel = 1; -else - if isfield(options, 'KO') - KO = options.KO; - else - KO = []; - end - if isfield(options, 'gene_set') - gene_set = options.gene_set; - else - gene_set = []; - end - if isfield(options, 'timelimit') - timelimit = options.timelimit; - else - timelimit = 1e75; - end - if isfield(options, 'target_b') - target_b = options.target_b; - else - target_b = 1e-3; - end - if isfield(options, 'separate_transcript') - separate_transcript = options.separate_transcript; - else - separate_transcript = ''; - end - if isfield(options, 'forceLength') - forceLength = options.forceLength; - else - forceLength = true; - end - if isfield(options, 'numWorkers') - numWorkers = options.numWorkers; - else - numWorkers = 0; - end - if isfield(options, 'printLevel') - printLevel = options.printLevel; - else - printLevel = 1; - end -end - +p = inputParser; +% check required arguments +addRequired(p, 'model_name', @(x)ischar(x)); +addRequired(p, 'model_struct'); +addRequired(p, 'n_gmcs', @isnumeric); +addRequired(p, 'max_len_gmcs', @isnumeric); +% Add optional name-value pair argument +addParameter(p, 'KO', [], @(x)ischar(x)||isempty(x)); +addParameter(p, 'gene_set', [], @(x)iscell(x)||isempty(x)); +addParameter(p, 'target_b', 1e-3, @(x)isnumeric(x)&&isscalar(x)); +addParameter(p, 'timelimit', 1e75, @(x)isnumeric(x)&&isscalar(x)); +addParameter(p, 'forceLength', true, @(x)islogical(x)||(isnumeric(x)&&isscalar(x))); +addParameter(p, 'separate_transcript', '', @(x)ischar(x)); +addParameter(p, 'numWorkers', 0, @(x)isnumeric(x)&&isscalar(x)); +addParameter(p, 'printLevel', 1, @(x)isnumeric(x)&&isscalar(x)); +addParameter(p, 'nutrientGMCS', false, @(x)islogical(x)||(isnumeric(x)&&isscalar(x))); +addParameter(p, 'exchangeRxns', [], @(x)iscell(x)||isempty(x)); +addParameter(p, 'onlyNutrients', false, @(x)islogical(x)||(isnumeric(x)&&isscalar(x))); +% extract variables from parser +parse(p, model_name, model_struct, n_gmcs, max_len_gmcs, varargin{:}); +model_name = p.Results.model_name; +model_struct = p.Results.model_struct; +n_gmcs = p.Results.n_gmcs; +max_len_gmcs = p.Results.max_len_gmcs; +KO = p.Results.KO; +gene_set = p.Results.gene_set; +target_b = p.Results.target_b; +timelimit = p.Results.timelimit; +forceLength = p.Results.forceLength; +separate_transcript = p.Results.separate_transcript; +numWorkers = p.Results.numWorkers; +printLevel = p.Results.printLevel; +nutrientGMCS = p.Results.nutrientGMCS; +exchangeRxns = p.Results.exchangeRxns; +onlyNutrients = p.Results.onlyNutrients; + +% Define parameters for the gMCSs integrality_tolerance = 1e-5; M = 1e3; % Big Value alpha = 1; % used to relate the lower bound of v variables with z variables @@ -144,12 +140,23 @@ b = 1e-3; % used to activate KnockOut constraint phi = 1000; % b/c; + +% Prepare model for ngMCS +if nutrientGMCS + model_struct = prepareModelNutrientGeneMCS(model_struct, exchangeRxns); + if onlyNutrients && ~isempty(KO) + % select only artificial genes for nutrients + gene_set = model_struct.genes(startsWith(model_struct.genes, 'gene_')); + end +end + % Load or Build the G Matrix G_file = [pwd filesep 'G_' model_name '.mat']; if exist(G_file) == 2 load(G_file) else [G, G_ind, related, n_genes_KO, G_time] = buildGmatrix(model_name, model_struct, separate_transcript, numWorkers, printLevel); + assert(size(G,2) == numel(model_struct.rxns)); end gmcs_time{1, 1} = '------ TIMING ------'; gmcs_time{1, 2} = '--- G MATRIX ---'; @@ -381,6 +388,7 @@ return; end end + try disp(['Number of gMCS saved: ' num2str(length(gmcs))]); end try save('tmp.mat', 'gmcs', 'gmcs_time'); end try largest_gmcs = max(cellfun(@length, gmcs)); end end @@ -611,6 +619,7 @@ return; end end + try disp(['Number of gMCS saved: ' num2str(length(gmcs))]); end try save('tmp.mat', 'gmcs', 'gmcs_time'); end try largest_gmcs = max(cellfun(@length, gmcs)); end end diff --git a/src/analysis/gMCS/calculateMCS.m b/src/analysis/gMCS/calculateMCS.m index 2b9b2ecba3..45c8fde7de 100644 --- a/src/analysis/gMCS/calculateMCS.m +++ b/src/analysis/gMCS/calculateMCS.m @@ -1,4 +1,4 @@ -function [mcs, mcs_time] = calculateMCS(model_struct, n_mcs, max_len_mcs, options) +function [mcs, mcs_time] = calculateMCS(model_struct, n_mcs, max_len_mcs, varargin) % Calculate Minimal Cut Sets (MCSs) using the warm-start strategy available % in CPLEX, namely cplex.populate(), with or without selecting a given % knockout, among all the reactions included in the model or a given subset @@ -62,6 +62,7 @@ % .. Revisions: % - Inigo Apaolaza, 10/04/2018, University of Navarra, TECNUN School of Engineering. % - Luis V. Valcarcel, 17/04/2018, University of Navarra, TECNUN School of Engineering. +% - Luis V. Valcarcel, 30/06/2021, University of Navarra, TECNUN School of Engineering. % Check the installation of cplex global SOLVERS; @@ -76,52 +77,32 @@ time_aa = tic; % Set Parameters -% Optional inputs -if nargin == 3 - KO = []; - rxn_set = []; - target_b = 1e-3; - timelimit = 1e75; - forceLength = true; - numWorkers = 0; - printLevel = 1; -else - if isfield(options, 'KO') - KO = options.KO; - else - KO = []; - end - if isfield(options, 'rxn_set') - rxn_set = options.rxn_set; - else - rxn_set = []; - end - if isfield(options, 'timelimit') - timelimit = options.timelimit; - else - timelimit = 1e75; - end - if isfield(options, 'target_b') - target_b = options.target_b; - else - target_b = 1e-3; - end - if isfield(options, 'forceLength') - forceLength = options.forceLength; - else - forceLength = 1; - end - if isfield(options, 'numWorkers') - numWorkers = options.numWorkers; - else - numWorkers = 0; - end - if isfield(options, 'printLevel') - printLevel = options.printLevel; - else - printLevel = 1; - end -end +p = inputParser; +% check required arguments +addRequired(p, 'model_struct'); +addRequired(p, 'n_mcs', @isnumeric); +addRequired(p, 'max_len_mcs', @isnumeric); +% Add optional name-value pair argument +addParameter(p, 'KO', [], @(x)ischar(x)||isempty(x)); +addParameter(p, 'rxn_set', [], @(x)iscell(x)||isempty(x)); +addParameter(p, 'target_b', 1e-3, @(x)isnumeric(x)&&isscalar(x)); +addParameter(p, 'timelimit', 1e75, @(x)isnumeric(x)&&isscalar(x)); +addParameter(p, 'forceLength', true, @(x)islogical(x)||(isnumeric(x)&&isscalar(x))); +addParameter(p, 'numWorkers', 0, @(x)isnumeric(x)&&isscalar(x)); +addParameter(p, 'printLevel', 1, @(x)isnumeric(x)&&isscalar(x)); +% extract variables from parser +parse(p, model_struct, n_mcs, max_len_mcs, varargin{:}); +model_struct = p.Results.model_struct; +n_mcs = p.Results.n_mcs; +max_len_mcs = p.Results.max_len_gmcs; +KO = p.Results.KO; +rxn_set = p.Results.rxn_set; +target_b = p.Results.target_b; +timelimit = p.Results.timelimit; +forceLength = p.Results.forceLength; +numWorkers = p.Results.numWorkers; +printLevel = p.Results.printLevel; + integrality_tolerance = 1e-5; M = 1e3; % Big Value diff --git a/src/analysis/gMCS/prepareModelNutrientGeneMCS.m b/src/analysis/gMCS/prepareModelNutrientGeneMCS.m new file mode 100644 index 0000000000..38722c7277 --- /dev/null +++ b/src/analysis/gMCS/prepareModelNutrientGeneMCS.m @@ -0,0 +1,89 @@ +function model = prepareModelNutrientGeneMCS(model, exchangeRxns) +% Add artificial genes for the exchange reactions and prepare model for the +% gMCS implementation which includes nutrients from the culture medium. +% +% USAGE: +% +% model = prepareModelNutrientGeneMCS(model, exchangeRxns) +% +% INPUTS: +% model: Metabolic model structure (COBRA Toolbox format). +% +% OPTIONAL INPUTS: +% exchangeRxns: Exchange reactions to be included (default = all +% reactions which start by 'EX_', 'DM_' or 'sink_' +% and only have one metabolite involved. +% +% OUTPUTS: +% model: Metabolic model structure with genes for selected +% exchanges (COBRA Toolbox format). +% +% EXAMPLE: +% +% model = prepareModelNutrientGeneMCS(model, exchangeRxns); +% +% .. Authors: +% - Inigo Apaolaza, 19/04/2020, University of Navarra, TECNUN School of Engineering. +% - Luis V. Valcarcel, 19/04/2020, University of Navarra, TECNUN School of Engineering. + +if nargin<2 + exchangeRxns = []; +end + + +% Add genes to exchanges +if isempty(exchangeRxns) + pos_exchanges = startsWith(model.rxns, 'EX_') + startsWith(model.rxns, 'DM_') + startsWith(model.rxns, 'sink_'); + pos_exchanges = find(pos_exchanges); + n_all = length(pos_exchanges); + n_mets = length(model.mets); + % check that there is only one metabolite involved + idx = false(n_all,1); + for i = 1:n_all + idx(i) = sum(model.S(:, pos_exchanges(i))==0) == n_mets-1; + end + pos_exchanges = pos_exchanges(idx); +else + [~, pos_exchanges] = ismember(model.rxns, exchangeRxns); + % check that there is only one metabolite involved + n_all = length(pos_exchanges); + n_mets = length(model.mets); + idx = false(n_all,1); + for i = 1:n_all + idx(i) = sum(model.S(:, pos_exchanges(i))==0) == n_mets-1; + end + if any(~idx) + warning('Some of the reactions inlcuded as nutrient exchange do not have only one metabolite involved') + end +end + +% perform a reaction spliting in the model and prepare it to the gMCS +% algorithm. In order to block nutrients, we need only to block the inputs, +% not the outputs. +modelRev = model; +[model, matchRev, rev2irrev, irrev2rev] = convertToIrreversible(modelRev, 'sRxns', modelRev.rxns(pos_exchanges)); +% transform index in reversible model to index in irreversible model +pos_exchanges_2 = [rev2irrev{pos_exchanges}]; +% select only inputs +idx = false(length(pos_exchanges_2),1); +for i = 1:length(pos_exchanges_2) + aux = unique(model.S(:, pos_exchanges_2(i))); + idx(i) = length(aux(aux~=0))==1 && aux(aux~=0)>0; +end +pos_exchanges_2 = pos_exchanges_2(idx); + +% % debug +% TT = table(model.rxns, model.lb, model.ub, printRxnFormula(model, 'printFlag', 0)); +% TT2 = TT(setdiff(1:size(TT,1), pos_exchanges_2),:); +% TT = TT(pos_exchanges_2,:); + +% add the artificial genes for the input reactions +n_all = length(pos_exchanges_2); +showprogress(0,['Adding genes for input reactions (n=' num2str(n_all) ')']); + +for i = 1:n_all + showprogress(i/n_all); + model = changeGeneAssociation(model, model.rxns{pos_exchanges_2(i)}, ['gene_' strtok(model.rxns{pos_exchanges_2(i)}, '[')]); +end + +end diff --git a/test/verifiedTests/analysis/testGeneMCS/testGeneMCS.m b/test/verifiedTests/analysis/testGeneMCS/testGeneMCS.m index ce79157361..893b59ccec 100644 --- a/test/verifiedTests/analysis/testGeneMCS/testGeneMCS.m +++ b/test/verifiedTests/analysis/testGeneMCS/testGeneMCS.m @@ -78,8 +78,8 @@ assert(sum(~logical(gmcsIsTrue))==0); %Now, test with a gene_set options = struct(); - options.gene_set = model.genes([1 2 4 5 6]); - [gmcs, gmcs_time] = calculateGeneMCS('toy_example_gMCS', model, 20, 5, options); +% options.gene_set = model.genes([1 2 4 5 6]); + [gmcs, gmcs_time] = calculateGeneMCS('toy_example_gMCS', model, 20, 5, 'gene_set', model.genes([1 2 4 5 6])); % Check the gMCS [IsCutSet, IsMinimal, geneNotMinimal] = checkGeneMCS(model, gmcs); assert(all(IsMinimal)); From 48bc3a389f33a8f8d25cd8a4f2beac527befad87 Mon Sep 17 00:00:00 2001 From: "almut.heinken" Date: Tue, 20 Jul 2021 15:18:42 +0100 Subject: [PATCH 55/82] Adjusted some plots --- .../analyseObjectiveShadowPrices.m | 13 +- .../mgPipe/microbiotaModelSimulator.m | 6 + .../demeter/src/properties/producetSNEPlots.m | 358 +++++++++--------- .../suite/tests/plotTestSuiteResults.m | 2 +- 4 files changed, 197 insertions(+), 182 deletions(-) mode change 100644 => 100755 src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/microbiotaModelSimulator.m diff --git a/src/analysis/multiSpecies/microbiomeModelingToolbox/additionalAnalysis/analyseObjectiveShadowPrices.m b/src/analysis/multiSpecies/microbiomeModelingToolbox/additionalAnalysis/analyseObjectiveShadowPrices.m index f02cc85f9b..07a6ceefd5 100755 --- a/src/analysis/multiSpecies/microbiomeModelingToolbox/additionalAnalysis/analyseObjectiveShadowPrices.m +++ b/src/analysis/multiSpecies/microbiomeModelingToolbox/additionalAnalysis/analyseObjectiveShadowPrices.m @@ -81,6 +81,15 @@ end end +% reload existing results if applies +if isfile([resultsFolder filesep 'objectives']) + load([resultsFolder filesep 'objectives']); + load([resultsFolder filesep 'shadowPrices']); + startPnt=size(objectives,2)-1; +else + startPnt=1; +end + objectives{1,1}='Objective'; shadowPrices{1,1}='Metabolite'; shadowPrices{1,2}='Objective'; @@ -108,7 +117,7 @@ end end -for i=1:size(modelList,1) +for i=startPnt:size(modelList,1) i objectives{1,2+i}=strrep(modelList{i,1},'.mat',''); shadowPrices{1,3+i}=strrep(modelList{i,1},'.mat',''); @@ -183,8 +192,6 @@ % Regularly save results if floor(i/10) == i/10 save([resultsFolder filesep 'objectives'],'objectives'); - end - if floor(i/50) == i/50 save([resultsFolder filesep 'shadowPrices'],'shadowPrices'); end end diff --git a/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/microbiotaModelSimulator.m b/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/microbiotaModelSimulator.m old mode 100644 new mode 100755 index d3ffb5ad87..e7b926a95a --- a/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/microbiotaModelSimulator.m +++ b/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/microbiotaModelSimulator.m @@ -423,6 +423,12 @@ end else + exchanges = {}; + netProduction = {}; + netUptake = {}; + presol = {}; + inFesMat = {}; + if saveConstrModels %% just export the models with diet constraints inFesMat = {}; diff --git a/src/reconstruction/demeter/src/properties/producetSNEPlots.m b/src/reconstruction/demeter/src/properties/producetSNEPlots.m index 68dcba6c20..da34fbb200 100755 --- a/src/reconstruction/demeter/src/properties/producetSNEPlots.m +++ b/src/reconstruction/demeter/src/properties/producetSNEPlots.m @@ -47,218 +47,220 @@ function producetSNEPlots(propertiesFolder,infoFilePath,reconVersion,customFeatu }; for k=1:size(analyzedFiles,1) - DataToAnalyze = readtable([propertiesFolder filesep analyzedFiles{k,2} '.txt'], 'ReadVariableNames', false); - DataToAnalyze = table2cell(DataToAnalyze); - DataToAnalyze=DataToAnalyze'; - - [C,I]=setdiff(DataToAnalyze(1,:),infoFile(:,1),'stable'); - DataToAnalyze(:,I(2:end))=[]; - - % can only be performed if there are enough strains with taxonomical information - if size(DataToAnalyze,2) >= 10 + if isfile([propertiesFolder filesep analyzedFiles{k,2} '.txt']) + DataToAnalyze = readtable([propertiesFolder filesep analyzedFiles{k,2} '.txt'], 'ReadVariableNames', false); + DataToAnalyze = table2cell(DataToAnalyze); + DataToAnalyze=DataToAnalyze'; - rp=str2double(DataToAnalyze(2:end,2:end)); - orgs=DataToAnalyze(1,2:end)'; + [C,I]=setdiff(DataToAnalyze(1,:),infoFile(:,1),'stable'); + DataToAnalyze(:,I(2:end))=[]; - taxonlevels={ - 'Phylum' - 'Class' - 'Order' - 'Family' - 'Genus' - 'Species' - }; - - Summary=struct; - for i=1:length(taxonlevels) - % plot on different taxon levels - taxa={}; - taxcol=find(strcmp(infoFile(1,:),taxonlevels{i})); - for j=2:size(DataToAnalyze,2) - if ~any(strcmp(infoFile(:,1),DataToAnalyze{1,j})) - taxa{j-1,1}='N/A'; - else - taxa{j-1,1}=infoFile{find(strcmp(infoFile(:,1),DataToAnalyze{1,j})),taxcol}; - end - end - - data=rp'; - red_orgs=orgs; - - % remove entries that are all zeros - toDel=sum(data,1)= 10 + rp=str2double(DataToAnalyze(2:end,2:end)); + orgs=DataToAnalyze(1,2:end)'; - % remove unclassified organisms - data(find(strncmp(taxa,'unclassified',length('unclassified'))),:)=[]; - red_orgs(find(strncmp(taxa,'unclassified',length('unclassified'))),:)=[]; - taxa(find(strncmp(taxa,'unclassified',length('unclassified'))),:)=[]; + taxonlevels={ + 'Phylum' + 'Class' + 'Order' + 'Family' + 'Genus' + 'Species' + }; - - if i==6 - % remove unclassified species - toDel=[]; - cnt=1; - for j=1:size(data,1) - if strcmp(taxa{j,1}(length(taxa{j,1})-2:length(taxa{j,1})),' sp') - toDel(cnt)=j; - cnt=cnt+1; + Summary=struct; + for i=1:length(taxonlevels) + % plot on different taxon levels + taxa={}; + taxcol=find(strcmp(infoFile(1,:),taxonlevels{i})); + for j=2:size(DataToAnalyze,2) + if ~any(strcmp(infoFile(:,1),DataToAnalyze{1,j})) + taxa{j-1,1}='N/A'; + else + taxa{j-1,1}=infoFile{find(strcmp(infoFile(:,1),DataToAnalyze{1,j})),taxcol}; end end - data(toDel,:)=[]; - red_orgs(toDel,:)=[]; - taxa(toDel,:)=[]; - end - - % remove taxa with too few members - [uniqueXX, ~, J]=unique(taxa) ; - occ = histc(J, 1:numel(uniqueXX)); - - if length(uniqueXX) >15 - % sort by number of entries and remove the ones with the least - % entries - [B,I]=sort(occ,'descend'); - uniqueXX=uniqueXX(I); - if sum(B==1) > length(B)-15 - % remove all that are just one entry - uniqueXX(B==1)=[]; - else - % remove all but 20 highest - uniqueXX=uniqueXX(1:20); - end + data=rp'; + red_orgs=orgs; - [C,IA]=setdiff(taxa,uniqueXX); - data(find(ismember(taxa,C)),:)=[]; - red_orgs(ismember(taxa,C),:)=[]; - taxa(find(ismember(taxa,C)),:)=[]; - end - - if size(data,1)>10 + % remove entries that are all zeros + toDel=sum(data,1) 150 - perpl=50; - elseif size(data,1) >= 50 - perpl=30; - elseif size(data,1) >= 20 - perpl=10; - else - perpl=5; - end - - Y = tsne(data,'Distance',distance,'Algorithm',alg,'Perplexity',perpl,'NumDimensions',2); -% Y = tsne(data,'Distance',distance,'Algorithm',alg,'Perplexity',perpl,'NumDimensions',3); - Summary.(taxonlevels{i})(:,1)=red_orgs; - Summary.(taxonlevels{i})(:,2)=taxa; - Summary.(taxonlevels{i})(:,3:size(Y,2)+2)=cellstr(string(Y)); + % remove entries that are NaNs + findnans=any(isnan(data)); + data(:,findnans==1)=[]; + + % remove unclassified organisms + data(find(strcmp(taxa,'N/A')),:)=[]; + red_orgs(strcmp(taxa,'N/A'),:)=[]; + taxa(find(strcmp(taxa,'N/A')),:)=[]; + + + % remove unclassified organisms + data(find(strncmp(taxa,'unclassified',length('unclassified'))),:)=[]; + red_orgs(find(strncmp(taxa,'unclassified',length('unclassified'))),:)=[]; + taxa(find(strncmp(taxa,'unclassified',length('unclassified'))),:)=[]; - if size(data,1) == size(Y,1) && size(Y,2) > 1 - f=figure; - cols=hsv(length(unique(taxa))); - % define markers to better distinguish groups - cmarkers=''; - for j=1:7:length(unique(taxa)) - cmarkers=[cmarkers '+o*xsdp']; + + if i==6 + % remove unclassified species + toDel=[]; + cnt=1; + for j=1:size(data,1) + if strcmp(taxa{j,1}(length(taxa{j,1})-2:length(taxa{j,1})),' sp') + toDel(cnt)=j; + cnt=cnt+1; + end end - cmarkers=cmarkers(1:length(unique(taxa))); -% gscatter3(Y(:,1),Y(:,2),Y(:,3),taxa,cols,{},30); - h=gscatter(Y(:,1),Y(:,2),taxa,cols,cmarkers); - set(h,'MarkerSize',6) - hold on - title(analyzedFiles{k,1}) - plottitle=strrep(reconVersion,'_refined',''); - plottitle=strrep(plottitle,'_draft',''); - suptitle(plottitle) + data(toDel,:)=[]; + red_orgs(toDel,:)=[]; + taxa(toDel,:)=[]; + end + + % remove taxa with too few members + [uniqueXX, ~, J]=unique(taxa) ; + occ = histc(J, 1:numel(uniqueXX)); + + if length(uniqueXX) >15 + % sort by number of entries and remove the ones with the least + % entries + [B,I]=sort(occ,'descend'); + uniqueXX=uniqueXX(I); - h=legend('Location','northeastoutside'); - if length(uniqueXX) < 12 - set(h,'FontSize',12) - elseif length(uniqueXX) < 20 - set(h,'FontSize',11) + if sum(B==1) > length(B)-15 + % remove all that are just one entry + uniqueXX(B==1)=[]; else - set(h,'FontSize',8) + % remove all but 20 highest + uniqueXX=uniqueXX(1:20); end - grid off - f.Renderer='painters'; - print([taxonlevels{i} '_' strrep(analyzedFiles{k,1},' ','_') '_' reconVersion],'-dpng','-r300') - else - warning('Not enough strains with available organism information. Cannot cluster based on taxonomy.') + + [C,IA]=setdiff(taxa,uniqueXX); + data(find(ismember(taxa,C)),:)=[]; + red_orgs(ismember(taxa,C),:)=[]; + taxa(find(ismember(taxa,C)),:)=[]; end - end - end - save(['Summary_' reconVersion],'Summary'); - - % if the data should be clustered by any custom features from the info file - if nargin > 3 - for i=1:length(customFeatures) - % plot on different taxon levels - feats={}; - cuscol=find(strcmp(infoFile(1,:),customFeatures{i})); - if ~isempty(cuscol) - for j=2:size(DataToAnalyze,2) - if ~any(strcmp(infoFile(:,1),DataToAnalyze{1,j})) - feats{j-1,1}='N/A'; - else - feats{j-1,1}=infoFile{find(strcmp(infoFile(:,1),DataToAnalyze{1,j})),cuscol}; - end - end + + if size(data,1)>10 - data=rp'; - red_orgs=orgs; + % adjust perplicity to number of variables + if size(data,1) > 150 + perpl=50; + elseif size(data,1) >= 50 + perpl=30; + elseif size(data,1) >= 20 + perpl=10; + else + perpl=5; + end - % remove organisms with no data - data(find(strcmp(taxa,'N/A')),:)=[]; - red_orgs(strcmp(taxa,'N/A'),:)=[]; - taxa(find(strcmp(taxa,'N/A')),:)=[]; + Y = tsne(data,'Distance',distance,'Algorithm',alg,'Perplexity',perpl,'NumDimensions',2); + % Y = tsne(data,'Distance',distance,'Algorithm',alg,'Perplexity',perpl,'NumDimensions',3); + Summary.(taxonlevels{i})(:,1)=red_orgs; + Summary.(taxonlevels{i})(:,2)=taxa; + Summary.(taxonlevels{i})(:,3:size(Y,2)+2)=cellstr(string(Y)); - if size(data,1) >= 10 - - % remove features with too few members - [uniqueXX, ~, J]=unique(feats) ; - occ = histc(J, 1:numel(uniqueXX)); - toofew=uniqueXX(occ 1 f=figure; + cols=hsv(length(unique(taxa))); + % define markers to better distinguish groups + cmarkers=''; + for j=1:7:length(unique(taxa)) + cmarkers=[cmarkers '+o*xsdp']; + end + cmarkers=cmarkers(1:length(unique(taxa))); + % gscatter3(Y(:,1),Y(:,2),Y(:,3),taxa,cols,{},30); + h=gscatter(Y(:,1),Y(:,2),taxa,cols,cmarkers); + set(h,'MarkerSize',4) hold on - gscatter3(Y(:,1),Y(:,2),Y(:,3),feats); - set(h,'MarkerSize',6) title(analyzedFiles{k,1}) + plottitle=strrep(reconVersion,'_refined',''); + plottitle=strrep(plottitle,'_draft',''); + suptitle(plottitle) + h=legend('Location','northeastoutside'); if length(uniqueXX) < 12 - set(h,'FontSize',11) + set(h,'FontSize',12) elseif length(uniqueXX) < 20 - set(h,'FontSize',9) + set(h,'FontSize',11) else - set(h,'FontSize',6) + set(h,'FontSize',8) end grid off f.Renderer='painters'; - print([customFeatures{i} '_' strrep(analyzedFiles{k,1},' ','_') '_' reconVersion],'-dpng','-r300') + print([taxonlevels{i} '_' strrep(analyzedFiles{k,1},' ','_') '_' reconVersion],'-dpng','-r300') else - warning('Not enough strains with available organism information. Cannot cluster based on features.') + warning('Not enough strains with available organism information. Cannot cluster based on taxonomy.') end end end save(['Summary_' reconVersion],'Summary'); + + % if the data should be clustered by any custom features from the info file + if nargin > 3 + for i=1:length(customFeatures) + % plot on different taxon levels + feats={}; + cuscol=find(strcmp(infoFile(1,:),customFeatures{i})); + if ~isempty(cuscol) + for j=2:size(DataToAnalyze,2) + if ~any(strcmp(infoFile(:,1),DataToAnalyze{1,j})) + feats{j-1,1}='N/A'; + else + feats{j-1,1}=infoFile{find(strcmp(infoFile(:,1),DataToAnalyze{1,j})),cuscol}; + end + end + + data=rp'; + red_orgs=orgs; + + % remove organisms with no data + data(find(strcmp(taxa,'N/A')),:)=[]; + red_orgs(strcmp(taxa,'N/A'),:)=[]; + taxa(find(strcmp(taxa,'N/A')),:)=[]; + + if size(data,1) >= 10 + + % remove features with too few members + [uniqueXX, ~, J]=unique(feats) ; + occ = histc(J, 1:numel(uniqueXX)); + toofew=uniqueXX(occ Date: Wed, 21 Jul 2021 00:13:50 -0700 Subject: [PATCH 56/82] Fixing drawFlux directions --- deprecated/_maps_old/drawFlux.m | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/deprecated/_maps_old/drawFlux.m b/deprecated/_maps_old/drawFlux.m index fd5247df04..8b196996a5 100644 --- a/deprecated/_maps_old/drawFlux.m +++ b/deprecated/_maps_old/drawFlux.m @@ -51,11 +51,12 @@ if ~isfield(options,'rxnDirFlag'), rxnDirFlag = false; else rxnDirFlag = options.rxnDirFlag; end rxnListZero = model.rxns(abs(flux)<=1e-9); absFlag=false; +origFlux = flux; %need this to set the arrow directions correct if abs is used -mfarshada switch lower(options.scaleType) case {1, 'linear'} options.scaleTypeLabel='Linear;'; case {2 ,'linear absolute'} - flux=abs(flux); + flux=abs(flux); absFlag=true; options.scaleTypeLabel='Linear absolute;'; case {3,'log10'} @@ -109,8 +110,8 @@ if rxnDirFlag options.rxnDir = zeros(length(map.connectionAbb),1); for i = 1:length(map.connectionAbb) - options.rxnDir(ismember(map.connectionAbb,model.rxns(flux>0))) = 1; - options.rxnDir(ismember(map.connectionAbb,model.rxns(flux<0))) = -1; + options.rxnDir(ismember(map.connectionAbb,model.rxns(origFlux>0))) = -1; %was 1, inconsistent with drawLine -mfarshada + options.rxnDir(ismember(map.connectionAbb,model.rxns(origFlux<0))) = 1; %was -1 end end From 4973ddf85da3c834fd6861dab193280a589556f8 Mon Sep 17 00:00:00 2001 From: Farshad Abdollah Nia Date: Wed, 21 Jul 2021 00:41:30 -0700 Subject: [PATCH 57/82] More readable default font sizes in drawCbMap --- deprecated/_maps_old/drawCbMap.m | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/deprecated/_maps_old/drawCbMap.m b/deprecated/_maps_old/drawCbMap.m index c9fccb58ae..424ec16cf2 100644 --- a/deprecated/_maps_old/drawCbMap.m +++ b/deprecated/_maps_old/drawCbMap.m @@ -105,7 +105,7 @@ if ~isfield(options,'textSize') options.textSize = ones(max(nNodes,nEdges),1)*12; if strcmp(CB_MAP_OUTPUT,'svg') - options.textSize = ones(max(nNodes,nEdges),1)*6; + options.textSize = ones(max(nNodes,nEdges),1)*10; end end %Font Color @@ -286,7 +286,7 @@ if isfield(options, 'rxnTextSize') drawText(map.rxnLabelPosition(1,i),map.rxnLabelPosition(2,i),map.connectionAbb{find(map.rxnIndex(i)==map.connection,1)},options.rxnTextSize(i),'italic'); else - drawText(map.rxnLabelPosition(1,i),map.rxnLabelPosition(2,i),map.connectionAbb{find(map.rxnIndex(i)==map.connection,1)},8,'italic'); + drawText(map.rxnLabelPosition(1,i),map.rxnLabelPosition(2,i),map.connectionAbb{find(map.rxnIndex(i)==map.connection,1)},10,'italic'); end end end From 9c531ceb97282a0793882cabec95ba8750b970e8 Mon Sep 17 00:00:00 2001 From: "almut.heinken" Date: Thu, 22 Jul 2021 00:04:14 +0100 Subject: [PATCH 58/82] Adjusted some plots --- .../microbiomeModelingToolbox/mgPipe/buildModelStorage.m | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/buildModelStorage.m b/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/buildModelStorage.m index 4203508562..acd1d46ead 100755 --- a/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/buildModelStorage.m +++ b/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/buildModelStorage.m @@ -42,10 +42,10 @@ for i = 1:size(microbeNames, 1) model = readCbModel([modPath filesep microbeNames{i,1} '.mat']); - activeExMets = model.mets(~cellfun(@isempty, strfind(model.mets, '[e]'))); + ex_mets = model.mets(~cellfun(@isempty, strfind(model.mets, '[e]'))); ex_rxns = {}; - for j=1:length(activeExMets) - ex_rxns{j}=['EX_' activeExMets{j}]; + for j=1:length(ex_mets) + ex_rxns{j}=['EX_' ex_mets{j}]; ex_rxns{j}=strrep(ex_rxns{j},'[e]','(e)'); end % account for depracated nomenclature @@ -53,9 +53,7 @@ % compute which exchanges can carry flux try - tic [minFlux,maxFlux]=fastFVA(model,0,'max','ibm_cplex',ex_rxns); - toc catch [minFlux,maxFlux]=fluxVariability(model,0,'max',ex_rxns); end @@ -144,5 +142,4 @@ end cd(currentDir) - end \ No newline at end of file From 6d0e01f1017002c0730678631ac3bebc2c178153 Mon Sep 17 00:00:00 2001 From: YinTat Date: Mon, 26 Jul 2021 22:21:45 -0700 Subject: [PATCH 59/82] Update PolytopeSamplerMatlab --- external/analysis/PolytopeSamplerMatlab | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/external/analysis/PolytopeSamplerMatlab b/external/analysis/PolytopeSamplerMatlab index 5c0d9e76d3..30e1c2f88b 160000 --- a/external/analysis/PolytopeSamplerMatlab +++ b/external/analysis/PolytopeSamplerMatlab @@ -1 +1 @@ -Subproject commit 5c0d9e76d3f925728671ad11134e7356646755bc +Subproject commit 30e1c2f88b23e625d96d4cf185aff158c05dcbe3 From 6d880784a065f7d0db1eff6ef0037b2e95e2da78 Mon Sep 17 00:00:00 2001 From: YinTat Date: Mon, 26 Jul 2021 23:22:36 -0700 Subject: [PATCH 60/82] Update testSampleCbModel.m --- .../analysis/testSampling/testSampleCbModel.m | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/test/verifiedTests/analysis/testSampling/testSampleCbModel.m b/test/verifiedTests/analysis/testSampling/testSampleCbModel.m index 88a4a9228b..e6434d77f5 100644 --- a/test/verifiedTests/analysis/testSampling/testSampleCbModel.m +++ b/test/verifiedTests/analysis/testSampling/testSampleCbModel.m @@ -16,9 +16,8 @@ fileDir = fileparts(which('testSampleCbModel')); cd(fileDir); % define the samplers -samplers = {'CHRR','CHRR_EXP','ACHR'}; %'MFE' -samplers = {'CHRR'}; - +samplers = {'CHRR','CHRR_EXP','ACHR','RHMC'}; %'MFE' + % create a parallel pool (if possible) try minWorkers = 2; @@ -88,7 +87,14 @@ [modelSampling, samples, volume] = sampleCbModel(model, 'EcoliModelSamples', 'CHRR_EXP', options); assert(norm(samples) > 0) + case 'RHMC' + fprintf('\nTesting the Riemann Hamiltonian Monte Carlo (RHMC) sampler\n.'); + + options.nPointsReturned = 10; + [modelSampling, samples, volume] = sampleCbModel(model, 'EcoliModelSamples', 'RHMC', options); + + assert(norm(samples) > 0) end end end From f6a5e2725abd464e2865a79b76273808776bd00a Mon Sep 17 00:00:00 2001 From: YinTat Date: Mon, 26 Jul 2021 23:28:03 -0700 Subject: [PATCH 61/82] Update testSampleCbModel.m --- test/verifiedTests/analysis/testSampling/testSampleCbModel.m | 1 + 1 file changed, 1 insertion(+) diff --git a/test/verifiedTests/analysis/testSampling/testSampleCbModel.m b/test/verifiedTests/analysis/testSampling/testSampleCbModel.m index e6434d77f5..917da79e5c 100644 --- a/test/verifiedTests/analysis/testSampling/testSampleCbModel.m +++ b/test/verifiedTests/analysis/testSampling/testSampleCbModel.m @@ -17,6 +17,7 @@ cd(fileDir); % define the samplers samplers = {'CHRR','CHRR_EXP','ACHR','RHMC'}; %'MFE' +samplers = {'RHMC'}; % create a parallel pool (if possible) try From 0275b90f02f08595acce4e40cb1af12d1c96a7b9 Mon Sep 17 00:00:00 2001 From: "almut.heinken" Date: Tue, 27 Jul 2021 11:45:33 +0100 Subject: [PATCH 62/82] Adjusted some plots --- .../suite/tests/plotBiomassTestResults.m | 26 ------------------- 1 file changed, 26 deletions(-) diff --git a/src/reconstruction/demeter/suite/tests/plotBiomassTestResults.m b/src/reconstruction/demeter/suite/tests/plotBiomassTestResults.m index 61c96a11ae..c072cbbc5a 100755 --- a/src/reconstruction/demeter/suite/tests/plotBiomassTestResults.m +++ b/src/reconstruction/demeter/suite/tests/plotBiomassTestResults.m @@ -225,19 +225,6 @@ fprintf('All models are able to produce biomass on complex medium.\n') end - noGrowth=growth{2}(:,4) < tol; - if sum(noGrowth) > 0 - fprintf([num2str(sum(noGrowth)) ' models are unable to produce biomass on complex medium under anaerobic conditions.\n']) - for i=1:length(noGrowth) - if noGrowth(i) - notGrowing{cnt,1}=modelList{i,1}; - cnt=cnt+1; - end - end - else - fprintf('All models are able to produce biomass on complex medium under anaerobic conditions.\n') - end - else % only refined reconstructions if size(data,1)>5 @@ -315,19 +302,6 @@ else fprintf('All models are able to produce biomass on complex medium.\n') end - - noGrowth=growth{1}(:,4) < tol; - if sum(noGrowth) > 0 - fprintf([num2str(sum(noGrowth)) ' models are unable to produce biomass on complex medium under anaerobic conditions.\n']) - for i=1:length(noGrowth) - if noGrowth(i) - notGrowing{cnt,1}=modelList{i,1}; - cnt=cnt+1; - end - end - else - fprintf('All models are able to produce biomass on complex medium under anaerobic conditions.\n') - end end % export models that cannot grow on at least one condition From 72fc6d8d53c5012c35664c7317f225fa33e54734 Mon Sep 17 00:00:00 2001 From: Gpreciat Date: Thu, 5 Aug 2021 08:19:38 +0200 Subject: [PATCH 63/82] Update generateChemicalDatabase for conserved moieties --- .../generateChemicalDatabase.m | 211 ++++++------------ 1 file changed, 67 insertions(+), 144 deletions(-) diff --git a/src/dataIntegration/chemoInformatics/generateChemicalDatabase.m b/src/dataIntegration/chemoInformatics/generateChemicalDatabase.m index 8d19d80a9c..1e8eace9d1 100644 --- a/src/dataIntegration/chemoInformatics/generateChemicalDatabase.m +++ b/src/dataIntegration/chemoInformatics/generateChemicalDatabase.m @@ -439,35 +439,59 @@ % heatMap comparison figure - subplot(1, 2, 1) + subplot(1, 3, 1) scoreMatrix = info.sourcesComparison.comparisonMatrix; for i = 1:size(scoreMatrix, 2) for j = 1:size(scoreMatrix, 2) boolToCompare = scoreMatrix(:, i) ~= 0 & scoreMatrix(:, j) ~= 0; group1 = scoreMatrix(boolToCompare, i); group2 = scoreMatrix(boolToCompare, j); - comparisonMatrix(i, j) = sqrt(sum((group1 - group2).^2)); + comparisonMatrix(i, j) = corr(group1,group2,'Type','Spearman'); end end h = heatmap(comparisonMatrix); - h.YDisplayLabels = directories; - h.XDisplayLabels = directories; + title('1. Spearman correlation') + % Labels + directoriesLabels = regexprep(directories, 'inchi','InChI'); + directoriesLabels = regexprep(directoriesLabels, 'smiles','SMILES'); + h.YDisplayLabels = directoriesLabels; + h.XDisplayLabels = directoriesLabels; h.FontSize = 16; - title('Sources disimilarity comparison') % Sources comparison - subplot(1, 2, 2) + subplot(1, 3, 2) [db, ~, idx] = unique(split(strjoin(info.sourcesComparison.comparisonTable.source, ' '), ' ')); [~, ib1] = ismember(db, directories); [timesMatched, ia] = sort(histcounts(idx, size(db, 1)), 'descend'); - bar([timesMatched; info.sourcesCoverage.totalCoverage(ib1)]') - title({'Sources comparison', ... - ['Metabolites collected: ' num2str(size(info.sourcesComparison.comparisonTable, 1))]}, 'FontSize', 20) - legend({'Times with highest score', 'IDs coverage'}) - set(gca, 'XTick', 1:size(db, 1), 'xticklabel', db(ia), 'FontSize', 18) + bar(timesMatched') + title({'2. Sources comparison', ... + ['Metabolites collected: ' num2str(size(info.sourcesComparison.comparisonTable, 1))]}, 'FontSize', 16) + % Labels + directoriesLabels = regexprep(db, 'inchi','InChI'); + directoriesLabels = regexprep(directoriesLabels, 'smiles','SMILES'); + set(gca, 'XTick', 1:size(db, 1), 'xticklabel', directoriesLabels(ia), 'FontSize', 16) ylabel('Metabolites') xtickangle(45) + % Features comparison + subplot(1, 3, 3) + fnames = fieldnames(info.sourcesComparison); + fnames = fnames(contains(fnames, 'met_')); + sterochemicalCounter = zeros(7, 1); + chargeCounter = zeros(7, 1); + formula = zeros(7, 1); + for i = 1:length(fnames) + formula = formula + info.sourcesComparison.(fnames{i}).formulaOkBool; + sterochemicalCounter = sterochemicalCounter + info.sourcesComparison.(fnames{i}).stereochemicalSubLayers; + chargeCounter = chargeCounter + info.sourcesComparison.(fnames{i}).chargeOkBool; + end + plot([formula, sterochemicalCounter, chargeCounter], 'LineWidth', 2) + legend({'Formula', 'Sterochemestry', 'Charge'}, 'Location', 'best') + title('3. Features comparison', 'FontSize', 16) + directoriesLabels = regexprep(directories, 'inchi','InChI'); + directoriesLabels = regexprep(directoriesLabels, 'smiles','SMILES'); + set(gca, 'XTick', 1:size(directoriesLabels, 1), 'xticklabel', directoriesLabels, 'FontSize', 16) + if options.printlevel > 1 display(groupedInChIs) end @@ -488,9 +512,6 @@ copyfile([comparisonDir dirToCopy{1} filesep metName '.mol'], tmpDir) end end -if isfile([outputDir 'tmp.mol']) - delete([outputDir 'tmp.mol']) -end if ~options.keepMolComparison rmdir(comparisonDir, 's') end @@ -688,115 +709,14 @@ % Atom map metabolic reactions reactionsReport = obtainAtomMappingsRDT(model, molFileDir, rxnDir, rxnsToAM, hMapping, options.onlyUnmapped); -info.reactionsReport = reactionsReport; rxnsFilesDir = [rxnDir filesep 'unMapped']; -if ~options.onlyUnmapped - - % Atom map transport reactions - mappedRxns = transportRxnAM(rxnsFilesDir, [rxnDir filesep 'atomMapped']); - for i = 1:size(mappedRxns, 2) - delete([rxnDir filesep 'images' filesep mappedRxns{i} '.png']); - end - - % Generate rinchis and reaction SMILES - if oBabelInstalled - - nRows = size(rxnsToAM, 1); - varTypes = {'string', 'string', 'string'}; - varNames = {'rxns', 'rinchi', 'rsmi'}; - info.reactionsReport.rxnxIDsTable = table('Size', [nRows length(varTypes)], 'VariableTypes', varTypes, 'VariableNames', varNames); - - model.rinchi = repmat({''}, size(model.rxns)); - model.rsmi = repmat({''}, size(model.rxns)); - for i = 1:size(rxnsToAM, 1) - info.reactionsReport.rxnxIDsTable.rxns(i) = rxnsToAM(i); - if isfile([rxnDir filesep 'atomMapped' filesep rxnsToAM{i} '.rxn']) - - % Remove parenthesis for RDT - if contains(rxnsToAM{i}, '(') - rxnFileName = regexprep(rxnsToAM{i}, '\(', '\_40'); - rxnFileName = regexprep(rxnFileName, '\)', '\_41'); - movefile([rxnDir filesep 'atomMapped' filesep rxnsToAM{i} '.rxn'], ... - [rxnDir filesep 'atomMapped' filesep rxnFileName '.rxn']) - reverseName = 1; - else - rxnFileName = rxnsToAM{i}; - end - - % Get rinchis - command = ['obabel -irxn ' rxnDir filesep 'atomMapped' filesep rxnFileName '.rxn -orinchi']; - [~, result] = system(command); - if ~any(contains(result, '0 molecules converted')) - result = split(result); - info.reactionsReport.rxnxIDsTable.rinchi(i) = result{~cellfun(@isempty, regexp(result, 'RInChI='))}; - model.rinchi{findRxnIDs(model, rxnsToAM{i})} = result{~cellfun(@isempty, regexp(result, 'RInChI='))}; - end - - % Get reaction SMILES - command = ['obabel -irxn ' rxnDir filesep 'atomMapped' filesep rxnFileName '.rxn -osmi']; - [~, result] = system(command); - if ~any(contains(result, '0 molecules converted')) - result = splitlines(result); - result = split(result{end - 2}); - info.reactionsReport.rxnxIDsTable.rsmi(i) = result{1}; - model.rsmi{findRxnIDs(model, rxnsToAM{i}), 1} = result{1}; - end - - if exist('reverseName', 'var') - movefile([rxnDir filesep 'atomMapped' filesep rxnFileName '.rxn'], ... - [rxnDir filesep 'atomMapped' filesep rxnsToAM{i} '.rxn']) - clear reverseName - end - end - end - end -end - -% Find unbalanced RXN files -% Get list of RXN files to check -rxnList = dir([rxnDir filesep 'unMapped' filesep '*.rxn']); -rxnList = regexprep({rxnList.name}, '.rxn', '')'; -rxnList(~ismember(rxnList, rxnsToAM)) = []; - -[unbalancedBool, v3000] = deal(false(size(rxnList))); -for i = 1:size(rxnList, 1) - - name = [rxnList{i} '.rxn']; - % Read the RXN file - rxnFile = regexp(fileread([rxnsFilesDir filesep name]), '\n', 'split')'; - - % Identify molecules - substrates = str2double(rxnFile{5}(1:3)); - products = str2double(rxnFile{5}(4:6)); - begMol = strmatch('$MOL', rxnFile); - - if ~isnan(products) - % Count atoms in substrates and products - atomsS = 0; - for j = 1:substrates - atomsS = atomsS + str2double(rxnFile{begMol(j) + 4}(1:3)); - end - atomsP = 0; - for j = substrates + 1: substrates +products - atomsP = atomsP + str2double(rxnFile{begMol(j) + 4}(1:3)); - end - - % Check if the file is unbalanced - if atomsS ~= atomsP - unbalancedBool(i) = true; - end - else - v3000(i) = true; - end -end - % Final database table % Reactions in the database -info.reactionsReport.rxnInDatabase = rxnList; +info.reactionsReport.rxnInDatabase = reactionsReport.rxnFilesWritten; % List atom mapped reactions -if isfolder([rxnDir filesep 'atomMapped' filesep '*.rxn']) +if isfolder([rxnDir filesep 'atomMapped']) atomMappedRxns = dir([rxnDir filesep 'unMapped' filesep '*.rxn']); atomMappedRxns = regexprep({atomMappedRxns.name}, '.rxn', '')'; atomMappedRxns(~ismember(atomMappedRxns, rxnsToAM)) = []; @@ -805,17 +725,17 @@ end info.reactionsReport.mappedRxns = atomMappedRxns; % Balanced reactions -info.reactionsReport.balancedReactions = rxnList(~unbalancedBool); +info.reactionsReport.balancedReactions = reactionsReport.balanced; % Unalanced reactions -info.reactionsReport.unbalancedReactions = rxnList(unbalancedBool); +info.reactionsReport.unbalancedReactions = reactionsReport.unbalanced; % Missing reactions model = findSExRxnInd(model); -info.reactionsReport.rxnMissing = setdiff(model.rxns(model.SIntRxnBool), info.reactionsReport.rxnFilesWritten); +info.reactionsReport.rxnMissing = setdiff(model.rxns(model.SIntRxnBool), reactionsReport.rxnFilesWritten); % Find metabolites in balanced reactions -metsInBalanced = unique(regexprep(findMetsFromRxns(model, rxnList(~unbalancedBool)), '(\[\w\])', '')); +metsInBalanced = unique(regexprep(findMetsFromRxns(model, reactionsReport.balanced), '(\[\w\])', '')); % Find metabolites in unbalanced reactions -metsInUnbalanced = unique(regexprep(findMetsFromRxns(model, rxnList(unbalancedBool)), '(\[\w\])', '')); +metsInUnbalanced = unique(regexprep(findMetsFromRxns(model, reactionsReport.unbalanced), '(\[\w\])', '')); % Metabolites not used in reactions metsNotUsed = info.sourcesComparison.comparisonTable.mets(~ismember(... info.sourcesComparison.comparisonTable.mets, [metsInBalanced; ... @@ -844,7 +764,7 @@ size(info.reactionsReport.mappedRxns, 1); ... size(info.reactionsReport.balancedReactions, 1); ... size(info.reactionsReport.unbalancedReactions, 1); ... - size(info.reactionsReport.missingMets, 1)],... + size(info.reactionsReport.rxnMissing, 1)],... ... 'VariableNames', ... {'Var'},... @@ -864,32 +784,14 @@ if options.printlevel > 0 if ~options.onlyUnmapped - display(info.reactionsReport.rxnxIDsTable) + display(info.reactionsReport.table) end display(info.reactionsReport.table) - % Reactions - figure - labelsToAdd = {'Balanced', 'Unbalanced', 'Missing'}; - X = [size(info.reactionsReport.balancedReactions, 1);... - size(info.reactionsReport.unbalancedReactions, 1);... - size(info.reactionsReport.rxnMissing, 1)]; - ax = gca(); - pieChart = pie(ax, X(find(X))); - newColors = [... - 0.9608, 0.8353, 0.8353; - 0.7961, 0.8824, 0.9608; - 0.9137, 1.0000, 0.8392]; - ax.Colormap = newColors; - title({'Reaction coverage', ['From ' num2str(sum(X)) ' internal rxns in the model']}, 'FontSize', 20) - lh = legend(labelsToAdd(find(X)), 'FontSize', 16); - lh.Position(1) = 0.5 - lh.Position(3)/2; - lh.Position(2) = 0.5 - lh.Position(4)/2; - set(findobj(pieChart,'type','text'),'fontsize',18) - % Metabolites figure + subplot(1, 2, 1) labelsToAdd = {'In balanced rxn', 'Ocassionally in unbalanced rxn', 'In unbalanced rxn', 'Missing'}; X = [size(info.reactionsReport.metsAllwaysInBalancedRxns, 1);... size(info.reactionsReport.metsSometimesInUnbalancedRxns, 1);... @@ -908,7 +810,24 @@ lh.Position(1) = 0.5 - lh.Position(3)/2; lh.Position(2) = 0.5 - lh.Position(4)/2; legend(labelsToAdd(find(X)), 'FontSize', 16) - title({'Metabolite percentage coverage', ['From ' num2str(size(umets, 1)) ' unique mets in the model']}, 'FontSize', 20) + title({'1. Metabolite percentage coverage', [num2str(size(umets, 1)) ' unique metabolites in the model']}, 'FontSize', 20) + set(findobj(pieChart,'type','text'),'fontsize',18) + + % Reactions + subplot(1, 2, 2) + labelsToAdd = {'Balanced', 'Unbalanced', 'Missing'}; + X = [size(info.reactionsReport.balancedReactions, 1);... + size(info.reactionsReport.unbalancedReactions, 1);... + size(info.reactionsReport.rxnMissing, 1)]; + ax = gca(); + pieChart = pie(ax, X(find(X))); + newColors = [... + 0.9608, 0.8353, 0.8353; + 0.7961, 0.8824, 0.9608; + 0.9137, 1.0000, 0.8392]; + ax.Colormap = newColors; + title({'2. Reaction coverage', [num2str(sum(X)) ' internal reactions in the model']}, 'FontSize', 20) + lh = legend(labelsToAdd(find(X)), 'FontSize', 16, 'Location', 'best'); set(findobj(pieChart,'type','text'),'fontsize',18) end @@ -962,6 +881,10 @@ end +if isfile([outputDir 'tmp.mol']) + delete([outputDir 'tmp.mol']) +end + newModel = model; if options.debug save([outputDir '7.debug_endOfGenerateChemicalDatabase.mat']) From 4169c0dc2c0a8e939a35d01df3440513dfc0000d Mon Sep 17 00:00:00 2001 From: Gpreciat Date: Thu, 5 Aug 2021 08:22:10 +0200 Subject: [PATCH 64/82] Standardisation of reactions with atom mapping --- .../chemoInformatics/obtainAtomMappingsRDT.m | 145 ++++++++++++++++-- 1 file changed, 131 insertions(+), 14 deletions(-) diff --git a/src/dataIntegration/chemoInformatics/obtainAtomMappingsRDT.m b/src/dataIntegration/chemoInformatics/obtainAtomMappingsRDT.m index 08576406da..5c108f0e47 100644 --- a/src/dataIntegration/chemoInformatics/obtainAtomMappingsRDT.m +++ b/src/dataIntegration/chemoInformatics/obtainAtomMappingsRDT.m @@ -75,7 +75,10 @@ maxTime = 1800; -% Check if JAVA is installed +% Check installation +[cxcalcInstalled, ~] = system('cxcalc'); +cxcalcInstalled = ~cxcalcInstalled; +[oBabelInstalled, ~] = system('obabel'); [javaInstalled, ~] = system('java'); % Generating new directories @@ -166,10 +169,23 @@ % Atom map RXN files if javaInstalled == 1 && ~onlyUnmapped + + % Atom map RXN files fprintf('Computing atom mappings for %d reactions.\n\n', length(rxnsToAM)); - for i = 1:length(rxnsToAM) + + % Atom map passive transport reactions; The atoms are mapped for the + % same molecular structures in the substrates as they are in the + % products i.e. A[m] + B[c] -> A[c] + B[m]. + mappedTransportRxns = transportRxnAM([rxnDir 'unMapped'], [rxnDir 'atomMapped']); + mappedBool = false(size(rxnsToAM)); + transportBool = ismember(rxnsToAM, mappedTransportRxns); + mappedBool(transportBool) = true; + nonTransport = setdiff(rxnsToAM, rxnsToAM(mappedBool)); + + % Atom map the rest + for i = 1:length(nonTransport) - name = [rxnDir 'unMapped' filesep rxnsToAM{i} '.rxn']; + name = [rxnDir 'unMapped' filesep nonTransport{i} '.rxn']; command = ['timeout ' num2str(maxTime) 's java -jar ' rxnDir 'rdtAlgorithm.jar -Q RXN -q "' name '" -g -j AAM -f TEXT']; if ismac @@ -189,18 +205,17 @@ cellfun(@movefile, {mNames.name}, name) cellfun(@movefile, name, {[rxnDir 'images'], [rxnDir... 'atomMapped'], [rxnDir 'txtData']}) - mappedBool(i) = true; + mappedBool(ismember(rxnsToAM, nonTransport{i})) = true; elseif ~isempty(mNames) delete(mNames.name) end end + delete([rxnDir 'rdtAlgorithm.jar']) mappedRxns = rxnsToAM(mappedBool); atomMappingReport.mappedRxns = mappedRxns; - - delete([rxnDir 'rdtAlgorithm.jar']) - + [unbalancedBool, inconsistentBool] = deal(false(size(rxnsToAM))); for i = 1:length(mappedRxns) name = [mappedRxns{i} '.rxn']; @@ -215,7 +230,9 @@ formula = strsplit(mappedFile{4}, {'->', '<=>'}); substratesFormula = strtrim(strsplit(formula{1}, '+')); - % Check if a metabolite is repeated in the substrates formula + % Check if a metabolite is modified in the substrate's formula; + % metabolites with an iron atom but no bonds are splited by the RDT + % algorithm, which modifies the stoichiometry. repMetsSubInx = find(~cellfun(@isempty, regexp(substratesFormula, ' '))); if ~isempty(repMetsSubInx) for j = 1:length(repMetsSubInx) @@ -228,7 +245,9 @@ end productsFormula = strtrim(strsplit(formula{2}, '+')); - % Check if a metabolite is repeated in the products formula + % Check if a metabolite is modified in the product's formula; + % metabolites with an iron atom but no bonds are splited by the RDT + % algorithm, which modifies the stoichiometry. repMetsProInx = find(~cellfun(@isempty, regexp(productsFormula, ' '))); if ~isempty(repMetsProInx) for j = 1:length(repMetsProInx) @@ -243,6 +262,13 @@ % RXN file data begmol = strmatch('$MOL', mappedFile); noOfMolSubstrates = str2double(mappedFile{5}(1:3)); + if isnan(noOfMolSubstrates) + if ~isfolder([rxnDir 'atomMapped' filesep 'v3000']) + mkdir([rxnDir 'atomMapped' filesep 'v3000']); + end + movefile([rxnDir 'atomMapped' filesep name], [rxnDir 'atomMapped' filesep 'v3000']) + continue + end substratesMol = mappedFile(begmol(1:noOfMolSubstrates) + 1)'; noOfMolProducts = str2double(mappedFile{5}(4:6)); productsMol = mappedFile(begmol(noOfMolSubstrates + 1:noOfMolSubstrates + noOfMolProducts) + 1)'; @@ -255,23 +281,114 @@ if ~isequal(noOfsubstrates, substratesMol) || ~isequal(noOfproducts, productsMol) mappedFile = sortMets(mappedFile, substratesMol, substratesFormula, productsMol, productsFormula, rxnDir); end - if length(mappedFile) > 5 + + % SMILES TO MOL + begmol = strmatch('$MOL', mappedFile); + if cxcalcInstalled && ~inconsistentBool(i) && ~isempty(begmol) + + begmolStd = strmatch('$MOL', standardFile); + newMappedFile = {}; + newMappedFile = mappedFile(1:5); + for j = 1:str2double(mappedFile{5, 1}(1:3)) + str2double(mappedFile{5, 1}(4:6)) + + % Write a new MOL file and save it + c = 0; + molFile = {}; + while ~isequal(mappedFile{begmol(j) + 1 + c}, '$MOL') && begmol(j) + 1 + c < length(mappedFile) + c = c + 1; + molFile{c, 1} = regexprep(mappedFile{begmol(j) + c}, '\*', 'A'); + end + fid2 = fopen('tmp.mol', 'w'); + fprintf(fid2, '%s\n', molFile{:}); + fclose(fid2); + + % Rewrite the MOL file + command = ['molconvert smiles ' pwd filesep 'tmp.mol -o ' pwd filesep 'tmp.smiles']; + [~, ~] = system(command); + command = ['molconvert rxn ' pwd filesep 'tmp.smiles -o ' pwd filesep 'tmp.mol']; + [~, ~] = system(command); + delete([pwd filesep 'tmp.smiles']) + molFile = regexp(fileread([pwd filesep 'tmp.mol']), '\n', 'split')'; + newMappedFile(length(newMappedFile) + 1: length(newMappedFile) + 4) = standardFile(begmolStd(j): begmolStd(j) + 3); + newMappedFile(length(newMappedFile) + 1: length(newMappedFile) + length(molFile) - 4) = molFile(4:end - 1); + + end + mappedFile = newMappedFile; + end + + % Sort the atoms in the substrates in ascending order and then map + % them to the atoms in the products. + if any(contains(mappedFile, '$MOL')) mappedFile = acsendingAtomMaps(mappedFile); + else + inconsistentBool(i) = true; + end + + % Check if the reaction is atomically balanced + if ~inconsistentBool(i) + begmol = strmatch('$MOL', mappedFile); + atomsSubstrates = 0; + for j = 1:noOfsubstrates + atomsSubstrates = atomsSubstrates + str2double(mappedFile{begmol(j) + 4}(1:3)); + end + atomsProducts = 0; + for j = noOfsubstrates + 1:noOfsubstrates + noOfproducts + atomsProducts = atomsProducts + str2double(mappedFile{begmol(j) + 4}(1:3)); + end + if atomsSubstrates ~= atomsProducts + unbalancedBool(i) = true; + end end % Rewrite the file - if any(contains(mappedFile, '$MOL')) + if ~unbalancedBool(i) && ~inconsistentBool(i) fid2 = fopen([rxnDir 'atomMapped' filesep name], 'w'); fprintf(fid2, '%s\n', mappedFile{:}); fclose(fid2); - else + elseif inconsistentBool(i) if ~exist([rxnDir filesep 'atomMapped' filesep 'inconsistent'],'dir') mkdir([rxnDir filesep 'atomMapped' filesep 'inconsistent']) end movefile([rxnDir 'atomMapped' filesep name], ... - [rxnDir filesep 'atomMapped' filesep 'inconsistent']) + [rxnDir 'atomMapped' filesep 'inconsistent']) + elseif unbalancedBool(i) + if ~exist([rxnDir filesep 'atomMapped' filesep 'unbalanced'],'dir') + mkdir([rxnDir filesep 'atomMapped' filesep 'unbalanced']) + end + fid2 = fopen([rxnDir 'atomMapped' filesep 'unbalanced' filesep name], 'w'); + fprintf(fid2, '%s\n', mappedFile{:}); + fclose(fid2); + end + + if oBabelInstalled + + % Get rinchis + command = ['obabel -irxn ' [rxnDir 'unMapped' filesep rxnsToAM{i}] '.rxn -orinchi']; + [~, result] = system(command); + if ~any(contains(result, '0 molecules converted')) + result = split(result); + atomMappingReport.rinchi{i, 1} = [result{~cellfun(@isempty, ... + regexp(result, 'RInChI='))} ' - ' rxnsToAM{i}]; + end + + % Get rsmi + command = ['obabel -irxn ' [rxnDir 'unMapped' filesep rxnsToAM{i}] '.rxn -osmi']; + [~, result] = system(command); + if ~any(contains(result, '0 molecules converted')) + result = splitlines(result); + result = split(result{end - 2}); + atomMappingReport.rsmi{i, 1} = result{1}; + end + end end + + atomMappingReport.rxnFilesWritten = rxnsToAM; + atomMappingReport.balanced = rxnsToAM(~unbalancedBool); + atomMappingReport.unbalanced = rxnsToAM(unbalancedBool); + atomMappingReport.inconsistentBool = rxnsToAM(inconsistentBool); + atomMappingReport.notMapped = setdiff(rxnsToAM, mappedRxns); + else atomMappingReport.mappedRxns = []; end @@ -353,4 +470,4 @@ end end end -end \ No newline at end of file +end From 141da850c0745a2742211d29387fb02e567d0e02 Mon Sep 17 00:00:00 2001 From: Bhushan Dhamale Date: Tue, 10 Aug 2021 12:26:38 +0530 Subject: [PATCH 65/82] Update findCarbonRxns.m --- src/analysis/exploration/findCarbonRxns.m | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/analysis/exploration/findCarbonRxns.m b/src/analysis/exploration/findCarbonRxns.m index 9f1042b92e..9edabb24b7 100644 --- a/src/analysis/exploration/findCarbonRxns.m +++ b/src/analysis/exploration/findCarbonRxns.m @@ -1,6 +1,6 @@ function [hiCarbonRxns, zeroCarbonRxns, nCarbon] = findCarbonRxns(model, nCarbonThr) % Returns the list of reactions that act of compounds which -% contain cabons greater than the thershhold set. +% contain cabons greater than the threshold set. % % USAGE: % From 40ecaf0a303aa16d25e9f3597f8abface4f6709d Mon Sep 17 00:00:00 2001 From: Almut Heinken Date: Thu, 26 Aug 2021 18:00:05 +0200 Subject: [PATCH 66/82] improved efficacy of fastFVA and shadow price analysis --- .../analyseObjectiveShadowPrices.m | 7 +++-- .../mgPipe/guidedSim.m | 28 +++++++++---------- 2 files changed, 18 insertions(+), 17 deletions(-) diff --git a/src/analysis/multiSpecies/microbiomeModelingToolbox/additionalAnalysis/analyseObjectiveShadowPrices.m b/src/analysis/multiSpecies/microbiomeModelingToolbox/additionalAnalysis/analyseObjectiveShadowPrices.m index 07a6ceefd5..7e66185f62 100755 --- a/src/analysis/multiSpecies/microbiomeModelingToolbox/additionalAnalysis/analyseObjectiveShadowPrices.m +++ b/src/analysis/multiSpecies/microbiomeModelingToolbox/additionalAnalysis/analyseObjectiveShadowPrices.m @@ -82,9 +82,9 @@ end % reload existing results if applies -if isfile([resultsFolder filesep 'objectives']) - load([resultsFolder filesep 'objectives']); - load([resultsFolder filesep 'shadowPrices']); +if isfile([resultsFolder filesep 'objectives.mat']) + load([resultsFolder filesep 'objectives.mat']); + load([resultsFolder filesep 'shadowPrices.mat']); startPnt=size(objectives,2)-1; else startPnt=1; @@ -211,6 +211,7 @@ environment = getEnvironment(); parfor j = 1:size(objectiveList, 1) + j restoreEnvironment(environment); changeCobraSolver(solver, 'LP', 0, -1); % prevent creation of log files diff --git a/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/guidedSim.m b/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/guidedSim.m index 13c9d8887e..8dfd4179fe 100755 --- a/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/guidedSim.m +++ b/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/guidedSim.m @@ -24,20 +24,20 @@ % Check for installation of fastFVA try - % cpxControl.PARALLELMODE = 1; -% cpxControl.THREADS = 1; -% cpxControl.AUXROOTTHREADS = 2; - [minFlux,maxFlux,optsol,ret] = fastFVA(model,99.99,'max',{},rl,'A'); - if ret~=0 - % infeasibilities in the solution - minFlux=NaN(length(rl),1); - maxFlux=NaN(length(rl),1); - end - % cpxControl.threads=1; - % cpxControl.parallel=1; - % cpxControl.auxrootthreads=2; - % cpxControl.SCAIND =-1; -% [minFlux,maxFlux] = fastFVA(model,99.99,'max',{},rl,'A',cpxControl) + cpxControl.PARALLELMODE = 1; + cpxControl.THREADS = 1; + cpxControl.AUXROOTTHREADS = 2; + [minFlux,maxFlux,optsol,ret] = fastFVA(model,99.99,'max',{},rl,'A',cpxControl); + if ret~=0 + % infeasibilities in the solution + minFlux=NaN(length(rl),1); + maxFlux=NaN(length(rl),1); + end + % cpxControl.threads=1; + % cpxControl.parallel=1; + % cpxControl.auxrootthreads=2; + % cpxControl.SCAIND =-1; + % [minFlux,maxFlux] = fastFVA(model,99.99,'max',{},rl,'A',cpxControl) catch warning('fastFVA could not run, so fluxVariability is instead used. Consider installing fastFVA for shorter computation times.'); From 9a92edba9caec2b69b4703d65d8f6a24c797b782 Mon Sep 17 00:00:00 2001 From: YinTat Date: Sun, 29 Aug 2021 22:26:47 -0700 Subject: [PATCH 67/82] Update PolytopeSamplerMatlab --- external/analysis/PolytopeSamplerMatlab | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/external/analysis/PolytopeSamplerMatlab b/external/analysis/PolytopeSamplerMatlab index 30e1c2f88b..97611084e6 160000 --- a/external/analysis/PolytopeSamplerMatlab +++ b/external/analysis/PolytopeSamplerMatlab @@ -1 +1 @@ -Subproject commit 30e1c2f88b23e625d96d4cf185aff158c05dcbe3 +Subproject commit 97611084e66b59bba80c1d7786d7aa564ded9a1d From 6d53b2ca2f0e8539c617f0811c7fcce84c557c5f Mon Sep 17 00:00:00 2001 From: YinTat Date: Sun, 29 Aug 2021 22:32:39 -0700 Subject: [PATCH 68/82] Update PolytopeSamplerMatlab --- external/analysis/PolytopeSamplerMatlab | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/external/analysis/PolytopeSamplerMatlab b/external/analysis/PolytopeSamplerMatlab index 97611084e6..cfa3ab9b6b 160000 --- a/external/analysis/PolytopeSamplerMatlab +++ b/external/analysis/PolytopeSamplerMatlab @@ -1 +1 @@ -Subproject commit 97611084e66b59bba80c1d7786d7aa564ded9a1d +Subproject commit cfa3ab9b6b3c0c305ea4d0256d999a00cad834d6 From 194c08987819dff81099e9688594074d6b7a6bf6 Mon Sep 17 00:00:00 2001 From: Almut Heinken Date: Fri, 3 Sep 2021 15:45:47 +0100 Subject: [PATCH 69/82] rearranged DEmeter folders --- .../gapfillRefinedGenomeReactions.m | 4 +- .../deleteSeedGapfilledReactions.m | 0 .../doubleCheckGapfilledReactions.m | 0 .../findTransportersWithoutExchanges.m | 0 .../findUnusedExchangeReactions.m | 0 .../{debugging => refinement}/printBiomass.m | 0 .../refinement/removeUnannotatedBlockedRxns.m | 48 ------------------- .../refinement/removeUnannotatedReactions.m | 10 ++-- 8 files changed, 7 insertions(+), 55 deletions(-) rename src/reconstruction/demeter/src/{debugging => refinement}/deleteSeedGapfilledReactions.m (100%) rename src/reconstruction/demeter/src/{debugging => refinement}/doubleCheckGapfilledReactions.m (100%) rename src/reconstruction/demeter/src/{debugging => refinement}/findTransportersWithoutExchanges.m (100%) rename src/reconstruction/demeter/src/{debugging => refinement}/findUnusedExchangeReactions.m (100%) rename src/reconstruction/demeter/src/{debugging => refinement}/printBiomass.m (100%) delete mode 100644 src/reconstruction/demeter/src/refinement/removeUnannotatedBlockedRxns.m diff --git a/src/reconstruction/demeter/src/integration/gapfillRefinedGenomeReactions.m b/src/reconstruction/demeter/src/integration/gapfillRefinedGenomeReactions.m index 9b443029f3..8e4da4175d 100755 --- a/src/reconstruction/demeter/src/integration/gapfillRefinedGenomeReactions.m +++ b/src/reconstruction/demeter/src/integration/gapfillRefinedGenomeReactions.m @@ -167,8 +167,8 @@ genomeAnnotation(delInd,:)=[]; % manually add some pathways based on information from the literature -% Papers: PMIDs 11082195,30310076,30962433,29761785,31196984 -manuallyAdded={'Olsenella_uli_DSM_7084','IND3ACDC','E1QXZ2';'Olsenella_uli_DSM_7084','IND3ACt2r','gap_filled';'Olsenella_uli_DSM_7084','EX_ind3ac(e)','exchange_reaction';'Olsenella_uli_DSM_7084','SKATOLEt2r','gap_filled';'Olsenella_uli_DSM_7084','EX_skatole(e)','exchange_reaction';'Olsenella_uli_DSM_7084','4HPHACDC','E1QVI8';'Olsenella_uli_DSM_7084','PCRESOLt2r','gap_filled';'Olsenella_uli_DSM_7084','EX_pcresol(e)','exchange_reaction';'Olsenella_uli_DSM_7084','HPACt2r','gap_filled';'Olsenella_uli_DSM_7084','EX_4hphac(e)','exchange_reaction';'Clostridium_botulinum_BKT015925','IND3ACDC','A0A0L7NFY0';'Clostridium_botulinum_BKT015925','IND3ACt2r','gap_filled';'Clostridium_botulinum_BKT015925','EX_ind3ac(e)','exchange_reaction';'Clostridium_botulinum_BKT015925','SKATOLEt2r','gap_filled';'Clostridium_botulinum_BKT015925','EX_skatole(e)','exchange_reaction';'Clostridium_botulinum_BKT015925','4HPHACDC','A0A0M1LIM4';'Clostridium_botulinum_BKT015925','HPACt2r','gap_filled';'Clostridium_botulinum_BKT015925','EX_4hphac(e)','exchange_reaction';'Clostridium_botulinum_C_str_Eklund','IND3ACDC','B1BA70';'Clostridium_botulinum_C_str_Eklund','IND3ACt2r','gap_filled';'Clostridium_botulinum_C_str_Eklund','EX_ind3ac(e)','exchange_reaction';'Clostridium_botulinum_C_str_Eklund','SKATOLEt2r','gap_filled';'Clostridium_botulinum_C_str_Eklund','EX_skatole(e)','exchange_reaction';'Bilophila_wadsworthia_ATCC_49260','ISETACSL','WP_005024906.1';'Bilophila_wadsworthia_3_1_6','ISETACSL','WP_005024906.1';'Desulfovibrio_piger_ATCC_29098','ISETACSL','B6WXM2';'Desulfovibrio_piger_ATCC_29098','ISETACt2r','gap_filled';'Desulfovibrio_piger_ATCC_29098','EX_isetac(e)','exchange_reaction';'Desulfovibrio_desulfuricans_subsp_desulfuricans_DSM_642','ISETACSL','WP_022659977.1';'Desulfovibrio_desulfuricans_subsp_desulfuricans_DSM_642','ISETACt2r','gap_filled';'Desulfovibrio_desulfuricans_subsp_desulfuricans_DSM_642','EX_isetac(e)','exchange_reaction';'Eggerthella_lenta_11C','DIGITOXINt2r','gap_filled';'Eggerthella_lenta_11C','DIGOXINt2r','gap_filled';'Eggerthella_lenta_11C','DIHYDRO_DIGITOXINc','cgr2';'Eggerthella_lenta_11C','DIHYDRO_DIGITOXINt2r','gap_filled';'Eggerthella_lenta_11C','DIHYDRO_DIGOXINc','cgr2';'Eggerthella_lenta_11C','DIHYDRO_DIGOXINt2r','gap_filled';'Eggerthella_lenta_11C','EX_digitoxin(e)','exchange_reaction';'Eggerthella_lenta_11C','EX_digoxin(e)','exchange_reaction';'Eggerthella_lenta_11C','EX_dihydro_digitoxin(e)','exchange_reaction';'Eggerthella_lenta_11C','EX_dihydro_digoxin(e)','exchange_reaction';'Eggerthella_lenta_DSM_11767','DIGITOXINt2r','gap_filled';'Eggerthella_lenta_DSM_11767','DIGOXINt2r','gap_filled';'Eggerthella_lenta_DSM_11767','DIHYDRO_DIGITOXINc','cgr2';'Eggerthella_lenta_DSM_11767','DIHYDRO_DIGITOXINt2r','gap_filled';'Eggerthella_lenta_DSM_11767','DIHYDRO_DIGOXINc','cgr2';'Eggerthella_lenta_DSM_11767','DIHYDRO_DIGOXINt2r','gap_filled';'Eggerthella_lenta_DSM_11767','EX_digitoxin(e)','exchange_reaction';'Eggerthella_lenta_DSM_11767','EX_digoxin(e)','exchange_reaction';'Eggerthella_lenta_DSM_11767','EX_dihydro_digitoxin(e)','exchange_reaction';'Eggerthella_lenta_DSM_11767','EX_dihydro_digoxin(e)','exchange_reaction';'Eggerthella_lenta_CC86D54','DIGITOXINt2r','gap_filled';'Eggerthella_lenta_CC86D54','DIGOXINt2r','gap_filled';'Eggerthella_lenta_CC86D54','DIHYDRO_DIGITOXINc','cgr2';'Eggerthella_lenta_CC86D54','DIHYDRO_DIGITOXINt2r','gap_filled';'Eggerthella_lenta_CC86D54','DIHYDRO_DIGOXINc','cgr2';'Eggerthella_lenta_CC86D54','DIHYDRO_DIGOXINt2r','gap_filled';'Eggerthella_lenta_CC86D54','EX_digitoxin(e)','exchange_reaction';'Eggerthella_lenta_CC86D54','EX_digoxin(e)','exchange_reaction';'Eggerthella_lenta_CC86D54','EX_dihydro_digitoxin(e)','exchange_reaction';'Eggerthella_lenta_CC86D54','EX_dihydro_digoxin(e)','exchange_reaction';'Eggerthella_lenta_AB12n2','DIGITOXINt2r','gap_filled';'Eggerthella_lenta_AB12n2','DIGOXINt2r','gap_filled';'Eggerthella_lenta_AB12n2','DIHYDRO_DIGITOXINc','cgr2';'Eggerthella_lenta_AB12n2','DIHYDRO_DIGITOXINt2r','gap_filled';'Eggerthella_lenta_AB12n2','DIHYDRO_DIGOXINc','cgr2';'Eggerthella_lenta_AB12n2','DIHYDRO_DIGOXINt2r','gap_filled';'Eggerthella_lenta_AB12n2','EX_digitoxin(e)','exchange_reaction';'Eggerthella_lenta_AB12n2','EX_digoxin(e)','exchange_reaction';'Eggerthella_lenta_AB12n2','EX_dihydro_digitoxin(e)','exchange_reaction';'Eggerthella_lenta_AB12n2','EX_dihydro_digoxin(e)','exchange_reaction';'Eggerthella_lenta_AB8n2','DIGITOXINt2r','gap_filled';'Eggerthella_lenta_AB8n2','DIGOXINt2r','gap_filled';'Eggerthella_lenta_AB8n2','DIHYDRO_DIGITOXINc','cgr2';'Eggerthella_lenta_AB8n2','DIHYDRO_DIGITOXINt2r','gap_filled';'Eggerthella_lenta_AB8n2','DIHYDRO_DIGOXINc','cgr2';'Eggerthella_lenta_AB8n2','DIHYDRO_DIGOXINt2r','gap_filled';'Eggerthella_lenta_AB8n2','EX_digitoxin(e)','exchange_reaction';'Eggerthella_lenta_AB8n2','EX_digoxin(e)','exchange_reaction';'Eggerthella_lenta_AB8n2','EX_dihydro_digitoxin(e)','exchange_reaction';'Eggerthella_lenta_AB8n2','EX_dihydro_digoxin(e)','exchange_reaction';'Eggerthella_lenta_DSM_11863','DIGITOXINt2r','gap_filled';'Eggerthella_lenta_DSM_11863','DIGOXINt2r','gap_filled';'Eggerthella_lenta_DSM_11863','DIHYDRO_DIGITOXINc','cgr2';'Eggerthella_lenta_DSM_11863','DIHYDRO_DIGITOXINt2r','gap_filled';'Eggerthella_lenta_DSM_11863','DIHYDRO_DIGOXINc','cgr2';'Eggerthella_lenta_DSM_11863','DIHYDRO_DIGOXINt2r','gap_filled';'Eggerthella_lenta_DSM_11863','EX_digitoxin(e)','exchange_reaction';'Eggerthella_lenta_DSM_11863','EX_digoxin(e)','exchange_reaction';'Eggerthella_lenta_DSM_11863','EX_dihydro_digitoxin(e)','exchange_reaction';'Eggerthella_lenta_DSM_11863','EX_dihydro_digoxin(e)','exchange_reaction';'Eggerthella_lenta_326I6NA','DIGITOXINt2r','gap_filled';'Eggerthella_lenta_326I6NA','DIGOXINt2r','gap_filled';'Eggerthella_lenta_326I6NA','DIHYDRO_DIGITOXINc','cgr2';'Eggerthella_lenta_326I6NA','DIHYDRO_DIGITOXINt2r','gap_filled';'Eggerthella_lenta_326I6NA','DIHYDRO_DIGOXINc','cgr2';'Eggerthella_lenta_326I6NA','DIHYDRO_DIGOXINt2r','gap_filled';'Eggerthella_lenta_326I6NA','EX_digitoxin(e)','exchange_reaction';'Eggerthella_lenta_326I6NA','EX_digoxin(e)','exchange_reaction';'Eggerthella_lenta_326I6NA','EX_dihydro_digitoxin(e)','exchange_reaction';'Eggerthella_lenta_326I6NA','EX_dihydro_digoxin(e)','exchange_reaction';'Bilophila_wadsworthia_3_1_6','EX_h2s(e)','gap_filled';'Bilophila_wadsworthia_3_1_6','H2St','gap_filled';'Bilophila_wadsworthia_3_1_6','EX_h2(e)','gap_filled';'Bilophila_wadsworthia_3_1_6','EX_for(e)','gap_filled';'Bilophila_wadsworthia_3_1_6','H2St','gap_filled';'Bilophila_wadsworthia_3_1_6','H2td','gap_filled';'Bilophila_wadsworthia_3_1_6','FORt','gap_filled';'Bilophila_wadsworthia_3_1_6','EX_pyr(e)','gap_filled';'Bilophila_wadsworthia_3_1_6','PYRt2r','gap_filled';'Bilophila_wadsworthia_3_1_6','SULR','gap_filled';'Bilophila_wadsworthia_3_1_6','TAURPYRAT','gap_filled';'Bilophila_wadsworthia_3_1_6','SACALDACT','gap_filled';'Bilophila_wadsworthia_ATCC_49260','EX_h2s(e)','gap_filled';'Bilophila_wadsworthia_ATCC_49260','H2St','gap_filled';'Bilophila_wadsworthia_ATCC_49260','EX_h2(e)','gap_filled';'Bilophila_wadsworthia_ATCC_49260','EX_for(e)','gap_filled';'Bilophila_wadsworthia_ATCC_49260','H2St','gap_filled';'Bilophila_wadsworthia_ATCC_49260','H2td','gap_filled';'Bilophila_wadsworthia_ATCC_49260','FORt','gap_filled';'Bilophila_wadsworthia_ATCC_49260','EX_pyr(e)','gap_filled';'Bilophila_wadsworthia_ATCC_49260','PYRt2r','gap_filled';'Bilophila_wadsworthia_ATCC_49260','SULR','gap_filled';'Bilophila_wadsworthia_ATCC_49260','TAURPYRAT','gap_filled';'Bilophila_wadsworthia_ATCC_49260','SACALDACT','gap_filled';'Eggerthella_lenta_1160AFAA','DOPADH','Dadh';'Eggerthella_lenta_1160AFAA','EX_dopa(e)','gap_filled';'Eggerthella_lenta_1160AFAA','DOPAENT4tc','gap_filled';'Eggerthella_lenta_1160AFAA','EX_mtym(e)','gap_filled';'Eggerthella_lenta_1160AFAA','MTYMt2r','gap_filled';'Eggerthella_lenta_11C','DOPADH','Dadh';'Eggerthella_lenta_11C','EX_dopa(e)','gap_filled';'Eggerthella_lenta_11C','DOPAENT4tc','gap_filled';'Eggerthella_lenta_11C','EX_mtym(e)','gap_filled';'Eggerthella_lenta_11C','MTYMt2r','gap_filled';'Eggerthella_lenta_14A','DOPADH','Dadh';'Eggerthella_lenta_14A','EX_dopa(e)','gap_filled';'Eggerthella_lenta_14A','DOPAENT4tc','gap_filled';'Eggerthella_lenta_14A','EX_mtym(e)','gap_filled';'Eggerthella_lenta_14A','MTYMt2r','gap_filled';'Eggerthella_lenta_28B','DOPADH','Dadh';'Eggerthella_lenta_28B','EX_dopa(e)','gap_filled';'Eggerthella_lenta_28B','DOPAENT4tc','gap_filled';'Eggerthella_lenta_28B','EX_mtym(e)','gap_filled';'Eggerthella_lenta_28B','MTYMt2r','gap_filled';'Eggerthella_lenta_326I6NA','DOPADH','Dadh';'Eggerthella_lenta_326I6NA','EX_dopa(e)','gap_filled';'Eggerthella_lenta_326I6NA','DOPAENT4tc','gap_filled';'Eggerthella_lenta_326I6NA','EX_mtym(e)','gap_filled';'Eggerthella_lenta_326I6NA','MTYMt2r','gap_filled';'Eggerthella_lenta_A2','DOPADH','Dadh';'Eggerthella_lenta_A2','EX_dopa(e)','gap_filled';'Eggerthella_lenta_A2','DOPAENT4tc','gap_filled';'Eggerthella_lenta_A2','EX_mtym(e)','gap_filled';'Eggerthella_lenta_A2','MTYMt2r','gap_filled';'Eggerthella_lenta_AB12n2','DOPADH','Dadh';'Eggerthella_lenta_AB12n2','EX_dopa(e)','gap_filled';'Eggerthella_lenta_AB12n2','DOPAENT4tc','gap_filled';'Eggerthella_lenta_AB12n2','EX_mtym(e)','gap_filled';'Eggerthella_lenta_AB12n2','MTYMt2r','gap_filled';'Eggerthella_lenta_AB8n2','DOPADH','Dadh';'Eggerthella_lenta_AB8n2','EX_dopa(e)','gap_filled';'Eggerthella_lenta_AB8n2','DOPAENT4tc','gap_filled';'Eggerthella_lenta_AB8n2','EX_mtym(e)','gap_filled';'Eggerthella_lenta_AB8n2','MTYMt2r','gap_filled';'Eggerthella_lenta_CC75D52','DOPADH','Dadh';'Eggerthella_lenta_CC75D52','EX_dopa(e)','gap_filled';'Eggerthella_lenta_CC75D52','DOPAENT4tc','gap_filled';'Eggerthella_lenta_CC75D52','EX_mtym(e)','gap_filled';'Eggerthella_lenta_CC75D52','MTYMt2r','gap_filled';'Eggerthella_lenta_CC82BHI2','DOPADH','Dadh';'Eggerthella_lenta_CC82BHI2','EX_dopa(e)','gap_filled';'Eggerthella_lenta_CC82BHI2','DOPAENT4tc','gap_filled';'Eggerthella_lenta_CC82BHI2','EX_mtym(e)','gap_filled';'Eggerthella_lenta_CC82BHI2','MTYMt2r','gap_filled';'Eggerthella_lenta_DSM_11767','DOPADH','Dadh';'Eggerthella_lenta_DSM_11767','EX_dopa(e)','gap_filled';'Eggerthella_lenta_DSM_11767','DOPAENT4tc','gap_filled';'Eggerthella_lenta_DSM_11767','EX_mtym(e)','gap_filled';'Eggerthella_lenta_DSM_11767','MTYMt2r','gap_filled';'Eggerthella_lenta_DSM_15644','DOPADH','Dadh';'Eggerthella_lenta_DSM_15644','EX_dopa(e)','gap_filled';'Eggerthella_lenta_DSM_15644','DOPAENT4tc','gap_filled';'Eggerthella_lenta_DSM_15644','EX_mtym(e)','gap_filled';'Eggerthella_lenta_DSM_15644','MTYMt2r','gap_filled';'Eggerthella_lenta_Valencia','DOPADH','Dadh';'Eggerthella_lenta_Valencia','EX_dopa(e)','gap_filled';'Eggerthella_lenta_Valencia','DOPAENT4tc','gap_filled';'Eggerthella_lenta_Valencia','EX_mtym(e)','gap_filled';'Eggerthella_lenta_Valencia','MTYMt2r','gap_filled';'Eggerthella_sinensis_DSM_16107','DOPADH','Dadh';'Eggerthella_sinensis_DSM_16107','EX_dopa(e)','gap_filled';'Eggerthella_sinensis_DSM_16107','DOPAENT4tc','gap_filled';'Eggerthella_sinensis_DSM_16107','EX_mtym(e)','gap_filled';'Eggerthella_sinensis_DSM_16107','MTYMt2r','gap_filled';'Paraeggerthella_hongkongensis_RC2_2','DOPADH','Dadh';'Paraeggerthella_hongkongensis_RC2_2','EX_dopa(e)','gap_filled';'Paraeggerthella_hongkongensis_RC2_2','DOPAENT4tc','gap_filled';'Paraeggerthella_hongkongensis_RC2_2','EX_mtym(e)','gap_filled';'Paraeggerthella_hongkongensis_RC2_2','MTYMt2r','gap_filled'}; +% Papers: PMIDs 11082195,30310076,30962433,29761785,31196984,32571913 +manuallyAdded={'Olsenella_uli_DSM_7084','IND3ACDC','E1QXZ2';'Olsenella_uli_DSM_7084','IND3ACt2r','gap_filled';'Olsenella_uli_DSM_7084','EX_ind3ac(e)','exchange_reaction';'Olsenella_uli_DSM_7084','SKATOLEt2r','gap_filled';'Olsenella_uli_DSM_7084','EX_skatole(e)','exchange_reaction';'Olsenella_uli_DSM_7084','4HPHACDC','E1QVI8';'Olsenella_uli_DSM_7084','PCRESOLt2r','gap_filled';'Olsenella_uli_DSM_7084','EX_pcresol(e)','exchange_reaction';'Olsenella_uli_DSM_7084','HPACt2r','gap_filled';'Olsenella_uli_DSM_7084','EX_4hphac(e)','exchange_reaction';'Clostridium_botulinum_BKT015925','IND3ACDC','A0A0L7NFY0';'Clostridium_botulinum_BKT015925','IND3ACt2r','gap_filled';'Clostridium_botulinum_BKT015925','EX_ind3ac(e)','exchange_reaction';'Clostridium_botulinum_BKT015925','SKATOLEt2r','gap_filled';'Clostridium_botulinum_BKT015925','EX_skatole(e)','exchange_reaction';'Clostridium_botulinum_BKT015925','4HPHACDC','A0A0M1LIM4';'Clostridium_botulinum_BKT015925','HPACt2r','gap_filled';'Clostridium_botulinum_BKT015925','EX_4hphac(e)','exchange_reaction';'Clostridium_botulinum_C_str_Eklund','IND3ACDC','B1BA70';'Clostridium_botulinum_C_str_Eklund','IND3ACt2r','gap_filled';'Clostridium_botulinum_C_str_Eklund','EX_ind3ac(e)','exchange_reaction';'Clostridium_botulinum_C_str_Eklund','SKATOLEt2r','gap_filled';'Clostridium_botulinum_C_str_Eklund','EX_skatole(e)','exchange_reaction';'Bilophila_wadsworthia_ATCC_49260','ISETACSL','WP_005024906.1';'Bilophila_wadsworthia_3_1_6','ISETACSL','WP_005024906.1';'Desulfovibrio_piger_ATCC_29098','ISETACSL','B6WXM2';'Desulfovibrio_piger_ATCC_29098','ISETACt2r','gap_filled';'Desulfovibrio_piger_ATCC_29098','EX_isetac(e)','exchange_reaction';'Desulfovibrio_desulfuricans_subsp_desulfuricans_DSM_642','ISETACSL','WP_022659977.1';'Desulfovibrio_desulfuricans_subsp_desulfuricans_DSM_642','ISETACt2r','gap_filled';'Desulfovibrio_desulfuricans_subsp_desulfuricans_DSM_642','EX_isetac(e)','exchange_reaction';'Eggerthella_lenta_11C','DIGITOXINt2r','gap_filled';'Eggerthella_lenta_11C','DIGOXINt2r','gap_filled';'Eggerthella_lenta_11C','DIHYDRO_DIGITOXINc','cgr2';'Eggerthella_lenta_11C','DIHYDRO_DIGITOXINt2r','gap_filled';'Eggerthella_lenta_11C','DIHYDRO_DIGOXINc','cgr2';'Eggerthella_lenta_11C','DIHYDRO_DIGOXINt2r','gap_filled';'Eggerthella_lenta_11C','EX_digitoxin(e)','exchange_reaction';'Eggerthella_lenta_11C','EX_digoxin(e)','exchange_reaction';'Eggerthella_lenta_11C','EX_dihydro_digitoxin(e)','exchange_reaction';'Eggerthella_lenta_11C','EX_dihydro_digoxin(e)','exchange_reaction';'Eggerthella_lenta_DSM_11767','DIGITOXINt2r','gap_filled';'Eggerthella_lenta_DSM_11767','DIGOXINt2r','gap_filled';'Eggerthella_lenta_DSM_11767','DIHYDRO_DIGITOXINc','cgr2';'Eggerthella_lenta_DSM_11767','DIHYDRO_DIGITOXINt2r','gap_filled';'Eggerthella_lenta_DSM_11767','DIHYDRO_DIGOXINc','cgr2';'Eggerthella_lenta_DSM_11767','DIHYDRO_DIGOXINt2r','gap_filled';'Eggerthella_lenta_DSM_11767','EX_digitoxin(e)','exchange_reaction';'Eggerthella_lenta_DSM_11767','EX_digoxin(e)','exchange_reaction';'Eggerthella_lenta_DSM_11767','EX_dihydro_digitoxin(e)','exchange_reaction';'Eggerthella_lenta_DSM_11767','EX_dihydro_digoxin(e)','exchange_reaction';'Eggerthella_lenta_CC86D54','DIGITOXINt2r','gap_filled';'Eggerthella_lenta_CC86D54','DIGOXINt2r','gap_filled';'Eggerthella_lenta_CC86D54','DIHYDRO_DIGITOXINc','cgr2';'Eggerthella_lenta_CC86D54','DIHYDRO_DIGITOXINt2r','gap_filled';'Eggerthella_lenta_CC86D54','DIHYDRO_DIGOXINc','cgr2';'Eggerthella_lenta_CC86D54','DIHYDRO_DIGOXINt2r','gap_filled';'Eggerthella_lenta_CC86D54','EX_digitoxin(e)','exchange_reaction';'Eggerthella_lenta_CC86D54','EX_digoxin(e)','exchange_reaction';'Eggerthella_lenta_CC86D54','EX_dihydro_digitoxin(e)','exchange_reaction';'Eggerthella_lenta_CC86D54','EX_dihydro_digoxin(e)','exchange_reaction';'Eggerthella_lenta_AB12n2','DIGITOXINt2r','gap_filled';'Eggerthella_lenta_AB12n2','DIGOXINt2r','gap_filled';'Eggerthella_lenta_AB12n2','DIHYDRO_DIGITOXINc','cgr2';'Eggerthella_lenta_AB12n2','DIHYDRO_DIGITOXINt2r','gap_filled';'Eggerthella_lenta_AB12n2','DIHYDRO_DIGOXINc','cgr2';'Eggerthella_lenta_AB12n2','DIHYDRO_DIGOXINt2r','gap_filled';'Eggerthella_lenta_AB12n2','EX_digitoxin(e)','exchange_reaction';'Eggerthella_lenta_AB12n2','EX_digoxin(e)','exchange_reaction';'Eggerthella_lenta_AB12n2','EX_dihydro_digitoxin(e)','exchange_reaction';'Eggerthella_lenta_AB12n2','EX_dihydro_digoxin(e)','exchange_reaction';'Eggerthella_lenta_AB8n2','DIGITOXINt2r','gap_filled';'Eggerthella_lenta_AB8n2','DIGOXINt2r','gap_filled';'Eggerthella_lenta_AB8n2','DIHYDRO_DIGITOXINc','cgr2';'Eggerthella_lenta_AB8n2','DIHYDRO_DIGITOXINt2r','gap_filled';'Eggerthella_lenta_AB8n2','DIHYDRO_DIGOXINc','cgr2';'Eggerthella_lenta_AB8n2','DIHYDRO_DIGOXINt2r','gap_filled';'Eggerthella_lenta_AB8n2','EX_digitoxin(e)','exchange_reaction';'Eggerthella_lenta_AB8n2','EX_digoxin(e)','exchange_reaction';'Eggerthella_lenta_AB8n2','EX_dihydro_digitoxin(e)','exchange_reaction';'Eggerthella_lenta_AB8n2','EX_dihydro_digoxin(e)','exchange_reaction';'Eggerthella_lenta_DSM_11863','DIGITOXINt2r','gap_filled';'Eggerthella_lenta_DSM_11863','DIGOXINt2r','gap_filled';'Eggerthella_lenta_DSM_11863','DIHYDRO_DIGITOXINc','cgr2';'Eggerthella_lenta_DSM_11863','DIHYDRO_DIGITOXINt2r','gap_filled';'Eggerthella_lenta_DSM_11863','DIHYDRO_DIGOXINc','cgr2';'Eggerthella_lenta_DSM_11863','DIHYDRO_DIGOXINt2r','gap_filled';'Eggerthella_lenta_DSM_11863','EX_digitoxin(e)','exchange_reaction';'Eggerthella_lenta_DSM_11863','EX_digoxin(e)','exchange_reaction';'Eggerthella_lenta_DSM_11863','EX_dihydro_digitoxin(e)','exchange_reaction';'Eggerthella_lenta_DSM_11863','EX_dihydro_digoxin(e)','exchange_reaction';'Eggerthella_lenta_326I6NA','DIGITOXINt2r','gap_filled';'Eggerthella_lenta_326I6NA','DIGOXINt2r','gap_filled';'Eggerthella_lenta_326I6NA','DIHYDRO_DIGITOXINc','cgr2';'Eggerthella_lenta_326I6NA','DIHYDRO_DIGITOXINt2r','gap_filled';'Eggerthella_lenta_326I6NA','DIHYDRO_DIGOXINc','cgr2';'Eggerthella_lenta_326I6NA','DIHYDRO_DIGOXINt2r','gap_filled';'Eggerthella_lenta_326I6NA','EX_digitoxin(e)','exchange_reaction';'Eggerthella_lenta_326I6NA','EX_digoxin(e)','exchange_reaction';'Eggerthella_lenta_326I6NA','EX_dihydro_digitoxin(e)','exchange_reaction';'Eggerthella_lenta_326I6NA','EX_dihydro_digoxin(e)','exchange_reaction';'Bilophila_wadsworthia_3_1_6','EX_h2s(e)','gap_filled';'Bilophila_wadsworthia_3_1_6','H2St','gap_filled';'Bilophila_wadsworthia_3_1_6','EX_h2(e)','gap_filled';'Bilophila_wadsworthia_3_1_6','EX_for(e)','gap_filled';'Bilophila_wadsworthia_3_1_6','H2St','gap_filled';'Bilophila_wadsworthia_3_1_6','H2td','gap_filled';'Bilophila_wadsworthia_3_1_6','FORt','gap_filled';'Bilophila_wadsworthia_3_1_6','EX_pyr(e)','gap_filled';'Bilophila_wadsworthia_3_1_6','PYRt2r','gap_filled';'Bilophila_wadsworthia_3_1_6','SULR','gap_filled';'Bilophila_wadsworthia_3_1_6','TAURPYRAT','gap_filled';'Bilophila_wadsworthia_3_1_6','SACALDACT','gap_filled';'Bilophila_wadsworthia_ATCC_49260','EX_h2s(e)','gap_filled';'Bilophila_wadsworthia_ATCC_49260','H2St','gap_filled';'Bilophila_wadsworthia_ATCC_49260','EX_h2(e)','gap_filled';'Bilophila_wadsworthia_ATCC_49260','EX_for(e)','gap_filled';'Bilophila_wadsworthia_ATCC_49260','H2St','gap_filled';'Bilophila_wadsworthia_ATCC_49260','H2td','gap_filled';'Bilophila_wadsworthia_ATCC_49260','FORt','gap_filled';'Bilophila_wadsworthia_ATCC_49260','EX_pyr(e)','gap_filled';'Bilophila_wadsworthia_ATCC_49260','PYRt2r','gap_filled';'Bilophila_wadsworthia_ATCC_49260','SULR','gap_filled';'Bilophila_wadsworthia_ATCC_49260','TAURPYRAT','gap_filled';'Bilophila_wadsworthia_ATCC_49260','SACALDACT','gap_filled';'Eggerthella_lenta_1160AFAA','DOPADH','Dadh';'Eggerthella_lenta_1160AFAA','EX_dopa(e)','gap_filled';'Eggerthella_lenta_1160AFAA','DOPAENT4tc','gap_filled';'Eggerthella_lenta_1160AFAA','EX_mtym(e)','gap_filled';'Eggerthella_lenta_1160AFAA','MTYMt2r','gap_filled';'Eggerthella_lenta_11C','DOPADH','Dadh';'Eggerthella_lenta_11C','EX_dopa(e)','gap_filled';'Eggerthella_lenta_11C','DOPAENT4tc','gap_filled';'Eggerthella_lenta_11C','EX_mtym(e)','gap_filled';'Eggerthella_lenta_11C','MTYMt2r','gap_filled';'Eggerthella_lenta_14A','DOPADH','Dadh';'Eggerthella_lenta_14A','EX_dopa(e)','gap_filled';'Eggerthella_lenta_14A','DOPAENT4tc','gap_filled';'Eggerthella_lenta_14A','EX_mtym(e)','gap_filled';'Eggerthella_lenta_14A','MTYMt2r','gap_filled';'Eggerthella_lenta_28B','DOPADH','Dadh';'Eggerthella_lenta_28B','EX_dopa(e)','gap_filled';'Eggerthella_lenta_28B','DOPAENT4tc','gap_filled';'Eggerthella_lenta_28B','EX_mtym(e)','gap_filled';'Eggerthella_lenta_28B','MTYMt2r','gap_filled';'Eggerthella_lenta_326I6NA','DOPADH','Dadh';'Eggerthella_lenta_326I6NA','EX_dopa(e)','gap_filled';'Eggerthella_lenta_326I6NA','DOPAENT4tc','gap_filled';'Eggerthella_lenta_326I6NA','EX_mtym(e)','gap_filled';'Eggerthella_lenta_326I6NA','MTYMt2r','gap_filled';'Eggerthella_lenta_A2','DOPADH','Dadh';'Eggerthella_lenta_A2','EX_dopa(e)','gap_filled';'Eggerthella_lenta_A2','DOPAENT4tc','gap_filled';'Eggerthella_lenta_A2','EX_mtym(e)','gap_filled';'Eggerthella_lenta_A2','MTYMt2r','gap_filled';'Eggerthella_lenta_AB12n2','DOPADH','Dadh';'Eggerthella_lenta_AB12n2','EX_dopa(e)','gap_filled';'Eggerthella_lenta_AB12n2','DOPAENT4tc','gap_filled';'Eggerthella_lenta_AB12n2','EX_mtym(e)','gap_filled';'Eggerthella_lenta_AB12n2','MTYMt2r','gap_filled';'Eggerthella_lenta_AB8n2','DOPADH','Dadh';'Eggerthella_lenta_AB8n2','EX_dopa(e)','gap_filled';'Eggerthella_lenta_AB8n2','DOPAENT4tc','gap_filled';'Eggerthella_lenta_AB8n2','EX_mtym(e)','gap_filled';'Eggerthella_lenta_AB8n2','MTYMt2r','gap_filled';'Eggerthella_lenta_CC75D52','DOPADH','Dadh';'Eggerthella_lenta_CC75D52','EX_dopa(e)','gap_filled';'Eggerthella_lenta_CC75D52','DOPAENT4tc','gap_filled';'Eggerthella_lenta_CC75D52','EX_mtym(e)','gap_filled';'Eggerthella_lenta_CC75D52','MTYMt2r','gap_filled';'Eggerthella_lenta_CC82BHI2','DOPADH','Dadh';'Eggerthella_lenta_CC82BHI2','EX_dopa(e)','gap_filled';'Eggerthella_lenta_CC82BHI2','DOPAENT4tc','gap_filled';'Eggerthella_lenta_CC82BHI2','EX_mtym(e)','gap_filled';'Eggerthella_lenta_CC82BHI2','MTYMt2r','gap_filled';'Eggerthella_lenta_DSM_11767','DOPADH','Dadh';'Eggerthella_lenta_DSM_11767','EX_dopa(e)','gap_filled';'Eggerthella_lenta_DSM_11767','DOPAENT4tc','gap_filled';'Eggerthella_lenta_DSM_11767','EX_mtym(e)','gap_filled';'Eggerthella_lenta_DSM_11767','MTYMt2r','gap_filled';'Eggerthella_lenta_DSM_15644','DOPADH','Dadh';'Eggerthella_lenta_DSM_15644','EX_dopa(e)','gap_filled';'Eggerthella_lenta_DSM_15644','DOPAENT4tc','gap_filled';'Eggerthella_lenta_DSM_15644','EX_mtym(e)','gap_filled';'Eggerthella_lenta_DSM_15644','MTYMt2r','gap_filled';'Eggerthella_lenta_Valencia','DOPADH','Dadh';'Eggerthella_lenta_Valencia','EX_dopa(e)','gap_filled';'Eggerthella_lenta_Valencia','DOPAENT4tc','gap_filled';'Eggerthella_lenta_Valencia','EX_mtym(e)','gap_filled';'Eggerthella_lenta_Valencia','MTYMt2r','gap_filled';'Eggerthella_sinensis_DSM_16107','DOPADH','Dadh';'Eggerthella_sinensis_DSM_16107','EX_dopa(e)','gap_filled';'Eggerthella_sinensis_DSM_16107','DOPAENT4tc','gap_filled';'Eggerthella_sinensis_DSM_16107','EX_mtym(e)','gap_filled';'Eggerthella_sinensis_DSM_16107','MTYMt2r','gap_filled';'Paraeggerthella_hongkongensis_RC2_2','DOPADH','Dadh';'Paraeggerthella_hongkongensis_RC2_2','EX_dopa(e)','gap_filled';'Paraeggerthella_hongkongensis_RC2_2','DOPAENT4tc','gap_filled';'Paraeggerthella_hongkongensis_RC2_2','EX_mtym(e)','gap_filled';'Paraeggerthella_hongkongensis_RC2_2','MTYMt2r','gap_filled';'Bacteroides_fragilis_BOB25','BZD_AR_NAD','AzoC_Cp';'Bacteroides_fragilis_BOB25','NEOPRONT_AR_NAD','AzoC_Cp';'Bacteroides_fragilis_BOB25','OLSA_AR_NAD','AzoC_Cp';'Bacteroides_fragilis_BOB25','PRONT_AR_NAD','AzoC_Cp';'Bacteroides_fragilis_BOB25','SSZ_AR_NAD','AzoC_Cp';'Bacteroides_fragilis_BOB25','BZD_AR_NADP','AzoC_Cp';'Bacteroides_fragilis_BOB25','NEOPRONT_AR_NADP','AzoC_Cp';'Bacteroides_fragilis_BOB25','OLSA_AR_NADP','AzoC_Cp';'Bacteroides_fragilis_BOB25','PRONT_AR_NADP','AzoC_Cp';'Bacteroides_fragilis_BOB25','SSZ_AR_NADP','AzoC_Cp';'Bacteroides_fragilis_BOB25','5ASAt2r','gap_filled';'Bacteroides_fragilis_BOB25','ABZ_ALA_Bt2r','gap_filled';'Bacteroides_fragilis_BOB25','BZDt2r','gap_filled';'Bacteroides_fragilis_BOB25','NEOPRONTt2r','gap_filled';'Bacteroides_fragilis_BOB25','OLSAt2r','gap_filled';'Bacteroides_fragilis_BOB25','PRONTt2r','gap_filled';'Bacteroides_fragilis_BOB25','SANILAMIDEt2r','gap_filled';'Bacteroides_fragilis_BOB25','SSZt2r','gap_filled';'Bacteroides_fragilis_BOB25','SULFPt2r','gap_filled';'Bacteroides_fragilis_BOB25','TABt2r','gap_filled';'Bacteroides_fragilis_BOB25','EX_5asa(e)','gap_filled';'Bacteroides_fragilis_BOB25','EX_abz_ala_b(e)','gap_filled';'Bacteroides_fragilis_BOB25','EX_bzd(e)','gap_filled';'Bacteroides_fragilis_BOB25','EX_olsa(e)','gap_filled';'Bacteroides_fragilis_BOB25','EX_neopront(e)','gap_filled';'Bacteroides_fragilis_BOB25','EX_pront(e)','gap_filled';'Bacteroides_fragilis_BOB25','EX_sanilamide(e)','gap_filled';'Bacteroides_fragilis_BOB25','EX_ssz(e)','gap_filled';'Bacteroides_fragilis_BOB25','EX_sulfp(e)','gap_filled';'Bacteroides_fragilis_BOB25','EX_tab(e)','gap_filled';'Bacteroides_fragilis_NCTC_9343','BZD_AR_NAD','AzoC_Cp';'Bacteroides_fragilis_NCTC_9343','NEOPRONT_AR_NAD','AzoC_Cp';'Bacteroides_fragilis_NCTC_9343','OLSA_AR_NAD','AzoC_Cp';'Bacteroides_fragilis_NCTC_9343','PRONT_AR_NAD','AzoC_Cp';'Bacteroides_fragilis_NCTC_9343','SSZ_AR_NAD','AzoC_Cp';'Bacteroides_fragilis_NCTC_9343','BZD_AR_NADP','AzoC_Cp';'Bacteroides_fragilis_NCTC_9343','NEOPRONT_AR_NADP','AzoC_Cp';'Bacteroides_fragilis_NCTC_9343','OLSA_AR_NADP','AzoC_Cp';'Bacteroides_fragilis_NCTC_9343','PRONT_AR_NADP','AzoC_Cp';'Bacteroides_fragilis_NCTC_9343','SSZ_AR_NADP','AzoC_Cp';'Bacteroides_fragilis_NCTC_9343','5ASAt2r','gap_filled';'Bacteroides_fragilis_NCTC_9343','ABZ_ALA_Bt2r','gap_filled';'Bacteroides_fragilis_NCTC_9343','BZDt2r','gap_filled';'Bacteroides_fragilis_NCTC_9343','NEOPRONTt2r','gap_filled';'Bacteroides_fragilis_NCTC_9343','OLSAt2r','gap_filled';'Bacteroides_fragilis_NCTC_9343','PRONTt2r','gap_filled';'Bacteroides_fragilis_NCTC_9343','SANILAMIDEt2r','gap_filled';'Bacteroides_fragilis_NCTC_9343','SSZt2r','gap_filled';'Bacteroides_fragilis_NCTC_9343','SULFPt2r','gap_filled';'Bacteroides_fragilis_NCTC_9343','TABt2r','gap_filled';'Bacteroides_fragilis_NCTC_9343','EX_5asa(e)','gap_filled';'Bacteroides_fragilis_NCTC_9343','EX_abz_ala_b(e)','gap_filled';'Bacteroides_fragilis_NCTC_9343','EX_bzd(e)','gap_filled';'Bacteroides_fragilis_NCTC_9343','EX_olsa(e)','gap_filled';'Bacteroides_fragilis_NCTC_9343','EX_neopront(e)','gap_filled';'Bacteroides_fragilis_NCTC_9343','EX_pront(e)','gap_filled';'Bacteroides_fragilis_NCTC_9343','EX_sanilamide(e)','gap_filled';'Bacteroides_fragilis_NCTC_9343','EX_ssz(e)','gap_filled';'Bacteroides_fragilis_NCTC_9343','EX_sulfp(e)','gap_filled';'Bacteroides_fragilis_NCTC_9343','EX_tab(e)','gap_filled';'Bacteroides_fragilis_YCH46','BZD_AR_NAD','AzoC_Cp';'Bacteroides_fragilis_YCH46','NEOPRONT_AR_NAD','AzoC_Cp';'Bacteroides_fragilis_YCH46','OLSA_AR_NAD','AzoC_Cp';'Bacteroides_fragilis_YCH46','PRONT_AR_NAD','AzoC_Cp';'Bacteroides_fragilis_YCH46','SSZ_AR_NAD','AzoC_Cp';'Bacteroides_fragilis_YCH46','BZD_AR_NADP','AzoC_Cp';'Bacteroides_fragilis_YCH46','NEOPRONT_AR_NADP','AzoC_Cp';'Bacteroides_fragilis_YCH46','OLSA_AR_NADP','AzoC_Cp';'Bacteroides_fragilis_YCH46','PRONT_AR_NADP','AzoC_Cp';'Bacteroides_fragilis_YCH46','SSZ_AR_NADP','AzoC_Cp';'Bacteroides_fragilis_YCH46','5ASAt2r','gap_filled';'Bacteroides_fragilis_YCH46','ABZ_ALA_Bt2r','gap_filled';'Bacteroides_fragilis_YCH46','BZDt2r','gap_filled';'Bacteroides_fragilis_YCH46','NEOPRONTt2r','gap_filled';'Bacteroides_fragilis_YCH46','OLSAt2r','gap_filled';'Bacteroides_fragilis_YCH46','PRONTt2r','gap_filled';'Bacteroides_fragilis_YCH46','SANILAMIDEt2r','gap_filled';'Bacteroides_fragilis_YCH46','SSZt2r','gap_filled';'Bacteroides_fragilis_YCH46','SULFPt2r','gap_filled';'Bacteroides_fragilis_YCH46','TABt2r','gap_filled';'Bacteroides_fragilis_YCH46','EX_5asa(e)','gap_filled';'Bacteroides_fragilis_YCH46','EX_abz_ala_b(e)','gap_filled';'Bacteroides_fragilis_YCH46','EX_bzd(e)','gap_filled';'Bacteroides_fragilis_YCH46','EX_olsa(e)','gap_filled';'Bacteroides_fragilis_YCH46','EX_neopront(e)','gap_filled';'Bacteroides_fragilis_YCH46','EX_pront(e)','gap_filled';'Bacteroides_fragilis_YCH46','EX_sanilamide(e)','gap_filled';'Bacteroides_fragilis_YCH46','EX_ssz(e)','gap_filled';'Bacteroides_fragilis_YCH46','EX_sulfp(e)','gap_filled';'Bacteroides_fragilis_YCH46','EX_tab(e)','gap_filled';'Bacteroides_ovatus_ATCC_8483','BZD_AR_NAD','AzoC_Cp';'Bacteroides_ovatus_ATCC_8483','NEOPRONT_AR_NAD','AzoC_Cp';'Bacteroides_ovatus_ATCC_8483','OLSA_AR_NAD','AzoC_Cp';'Bacteroides_ovatus_ATCC_8483','PRONT_AR_NAD','AzoC_Cp';'Bacteroides_ovatus_ATCC_8483','SSZ_AR_NAD','AzoC_Cp';'Bacteroides_ovatus_ATCC_8483','BZD_AR_NADP','AzoC_Cp';'Bacteroides_ovatus_ATCC_8483','NEOPRONT_AR_NADP','AzoC_Cp';'Bacteroides_ovatus_ATCC_8483','OLSA_AR_NADP','AzoC_Cp';'Bacteroides_ovatus_ATCC_8483','PRONT_AR_NADP','AzoC_Cp';'Bacteroides_ovatus_ATCC_8483','SSZ_AR_NADP','AzoC_Cp';'Bacteroides_ovatus_ATCC_8483','5ASAt2r','gap_filled';'Bacteroides_ovatus_ATCC_8483','ABZ_ALA_Bt2r','gap_filled';'Bacteroides_ovatus_ATCC_8483','BZDt2r','gap_filled';'Bacteroides_ovatus_ATCC_8483','NEOPRONTt2r','gap_filled';'Bacteroides_ovatus_ATCC_8483','OLSAt2r','gap_filled';'Bacteroides_ovatus_ATCC_8483','PRONTt2r','gap_filled';'Bacteroides_ovatus_ATCC_8483','SANILAMIDEt2r','gap_filled';'Bacteroides_ovatus_ATCC_8483','SSZt2r','gap_filled';'Bacteroides_ovatus_ATCC_8483','SULFPt2r','gap_filled';'Bacteroides_ovatus_ATCC_8483','TABt2r','gap_filled';'Bacteroides_ovatus_ATCC_8483','EX_5asa(e)','gap_filled';'Bacteroides_ovatus_ATCC_8483','EX_abz_ala_b(e)','gap_filled';'Bacteroides_ovatus_ATCC_8483','EX_bzd(e)','gap_filled';'Bacteroides_ovatus_ATCC_8483','EX_olsa(e)','gap_filled';'Bacteroides_ovatus_ATCC_8483','EX_neopront(e)','gap_filled';'Bacteroides_ovatus_ATCC_8483','EX_pront(e)','gap_filled';'Bacteroides_ovatus_ATCC_8483','EX_sanilamide(e)','gap_filled';'Bacteroides_ovatus_ATCC_8483','EX_ssz(e)','gap_filled';'Bacteroides_ovatus_ATCC_8483','EX_sulfp(e)','gap_filled';'Bacteroides_ovatus_ATCC_8483','EX_tab(e)','gap_filled';'Bacteroides_vulgatus_ATCC_8482','BZD_AR_NAD','AzoC_Cp';'Bacteroides_vulgatus_ATCC_8482','NEOPRONT_AR_NAD','AzoC_Cp';'Bacteroides_vulgatus_ATCC_8482','OLSA_AR_NAD','AzoC_Cp';'Bacteroides_vulgatus_ATCC_8482','PRONT_AR_NAD','AzoC_Cp';'Bacteroides_vulgatus_ATCC_8482','SSZ_AR_NAD','AzoC_Cp';'Bacteroides_vulgatus_ATCC_8482','BZD_AR_NADP','AzoC_Cp';'Bacteroides_vulgatus_ATCC_8482','NEOPRONT_AR_NADP','AzoC_Cp';'Bacteroides_vulgatus_ATCC_8482','OLSA_AR_NADP','AzoC_Cp';'Bacteroides_vulgatus_ATCC_8482','PRONT_AR_NADP','AzoC_Cp';'Bacteroides_vulgatus_ATCC_8482','SSZ_AR_NADP','AzoC_Cp';'Bacteroides_vulgatus_ATCC_8482','5ASAt2r','gap_filled';'Bacteroides_vulgatus_ATCC_8482','ABZ_ALA_Bt2r','gap_filled';'Bacteroides_vulgatus_ATCC_8482','BZDt2r','gap_filled';'Bacteroides_vulgatus_ATCC_8482','NEOPRONTt2r','gap_filled';'Bacteroides_vulgatus_ATCC_8482','OLSAt2r','gap_filled';'Bacteroides_vulgatus_ATCC_8482','PRONTt2r','gap_filled';'Bacteroides_vulgatus_ATCC_8482','SANILAMIDEt2r','gap_filled';'Bacteroides_vulgatus_ATCC_8482','SSZt2r','gap_filled';'Bacteroides_vulgatus_ATCC_8482','SULFPt2r','gap_filled';'Bacteroides_vulgatus_ATCC_8482','TABt2r','gap_filled';'Bacteroides_vulgatus_ATCC_8482','EX_5asa(e)','gap_filled';'Bacteroides_vulgatus_ATCC_8482','EX_abz_ala_b(e)','gap_filled';'Bacteroides_vulgatus_ATCC_8482','EX_bzd(e)','gap_filled';'Bacteroides_vulgatus_ATCC_8482','EX_olsa(e)','gap_filled';'Bacteroides_vulgatus_ATCC_8482','EX_neopront(e)','gap_filled';'Bacteroides_vulgatus_ATCC_8482','EX_pront(e)','gap_filled';'Bacteroides_vulgatus_ATCC_8482','EX_sanilamide(e)','gap_filled';'Bacteroides_vulgatus_ATCC_8482','EX_ssz(e)','gap_filled';'Bacteroides_vulgatus_ATCC_8482','EX_sulfp(e)','gap_filled';'Bacteroides_vulgatus_ATCC_8482','EX_tab(e)','gap_filled';'Bacteroides_vulgatus_mpk','BZD_AR_NAD','AzoC_Cp';'Bacteroides_vulgatus_mpk','NEOPRONT_AR_NAD','AzoC_Cp';'Bacteroides_vulgatus_mpk','OLSA_AR_NAD','AzoC_Cp';'Bacteroides_vulgatus_mpk','PRONT_AR_NAD','AzoC_Cp';'Bacteroides_vulgatus_mpk','SSZ_AR_NAD','AzoC_Cp';'Bacteroides_vulgatus_mpk','BZD_AR_NADP','AzoC_Cp';'Bacteroides_vulgatus_mpk','NEOPRONT_AR_NADP','AzoC_Cp';'Bacteroides_vulgatus_mpk','OLSA_AR_NADP','AzoC_Cp';'Bacteroides_vulgatus_mpk','PRONT_AR_NADP','AzoC_Cp';'Bacteroides_vulgatus_mpk','SSZ_AR_NADP','AzoC_Cp';'Bacteroides_vulgatus_mpk','5ASAt2r','gap_filled';'Bacteroides_vulgatus_mpk','ABZ_ALA_Bt2r','gap_filled';'Bacteroides_vulgatus_mpk','BZDt2r','gap_filled';'Bacteroides_vulgatus_mpk','NEOPRONTt2r','gap_filled';'Bacteroides_vulgatus_mpk','OLSAt2r','gap_filled';'Bacteroides_vulgatus_mpk','PRONTt2r','gap_filled';'Bacteroides_vulgatus_mpk','SANILAMIDEt2r','gap_filled';'Bacteroides_vulgatus_mpk','SSZt2r','gap_filled';'Bacteroides_vulgatus_mpk','SULFPt2r','gap_filled';'Bacteroides_vulgatus_mpk','TABt2r','gap_filled';'Bacteroides_vulgatus_mpk','EX_5asa(e)','gap_filled';'Bacteroides_vulgatus_mpk','EX_abz_ala_b(e)','gap_filled';'Bacteroides_vulgatus_mpk','EX_bzd(e)','gap_filled';'Bacteroides_vulgatus_mpk','EX_olsa(e)','gap_filled';'Bacteroides_vulgatus_mpk','EX_neopront(e)','gap_filled';'Bacteroides_vulgatus_mpk','EX_pront(e)','gap_filled';'Bacteroides_vulgatus_mpk','EX_sanilamide(e)','gap_filled';'Bacteroides_vulgatus_mpk','EX_ssz(e)','gap_filled';'Bacteroides_vulgatus_mpk','EX_sulfp(e)','gap_filled';'Bacteroides_vulgatus_mpk','EX_tab(e)','gap_filled';'Bifidobacterium_bifidum_BGN4','BZD_AR_NAD','AzoR_Ec';'Bifidobacterium_bifidum_BGN4','NEOPRONT_AR_NAD','AzoR_Ec';'Bifidobacterium_bifidum_BGN4','OLSA_AR_NAD','AzoR_Ec';'Bifidobacterium_bifidum_BGN4','PRONT_AR_NAD','AzoR_Ec';'Bifidobacterium_bifidum_BGN4','SSZ_AR_NAD','AzoR_Ec';'Bifidobacterium_bifidum_BGN4','BZD_AR_NADP','AzoR_Ec';'Bifidobacterium_bifidum_BGN4','NEOPRONT_AR_NADP','AzoR_Ec';'Bifidobacterium_bifidum_BGN4','OLSA_AR_NADP','AzoR_Ec';'Bifidobacterium_bifidum_BGN4','PRONT_AR_NADP','AzoR_Ec';'Bifidobacterium_bifidum_BGN4','SSZ_AR_NADP','AzoR_Ec';'Bifidobacterium_bifidum_BGN4','5ASAt2r','gap_filled';'Bifidobacterium_bifidum_BGN4','ABZ_ALA_Bt2r','gap_filled';'Bifidobacterium_bifidum_BGN4','BZDt2r','gap_filled';'Bifidobacterium_bifidum_BGN4','NEOPRONTt2r','gap_filled';'Bifidobacterium_bifidum_BGN4','OLSAt2r','gap_filled';'Bifidobacterium_bifidum_BGN4','PRONTt2r','gap_filled';'Bifidobacterium_bifidum_BGN4','SANILAMIDEt2r','gap_filled';'Bifidobacterium_bifidum_BGN4','SSZt2r','gap_filled';'Bifidobacterium_bifidum_BGN4','SULFPt2r','gap_filled';'Bifidobacterium_bifidum_BGN4','TABt2r','gap_filled';'Bifidobacterium_bifidum_BGN4','EX_5asa(e)','gap_filled';'Bifidobacterium_bifidum_BGN4','EX_abz_ala_b(e)','gap_filled';'Bifidobacterium_bifidum_BGN4','EX_bzd(e)','gap_filled';'Bifidobacterium_bifidum_BGN4','EX_olsa(e)','gap_filled';'Bifidobacterium_bifidum_BGN4','EX_neopront(e)','gap_filled';'Bifidobacterium_bifidum_BGN4','EX_pront(e)','gap_filled';'Bifidobacterium_bifidum_BGN4','EX_sanilamide(e)','gap_filled';'Bifidobacterium_bifidum_BGN4','EX_ssz(e)','gap_filled';'Bifidobacterium_bifidum_BGN4','EX_sulfp(e)','gap_filled';'Bifidobacterium_bifidum_BGN4','EX_tab(e)','gap_filled';'Bifidobacterium_bifidum_LMG_13195','BZD_AR_NAD','AzoR_Ec';'Bifidobacterium_bifidum_LMG_13195','NEOPRONT_AR_NAD','AzoR_Ec';'Bifidobacterium_bifidum_LMG_13195','OLSA_AR_NAD','AzoR_Ec';'Bifidobacterium_bifidum_LMG_13195','PRONT_AR_NAD','AzoR_Ec';'Bifidobacterium_bifidum_LMG_13195','SSZ_AR_NAD','AzoR_Ec';'Bifidobacterium_bifidum_LMG_13195','BZD_AR_NADP','AzoR_Ec';'Bifidobacterium_bifidum_LMG_13195','NEOPRONT_AR_NADP','AzoR_Ec';'Bifidobacterium_bifidum_LMG_13195','OLSA_AR_NADP','AzoR_Ec';'Bifidobacterium_bifidum_LMG_13195','PRONT_AR_NADP','AzoR_Ec';'Bifidobacterium_bifidum_LMG_13195','SSZ_AR_NADP','AzoR_Ec';'Bifidobacterium_bifidum_LMG_13195','5ASAt2r','gap_filled';'Bifidobacterium_bifidum_LMG_13195','ABZ_ALA_Bt2r','gap_filled';'Bifidobacterium_bifidum_LMG_13195','BZDt2r','gap_filled';'Bifidobacterium_bifidum_LMG_13195','NEOPRONTt2r','gap_filled';'Bifidobacterium_bifidum_LMG_13195','OLSAt2r','gap_filled';'Bifidobacterium_bifidum_LMG_13195','PRONTt2r','gap_filled';'Bifidobacterium_bifidum_LMG_13195','SANILAMIDEt2r','gap_filled';'Bifidobacterium_bifidum_LMG_13195','SSZt2r','gap_filled';'Bifidobacterium_bifidum_LMG_13195','SULFPt2r','gap_filled';'Bifidobacterium_bifidum_LMG_13195','TABt2r','gap_filled';'Bifidobacterium_bifidum_LMG_13195','EX_5asa(e)','gap_filled';'Bifidobacterium_bifidum_LMG_13195','EX_abz_ala_b(e)','gap_filled';'Bifidobacterium_bifidum_LMG_13195','EX_bzd(e)','gap_filled';'Bifidobacterium_bifidum_LMG_13195','EX_olsa(e)','gap_filled';'Bifidobacterium_bifidum_LMG_13195','EX_neopront(e)','gap_filled';'Bifidobacterium_bifidum_LMG_13195','EX_pront(e)','gap_filled';'Bifidobacterium_bifidum_LMG_13195','EX_sanilamide(e)','gap_filled';'Bifidobacterium_bifidum_LMG_13195','EX_ssz(e)','gap_filled';'Bifidobacterium_bifidum_LMG_13195','EX_sulfp(e)','gap_filled';'Bifidobacterium_bifidum_LMG_13195','EX_tab(e)','gap_filled';'Bifidobacterium_bifidum_PRL2010','BZD_AR_NAD','AzoR_Ec';'Bifidobacterium_bifidum_PRL2010','NEOPRONT_AR_NAD','AzoR_Ec';'Bifidobacterium_bifidum_PRL2010','OLSA_AR_NAD','AzoR_Ec';'Bifidobacterium_bifidum_PRL2010','PRONT_AR_NAD','AzoR_Ec';'Bifidobacterium_bifidum_PRL2010','SSZ_AR_NAD','AzoR_Ec';'Bifidobacterium_bifidum_PRL2010','BZD_AR_NADP','AzoR_Ec';'Bifidobacterium_bifidum_PRL2010','NEOPRONT_AR_NADP','AzoR_Ec';'Bifidobacterium_bifidum_PRL2010','OLSA_AR_NADP','AzoR_Ec';'Bifidobacterium_bifidum_PRL2010','PRONT_AR_NADP','AzoR_Ec';'Bifidobacterium_bifidum_PRL2010','SSZ_AR_NADP','AzoR_Ec';'Bifidobacterium_bifidum_PRL2010','5ASAt2r','gap_filled';'Bifidobacterium_bifidum_PRL2010','ABZ_ALA_Bt2r','gap_filled';'Bifidobacterium_bifidum_PRL2010','BZDt2r','gap_filled';'Bifidobacterium_bifidum_PRL2010','NEOPRONTt2r','gap_filled';'Bifidobacterium_bifidum_PRL2010','OLSAt2r','gap_filled';'Bifidobacterium_bifidum_PRL2010','PRONTt2r','gap_filled';'Bifidobacterium_bifidum_PRL2010','SANILAMIDEt2r','gap_filled';'Bifidobacterium_bifidum_PRL2010','SSZt2r','gap_filled';'Bifidobacterium_bifidum_PRL2010','SULFPt2r','gap_filled';'Bifidobacterium_bifidum_PRL2010','TABt2r','gap_filled';'Bifidobacterium_bifidum_PRL2010','EX_5asa(e)','gap_filled';'Bifidobacterium_bifidum_PRL2010','EX_abz_ala_b(e)','gap_filled';'Bifidobacterium_bifidum_PRL2010','EX_bzd(e)','gap_filled';'Bifidobacterium_bifidum_PRL2010','EX_olsa(e)','gap_filled';'Bifidobacterium_bifidum_PRL2010','EX_neopront(e)','gap_filled';'Bifidobacterium_bifidum_PRL2010','EX_pront(e)','gap_filled';'Bifidobacterium_bifidum_PRL2010','EX_sanilamide(e)','gap_filled';'Bifidobacterium_bifidum_PRL2010','EX_ssz(e)','gap_filled';'Bifidobacterium_bifidum_PRL2010','EX_sulfp(e)','gap_filled';'Bifidobacterium_bifidum_PRL2010','EX_tab(e)','gap_filled';'Bifidobacterium_bifidum_S17','BZD_AR_NAD','AzoR_Ec';'Bifidobacterium_bifidum_S17','NEOPRONT_AR_NAD','AzoR_Ec';'Bifidobacterium_bifidum_S17','OLSA_AR_NAD','AzoR_Ec';'Bifidobacterium_bifidum_S17','PRONT_AR_NAD','AzoR_Ec';'Bifidobacterium_bifidum_S17','SSZ_AR_NAD','AzoR_Ec';'Bifidobacterium_bifidum_S17','BZD_AR_NADP','AzoR_Ec';'Bifidobacterium_bifidum_S17','NEOPRONT_AR_NADP','AzoR_Ec';'Bifidobacterium_bifidum_S17','OLSA_AR_NADP','AzoR_Ec';'Bifidobacterium_bifidum_S17','PRONT_AR_NADP','AzoR_Ec';'Bifidobacterium_bifidum_S17','SSZ_AR_NADP','AzoR_Ec';'Bifidobacterium_bifidum_S17','5ASAt2r','gap_filled';'Bifidobacterium_bifidum_S17','ABZ_ALA_Bt2r','gap_filled';'Bifidobacterium_bifidum_S17','BZDt2r','gap_filled';'Bifidobacterium_bifidum_S17','NEOPRONTt2r','gap_filled';'Bifidobacterium_bifidum_S17','OLSAt2r','gap_filled';'Bifidobacterium_bifidum_S17','PRONTt2r','gap_filled';'Bifidobacterium_bifidum_S17','SANILAMIDEt2r','gap_filled';'Bifidobacterium_bifidum_S17','SSZt2r','gap_filled';'Bifidobacterium_bifidum_S17','SULFPt2r','gap_filled';'Bifidobacterium_bifidum_S17','TABt2r','gap_filled';'Bifidobacterium_bifidum_S17','EX_5asa(e)','gap_filled';'Bifidobacterium_bifidum_S17','EX_abz_ala_b(e)','gap_filled';'Bifidobacterium_bifidum_S17','EX_bzd(e)','gap_filled';'Bifidobacterium_bifidum_S17','EX_olsa(e)','gap_filled';'Bifidobacterium_bifidum_S17','EX_neopront(e)','gap_filled';'Bifidobacterium_bifidum_S17','EX_pront(e)','gap_filled';'Bifidobacterium_bifidum_S17','EX_sanilamide(e)','gap_filled';'Bifidobacterium_bifidum_S17','EX_ssz(e)','gap_filled';'Bifidobacterium_bifidum_S17','EX_sulfp(e)','gap_filled';'Bifidobacterium_bifidum_S17','EX_tab(e)','gap_filled';'Clostridium_perfringens_B_str_ATCC_3626','BZD_AR_NAD','AGH15624.1';'Clostridium_perfringens_B_str_ATCC_3626','NEOPRONT_AR_NAD','AGH15624.1';'Clostridium_perfringens_B_str_ATCC_3626','OLSA_AR_NAD','AGH15624.1';'Clostridium_perfringens_B_str_ATCC_3626','PRONT_AR_NAD','AGH15624.1';'Clostridium_perfringens_B_str_ATCC_3626','SSZ_AR_NAD','AGH15624.1';'Clostridium_perfringens_B_str_ATCC_3626','BZD_AR_NADP','AGH15624.1';'Clostridium_perfringens_B_str_ATCC_3626','NEOPRONT_AR_NADP','AGH15624.1';'Clostridium_perfringens_B_str_ATCC_3626','OLSA_AR_NADP','AGH15624.1';'Clostridium_perfringens_B_str_ATCC_3626','PRONT_AR_NADP','AGH15624.1';'Clostridium_perfringens_B_str_ATCC_3626','SSZ_AR_NADP','AGH15624.1';'Clostridium_perfringens_B_str_ATCC_3626','5ASAt2r','gap_filled';'Clostridium_perfringens_B_str_ATCC_3626','ABZ_ALA_Bt2r','gap_filled';'Clostridium_perfringens_B_str_ATCC_3626','BZDt2r','gap_filled';'Clostridium_perfringens_B_str_ATCC_3626','NEOPRONTt2r','gap_filled';'Clostridium_perfringens_B_str_ATCC_3626','OLSAt2r','gap_filled';'Clostridium_perfringens_B_str_ATCC_3626','PRONTt2r','gap_filled';'Clostridium_perfringens_B_str_ATCC_3626','SANILAMIDEt2r','gap_filled';'Clostridium_perfringens_B_str_ATCC_3626','SSZt2r','gap_filled';'Clostridium_perfringens_B_str_ATCC_3626','SULFPt2r','gap_filled';'Clostridium_perfringens_B_str_ATCC_3626','TABt2r','gap_filled';'Clostridium_perfringens_B_str_ATCC_3626','EX_5asa(e)','gap_filled';'Clostridium_perfringens_B_str_ATCC_3626','EX_abz_ala_b(e)','gap_filled';'Clostridium_perfringens_B_str_ATCC_3626','EX_bzd(e)','gap_filled';'Clostridium_perfringens_B_str_ATCC_3626','EX_olsa(e)','gap_filled';'Clostridium_perfringens_B_str_ATCC_3626','EX_neopront(e)','gap_filled';'Clostridium_perfringens_B_str_ATCC_3626','EX_pront(e)','gap_filled';'Clostridium_perfringens_B_str_ATCC_3626','EX_sanilamide(e)','gap_filled';'Clostridium_perfringens_B_str_ATCC_3626','EX_ssz(e)','gap_filled';'Clostridium_perfringens_B_str_ATCC_3626','EX_sulfp(e)','gap_filled';'Clostridium_perfringens_B_str_ATCC_3626','EX_tab(e)','gap_filled';'Enterococcus_faecalis_ATCC_19433','BZD_AR_NAD','AY422207.1';'Enterococcus_faecalis_ATCC_19433','NEOPRONT_AR_NAD','AY422207.1';'Enterococcus_faecalis_ATCC_19433','OLSA_AR_NAD','AY422207.1';'Enterococcus_faecalis_ATCC_19433','PRONT_AR_NAD','AY422207.1';'Enterococcus_faecalis_ATCC_19433','SSZ_AR_NAD','AY422207.1';'Enterococcus_faecalis_ATCC_19433','BZD_AR_NADP','AY422207.1';'Enterococcus_faecalis_ATCC_19433','NEOPRONT_AR_NADP','AY422207.1';'Enterococcus_faecalis_ATCC_19433','OLSA_AR_NADP','AY422207.1';'Enterococcus_faecalis_ATCC_19433','PRONT_AR_NADP','AY422207.1';'Enterococcus_faecalis_ATCC_19433','SSZ_AR_NADP','AY422207.1';'Enterococcus_faecalis_ATCC_19433','5ASAt2r','gap_filled';'Enterococcus_faecalis_ATCC_19433','ABZ_ALA_Bt2r','gap_filled';'Enterococcus_faecalis_ATCC_19433','BZDt2r','gap_filled';'Enterococcus_faecalis_ATCC_19433','NEOPRONTt2r','gap_filled';'Enterococcus_faecalis_ATCC_19433','OLSAt2r','gap_filled';'Enterococcus_faecalis_ATCC_19433','PRONTt2r','gap_filled';'Enterococcus_faecalis_ATCC_19433','SANILAMIDEt2r','gap_filled';'Enterococcus_faecalis_ATCC_19433','SSZt2r','gap_filled';'Enterococcus_faecalis_ATCC_19433','SULFPt2r','gap_filled';'Enterococcus_faecalis_ATCC_19433','TABt2r','gap_filled';'Enterococcus_faecalis_ATCC_19433','EX_5asa(e)','gap_filled';'Enterococcus_faecalis_ATCC_19433','EX_abz_ala_b(e)','gap_filled';'Enterococcus_faecalis_ATCC_19433','EX_bzd(e)','gap_filled';'Enterococcus_faecalis_ATCC_19433','EX_olsa(e)','gap_filled';'Enterococcus_faecalis_ATCC_19433','EX_neopront(e)','gap_filled';'Enterococcus_faecalis_ATCC_19433','EX_pront(e)','gap_filled';'Enterococcus_faecalis_ATCC_19433','EX_sanilamide(e)','gap_filled';'Enterococcus_faecalis_ATCC_19433','EX_ssz(e)','gap_filled';'Enterococcus_faecalis_ATCC_19433','EX_sulfp(e)','gap_filled';'Enterococcus_faecalis_ATCC_19433','EX_tab(e)','gap_filled';'Escherichia_coli_str_K_12_substr_MG1655','BZD_AR_NAD','NP_415930.1';'Escherichia_coli_str_K_12_substr_MG1655','NEOPRONT_AR_NAD','NP_415930.1';'Escherichia_coli_str_K_12_substr_MG1655','OLSA_AR_NAD','NP_415930.1';'Escherichia_coli_str_K_12_substr_MG1655','PRONT_AR_NAD','NP_415930.1';'Escherichia_coli_str_K_12_substr_MG1655','SSZ_AR_NAD','NP_415930.1';'Escherichia_coli_str_K_12_substr_MG1655','BZD_AR_NADP','NP_415930.1';'Escherichia_coli_str_K_12_substr_MG1655','NEOPRONT_AR_NADP','NP_415930.1';'Escherichia_coli_str_K_12_substr_MG1655','OLSA_AR_NADP','NP_415930.1';'Escherichia_coli_str_K_12_substr_MG1655','PRONT_AR_NADP','NP_415930.1';'Escherichia_coli_str_K_12_substr_MG1655','SSZ_AR_NADP','NP_415930.1';'Escherichia_coli_str_K_12_substr_MG1655','5ASAt2r','gap_filled';'Escherichia_coli_str_K_12_substr_MG1655','ABZ_ALA_Bt2r','gap_filled';'Escherichia_coli_str_K_12_substr_MG1655','BZDt2r','gap_filled';'Escherichia_coli_str_K_12_substr_MG1655','NEOPRONTt2r','gap_filled';'Escherichia_coli_str_K_12_substr_MG1655','OLSAt2r','gap_filled';'Escherichia_coli_str_K_12_substr_MG1655','PRONTt2r','gap_filled';'Escherichia_coli_str_K_12_substr_MG1655','SANILAMIDEt2r','gap_filled';'Escherichia_coli_str_K_12_substr_MG1655','SSZt2r','gap_filled';'Escherichia_coli_str_K_12_substr_MG1655','SULFPt2r','gap_filled';'Escherichia_coli_str_K_12_substr_MG1655','TABt2r','gap_filled';'Escherichia_coli_str_K_12_substr_MG1655','EX_5asa(e)','gap_filled';'Escherichia_coli_str_K_12_substr_MG1655','EX_abz_ala_b(e)','gap_filled';'Escherichia_coli_str_K_12_substr_MG1655','EX_bzd(e)','gap_filled';'Escherichia_coli_str_K_12_substr_MG1655','EX_olsa(e)','gap_filled';'Escherichia_coli_str_K_12_substr_MG1655','EX_neopront(e)','gap_filled';'Escherichia_coli_str_K_12_substr_MG1655','EX_pront(e)','gap_filled';'Escherichia_coli_str_K_12_substr_MG1655','EX_sanilamide(e)','gap_filled';'Escherichia_coli_str_K_12_substr_MG1655','EX_ssz(e)','gap_filled';'Escherichia_coli_str_K_12_substr_MG1655','EX_sulfp(e)','gap_filled';'Escherichia_coli_str_K_12_substr_MG1655','EX_tab(e)','gap_filled';'Odoribacter_splanchnicus_1651_6_DSM_20712','BZD_AR_NAD','AzoC_Cp or AzoEf1_Ef';'Odoribacter_splanchnicus_1651_6_DSM_20712','NEOPRONT_AR_NAD','AzoC_Cp or AzoEf1_Ef';'Odoribacter_splanchnicus_1651_6_DSM_20712','OLSA_AR_NAD','AzoC_Cp or AzoEf1_Ef';'Odoribacter_splanchnicus_1651_6_DSM_20712','PRONT_AR_NAD','AzoC_Cp or AzoEf1_Ef';'Odoribacter_splanchnicus_1651_6_DSM_20712','SSZ_AR_NAD','AzoC_Cp or AzoEf1_Ef';'Odoribacter_splanchnicus_1651_6_DSM_20712','BZD_AR_NADP','AzoC_Cp or AzoEf1_Ef';'Odoribacter_splanchnicus_1651_6_DSM_20712','NEOPRONT_AR_NADP','AzoC_Cp or AzoEf1_Ef';'Odoribacter_splanchnicus_1651_6_DSM_20712','OLSA_AR_NADP','AzoC_Cp or AzoEf1_Ef';'Odoribacter_splanchnicus_1651_6_DSM_20712','PRONT_AR_NADP','AzoC_Cp or AzoEf1_Ef';'Odoribacter_splanchnicus_1651_6_DSM_20712','SSZ_AR_NADP','AzoC_Cp or AzoEf1_Ef';'Odoribacter_splanchnicus_1651_6_DSM_20712','5ASAt2r','gap_filled';'Odoribacter_splanchnicus_1651_6_DSM_20712','ABZ_ALA_Bt2r','gap_filled';'Odoribacter_splanchnicus_1651_6_DSM_20712','BZDt2r','gap_filled';'Odoribacter_splanchnicus_1651_6_DSM_20712','NEOPRONTt2r','gap_filled';'Odoribacter_splanchnicus_1651_6_DSM_20712','OLSAt2r','gap_filled';'Odoribacter_splanchnicus_1651_6_DSM_20712','PRONTt2r','gap_filled';'Odoribacter_splanchnicus_1651_6_DSM_20712','SANILAMIDEt2r','gap_filled';'Odoribacter_splanchnicus_1651_6_DSM_20712','SSZt2r','gap_filled';'Odoribacter_splanchnicus_1651_6_DSM_20712','SULFPt2r','gap_filled';'Odoribacter_splanchnicus_1651_6_DSM_20712','TABt2r','gap_filled';'Odoribacter_splanchnicus_1651_6_DSM_20712','EX_5asa(e)','gap_filled';'Odoribacter_splanchnicus_1651_6_DSM_20712','EX_abz_ala_b(e)','gap_filled';'Odoribacter_splanchnicus_1651_6_DSM_20712','EX_bzd(e)','gap_filled';'Odoribacter_splanchnicus_1651_6_DSM_20712','EX_olsa(e)','gap_filled';'Odoribacter_splanchnicus_1651_6_DSM_20712','EX_neopront(e)','gap_filled';'Odoribacter_splanchnicus_1651_6_DSM_20712','EX_pront(e)','gap_filled';'Odoribacter_splanchnicus_1651_6_DSM_20712','EX_sanilamide(e)','gap_filled';'Odoribacter_splanchnicus_1651_6_DSM_20712','EX_ssz(e)','gap_filled';'Odoribacter_splanchnicus_1651_6_DSM_20712','EX_sulfp(e)','gap_filled';'Odoribacter_splanchnicus_1651_6_DSM_20712','EX_tab(e)','gap_filled';'Pseudomonas_aeruginosa_PAO1','BZD_AR_NAD','AAG04174';'Pseudomonas_aeruginosa_PAO1','NEOPRONT_AR_NAD','AAG04174';'Pseudomonas_aeruginosa_PAO1','OLSA_AR_NAD','AAG04174';'Pseudomonas_aeruginosa_PAO1','PRONT_AR_NAD','AAG04174';'Pseudomonas_aeruginosa_PAO1','SSZ_AR_NAD','AAG04174';'Pseudomonas_aeruginosa_PAO1','BZD_AR_NADP','AAG04174';'Pseudomonas_aeruginosa_PAO1','NEOPRONT_AR_NADP','AAG04174';'Pseudomonas_aeruginosa_PAO1','OLSA_AR_NADP','AAG04174';'Pseudomonas_aeruginosa_PAO1','PRONT_AR_NADP','AAG04174';'Pseudomonas_aeruginosa_PAO1','SSZ_AR_NADP','AAG04174';'Pseudomonas_aeruginosa_PAO1','5ASAt2r','gap_filled';'Pseudomonas_aeruginosa_PAO1','ABZ_ALA_Bt2r','gap_filled';'Pseudomonas_aeruginosa_PAO1','BZDt2r','gap_filled';'Pseudomonas_aeruginosa_PAO1','NEOPRONTt2r','gap_filled';'Pseudomonas_aeruginosa_PAO1','OLSAt2r','gap_filled';'Pseudomonas_aeruginosa_PAO1','PRONTt2r','gap_filled';'Pseudomonas_aeruginosa_PAO1','SANILAMIDEt2r','gap_filled';'Pseudomonas_aeruginosa_PAO1','SSZt2r','gap_filled';'Pseudomonas_aeruginosa_PAO1','SULFPt2r','gap_filled';'Pseudomonas_aeruginosa_PAO1','TABt2r','gap_filled';'Pseudomonas_aeruginosa_PAO1','EX_5asa(e)','gap_filled';'Pseudomonas_aeruginosa_PAO1','EX_abz_ala_b(e)','gap_filled';'Pseudomonas_aeruginosa_PAO1','EX_bzd(e)','gap_filled';'Pseudomonas_aeruginosa_PAO1','EX_olsa(e)','gap_filled';'Pseudomonas_aeruginosa_PAO1','EX_neopront(e)','gap_filled';'Pseudomonas_aeruginosa_PAO1','EX_pront(e)','gap_filled';'Pseudomonas_aeruginosa_PAO1','EX_sanilamide(e)','gap_filled';'Pseudomonas_aeruginosa_PAO1','EX_ssz(e)','gap_filled';'Pseudomonas_aeruginosa_PAO1','EX_sulfp(e)','gap_filled';'Pseudomonas_aeruginosa_PAO1','EX_tab(e)','gap_filled'}; genomeAnnotation=vertcat(genomeAnnotation,manuallyAdded); diff --git a/src/reconstruction/demeter/src/debugging/deleteSeedGapfilledReactions.m b/src/reconstruction/demeter/src/refinement/deleteSeedGapfilledReactions.m similarity index 100% rename from src/reconstruction/demeter/src/debugging/deleteSeedGapfilledReactions.m rename to src/reconstruction/demeter/src/refinement/deleteSeedGapfilledReactions.m diff --git a/src/reconstruction/demeter/src/debugging/doubleCheckGapfilledReactions.m b/src/reconstruction/demeter/src/refinement/doubleCheckGapfilledReactions.m similarity index 100% rename from src/reconstruction/demeter/src/debugging/doubleCheckGapfilledReactions.m rename to src/reconstruction/demeter/src/refinement/doubleCheckGapfilledReactions.m diff --git a/src/reconstruction/demeter/src/debugging/findTransportersWithoutExchanges.m b/src/reconstruction/demeter/src/refinement/findTransportersWithoutExchanges.m similarity index 100% rename from src/reconstruction/demeter/src/debugging/findTransportersWithoutExchanges.m rename to src/reconstruction/demeter/src/refinement/findTransportersWithoutExchanges.m diff --git a/src/reconstruction/demeter/src/debugging/findUnusedExchangeReactions.m b/src/reconstruction/demeter/src/refinement/findUnusedExchangeReactions.m similarity index 100% rename from src/reconstruction/demeter/src/debugging/findUnusedExchangeReactions.m rename to src/reconstruction/demeter/src/refinement/findUnusedExchangeReactions.m diff --git a/src/reconstruction/demeter/src/debugging/printBiomass.m b/src/reconstruction/demeter/src/refinement/printBiomass.m similarity index 100% rename from src/reconstruction/demeter/src/debugging/printBiomass.m rename to src/reconstruction/demeter/src/refinement/printBiomass.m diff --git a/src/reconstruction/demeter/src/refinement/removeUnannotatedBlockedRxns.m b/src/reconstruction/demeter/src/refinement/removeUnannotatedBlockedRxns.m deleted file mode 100644 index 3795ccb284..0000000000 --- a/src/reconstruction/demeter/src/refinement/removeUnannotatedBlockedRxns.m +++ /dev/null @@ -1,48 +0,0 @@ -function model = removeUnannotatedBlockedRxns(model, biomassRxn) -% Performs a flux variability analysis on an unlimited reconstruction and -% removes any reactions that do not have gene-protein-reaction rules and -% cannot carry flux. -% -% INPUT -% model COBRA model structure -% biomassRxn Biomass reaction abbreviation -% -% OUTPUT -% model COBRA model structure -% -% Stefania Magnusdottir, Nov 2017 - -if ~any(ismember(model.rxns, biomassRxn)) - error(['Reaction ', biomassRxn, ' not found in model.']) -end - -% set unlimited constraints -model.lb(model.lb > 0) = 0; -model.ub(model.ub < 0) = 0; -model.lb(model.lb < 0) = -1000; -model.ub(model.ub > 0) = 1000; - -% flux variability analysis -if ~isempty(ver('distcomp')) - [minFlux, maxFlux, ~, ~] = fastFVA(model, 0, 'max', 'ibm_cplex', ... - model.rxns, 'S'); -else - [minFlux, maxFlux] = fluxVariability(model, 0, 'max', model.rxns); -end -FBA=optimizeCbModel(model,'max'); -if FBA.f > 1e-6 - % find blocked reactions without GPRs - unannBlocked = model.rxns(abs(minFlux) < 1e-6 & abs(maxFlux) < 1e-6 & ... - cellfun(@isempty, model.rules)); - - % remove blocked unannotated reactions - for i = 1:length(unannBlocked) - model = removeRxns(model, unannBlocked{i}); - fprintf('Reaction %s removed from reconstruction.', unannBlocked{i}); - end -else - warning(['Model cannot carry flux through the reaction ', biomassRxn, ... - '. Blocked reactions not removed.']) -end - -end diff --git a/src/reconstruction/demeter/src/refinement/removeUnannotatedReactions.m b/src/reconstruction/demeter/src/refinement/removeUnannotatedReactions.m index a4919b9407..d9af3fd97e 100644 --- a/src/reconstruction/demeter/src/refinement/removeUnannotatedReactions.m +++ b/src/reconstruction/demeter/src/refinement/removeUnannotatedReactions.m @@ -50,11 +50,11 @@ modelDM=modelTest; end end - % ensure that anaerobic growth on Western diet is not abolished - WesternDiet = readtable('WesternDietAGORA2.txt', 'Delimiter', 'tab'); - WesternDiet=table2cell(WesternDiet); - WesternDiet=cellstr(string(WesternDiet)); - model=useDiet(model,WesternDiet); + % ensure that anaerobic growth on complex medium is not abolished + diet = readtable('ComplexMedium.txt', 'Delimiter', 'tab'); + diet=table2cell(diet); + diet=cellstr(string(diet)); + model=useDiet(model,diet); cnt=1; for i=1:size(unannRxns,1) From 4aca9fe4e206c03e7722be223edac712e1dfb2e5 Mon Sep 17 00:00:00 2001 From: Almut Heinken Date: Thu, 9 Sep 2021 10:52:49 +0100 Subject: [PATCH 70/82] enabled reading of partially translated models, moved functions to more appropriate location --- .../analyseObjectiveShadowPrices.m | 15 +++++++-------- .../demeter/src/debugging/debugModel.m | 12 +++++++++--- .../demeter/src/refinement}/rebuildModel.m | 1 + .../refinement/translateKBaseModel2VMHModel.m | 18 +++++++++++------- 4 files changed, 28 insertions(+), 18 deletions(-) rename src/{analysis/multiSpecies/microbiomeModelingToolbox/mgPipe => reconstruction/demeter/src/refinement}/rebuildModel.m (96%) diff --git a/src/analysis/multiSpecies/microbiomeModelingToolbox/additionalAnalysis/analyseObjectiveShadowPrices.m b/src/analysis/multiSpecies/microbiomeModelingToolbox/additionalAnalysis/analyseObjectiveShadowPrices.m index 7e66185f62..b79111412d 100755 --- a/src/analysis/multiSpecies/microbiomeModelingToolbox/additionalAnalysis/analyseObjectiveShadowPrices.m +++ b/src/analysis/multiSpecies/microbiomeModelingToolbox/additionalAnalysis/analyseObjectiveShadowPrices.m @@ -142,13 +142,13 @@ % compute the flux balance analysis solution [model, FBAsolution] = computeSolForObj(model, objectiveList, solver); % % store computed objective values -% for j=1:length(objectiveList) -% if ~isempty(FBAsolution{j,1}) -% objectives{j+1,3+i} = FBAsolution{j,1}.obj; -% else -% objectives{j+1,3+i} = 0; -% end -% end + for j=1:length(objectiveList) + if ~isempty(FBAsolution{j,1}) + objectives{j+1,3+i} = FBAsolution{j,1}.obj; + else + objectives{j+1,3+i} = 0; + end + end % save one model by one-file would be enourmous otherwise save([resultsFolder filesep strrep(modelList{i,1},'.mat','') '_solution'],'FBAsolution'); @@ -211,7 +211,6 @@ environment = getEnvironment(); parfor j = 1:size(objectiveList, 1) - j restoreEnvironment(environment); changeCobraSolver(solver, 'LP', 0, -1); % prevent creation of log files diff --git a/src/reconstruction/demeter/src/debugging/debugModel.m b/src/reconstruction/demeter/src/debugging/debugModel.m index 88ecb33ab2..51a391a35e 100644 --- a/src/reconstruction/demeter/src/debugging/debugModel.m +++ b/src/reconstruction/demeter/src/debugging/debugModel.m @@ -190,7 +190,7 @@ % define if objective should be maximized or minimized if any(contains(fields{i},{'Carbon_sources','Metabolite_uptake','Drug_metabolism'})) osenseStr = 'min'; - elseif any(contains(fields{i},{'Fermentation_products','Secretion_products','Bile_acid_biosynthesis'})) + elseif any(contains(fields{i},{'Fermentation_products','Secretion_products','Bile_acid_biosynthesis','PutrefactionPathways'})) osenseStr = 'max'; end FNlist = testResults.(fields{i}); @@ -200,12 +200,18 @@ if ~isempty(FNs) for j=1:length(FNs) metExch=['EX_' database.metabolites{find(strcmp(database.metabolites(:,2),FNs{j})),1} '(e)']; - if isempty(find(ismember(model.rxns,metExch))) + if contains(fields{i},'PutrefactionPathways') % reaction ID itself provided metExch = FNs{j}; end % find reactions that could be gap-filled to enable flux - [model,condGF,targetGF,relaxGF] = runGapfillingFunctions(model,metExch,biomassReaction,osenseStr,database); + try + [model,condGF,targetGF,relaxGF] = runGapfillingFunctions(model,metExch,biomassReaction,osenseStr,database); + catch + condGF = {}; + targetGF = {}; + relaxGF = {}; + end % export the gapfilled reactions if ~isempty(condGF) summary.condGF=union(summary.condGF,condGF); diff --git a/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/rebuildModel.m b/src/reconstruction/demeter/src/refinement/rebuildModel.m similarity index 96% rename from src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/rebuildModel.m rename to src/reconstruction/demeter/src/refinement/rebuildModel.m index 6848a9ac36..a898ba0f7d 100755 --- a/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/rebuildModel.m +++ b/src/reconstruction/demeter/src/refinement/rebuildModel.m @@ -21,6 +21,7 @@ % to account for older versions of AGORA toReplace={'EX_4hpro(e)','EX_4hpro_LT(e)';'EX_indprp(e)','EX_ind3ppa(e)';'INDPRPt2r','IND3PPAt2r';'EX_adpcbl(e','EX_adocbl(e';'H202D','H2O2D'}; + for i=1:size(toReplace,1) model.rxns=strrep(model.rxns,toReplace{i,1},toReplace{i,2}); end diff --git a/src/reconstruction/demeter/src/refinement/translateKBaseModel2VMHModel.m b/src/reconstruction/demeter/src/refinement/translateKBaseModel2VMHModel.m index 74aabd813c..e227753d77 100755 --- a/src/reconstruction/demeter/src/refinement/translateKBaseModel2VMHModel.m +++ b/src/reconstruction/demeter/src/refinement/translateKBaseModel2VMHModel.m @@ -47,6 +47,7 @@ model.mets = strrep(model.mets, '[e0]', '_e0'); model.rxns = strrep(model.rxns, '_c0', ''); model.rxns = strrep(model.rxns, '_e0', ''); +model.rxns = strrep(model.rxns, 'R_', ''); % proceed if the model contains any reactions in KBase nomenclature if ~isempty(intersect(model.rxns,translateRxns(:,1))) @@ -60,9 +61,13 @@ biomassMets = model.mets(model.S(:, ismember(model.rxns, biomassReaction)) ~= 0); biomassMets = strrep(biomassMets, '_c0', ''); biomassMets = strrep(biomassMets, '_e0', ''); + biomassMets = strrep(biomassMets, '[c]', ''); + biomassMets = strrep(biomassMets, '[e]', ''); - % check if all biomass metabolites are in translation table + % check if all biomass metabolites are in translation table and also + % not already translated notInTableBiomassMets = setdiff(biomassMets, translateMets(:, 1)); + notInTableBiomassMets = setdiff(notInTableBiomassMets, translateMets(:, 2)); if ~isempty(notInTableBiomassMets) error('Model contains biomass metabolites that are not present in translation table') end @@ -84,17 +89,14 @@ % remove biomass reaction from model (add translated reaction at the end) model = removeRxns(model, biomassReaction); - % adust metabolite and reaction IDs + % adust metabolite IDs % model.mets = strrep(model.mets, '[c0]', '_c0'); % model.mets = strrep(model.mets, '[e0]', '_e0'); model.mets = strrep(model.mets, '[c0]', ''); model.mets = strrep(model.mets, '[e0]', ''); model.mets = strrep(model.mets, '_c0', ''); model.mets = strrep(model.mets, '_e0', ''); - model.rxns = strrep(model.rxns, '_c0', ''); - model.rxns = strrep(model.rxns, '_e0', ''); - model.rxns = strrep(model.rxns, 'R_', ''); - + % check if there are any reactions in model that are not in translation % table notInTableRxns = setdiff(model.rxns, translateRxns(:, 1)); @@ -127,7 +129,9 @@ newModel.ub = zeros(size(newModel.rxns)); newModel.rules = cell(0, 1); newModel.genes = model.genes; - newModel.geneNames = model.geneNames; + if isfield(model,'geneNames') + newModel.geneNames = model.geneNames; + end newModel.comments = cell(0, 1); newModel.citations = cell(0, 1); newModel.rxnECNumbers = cell(0, 1); From 182c84c47c0ad047ecb9347261b65646fe4bba41 Mon Sep 17 00:00:00 2001 From: Almut Heinken Date: Thu, 9 Sep 2021 11:01:01 +0100 Subject: [PATCH 71/82] enabled reading of partially translated models, moved functions to more appropriate location --- .../calculateReactionAbundance.m | 357 ++++++++++++++++++ 1 file changed, 357 insertions(+) create mode 100644 src/analysis/multiSpecies/microbiomeModelingToolbox/additionalAnalysis/calculateReactionAbundance.m diff --git a/src/analysis/multiSpecies/microbiomeModelingToolbox/additionalAnalysis/calculateReactionAbundance.m b/src/analysis/multiSpecies/microbiomeModelingToolbox/additionalAnalysis/calculateReactionAbundance.m new file mode 100644 index 0000000000..e4a885e71a --- /dev/null +++ b/src/analysis/multiSpecies/microbiomeModelingToolbox/additionalAnalysis/calculateReactionAbundance.m @@ -0,0 +1,357 @@ +function [ReactionAbundance,TaxonomyInfo] = calculateReactionAbundance(abundancePath, modelPath, infoFilePath, rxnsList, numWorkers) +% Part of the Microbiome Modeling Toolbox. This function calculates and +% plots the total abundance of reactions of interest in a given microbiome +% sample based on the strain-level composition. +% Reaction presence or absence in each strain is derived from the reaction content +% of the respective AGORA model. Two results are given: the total abundance, +% and the abundance on different taxonomical levels. +% +% USAGE +% +% [ReactionAbundance,TaxonomyInfo] = calculateReactionAbundance(abundancePath, modelPath, infoFilePath, rxnsList, numWorkers) +% +% INPUTS: +% abundancePath: Path to the .csv file with the abundance data. +% Example: 'cobratoolbox/papers/018_microbiomeModelingToolbox/examples/normCoverage.csv' +% modelPath: Folder containing the strain-specific AGORA models +% OPTIONAL INPUTS: +% infoFilePath: Path to the spreadsheet with the taxonomy +% information on organisms (default: +% AGORA_infoFile.xlsx) +% rxnsList: List of reactions for which the abundance +% should be calculated (if left empty: all +% reactions in all models) +% numWorkers: Number of workers used for parallel pool. If +% left empty, the parallel pool will not be +% started. Parallellization is recommended if +% all reactions are computed. +% +% OUTPUT: +% ReactionAbundance Structure with abundance for each microbiome +% and reaction in total and on taxon levels +% TaxonomyInfo: Taxonomical information on each taxon level +% +% .. Author: - Almut Heinken, 03/2018 +% 10/2018: changed input to location of the csv file with the +% abundance data +% 01/2020: adapted to be suitable for pan-models + +% read the csv file with the abundance data +abundance = readtable(abundancePath, 'ReadVariableNames', false); +abundance = table2cell(abundance); +if isnumeric(abundance{2, 1}) + abundance(:, 1) = []; +end + +% + +% load the models +for i = 2:size(abundance, 1) + model = readCbModel([modelPath filesep abundance{i, 1} '.mat']); + modelsList{i, 1} = model; +end + +if ~exist('rxnsList', 'var') || isempty(rxnsList) % define reaction list if not entered + fprintf('No reaction list entered. Abundances will be calculated for all reactions in all models. \n') + % get model list from abundance input file + for i = 2:size(abundance, 1) + model = modelsList{i, 1}; + rxnsList = vertcat(model.rxns, rxnsList); + end + rxnsList = unique(rxnsList); +end + +% Get the taxonomy information +if exist('infoFilePath','var') && ~isempty(infoFilePath) + taxonomy = readtable(infoFilePath, 'ReadVariableNames', false); + taxonomy = table2cell(taxonomy); +else + taxonomy = readtable('AGORA_infoFile.xlsx', 'ReadVariableNames', false); + taxonomy = table2cell(taxonomy); +end + +% load the models found in the individuals and extract which reactions are +% in which model +for i = 2:size(abundance, 1) + model = modelsList{i, 1}; + ReactionPresence{i, 1} = abundance{i, 1}; + for j = 1:length(rxnsList) + ReactionPresence{1, j + 1} = rxnsList{j}; + if ~isempty(find(ismember(model.rxns, rxnsList{j}))) + ReactionPresence{i, j + 1} = 1; + else + ReactionPresence{i, j + 1} = 0; + end + end +end + +% put together a Matlab structure of the results +ReactionAbundance = struct; + +% prepare table for the total abundance +for j = 1:length(rxnsList) + ReactionAbundance.('Total'){1, j + 1} = rxnsList{j}; +end + +TaxonomyLevels = { + 'Phylum' + 'Class' + 'Order' + 'Family' + 'Genus' + 'Species' + }; +% extract the list of entries on each taxonomical level +for t = 1:size(TaxonomyLevels, 1) + % find the columns corresponding to each taxonomy level and the list of + % unique taxa + taxonCol = find(strcmp(taxonomy(1, :), TaxonomyLevels{t})); + % find and save all entries + taxa = unique(taxonomy(2:end, taxonCol)); + % exclude unclassified entries + taxa(strncmp('unclassified', taxa, taxonCol)) = []; + TaxonomyLevels{t, 2} = taxa; + % define the correct columns in taxonomy table + TaxonomyLevels{t, 3} = taxonCol; + % prepare table for the abundance on taxon levels + cnt = 2; + for j = 1:length(rxnsList) + for l = 1:length(TaxonomyLevels{t, 2}) + ReactionAbundance.(TaxonomyLevels{t, 1}){1, cnt} = strcat(TaxonomyLevels{t, 2}{l}, '_', rxnsList{j}); + cnt = cnt + 1; + end + end +end + +% Find the right column for the input data (strains, species,..) +taxa=taxonomy(2:end,1); +if length(intersect(abundance(2:end,1),taxa))==size(abundance,1)-1 + inputTaxa=taxa; + inputCol=1; +else + abundance(:,1)=regexprep(abundance(:,1),'pan','','once'); + inputTaxa={}; + for i=2:size(taxonomy,2) + taxa=strrep(taxonomy(:,i),' ','_'); + taxa=strrep(taxa,'.','_'); + taxa=strrep(taxa,'/','_'); + taxa=strrep(taxa,'-','_'); + taxa=strrep(taxa,'__','_'); + if length(intersect(abundance(2:end,1),taxa))==size(abundance,1)-1 + inputTaxa=taxa; + inputCol=i; + end + end +end +if isempty(inputTaxa) + error('Some taxa in the abundance file are not found in the taxonomy file!') +end + +for i = 2:size(abundance, 2) + %% calculate reaction abundance for the samples one by one + fprintf(['Calculating reaction abundance for sample ', num2str(i - 1), ' of ' num2str(size(abundance, 2) - 1) '.. \n']) + ReactionAbundance.('Total'){i, 1} = abundance{1, i}; + for t = 1:size(TaxonomyLevels, 1) + ReactionAbundance.(TaxonomyLevels{t, 1}){i, 1} = abundance{1, i}; + end + % use parallel pool if workers specified as input + if exist('numWorkers', 'var') && numWorkers > 0 + poolobj = gcp('nocreate'); + if isempty(poolobj) + parpool(numWorkers) + end + end + % create tables in which abundances for each individual for + % all reactions/taxa are stored + totalAbun = zeros(length(rxnsList), 1); + phylumAbun = zeros(length(rxnsList), length(TaxonomyLevels{1, 2})); + classAbun = zeros(length(rxnsList), length(TaxonomyLevels{2, 2})); + orderAbun = zeros(length(rxnsList), length(TaxonomyLevels{3, 2})); + familyAbun = zeros(length(rxnsList), length(TaxonomyLevels{4, 2})); + genusAbun = zeros(length(rxnsList), length(TaxonomyLevels{5, 2})); + speciesAbun = zeros(length(rxnsList), length(TaxonomyLevels{6, 2})); + + parfor j = 1:length(rxnsList) + + % store the abundance for each reaction and taxon separately in a + % temporary file to enable parallellization + tmpPhyl = zeros(length(rxnsList), length(TaxonomyLevels{1, 2})); + tmpClass = zeros(length(rxnsList), length(TaxonomyLevels{2, 2})); + tmpOrder = zeros(length(rxnsList), length(TaxonomyLevels{3, 2})); + tmpFamily = zeros(length(rxnsList), length(TaxonomyLevels{4, 2})); + tmpGenus = zeros(length(rxnsList), length(TaxonomyLevels{5, 2})); + tmpSpecies = zeros(length(rxnsList), length(TaxonomyLevels{6, 2})); + + for k = 2:size(abundance, 1) + % check if the reaction is present in the strain + if ReactionPresence{k, j + 1} == 1 + % calculate total abundance + totalAbun(j) = totalAbun(j) + str2double(abundance{k, i}); + % calculate phylum abundance + t = 1; + findTax = taxonomy(find(strcmp(abundance{k, 1}, inputTaxa)), TaxonomyLevels{t, 3}); + if any(strcmp(findTax{1}, TaxonomyLevels{t, 2})) + taxonCol = find(strcmp(findTax{1}, TaxonomyLevels{t, 2})); + tmpPhyl(1, taxonCol) = tmpPhyl(1, taxonCol) + str2double(abundance{k, i}); + end + % calculate class abundance + t = 2; + findTax = taxonomy(find(strcmp(abundance{k, 1}, inputTaxa)), TaxonomyLevels{t, 3}); + if any(strcmp(findTax{1}, TaxonomyLevels{t, 2})) + taxonCol = find(strcmp(findTax{1}, TaxonomyLevels{t, 2})); + tmpClass(1, taxonCol) = tmpClass(1, taxonCol) + str2double(abundance{k, i}); + end + % calculate order abundance + t = 3; + findTax = taxonomy(find(strcmp(abundance{k, 1}, inputTaxa)), TaxonomyLevels{t, 3}); + if any(strcmp(findTax{1}, TaxonomyLevels{t, 2})) + taxonCol = find(strcmp(findTax{1}, TaxonomyLevels{t, 2})); + tmpOrder(1, taxonCol) = tmpOrder(1, taxonCol) + str2double(abundance{k, i}); + end + % calculate family abundance + t = 4; + findTax = taxonomy(find(strcmp(abundance{k, 1}, inputTaxa)), TaxonomyLevels{t, 3}); + if any(strcmp(findTax{1}, TaxonomyLevels{t, 2})) + taxonCol = find(strcmp(findTax{1}, TaxonomyLevels{t, 2})); + tmpFamily(1, taxonCol) = tmpFamily(1, taxonCol) + str2double(abundance{k, i}); + end + % calculate genus abundance + t = 5; + findTax = taxonomy(find(strcmp(abundance{k, 1}, inputTaxa)), TaxonomyLevels{t, 3}); + if any(strcmp(findTax{1}, TaxonomyLevels{t, 2})) + taxonCol = find(strcmp(findTax{1}, TaxonomyLevels{t, 2})); + tmpGenus(1, taxonCol) = tmpGenus(1, taxonCol) + str2double(abundance{k, i}); + end + % calculate species abundance + t = 6; + findTax = taxonomy(find(strcmp(abundance{k, 1}, inputTaxa)), TaxonomyLevels{t, 3}); + if any(strcmp(findTax{1}, TaxonomyLevels{t, 2})) + taxonCol = find(strcmp(findTax{1}, TaxonomyLevels{t, 2})); + tmpSpecies(1, taxonCol) = tmpSpecies(1, taxonCol) + str2double(abundance{k, i}); + end + end + end + phylumAbun(j, :) = tmpPhyl(1, :); + classAbun(j, :) = tmpClass(1, :); + orderAbun(j, :) = tmpOrder(1, :); + familyAbun(j, :) = tmpFamily(1, :); + genusAbun(j, :) = tmpGenus(1, :); + speciesAbun(j, :) = tmpSpecies(1, :); + end + %% store the abundances total and on taxonomic levels calculated for the individual in the output structure + for j = 1:length(rxnsList) + ReactionAbundance.('Total'){i, j + 1} = totalAbun(j); + % abundance on taxon levels + end + % phylum abundance + t = 1; + cnt = 2; + for j = 1:length(rxnsList) + for l = 1:length(TaxonomyLevels{t, 2}) + ReactionAbundance.(TaxonomyLevels{t}){i, cnt} = phylumAbun(j, l); + cnt = cnt + 1; + end + end + % class abundance + t = 2; + cnt = 2; + for j = 1:length(rxnsList) + for l = 1:length(TaxonomyLevels{t, 2}) + ReactionAbundance.(TaxonomyLevels{t}){i, cnt} = classAbun(j, l); + cnt = cnt + 1; + end + end + % order abundance + t = 3; + cnt = 2; + for j = 1:length(rxnsList) + for l = 1:length(TaxonomyLevels{t, 2}) + ReactionAbundance.(TaxonomyLevels{t}){i, cnt} = orderAbun(j, l); + cnt = cnt + 1; + end + end + % family abundance + t = 4; + cnt = 2; + for j = 1:length(rxnsList) + for l = 1:length(TaxonomyLevels{t, 2}) + ReactionAbundance.(TaxonomyLevels{t}){i, cnt} = familyAbun(j, l); + cnt = cnt + 1; + end + end + % genus abundance + t = 5; + cnt = 2; + for j = 1:length(rxnsList) + for l = 1:length(TaxonomyLevels{t, 2}) + ReactionAbundance.(TaxonomyLevels{t}){i, cnt} = genusAbun(j, l); + cnt = cnt + 1; + end + end + % species abundance + t = 6; + cnt = 2; + for j = 1:length(rxnsList) + for l = 1:length(TaxonomyLevels{t, 2}) + ReactionAbundance.(TaxonomyLevels{t}){i, cnt} = speciesAbun(j, l); + cnt = cnt + 1; + end + end +end + +% finally, delete empty columns to avoid unneccessarily big file sizes +fprintf('Finalizing the output file... \n') + +fNames = fieldnames(ReactionAbundance); +for i = 1:length(fNames) + cValues = string(ReactionAbundance.(fNames{i})(2:end, 2:end)); + rownames=ReactionAbundance.(fNames{i})(:,1); + ReactionAbundance.(fNames{i})(:,1)=[]; + cTotal = sum(str2double(cValues),1); + ReactionAbundance.(fNames{i})(:,find(cTotal<0.000000001))=[]; + ReactionAbundance.(fNames{i})=[rownames,ReactionAbundance.(fNames{i})]; + ReactionAbundance.(fNames{i}){1,1}='Samples'; +end + +% export taxonomical information +taxonCol = 'Phylum'; +% remove unnecessary columns +taxonomy(:,taxonCol+1:end)=[]; + +for t = 2:size(TaxonomyLevels, 1) + taxa=ReactionAbundance.(TaxonomyLevels{t})(2:end,1); + TaxonomyReduced=taxonomy; + taxonCol = find(strcmp(taxonomy(1, :), TaxonomyLevels{t})); + TaxonomyReduced(:,1:taxonCol-1)=[]; + % remove duplicate entries + [C,IA] = unique(TaxonomyReduced(:,1),'stable'); + % remove unclassified taxa + findUncl=find(contains(C,'unclassified')); + IA(findUncl,:)=[]; + TaxonomyInfo.(TaxonomyLevels{t})=TaxonomyReduced(IA,:); +end + +% Plot the calculated reaction abundances. +for i = 1:length(fNames) + xlabels = ReactionAbundance.(fNames{i})(2:end,1); + ylabels = ReactionAbundance.(fNames{i})(1,2:end); + data = string(ReactionAbundance.(fNames{i})(2:end, 2:end)); + data = str2double(data); + figure; + imagesc(data') + colormap('hot') + colorbar + if length(xlabels) < 50 + set(gca, 'xtick', 1:length(xlabels)); + xticklabels(xlabels); + xtickangle(90) + end + if length(ylabels) < 50 + set(gca, 'ytick', 1:length(ylabels)); + yticklabels(ylabels); + end + set(gca, 'TickLabelInterpreter', 'none'); + title(fNames{i}) +end + +end \ No newline at end of file From e76858222546eb402c4e6fa2c576895eb72ef6ad Mon Sep 17 00:00:00 2001 From: German Preciat Date: Mon, 13 Sep 2021 16:38:44 +0200 Subject: [PATCH 72/82] update chemoinformatics --- .../generateChemicalDatabase.m | 36 ++++++++++++------- .../chemoInformatics/obtainAtomMappingsRDT.m | 9 +++-- .../chemoInformatics/standardiseMolDatabase.m | 15 ++++++-- 3 files changed, 43 insertions(+), 17 deletions(-) diff --git a/src/dataIntegration/chemoInformatics/generateChemicalDatabase.m b/src/dataIntegration/chemoInformatics/generateChemicalDatabase.m index 1e8eace9d1..07731eb0b8 100644 --- a/src/dataIntegration/chemoInformatics/generateChemicalDatabase.m +++ b/src/dataIntegration/chemoInformatics/generateChemicalDatabase.m @@ -656,7 +656,7 @@ % Standardise MOL files the most consitent MOL files standardisationReport = standardiseMolDatabase(tmpDir, metList, metDir, standardisationApproach); info.standardisationReport = standardisationReport; - +save if oBabelInstalled % Create table nRows = size(standardisationReport.SMILES, 1); @@ -827,7 +827,7 @@ 0.9137, 1.0000, 0.8392]; ax.Colormap = newColors; title({'2. Reaction coverage', [num2str(sum(X)) ' internal reactions in the model']}, 'FontSize', 20) - lh = legend(labelsToAdd(find(X)), 'FontSize', 16, 'Location', 'best'); + lh = legend(labelsToAdd(find(X)), 'FontSize', 16); set(findobj(pieChart,'type','text'),'fontsize',18) end @@ -861,16 +861,26 @@ % Get bond enthalpies and bonds broken and formed if options.printlevel > 0 display('Obtaining RInChIes and reaction SMILES ...') - [bondsBF, bondsE, meanBBF, meanBE] = findBEandBBF(model, [rxnDir filesep 'atomMapped'], 1); - info.bondsData.table = table(model.rxns, model.rxnNames, bondsBF, bondsE, ... - 'VariableNames', {'rxns', 'rxnNames', 'bondsBF', 'bondsE'}); - info.bondsData.table = sortrows(info.bondsData.table, {'bondsBF'}, {'descend'}); + end + + [bondsBF, bondsE, meanBBF, meanBE, substrateMass] = findBEandBBF(model, [rxnDir ... + filesep 'atomMapped'], options.printlevel); + + % Replace NaN values to 'Missing' + missingRxns = isnan(bondsBF); + bondsBF = cellstr(num2str(bondsBF)); + bondsBF(missingRxns) = {'Missing'}; + bondsE = cellstr(num2str(bondsE)); + bondsE(missingRxns) = {'Missing'}; + + % Create table & sort values + info.bondsData.table = table(model.rxns, model.rxnNames, bondsBF, bondsE, substrateMass, ... + 'VariableNames', {'rxns', 'rxnNames', 'bondsBF', 'bondsE', 'substrateMass'}); + info.bondsData.table = [sortrows(info.bondsData.table(~missingRxns, :), ... + {'bondsBF'}, {'descend'}); info.bondsData.table(missingRxns, :)]; + + if options.printlevel > 0 display(info.bondsData.table) - else - [bondsBF, bondsE, meanBBF, meanBE] = findBEandBBF(model, [rxnDir filesep 'atomMapped']); - info.bondsData.table = table(model.rxns, model.rxnNames, bondsBF, bondsE, ... - 'VariableNames', {'rxns','rxnNames','bondsBF','bondsE'}); - info.bondsData.table = sortrows(info.bondsData.table, {'bondsBF'}, {'descend'}); end % Add data in the model @@ -891,9 +901,9 @@ end diary off -if options.printlevel > 0 > 0 +if options.printlevel > 0 fprintf('%s\n', ['Diary written to: ' options.outputDir]) fprintf('%s\n', 'generateChemicalDatabase run is complete.') end -end +end \ No newline at end of file diff --git a/src/dataIntegration/chemoInformatics/obtainAtomMappingsRDT.m b/src/dataIntegration/chemoInformatics/obtainAtomMappingsRDT.m index 5c108f0e47..7e535ba6dc 100644 --- a/src/dataIntegration/chemoInformatics/obtainAtomMappingsRDT.m +++ b/src/dataIntegration/chemoInformatics/obtainAtomMappingsRDT.m @@ -37,7 +37,12 @@ % files (default: FALSE). % % OUTPUTS: -% balancedRxns: List of standadised atom mapped reactions. +% atomMappingReport: A report with the atom mapping data +% *. rxnFilesWritten the MDL RXN written written +% *. balanced the balanced reactions +% *. unbalancedBool the unbalanced reactions +% *. inconsistentBool the inconsistent reactions +% *. notMapped the that couldn't be mapped % A directory with standardised RXN files. % A directory with atom mapped RXN files. % A directory images for atom mapped reactions. @@ -470,4 +475,4 @@ end end end -end +end \ No newline at end of file diff --git a/src/dataIntegration/chemoInformatics/standardiseMolDatabase.m b/src/dataIntegration/chemoInformatics/standardiseMolDatabase.m index 15695cdbca..5a8fd6ed91 100644 --- a/src/dataIntegration/chemoInformatics/standardiseMolDatabase.m +++ b/src/dataIntegration/chemoInformatics/standardiseMolDatabase.m @@ -79,6 +79,9 @@ end end +% do not standardize +moleculesNotS = {'nad.mol'; 'nadh.mol'; 'nadp.mol'; 'nadph.mol'}; + % The new MOL files are readed % Get list of MOL files d = dir(molDir); @@ -133,7 +136,11 @@ fprintf(fid2, '%s\n', cmdout{contains(cmdout,'InChI=1S')}); fclose(fid2); % Create an InChI based-MOL file - command = ['obabel -iinchi tmp -O ' standardisedMolFiles name ' --gen2D']; + if ~ismember(aMets{i}, moleculesNotS) + command = ['obabel -iinchi tmp -O ' standardisedMolFiles name ' --gen2D']; + else + copyfile([molDir name], standardisedMolFiles) + end [~, ~] = system(command); delete('tmp') else @@ -141,7 +148,11 @@ fid2 = fopen('tmp', 'w'); fprintf(fid2, '%s\n', smiles); fclose(fid2); - command = ['obabel -ismiles tmp -O ' standardisedMolFiles name ' --gen2D']; + if ~ismember(aMets{i}, moleculesNotS) + command = ['obabel -ismiles tmp -O ' standardisedMolFiles name ' --gen2D']; + else + copyfile([molDir name], standardisedMolFiles) + end [~, ~] = system(command); delete('tmp') end From be6e275dee20abf008c35ec54e27e92de0000b03 Mon Sep 17 00:00:00 2001 From: German Preciat Date: Wed, 15 Sep 2021 11:24:48 +0200 Subject: [PATCH 73/82] Update chemoInformatics functions --- .../chemoInformatics/findBEandBBF.m | 353 ++++++++++++ .../generateChemicalDatabase.m | 52 +- .../chemoInformatics/obtainAtomMappingsRDT.m | 13 +- .../chemoInformatics/obtainMetStructures.m | 504 ++++++++---------- .../chemoInformatics/openBabelConverter.m | 103 ++++ 5 files changed, 703 insertions(+), 322 deletions(-) create mode 100644 src/dataIntegration/chemoInformatics/findBEandBBF.m create mode 100644 src/dataIntegration/chemoInformatics/openBabelConverter.m diff --git a/src/dataIntegration/chemoInformatics/findBEandBBF.m b/src/dataIntegration/chemoInformatics/findBEandBBF.m new file mode 100644 index 0000000000..303ebb65df --- /dev/null +++ b/src/dataIntegration/chemoInformatics/findBEandBBF.m @@ -0,0 +1,353 @@ +function [bondsBF, bondsE, meanBBF, meanBE, substrateMass] = findBEandBBF(model, rxnDir, printLevel) +% Calculate the bond enthalpies and the bonds broken and formed for the +% mass balanced reactions in a metabolic network based on atom mapping +% data. +% +% The bond enthalphies are based on: +% Huheey, pps. A-21 to A-34; T.L. Cottrell, "The Strengths of Chemical +% Bonds," 2nd ed., Butterworths, London, 1958; +% B. deB. Darwent, "National Standard Reference Data Series," National +% Bureau of Standards, No. 31,Washington, DC, 1970; S.W. Benson, J. Chem. +% Educ., 42, 502 (1965) +% +% USAGE: +% +% [bondsBF, bondsE, meanBBF, meanBE, substrateMass] = findBEandBBF(model, rxnDir, printLevel) +% +% INPUTS: +% model: COBRA model with following fields: +% +% * .rxns - An n x 1 array of reaction identifiers. +% Should match metabolite identifiers in +% rxnDir Directory of with the RXN files. +% printLevel Print figure with the relation of mass vs bondsBF bondsE +% +% OUTPUTS: +% bondsBF An n x 1 vector with the number of bonds broken and +% formed. External or mass balanced reactions are equal to 0. +% Missing atom mapping data is considered as mean(bondsBF) +% bondsE An n x 1 vector with the bond enthalpies in kJ/mol. +% External or mass balanced reactions are equal to 0. Missing +% atom mapping data is considered as mean(bondsE). +% meanBBF Mean of bonds broken and formed in the model +% meanBE Mean of bond enthalpies in the model +% substrateMass Total mass of the substrates +% +% .. Author: - German A. Preciat Gonzalez 12/06/2017 + +rxnDir = [regexprep(rxnDir,'(/|\\)$',''), filesep]; +if nargin < 3 || isempty(printLevel) + printLevel = 0; +end + +% Load chemical data +bondsArray = {'1 H H'; '1 B H'; '1 C H'; '1 H Si'; '1 Ge H'; '1 H Sn'; ... + '1 H N'; '1 H P'; '1 As H'; '1 H O'; '1 H S'; '1 H Se'; '1 H Te'; ... + '1 F H'; '1 Cl H'; '1 Br H'; '1 H I'; '1 B B'; '1 B O'; '1 B F'; ... + '1 B Cl'; '1 B Br'; '1 C C'; '2 C C'; '3 C C'; '1 C Si'; '1 C Ge'; ... + '1 C Sn'; '1 C Pb'; '1 C N'; '2 C N'; '3 C N'; '1 C P'; '1 C O'; ... + '2 C O'; '3 C O'; '1 C B'; '1 C S'; '2 C S'; '1 C F'; '1 C Cl'; ... + '1 C Br'; '1 C I'; '1 Si Si'; '1 N Si'; '1 O Si'; '1 S Si'; ... + '1 F Si'; '1 Cl Si'; '1 Br Si'; '1 I Si'; '1 Ge N'; '1 F Ge'; ... + '1 Cl Ge'; '1 Br Ge'; '1 Ge I'; '1 F Sn'; '1 Cl Sn'; '1 Br Sn'; ... + '1 I Sn'; '1 N P'; '1 F Pb'; '1 Cl Pb'; '1 Br Pb'; '1 I Pb'; ... + '1 N N'; '2 N N'; '3 N N'; '1 N O'; '2 N O'; '1 F N'; '1 Cl N'; ... + '1 P P'; '1 O S'; '1 O P'; '2 O P'; '2 P S'; '1 F P'; '1 Cl P'; ... + '1 Br P'; '1 I P'; '1 As As'; '1 As O'; '1 As F'; '1 As Cl'; ... + '1 As Br'; '1 As I'; '1 Sb Sb'; '1 F Sb'; '1 Cl Sb'; '1 O O'; ... + '2 O O'; '1 F O'; '2 O S'; '1 S S'; '2 S S'; '1 F S'; '1 Cl S'; ... + '1 Se Se'; '2 Se Se'; '1 F F'; '1 Cl Cl'; '1 Br Br'; '1 I I'; ... + '1 At At'; '1 I O'; '1 F I'; '1 Cl I'; '1 Br I'; '1 F Kr'; '1 O Xe';... + '1 F Xe'}; +bondsEnergy = [432; 389; 411; 318; 288; 251; 386; 322; 247; 459; 363; ... + 276; 238; 565; 428; 362; 295; 293; 536; 613; 456; 377; 346; 602; ... + 835; 318; 238; 192; 130; 305; 615; 887; 264; 358; 799; 1072; 356; ... + 272; 573; 485; 327; 285; 213; 222; 355; 452; 293; 565; 381; 310; ... + 234; 188; 257; 470; 349; 276; 414; 323; 237; 205; 210; 331; 243; ... + 201; 142; 167; 418; 942; 201; 607; 283; 313; 201; 1; 335; 544; ... + 335; 490; 326; 264; 184; 146; 301; 484; 322; 458; 200; 121; 440; ... + 248; 142; 494; 190; 522; 226; 425; 284; 255; 172; 272; 155; 240; ... + 190; 148; 116; 201; 273; 208; 175; 50; 84; 130]; % kJ/mol +if printLevel + atomicElements = {'H'; 'He'; 'Li'; 'Be'; 'B'; 'C'; 'N'; 'O'; 'F'; ... + 'Ne'; 'Na'; 'Mg'; 'Al'; 'Si'; 'P'; 'S'; 'Cl'; 'K'; 'Ar'; 'Ca'; ... + 'Sc'; 'Ti'; 'V'; 'Cr'; 'Mn'; 'Fe'; 'Ni'; 'Co'; 'Cu'; 'Zn'; 'Ga';... + 'Ge'; 'As'; 'Se'; 'Br'; 'Kr'; 'Rb'; 'Sr'; 'Y'; 'Zr'; 'Nb'; 'Mo';... + 'Tc'; 'Ru'; 'Rh'; 'Pd'; 'Ag'; 'Cd'; 'In'; 'Sn'; 'Sb'; 'I'; 'Te';... + 'Xe'; 'Cs'; 'Ba'; 'La'; 'Ce'; 'Pr'; 'Nd'; 'Pm'; 'Sm'; 'Eu'; ... + 'Gd'; 'Tb'; 'Dy'; 'Ho'; 'Er'; 'Tm'; 'Yb'; 'Lu'; 'Hf'; 'Ta'; 'W';... + 'Re'; 'Os'; 'Ir'; 'Pt'; 'Au'; 'Hg'; 'Tl'; 'Pb'; 'Bi'; 'Po'; ... + 'At'; 'Rn'; 'Fr'; 'Ra'; 'Ac'; 'Pa'; 'Th'; 'Np'; 'U'; 'Pu'; 'Am';... + 'Bk'; 'Cm'; 'No'; 'Cf'; 'Es'; 'Hs'; 'Mt'; 'Fm'; 'Md'; 'Lr'; ... + 'Rf'; 'Bh'; 'Db'; 'Sg'; 'Uun'; 'Uuu'; 'Uub'; 'A'; 'R'; '*'}; + atomicWeight = [1.00797; 4.0026; 6.941; 9.01218; 10.81; 12.011; ... + 14.0067; 15.9994; 18.998403; 20.179; 22.98977; 24.305; 26.98154; ... + 28.0855; 30.97376; 32.06; 35.453; 39.0983; 39.948; 40.08; ... + 44.9559; 47.9; 50.9415; 51.996; 54.938; 55.847; 58.7; 58.9332; ... + 63.546; 65.38; 69.72; 72.59; 74.9216; 78.96; 79.904; 83.8; ... + 85.4678; 87.62; 88.9059; 91.22; 92.9064; 95.94; 98; 101.07; ... + 102.9055; 106.4; 107.868; 112.41; 114.82; 118.69; 121.75; ... + 126.9045; 127.6; 131.3; 132.9054; 137.33; 138.9055; 140.12; ... + 140.9077; 144.24; 145; 150.4; 151.96; 157.25; 158.9254; 162.5; ... + 164.9304; 167.26; 168.9342; 173.04; 174.967; 178.49; 180.9479; ... + 183.85; 186.207; 190.2; 192.22; 195.09; 196.9665; 200.59; ... + 204.37; 207.2; 208.9804; 209; 210; 222; 223; 226.0254; 227.0278; ... + 231.0359; 232.0381; 237.0482; 238.029; 242; 243; 247; 247; 250; ... + 251; 252; 255; 256; 257; 258; 260; 261; 262; 262; 263; 269; 272; ... + 277; NaN; NaN; NaN]; % amu +end + +% Get list of RXN files +d = dir(rxnDir); +d = d(~[d.isdir]); +aRxns = {d.name}'; +aRxns = aRxns(~cellfun('isempty',regexp(aRxns,'(\.rxn)$'))); +% Identifiers for atom mapped reactions +aRxns = regexprep(aRxns, '(\.rxn)$',''); +assert(~isempty(aRxns), 'RXN files directory is empty or nonexistent.') + +% Identify mass inbalanced reactions +modeltmp = findSExRxnInd(model,[], printLevel); +exIdx = find(modeltmp.ExchRxnBool); + +[bondsE, bondsBF] = deal(zeros(size(model.rxns))); +unbalancedBool = false(size(model.rxns)); + +for i = 1:size(model.rxns, 1) + + clearvars -except unbalanced bondsBF bondsE exIdx atomicWeight ... + atomicElements printLevel bondsEnergy bondsArray rxnDir model i ... + unbalancedBool allSubstrateMass unbalancedBool + + if printLevel + substrateMass = 0; + end + + rxnFile = [rxnDir model.rxns{i} '.rxn']; + if isfile(rxnFile) % Check if the file exists + + % Read the MDL RXN file + rxnFileData = regexp( fileread(rxnFile), '\n', 'split')'; + + % Count number of products and substrates + substrates = str2double(rxnFileData{5}(1:3)); + products = str2double(rxnFileData{5}(4:6)); + + % Identify where a molecule starts + begmol = strmatch('$MOL',rxnFileData); + + % Initialaze the number of atoms/bonds in substrates/products + [bondsS, bondsP, atomsS, atomsP] = deal(0); + + % Obtain atoms and bond information + for j = 1:substrates + products + + noOfAtoms = str2double(rxnFileData{begmol(j) + 4}(1:3)); + noOfBonds = str2double(rxnFileData{begmol(j) + 4}(4:6)); + if j <= substrates + moleculeType = 'S'; + else + moleculeType = 'P'; + end + + % Look for the # atoms, mapping numbers and their elements + % corresponding the mapping number + for k = 1:noOfAtoms + if printLevel && j <= substrates + substrateMass = substrateMass + atomicWeight(ismember(atomicElements, strtrim(rxnFileData{begmol(j) + 4 + k}(32:33)))); + end + switch moleculeType + case 'S' + atomsS = atomsS + 1; + mappingsS(atomsS) = str2double(rxnFileData{begmol(j) + 4 + k}(61:63)); + if mappingsS(atomsS) == 0 + unbalancedBool(i) = true; + continue + end + elementsS{mappingsS(atomsS)} = strtrim(rxnFileData{begmol(j) + 4 + k}(32:33)); + case 'P' + atomsP = atomsP + 1; + mappingsP(atomsP) = str2double(rxnFileData{begmol(j) + 4 + k}(61:63)); + if mappingsP(atomsP) == 0 + unbalancedBool(i) = true; + continue + end + elementsP{mappingsP(atomsP)} = strtrim(rxnFileData{begmol(j) + 4 + k}(32:33)); + end + end + + % Check if the corresponding molecule es just an atom or not + if noOfBonds > 0 + % Look for the # of bonds, the row atom (the mapping number of + % the first atom in the bond), the column atom (the mapping + % number of second atom in the bond) and the bond type + for k = 1:noOfBonds + rowAtom = str2double(rxnFileData{begmol(j) + 4 + k + noOfAtoms}(1:3)); + colAtom = str2double(rxnFileData{begmol(j) + 4 + k + noOfAtoms}(4:6)); + switch moleculeType + case 'S' + bondsS = bondsS +1; + rowS(bondsS, 1) = str2double(rxnFileData{begmol(j)+ 4 + rowAtom}(61:63)); + colS(bondsS, 1) = str2double(rxnFileData{begmol(j)+ 4 + colAtom}(61:63)); + bondTypeS{bondsS, 1} = rxnFileData{begmol(j) + 4 + k + noOfAtoms}(9); + case 'P' + bondsP = bondsP +1; + rowP(bondsP, 1) = str2double(rxnFileData{begmol(j)+ 4 + rowAtom}(61:63)); + colP(bondsP, 1) = str2double(rxnFileData{begmol(j)+ 4 + colAtom}(61:63)); + bondTypeP{bondsP, 1} = rxnFileData{begmol(j) + 4 + k + noOfAtoms}(9); + end + end + end + end + + % Check if the reaction mapps for the same elements + % assert(isequal(elementsP, elementsS), 'The reaction mapps for different elements') + + if ~isequal(elementsP, elementsS) || unbalancedBool(i) + unbalancedBool(i) = true; + clear elementsP elementsS + continue + end + + % Create the matrices + switch exist('bondTypeS', 'var') + exist('bondTypeP', 'var') * 2 + % No bonds in the reaction + case 0 + + bondsE(i, 1) = 0; + bondsBF(i, 1) = 0; + + % No bonds in the product(s) + case 1 + for j = 1 : length(bondTypeS) + + % Looks for the avarage energy of a chemical bond based on the literature + matrixS_enthalpy(rowS(j), colS(j)) = findBondEnergy2([bondTypeS(j) elementsS(rowS(j)) elementsS(colS(j))], bondsArray, bondsEnergy); + matrixS_enthalpy(colS(j), rowS(j)) = findBondEnergy2([bondTypeS(j) elementsS(rowS(j)) elementsS(colS(j))], bondsArray, bondsEnergy); + matrixS_BBF(rowS(j), colS(j)) = str2double(bondTypeS{j}); + matrixS_BBF(colS(j), rowS(j)) = str2double(bondTypeS{j}); + end + matrixP_enthalpy = zeros(length(matrixS_enthalpy)); + matrixP_BBF = zeros(length(matrixS_BBF)); + + % No bonds in the substrate(s) + case 2 + for j = 1 : length(bondTypeP) + matrixP_enthalpy(rowP(j), colP(j)) = findBondEnergy2([bondTypeP(j) elementsP(rowP(j)) elementsP(colP(j))], bondsArray, bondsEnergy); + matrixP_enthalpy(colP(j), rowP(j)) = findBondEnergy2([bondTypeP(j) elementsP(rowP(j)) elementsP(colP(j))], bondsArray, bondsEnergy); + matrixP_BBF(rowP(j), colP(j)) = str2double(bondTypeP{j}); + matrixP_BBF(colP(j), rowP(j)) = str2double(bondTypeP{j}); + end + matrixS_enthalpy = zeros(length(matrixP_enthalpy)); + matrixS_BBF = zeros(length(matrixP_BBF)); + + % Bonds for substrate(s) and product(s) + case 3 + for j = 1 : length(bondTypeS) + matrixS_enthalpy(rowS(j), colS(j)) = findBondEnergy2([bondTypeS(j) elementsS(rowS(j)) elementsS(colS(j))], bondsArray, bondsEnergy); + matrixS_enthalpy(colS(j), rowS(j)) = findBondEnergy2([bondTypeS(j) elementsS(rowS(j)) elementsS(colS(j))], bondsArray, bondsEnergy); + matrixS_BBF(rowS(j), colS(j)) = str2double(bondTypeS{j}); + matrixS_BBF(colS(j), rowS(j)) = str2double(bondTypeS{j}); + end + for j = 1 : length(bondTypeP) + matrixP_enthalpy(rowP(j), colP(j)) = findBondEnergy2([bondTypeP(j) elementsP(rowP(j)) elementsP(colP(j))], bondsArray, bondsEnergy); + matrixP_enthalpy(colP(j), rowP(j)) = findBondEnergy2([bondTypeP(j) elementsP(rowP(j)) elementsP(colP(j))], bondsArray, bondsEnergy); + matrixP_BBF(rowP(j), colP(j)) = str2double(bondTypeP{j}); + matrixP_BBF(colP(j), rowP(j)) = str2double(bondTypeP{j}); + end + if length(matrixS_enthalpy) ~= length(matrixP_enthalpy) + if length(matrixS_enthalpy) > length(matrixP_enthalpy) + matrixP_enthalpy(length(matrixS_enthalpy),length(matrixS_enthalpy)) = 0; + matrixP_BBF(length(matrixS_BBF),length(matrixS_BBF)) = 0; + else + matrixS_enthalpy(length(matrixP_enthalpy),length(matrixP_enthalpy)) = 0; + matrixS_BBF(length(matrixP_BBF),length(matrixP_BBF)) = 0; + end + end + + end + if exist('matrixS_enthalpy', 'var') && exist('matrixP_enthalpy', 'var') + + bondsBF(i) = sum(sum(abs(matrixS_BBF - matrixP_BBF))') / 2; + totalMatrix = matrixS_enthalpy - matrixP_enthalpy; + bondsE(i) = sum(totalMatrix(find(totalMatrix))) / 2; + + end + + elseif ismember(i, exIdx) + + % Mass inbalanced reactions equal to zero + bondsBF(i, 1) = 0; + bondsE(i, 1) = 0; + + else + + % Missing RXN file equal to NaN + bondsE(i, 1) = NaN; + bondsBF(i, 1) = NaN; + if printLevel > 0 + allSubstrateMass(i) = NaN; + end + + end + + % Assing substrateMass + if printLevel > 0 + allSubstrateMass(i, 1) = substrateMass; + end +end + +% Consider unbalanced as missing +bondsE(unbalancedBool) = NaN; +bondsBF(unbalancedBool) = NaN; + +meanBE = mean(bondsE, 'omitnan'); +meanBBF = mean(bondsBF, 'omitnan'); + +if printLevel > 0 + [mass, idx] = sort(allSubstrateMass); + figure + subplot(1, 2, 1) + scatter(mass, bondsBF(idx), 'filled') + bonds = bondsBF(idx); + mass1 = mass; + mass1(isnan(bonds) | isnan(mass)) = []; + bonds(isnan(bonds) | isnan(mass)) = []; + title({'Total mass of substrates vs bonds', ... + 'broken and formed', ... + ['Correlation = ' num2str(round(corr(mass1, bonds,'Type','Spearman'), 2))]}, 'FontSize', 20) + xlabel('Mass of substrates (amu)', 'FontSize', 18) + ylabel('Number of bonds broken and formed', 'FontSize', 18) + + subplot(1, 2, 2) + scatter(mass, bondsE(idx), 'filled') + be = bondsE(idx); + mass1 = mass; + mass1(isnan(be) | isnan(mass)) = []; + be(isnan(be) | isnan(mass)) = []; + title({'Total mass of substrates vs bond', 'enthalpies', ... + ['Correlation = ' num2str(round(corr(mass1, abs(be),'Type','Spearman'), 2))]}, 'FontSize', 20) + ylabel('Bond enthalpies (kJ/mol)', 'FontSize', 18) + yline(0) + xlabel('Mass of substrates (amu)', 'FontSize', 18) + +end +substrateMass = allSubstrateMass; +end + +function bondsEnergy = findBondEnergy2(bondArray, bondsArray, bondsEnergy) +% Looks for the avarage energy of a chemical bond + +% Missing: bonds with R groups, '1 C Se', '1 N S', '1 O Se', '2 O Se', '1 +% Br C', '1 Cl O', '2 P Se' + +bondArray = strjoin(sort(bondArray)); +bondIdx = find(ismember(bondsArray, bondArray)); + +if ~isempty(bondIdx) + bondsEnergy = bondsEnergy(bondIdx); +else + bondsEnergy = NaN; +end + +end diff --git a/src/dataIntegration/chemoInformatics/generateChemicalDatabase.m b/src/dataIntegration/chemoInformatics/generateChemicalDatabase.m index 07731eb0b8..1154284402 100644 --- a/src/dataIntegration/chemoInformatics/generateChemicalDatabase.m +++ b/src/dataIntegration/chemoInformatics/generateChemicalDatabase.m @@ -155,7 +155,7 @@ disp('--------------------------------------------------------------') end -directories = {'inchi'; 'smiles'; 'KEGG'; 'HMDB'; 'PubChem'; 'CHEBI'}; +directories = {'inchi'; 'smiles'; 'kegg'; 'hmdb'; 'pubchem'; 'chebi'}; if dirsToCompare directories = [directories; options.dirNames]; end @@ -177,26 +177,21 @@ fprintf('%s\n\n', 'Obtaining MOL files from chemical databases ...') end -comparisonDir = [metDir filesep 'sourcesComparison' filesep]; -source = [0 0 0 0 0 0 0]; -for i = 1:6 +for i = 1:length(directories) dirBool(i) = false; if any(~cellfun(@isempty, regexpi(modelFields, directories{i}))) dirBool(i) = true; - sourceData = source; - sourceData(i + 1) = source(i + 1) + i + 1; - molCollectionReport = obtainMetStructures(model, comparisonDir, false, [], sourceData); - movefile([comparisonDir filesep 'newMol'], ... - [comparisonDir filesep directories{i}]) + molCollectionReport = obtainMetStructures(model, model.mets, outputDir, directories{i}); info.sourcesCoverage.(directories{i}) = molCollectionReport; - info.sourcesCoverage.totalCoverage(i) = molCollectionReport.noOfMets; - info.sourcesCoverage.source{i} = directories{i}; if options.printlevel > 0 disp([directories{i} ':']) display(molCollectionReport) end end end +comparisonDir = [outputDir 'molComparison']; +movefile([outputDir 'metabolites'], comparisonDir) + if ~isempty(dirsToCompare) for i = 1:length(options.dirsToCompare) % Get list of MOL files @@ -206,21 +201,24 @@ metList = metList(~cellfun('isempty', regexp(metList,'(\.mol)$'))); metList = regexprep(metList, '.mol', ''); metList(~ismember(metList, umets)) = []; - info.sourcesCoverage.totalCoverage(i + 6) = length(metList); - info.sourcesCoverage.source{i + 6} = options.dirNames{i}; + info.sourcesCoverage.(options.dirNames{i}).mets = umets; + info.sourcesCoverage.(options.dirNames{i}).metsWithMol = metList; + info.sourcesCoverage.(options.dirNames{i}).metsWithoutMol = setdiff(umets, metList); + info.sourcesCoverage.(options.dirNames{i}).coverage = ... + (numel(info.sourcesCoverage.(options.dirNames{i}).metsWithMol) * 100) /... + numel(molCollectionReport.mets); + if options.printlevel > 0 + disp([options.dirNames{i} ':']) + display(info.sourcesCoverage.(options.dirNames{i})) + end end end + % Remove sources without a single metabolite present the model -if dirsToCompare - emptySourceBool = info.sourcesCoverage.totalCoverage == 0; - info.sourcesCoverage.totalCoverage(emptySourceBool) = []; - directories(emptySourceBool) = []; - dirsToDeleteBool = ismember(options.dirNames, info.sourcesCoverage.source(emptySourceBool)); - options.dirsToCompare(dirsToDeleteBool) = []; - options.dirNames(dirsToDeleteBool) = []; - info.sourcesCoverage.source(emptySourceBool) = []; -else - directories(~dirBool) = []; +for i = length(directories):-1:1 + if info.sourcesCoverage.(directories{i}).coverage == 0 + directories{i} = []; + end end if options.debug @@ -240,7 +238,7 @@ if i > 6 && dirsToCompare sourceDir = options.dirsToCompare{i - 6}; else - sourceDir = [comparisonDir directories{i} filesep]; + sourceDir = [comparisonDir filesep directories{i} filesep]; end % Get list of MOL files @@ -509,7 +507,7 @@ if isfield(options, 'dirNames') && ismember(dirToCopy{1}, options.dirNames) copyfile([options.dirsToCompare{ismember(options.dirNames, dirToCopy{1})} metName '.mol'], tmpDir) else - copyfile([comparisonDir dirToCopy{1} filesep metName '.mol'], tmpDir) + copyfile([comparisonDir filesep dirToCopy{1} filesep metName '.mol'], tmpDir) end end if ~options.keepMolComparison @@ -656,7 +654,7 @@ % Standardise MOL files the most consitent MOL files standardisationReport = standardiseMolDatabase(tmpDir, metList, metDir, standardisationApproach); info.standardisationReport = standardisationReport; -save + if oBabelInstalled % Create table nRows = size(standardisationReport.SMILES, 1); @@ -860,7 +858,7 @@ % Get bond enthalpies and bonds broken and formed if options.printlevel > 0 - display('Obtaining RInChIes and reaction SMILES ...') + display('Calculating bonds broken and formed, and enthalpy change...') end [bondsBF, bondsE, meanBBF, meanBE, substrateMass] = findBEandBBF(model, [rxnDir ... diff --git a/src/dataIntegration/chemoInformatics/obtainAtomMappingsRDT.m b/src/dataIntegration/chemoInformatics/obtainAtomMappingsRDT.m index 7e535ba6dc..1c27cac952 100644 --- a/src/dataIntegration/chemoInformatics/obtainAtomMappingsRDT.m +++ b/src/dataIntegration/chemoInformatics/obtainAtomMappingsRDT.m @@ -182,10 +182,14 @@ % same molecular structures in the substrates as they are in the % products i.e. A[m] + B[c] -> A[c] + B[m]. mappedTransportRxns = transportRxnAM([rxnDir 'unMapped'], [rxnDir 'atomMapped']); - mappedBool = false(size(rxnsToAM)); - transportBool = ismember(rxnsToAM, mappedTransportRxns); - mappedBool(transportBool) = true; - nonTransport = setdiff(rxnsToAM, rxnsToAM(mappedBool)); + if ~isempty(mappedTransportRxns) + mappedBool = false(size(rxnsToAM)); + transportBool = ismember(rxnsToAM, mappedTransportRxns); + mappedBool(transportBool) = true; + nonTransport = setdiff(rxnsToAM, rxnsToAM(mappedBool)); + else + nonTransport = rxnsToAM; + end % Atom map the rest for i = 1:length(nonTransport) @@ -388,6 +392,7 @@ end end + delete([pwd filesep 'tmp.mol']) atomMappingReport.rxnFilesWritten = rxnsToAM; atomMappingReport.balanced = rxnsToAM(~unbalancedBool); atomMappingReport.unbalanced = rxnsToAM(unbalancedBool); diff --git a/src/dataIntegration/chemoInformatics/obtainMetStructures.m b/src/dataIntegration/chemoInformatics/obtainMetStructures.m index 285317ba26..6760aa20cf 100644 --- a/src/dataIntegration/chemoInformatics/obtainMetStructures.m +++ b/src/dataIntegration/chemoInformatics/obtainMetStructures.m @@ -1,88 +1,64 @@ -function molCollectionReport = obtainMetStructures(model, outputDir, updateDB, standardisationApproach, orderOfPreference) -% Obtain MDL MOL files from various databases, including KEGG, HMDB, ChEBI, -% and PubChem. Alternatively, openBabel can be used to convert InChI +function molCollectionReport = obtainMetStructures(model, metList, outputDir, sources) +% Obtain MDL MOL files from various databases, including KEGG, HMDB, ChEBI, +% and PubChem. Alternatively, openBabel can be used to convert InChI % strings or SMILES in MDL MOL files. % % USAGE: % -% missingMolFiles = obtainMetStructures(model, outputDir, updateDB, standardisationApproach, orderOfPreference) +% molCollectionReport = obtainMetStructures(model, mets, sources, standardisationApproach) % % INPUTS: -% model: COBRA model with following fields: +% model: COBRA model with following fields: % -% * .S - The m x n stoichiometric matrix for the -% metabolic network. -% * .mets - An m x 1 array of metabolite identifiers. -% * .metInChIString - An m x 1 array of metabolite identifiers. -% * .metSmiles - An m x 1 array of metabolite identifiers. -% * .metVMHID - An m x 1 array of metabolite identifiers. -% * .metCHEBIID - An m x 1 array of metabolite identifiers. -% * .metKEGGID - An m x 1 array of metabolite identifiers. -% * .metPubChemID - An m x 1 array of metabolite identifiers. -% * .metHMDBID - An m x 1 array of metabolite identifiers. +% * .S - The m x n stoichiometric matrix for the metabolic network. +% * .mets - An m x 1 array of metabolite identifiers. +% * .metInChIString - An m x 1 array of metabolite identifiers. +% * .metSmiles - An m x 1 array of metabolite identifiers. +% * .metVMHID - An m x 1 array of metabolite identifiers. +% * .metCHEBIID - An m x 1 array of metabolite identifiers. +% * .metKEGGID - An m x 1 array of metabolite identifiers. +% * .metPubChemID - An m x 1 array of metabolite identifiers. +% * .metHMDBID - An m x 1 array of metabolite identifiers. % % OPTIONAL INPUTS: -% outputDir: Path to directory that will contain the MOL files -% (default: current directory). -% updateDB: Logical value idicating if the database will be -% updated or not. If it's true, "outputDir" should -% contain an existing database (default: false). -% standardisationApproach: String contianing the type of standarization for -% the moldecules (default: empty) -% * explicitH - Normal chemical graphs. -% * implicitH - Hydrogen suppressed chemical -% graphs. -% * Neutral - Chemical graphs with protonated -% molecules. -% * basic - Adding the header. -% orderOfPreference: Vector indicating the source of preference -% (default: 1:7) -% 1.- VMH (http://vmh.life/) -% 2.- InChI (requires openBabel) -% 3.- Smiles (requires openBabel) -% 4.- KEGG (https://www.genome.jp/) -% 5.- HMDB (https://hmdb.ca/) -% 6.- PubChem (https://pubchem.ncbi.nlm.nih.gov/) -% 7.- CHEBI (https://www.ebi.ac.uk/) +% mets: List of metabolites to be download (Default: All) +% outputDir: Directory that will contain the obtained metabolite structures. +% sources: Sources where the MOL files will be obtained (Default: all). +% The sources supported are: +% +% 1.- 'inchi' (requires openBabel) +% 2.- 'smiles' (requires openBabel) +% 3.- 'kegg' (https://www.genome.jp/) +% 4.- 'hmdb' (https://hmdb.ca/) +% 5.- 'pubchem' (https://pubchem.ncbi.nlm.nih.gov/) +% 6.- 'chebi' (https://www.ebi.ac.uk/) % % OUTPUTS: -% missingMolFiles: List of missing MOL files -% nonStandardised: List of non-standardised MDL MOL file. +% molCollectionReport: Report of the obtained MDL MOL files +% -if nargin < 2 || isempty(outputDir) +if nargin < 2 || isempty(metList) + metList = unique(regexprep(model.mets, '(\[\w\])', '')); +else + metList = unique(regexprep(metList, '(\[\w\])', '')); +end +if nargin < 3 || isempty(outputDir) outputDir = [pwd filesep]; else % Make sure input path ends with directory separator outputDir = [regexprep(outputDir,'(/|\\)$',''), filesep]; end -if nargin < 3 - updateDB = false; +if nargin < 4 || isempty(sources) + sources = {'VMH'; 'inchi'; 'smiles'; 'kegg'; 'hmdb'; 'pubchem'; 'chebi'}; end -if nargin < 4 - standardisationApproach = []; -end -if nargin < 5 - orderOfPreference = 1:7; -end - [oBabelInstalled, ~] = system('obabel'); -webTimeout = weboptions('Timeout', 30); -% Set directories -if exist([outputDir 'newMol'], 'dir') == 0 - mkdir([outputDir 'newMol']) -end -newMolFilesDir = [outputDir 'newMol' filesep]; -if updateDB - if exist([outputDir 'met' filesep standardisationApproach filesep], 'dir') ~= 0 - modelMets = regexprep(model.mets,'(\[\w\])',''); - fnames = dir([newMolFilesDir '*.mol']); - model = removeMetabolites(model, model.mets(~ismember(modelMets, setdiff(modelMets, split([fnames(:).name], '.mol'))))); - else - display('Directory with MOL files was not found to be updated in:') - display([outputDir 'met' filesep standardisationApproach filesep]) - display('A new database will be created') - end +webTimeout = weboptions('Timeout', 60); + +% Set directory +newMolFilesDir = [outputDir 'metabolites' filesep]; +if exist(newMolFilesDir, 'dir') == 0 + mkdir(newMolFilesDir) end %% Obtain met data @@ -127,253 +103,199 @@ %% Obtain met structures % Unique metabolites idexes -[umets, ia] = unique(regexprep(model.mets, '(\[\w\])', '')); +mets = regexprep(model.mets, '(\[\w\])', ''); % umets = model.mets; % ia = 1:numel(model.mets); -missingMetBool = true(length(umets), 1); +missingMetBool = true(length(metList), 1); % Obtain MDL MOL files idsToCheck = {}; -for i = 1:length(umets) - for j = 1:7 - switch orderOfPreference(j) - - case 1 - % VMH - % if prod(~isnan(VMH{metIdxs(i)})) && ~isempty(VMH{metIdxs(i)}) && exist('VMH', 'var') && missing - % - % end - - case 2 % inchi - if prod(~isnan(inchis{ia(i)})) && ~isempty(inchis{ia(i)}) && oBabelInstalled && missingMetBool(i) - try - fid2 = fopen([outputDir 'tmp'], 'w'); - fprintf(fid2, '%s\n', inchis{ia(i)}); - fclose(fid2); - command = ['obabel -iinchi ' outputDir 'tmp -O ' newMolFilesDir umets{i} '.mol mol']; - [status, cmdout] = system(command); - if contains(cmdout, '1 molecule converted') - missingMetBool(i) = false; - end - delete([outputDir 'tmp']) - catch ME - disp(ME.message) - idsToCheck(end + 1, 1) = inchis(ia(i)); - end - end - - case 3 % Smiles - if prod(~isnan(smiles{ia(i)})) && ~isempty(smiles{ia(i)}) && oBabelInstalled && missingMetBool(i) - try - fid2 = fopen([outputDir 'tmp'], 'w'); - fprintf(fid2, '%s\n', smiles{ia(i)}); - fclose(fid2); - command = ['obabel -ismi ' outputDir 'tmp -O ' newMolFilesDir umets{i} '.mol mol']; - [status,cmdout] = system(command); - if status == 0 - missingMetBool(i) = false; - end - delete([outputDir 'tmp']) - catch ME - disp(ME.message) - idsToCheck(end + 1, 1) = smiles(ia(i)); - end - end - - case 4 % KEGG - if prod(~isnan(keggIDs{ia(i)})) && ~isempty(keggIDs{ia(i)}) && missingMetBool(i) - try - switch keggIDs{ia(i)}(1) - case 'C' - molFile = webread(['https://www.genome.jp/dbget-bin/www_bget?-f+m+compound+' keggIDs{ia(i)}], webTimeout); - case 'D' - molFile = webread(['https://www.kegg.jp/dbget-bin/www_bget?-f+m+drug+' keggIDs{ia(i)}], webTimeout); - end - if ~isempty(regexp(molFile, 'M END')) - fid2 = fopen([newMolFilesDir umets{i} '.mol'], 'w'); - fprintf(fid2, '%s\n', molFile); - fclose(fid2); - missingMetBool(i) = false; - end - catch ME - disp(ME.message) - idsToCheck(end + 1, 1) = keggIDs(ia(i)); - end - end - - case 5 % HMDB - if prod(~isnan(hmdbIDs{ia(i)})) && ~isempty(hmdbIDs{ia(i)}) && missingMetBool(i) - try - numbersID = hmdbIDs{ia(i)}(5:end); - if size(numbersID, 2) < 7 - numbersID = [repelem('0', 7 - size(numbersID, 2)) numbersID]; - end - molFile = webread(['https://hmdb.ca/structures/metabolites/HMDB' numbersID '.mol'], webTimeout); - if ~isempty(regexp(molFile, 'M END')) - fid2 = fopen([newMolFilesDir umets{i} '.mol'], 'w'); - fprintf(fid2, '%s\n', molFile); - fclose(fid2); - missingMetBool(i) = false; - end - catch ME - disp(ME.message) - idsToCheck(end + 1, 1) = hmdbIDs(ia(i)); - end - end - - case 6 % PubChem - if prod(~isnan(PubChemIDs{ia(i)})) && ~isempty(PubChemIDs{ia(i)}) && missingMetBool(i) - try - molFile = webread(['https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/CID/'... - num2str(PubChemIDs{ia(i)}) ... - '/record/SDF/?record_type=2d&response_type=display'], webTimeout); - % Delete all after 'M END' from the SDF filte to - % make it MOL file - if ~isempty(regexp(molFile, 'M END')) - molFile(regexp(molFile, 'M END') + 6:end) = []; - fid2 = fopen([newMolFilesDir umets{i} '.mol'], 'w'); - fprintf(fid2, '%s\n', molFile); - fclose(fid2); - missingMetBool(i) = false; - end - catch ME - disp(ME.message) - idsToCheck(end + 1, 1) = PubChemIDs(ia(i)); - end - end - - case 7 % ChEBI - if prod(~isnan(chebiIDs{ia(i)})) && ~isempty(chebiIDs{ia(i)}) && missingMetBool(i) - try - molFile = webread(['https://www.ebi.ac.uk/chebi/saveStructure.do?defaultImage=true&chebiId=' num2str(chebiIDs{ia(i)}) '&imageId=0'], webTimeout); - if ~isempty(regexp(molFile, 'M END')) - fid2 = fopen([newMolFilesDir umets{i} '.mol'], 'w'); - fprintf(fid2, '%s\n', molFile); - fclose(fid2); - missingMetBool(i) = false; - end - catch ME - disp(ME.message) - idsToCheck(end + 1, 1) = chebiIDs(ia(i)); - end - end +for i = 1:length(metList) + + % identify met in model + idx = find(ismember(mets, metList{i})); + + % InChI + if ~isempty(inchis{idx(1)}) && oBabelInstalled && ismember({'inchi'}, sources) + try + saveFileDir = [newMolFilesDir 'inchi' filesep]; + if exist(saveFileDir, 'dir') == 0 + mkdir(saveFileDir) + end + newFormat = openBabelConverter(inchis{idx(1)}, 'mol', [saveFileDir ... + metList{i} '.mol']); + missingMetBool(i) = false; + catch ME + disp(ME.message) + idsToCheck{end + 1, 1} = ['inchi - ' keggIDs{idx(1)}]; end end -end - -%% Standardise Mol Files - -if ~isempty(standardisationApproach) - % Set up directories - switch standardisationApproach - case 'explicitH' - standardisedDir = [outputDir 'explicitH' filesep]; - case 'implicitH' - standardisedDir = [outputDir 'implicitH' filesep]; - case 'protonated' - standardisedDir = [outputDir 'protonated' filesep]; - otherwise - standardisationApproach = 'basic'; - standardisedDir = molDir; + % SMILES + if ~isempty(inchis{idx(1)}) && oBabelInstalled && ismember({'smiles'}, sources) + try + saveFileDir = [newMolFilesDir 'smiles' filesep]; + if exist(saveFileDir, 'dir') == 0 + mkdir(saveFileDir) + end + newFormat = openBabelConverter(smiles{idx(1)}, 'mol', [saveFileDir ... + metList{i} '.mol']); + missingMetBool(i) = false; + catch ME + disp(ME.message) + idsToCheck{end + 1, 1} = ['inchi - ' keggIDs{idx(1)}]; + end end - % Standardise files - umets(missingMetBool) = []; - standardisationReport = standardiseMolDatabase(tmpDir, umets, standardisedDir, standardisationApproach); + % KEGG + if ~isempty(keggIDs{idx(1)}) && ismember({'kegg'}, sources) + saveFileDir = [newMolFilesDir 'kegg' filesep]; + if exist(saveFileDir, 'dir') == 0 + mkdir(saveFileDir) + end + try + switch keggIDs{idx(1)}(1) + case 'C' + molFile = webread(['https://www.genome.jp/dbget-bin/www_bget?-f+m+compound+' keggIDs{idx}], webTimeout); + case 'D' + molFile = webread(['https://www.kegg.jp/dbget-bin/www_bget?-f+m+drug+' keggIDs{idx}], webTimeout); + end + if ~isempty(regexp(molFile, 'M END')) + fid2 = fopen([newMolFilesDir 'kegg' filesep metList{i} '.mol'], 'w'); + fprintf(fid2, '%s\n', molFile); + fclose(fid2); + missingMetBool(i) = false; + end + catch ME + disp(ME.message) + idsToCheck{end + 1, 1} = ['kegg - ' keggIDs{idx(1)}]; + end + end - % Get SMILES and InChIs - if isfield(standardisationReport, 'SMILES') - SMILES = standardisationReport.SMILES; - else - SMILES = ''; + % HMDB + if ~isempty(hmdbIDs{idx(1)}) && ismember({'hmdb'}, sources) + saveFileDir = [newMolFilesDir 'hmdb' filesep]; + if exist(saveFileDir, 'dir') == 0 + mkdir(saveFileDir) + end + try + numbersID = hmdbIDs{idx(1)}(5:end); + if size(numbersID, 2) < 7 + numbersID = [repelem('0', 7 - size(numbersID, 2)) numbersID]; + end + molFile = webread(['https://hmdb.ca/structures/metabolites/HMDB' numbersID '.mol'], webTimeout); + if ~isempty(regexp(molFile, 'M END')) + fid2 = fopen([newMolFilesDir 'hmdb' filesep metList{i} '.mol'], 'w'); + fprintf(fid2, '%s\n', molFile); + fclose(fid2); + missingMetBool(i) = false; + end + catch ME + disp(ME.message) + idsToCheck{end + 1, 1} = ['hmdb - ' hmdbIDs{idx(1)}]; + end end - if isfield(standardisationReport, 'InChIs') - InChIs = standardisationReport.InChIs; - else - InChIs = ''; + % + % + % % hmdb + % if ~isempty(hmdbIDs{ia(i)}) && ismember('hmdb', sources) + % saveFileDir = [newMolFilesDir 'hmdb' filesep]; + % if exist(saveFileDir, 'dir') == 0 + % mkdir(saveFileDir) + % end + % try + % numbersID = hmdbIDs{idx}(5:end); + % if size(numbersID, 2) < 7 + % numbersID = [repelem('0', 7 - size(numbersID, 2)) numbersID]; + % end + % molFile = webread(['https://hmdb.ca/structures/metabolites/HMDB' numbersID '.mol'], webTimeout); + % if ~isempty(regexp(molFile, 'M END')) + % fid2 = fopen([newMolFilesDir idx '.mol'], 'w'); + % fprintf(fid2, '%s\n', molFile); + % fclose(fid2); + % missingMetBool(i) = false; + % end + % catch ME + % disp(ME.message) + % idsToCheck(end + 1, 1) = hmdbIDs(ia(i)); + % end + % end + + + % PubChem + if ~isempty(PubChemIDs{idx(1)}) && ismember({'pubchem'}, sources) + saveFileDir = [newMolFilesDir 'pubchem' filesep]; + if exist(saveFileDir, 'dir') == 0 + mkdir(saveFileDir) + end + try + molFile = webread(['https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/CID/'... + num2str(PubChemIDs{idx(1)}) ... + '/record/SDF/?record_type=2d&response_type=display'], webTimeout); + if ~isempty(regexp(molFile, 'M END')) + molFile(regexp(molFile, 'M END') + 6:end) = []; + fid2 = fopen([newMolFilesDir 'pubchem' filesep metList{i} '.mol'], 'w'); + fprintf(fid2, '%s\n', molFile); + fclose(fid2); + missingMetBool(i) = false; + end + catch ME + disp(ME.message) + idsToCheck{end + 1, 1} = ['pubchem - ' PubChemIDs{idx(1)}]; + end end - % Delete empty cells - InChIs(cellfun(@isempty, InChIs)) = []; - SMILES(cellfun(@isempty, SMILES)) = []; + % + % + % case 6 + % if prod(~isnan(PubChemIDs{ia(i)})) && ~isempty(PubChemIDs{ia(i)}) && missingMetBool(i) + % try + % molFile = webread(['https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/CID/'... + % num2str(PubChemIDs{ia(i)}) ... + % '/record/SDF/?record_type=2d&response_type=display'], webTimeout); + % % Delete all after 'M END' from the SDF filte to + % % make it MOL file + % if ~isempty(regexp(molFile, 'M END')) + % molFile(regexp(molFile, 'M END') + 6:end) = []; + % fid2 = fopen([newMolFilesDir umets{i} '.mol'], 'w'); + % fprintf(fid2, '%s\n', molFile); + % fclose(fid2); + % missingMetBool(i) = false; + % end + % catch ME + % disp(ME.message) + % idsToCheck(end + 1, 1) = PubChemIDs(ia(i)); + % end + % end + % - if updateDB && ~isempty(InChIs) && ~isempty(SMILES) - - % For InChIs - if isfile([standardisedDir 'InChIs']) - % Merge old and new InChIs - InChIsFile = regexp( fileread([standardisedDir 'InChIs']), '\n', 'split')'; - InChIsFile(cellfun(@isempty, InChIsFile)) = []; - InChIsFileSp = split(InChIsFile, ' - '); - smilesSp = split(InChIs, ' - '); - mergedSmiles(:, 2) = unique([InChIsFileSp(:, 2); smilesSp(:, 2)]); - mergedSmiles(ismember(mergedSmiles(:, 2), smilesSp(:, 2)), 1) = smilesSp(:, 2); - mergedSmiles(ismember(mergedSmiles(:, 2), InChIsFileSp(:, 2)), 1) = InChIsFileSp(:, 2); - mergedSmiles = strcat(mergedSmiles(:, 1), {' - '}, mergedSmiles(:, 2)); - % Write InChIs - fid2 = fopen([standardisedDir 'InChIs'], 'w'); - fprintf(fid2, '%s\n', mergedSmiles{:}); - fclose(fid2); - else - % Write InChIs - fid2 = fopen([standardisedDir 'InChIs'], 'w'); - fprintf(fid2, '%s\n', InChIs{:}); - fclose(fid2); + % ChEBI + if ~isempty(chebiIDs{idx(1)}) && ismember({'chebi'}, sources) + saveFileDir = [newMolFilesDir 'chebi' filesep]; + if exist(saveFileDir, 'dir') == 0 + mkdir(saveFileDir) end - - % For SMILES - if isfile([standardisedDir 'SMILES']) - % Merge old and new InChIs - smilesFile = regexp( fileread([standardisedDir 'SMILES']), '\n', 'split')'; - smilesFile(cellfun(@isempty, smilesFile)) = []; - smilesFileSp = split(smilesFile, ' - '); - smilesSp = split(SMILES, ' - '); - mergedSmiles(:, 2) = unique([smilesFileSp(:, 2); smilesSp(:, 2)]); - mergedSmiles(ismember(mergedSmiles(:, 2), smilesSp(:, 2)), 1) = smilesSp(:, 2); - mergedSmiles(ismember(mergedSmiles(:, 2), smilesFileSp(:, 2)), 1) = smilesFileSp(:, 2); - mergedSmiles = strcat(mergedSmiles(:, 1), {' - '}, mergedSmiles(:, 2)); - % Write InChIs - fid2 = fopen([standardisedDir 'SMILES'], 'w'); - fprintf(fid2, '%s\n', mergedSmiles{:}); - fclose(fid2); - else - % Write InChIs - fid2 = fopen([standardisedDir 'SMILES'], 'w'); - fprintf(fid2, '%s\n', SMILES{:}); - fclose(fid2); + try + molFile = webread(['https://www.ebi.ac.uk/chebi/saveStructure.do?defaultImage=true&chebiId=' num2str(chebiIDs{idx}) '&imageId=0'], webTimeout); + if ~isempty(regexp(molFile, 'M END')) + fid2 = fopen([newMolFilesDir 'chebi' filesep metList{i} '.mol'], 'w'); + fprintf(fid2, '%s\n', molFile); + fclose(fid2); + missingMetBool(i) = false; + end + catch ME + disp(ME.message) + idsToCheck{end + 1, 1} = ['chebi - ' chebiIDs{idx(1)}]; end - - else - % Write InChIs - fid2 = fopen([standardisedDir 'InChIs'], 'w'); - fprintf(fid2, '%s\n', InChIs{:}); - fclose(fid2); - % Write SMILES - fid2 = fopen([standardisedDir 'SMILES'], 'w'); - fprintf(fid2, '%s\n', SMILES{:}); - fclose(fid2); end end %% Report % Make report -molCollectionReport.noOfMets = size(umets, 1); -molCollectionReport.noOfMetsWithMol = sum(~missingMetBool); -molCollectionReport.noOfMetsWithoutMol = sum(missingMetBool); -molCollectionReport.coverage = (molCollectionReport.noOfMetsWithMol * 100) / molCollectionReport.noOfMets; - -% Check standardised data -if ~isempty(standardisationApproach) - nRows = size(standardisationReport.SMILES, 1); - varTypes = {'string', 'string', 'string', 'string'}; - varNames = {'mets', 'InChIKeys', 'InChIs', 'SMILES'}; - molCollectionReport.standardisationReport = table('Size', [nRows length(varTypes)], 'VariableTypes', varTypes, 'VariableNames', varNames); - molCollectionReport.standardisationApproach = standardisationApproach; - molCollectionReport.standardisationReport(1:end) = standardisationReport.standardised; - molCollectionReport.standardisationReport.InChIKeys(1:size(standardisationReport.InChIKeys, 1)) = standardisationReport.InChIKeys; - molCollectionReport.standardisationReport.InChIs(1:size(standardisationReport.InChIs, 1)) = standardisationReport.InChIs; - molCollectionReport.standardisationReport.SMILES(1:size(standardisationReport.SMILES, 1)) = standardisationReport.SMILES; -end +molCollectionReport.mets = metList; +molCollectionReport.metsWithMol = metList(~missingMetBool); +molCollectionReport.metsWithoutMol = metList(missingMetBool); +molCollectionReport.coverage = (numel(molCollectionReport.metsWithMol) * 100) / numel(molCollectionReport.mets); +molCollectionReport.idsToCheck = idsToCheck; end \ No newline at end of file diff --git a/src/dataIntegration/chemoInformatics/openBabelConverter.m b/src/dataIntegration/chemoInformatics/openBabelConverter.m new file mode 100644 index 0000000000..2b00ed783f --- /dev/null +++ b/src/dataIntegration/chemoInformatics/openBabelConverter.m @@ -0,0 +1,103 @@ +function newFormat = openBabelConverter(origFormat, outputFormat, saveFileDir) +% This function converts chemoformatic formats using OpenBabel. It requires +% to have openbabel installed. The formats that can be converted are used +% MDL MOL, SMILES, InChI, InChIKey, MDL RXN, reaction SMILES and rInChI. +% +% USAGE: +% +% newStructure = openBabelConverter(origFormat, outputFormat, saveFile) +% +% INPUT: +% origFormat: Original chemoinformatic format. Chemical tables such +% as MDL MOL or MDL RXN must be provided as files +% outputFormat: The format to be converted. Formats supported: smiles, +% mol, inchi, inchikey, rxn and rinchi. +% +% OPTIONAL INPUTS: +% saveFileDir: String with the directory where the new format will be +% saved. If is empty, the format is not saved. +% +% EXAMPLE: +% +% Example 1 (MDL MOL to InChI): +% origFormat = [pwd filesep 'alanine.mol']; +% outputFormat = 'inchi'; +% newFormat = openBabelConverter(origFormat, outputFormat); +% +% Example 2 (InChI to SMILES): +% origFormat = 'InChI=1S/C3H7NO2/c1-2(4)3(5)6/h2H,4H2,1H3,(H,5,6)/t2-/m0/s1'; +% outputFormat = 'smiles'; +% newFormat = openBabelConverter(origFormat, outputFormat); +% +% Example 3 (SMILES to mol): +% origFormat = 'C[C@@H](C(=O)O)N'; +% outputFormat = 'mol'; +% newFormat = openBabelConverter(origFormat, outputFormat); + +if nargin < 3 || isempty(saveFileDir) + toSave = false; +else + toSave = true; +end + +% Identify the chemoinformatic format of the input +if isfile(origFormat) + ctfFile = regexp(fileread(origFormat), '\n', 'split')'; + if contains(ctfFile{1}, '$RXN') + inputType = 'rxn'; + else + inputType = 'mol'; + end +elseif contains(origFormat, 'InChI=') + inputType = 'inchi'; +else + inputType = 'smiles'; +end + +% Convert +if ismember(inputType, {'inchi'; 'smiles'}) + + fid2 = fopen('tmp', 'w'); + fprintf(fid2, '%s\n', origFormat); + fclose(fid2); + [~, cmdout] = system(['obabel -i' inputType ' tmp -o' outputFormat]); + delete('tmp') + +else + switch inputType + case 'mol' + [~, cmdout] = system(['obabel -imol ' origFormat ' -o' outputFormat]); + case 'rxn' + [~, cmdout] = system(['obabel -irxn ' origFormat ' -o' outputFormat]); + end +end + +% Prepare the output +cmdout = splitlines(cmdout); +switch outputFormat + case 'mol' + startIdx = find(cellfun(@isempty, cmdout)); + endIdx = find(ismember(cmdout, 'M END')); + newFormat = cmdout(startIdx(1):endIdx); + + case 'inchikey' + cmdout = split(cmdout{end - 2}); + newFormat = cmdout{1}; + + case 'inchi' + newFormat = cmdout{contains(cmdout,'InChI=1S')}; + + case 'smiles' + cmdout = split(cmdout{end - 2}); + newFormat = cmdout{1}; + + case 'rinchi' + newFormat = cmdout{contains(cmdout,'RInChI=')}; +end + +% Save the file +if toSave + fid2 = fopen(saveFileDir, 'w'); + fprintf(fid2, '%s\n', newFormat{:}); + fclose(fid2); +end From 0828112838baac8c3e46a7e8164fb631a3bdb5f4 Mon Sep 17 00:00:00 2001 From: Ronan Fleming Date: Thu, 16 Sep 2021 13:05:58 +0100 Subject: [PATCH 74/82] summer changes --- src/analysis/thermo/inchi/old/sdf2inchi.m | 5 +++ .../thermo/protons/old/estimate_pKa.m | 2 ++ .../trainingModel/cache/trainingModel.mat | Bin 276829 -> 276829 bytes .../identifyConservedMoieties.m | 29 ++++++++++++------ src/base/install/updateCobraToolbox.m | 1 + .../buildAtomTransitionMultigraph.m | 27 +++++++++++----- .../AuxillaryFiles/createXMatrix2.m | 12 ++++---- .../AuxillaryFiles/generateSUXComp.m | 2 +- .../findFluxConsistentSubset.m | 3 ++ 9 files changed, 57 insertions(+), 24 deletions(-) diff --git a/src/analysis/thermo/inchi/old/sdf2inchi.m b/src/analysis/thermo/inchi/old/sdf2inchi.m index f149e8ec1a..6f8be2921a 100644 --- a/src/analysis/thermo/inchi/old/sdf2inchi.m +++ b/src/analysis/thermo/inchi/old/sdf2inchi.m @@ -44,6 +44,11 @@ [inchi,metList] = strtok(result); inchi = strtrim(inchi); metList = strtrim(metList); + if isempty(inchi) + [success,result] = system(['babel ' sdfFileName ' -oinchi' options]) + fprintf('%s\n','If you get a ''not found'' message from the call to Babel, make sure that Matlab''s LD_LIBRARY_PATH is edited to include correct system libraries. See initVonBertylanffy') + error('Conversion to InChI not successful. Make sure OpenBabel is installed correctly.\n') + end else [success,result] = system(['babel ' sdfFileName ' -oinchi' options]) fprintf('%s\n','If you get a ''not found'' message from the call to Babel, make sure that Matlab''s LD_LIBRARY_PATH is edited to include correct system libraries. See initVonBertylanffy') diff --git a/src/analysis/thermo/protons/old/estimate_pKa.m b/src/analysis/thermo/protons/old/estimate_pKa.m index adb774a2b2..092a33109e 100644 --- a/src/analysis/thermo/protons/old/estimate_pKa.m +++ b/src/analysis/thermo/protons/old/estimate_pKa.m @@ -193,6 +193,8 @@ pkas(2:end,1:end-1) = diag(pkalist); pkas = pkas + pkas'; + %here is where the ph = 7 is set + %TODO: make the pH an argument to this function mmsbool = false(size(pkas,1),1); if any(pkalist <= 7) mmsbool(find(pkalist <= 7,1)) = true; diff --git a/src/analysis/thermo/trainingModel/cache/trainingModel.mat b/src/analysis/thermo/trainingModel/cache/trainingModel.mat index 9e787dfc62983ff706c72a42fd7be03a70a94493..9e60c260433274034b00150fe9854f3c1e76d2ef 100644 GIT binary patch delta 55 zcmccnN#O1$fe9uO!HFdbUZr^oMrI0zCRQfKR;C6DMg~TP69bheCa^Y^w3aZomN2!J LFmEkkd1(Ov837V; delta 55 zcmccnN#O1$fe9uOAsM9#zKN9zMg|H77FNa The SHA1 of the last commit could not be retrieved.'); + lastCommit = []; else lastCommit = result_gitLastCommit(1:6); end diff --git a/src/dataIntegration/fluxomics/atomTransition/buildAtomTransitionMultigraph.m b/src/dataIntegration/fluxomics/atomTransition/buildAtomTransitionMultigraph.m index f022daf257..a261131db7 100644 --- a/src/dataIntegration/fluxomics/atomTransition/buildAtomTransitionMultigraph.m +++ b/src/dataIntegration/fluxomics/atomTransition/buildAtomTransitionMultigraph.m @@ -17,18 +17,27 @@ % inherits the orientation of its corresponding reaction. % % A stoichimetric matrix may be decomposed into a set of atom transitions -% That is: -% N = inv(M2Ai*M2Ai')*M2Ai*Ti*Ti2R; +% with the following atomic decomposition: % -% Note that M2Ai*M2Ai' is a diagonal matrix, where each diagonal entry is -% the number of atoms in each metabolite. +% N=\left(VV^{T}\right)^{-1}VAE +% +% VV^{T} is a diagonal matrix, where each diagonal entry is the number of +% atoms in each metabolite, so V*V^{T}*N = V*A*E +% +% With respect to the input, N is the subset of model.S corresponding to atom mapped reactions +% +% With respect to the output V := M2Ai +% E := Ti2R +% A := incidence(dATM); +% so we have the atomic decomposition M2Ai*M2Ai'*N = M2Ai*A*Ti2R % % USAGE: % -% ATN = buildAtomTransitionNetwork(model, rxnfileDir, options) +% [dATM, metAtomMappedBool, rxnAtomMappedBool, M2Ai, Ti2R] = buildAtomTransitionNetwork(model, rxnfileDir, options) % % INPUTS: -% model: Structure with following fields: +% model: Directed stoichiometric hypergraph +% Represented by a matlab structure with following fields: % % * .S - The `m` x `n` stoichiometric matrix for the metabolic network % * .mets - An `m` x 1 array of metabolite identifiers. Should match @@ -37,9 +46,13 @@ % rxnfile names in `rxnFileDir`. % * .lb - An `n` x 1 vector of lower bounds on fluxes. % * .ub - An `n` x 1 vector of upper bounds on fluxes. +% % rxnfileDir: Path to directory containing `rxnfiles` with atom mappings % for internal reactions in `S`. File names should % correspond to reaction identifiers in input `rxns`. +% e.g. git clone https://github.com/opencobra/ctf ~/fork-ctf +% then rxnfileDir = ~/fork-ctf/rxns/atomMapped +% % options: % *.directed - transition split into two oppositely % directed edges for reversible reactions @@ -70,7 +83,7 @@ % .. Authors: - Hulda S. Haraldsdóttir and Ronan M. T. Fleming, June 2015 -% Ronan M. T. Fleming, 2020 revision. +% Ronan M. T. Fleming, 2020, 2021 revision. if ~exist('options','var') options=[]; diff --git a/src/reconstruction/fastGapFill/AuxillaryFiles/createXMatrix2.m b/src/reconstruction/fastGapFill/AuxillaryFiles/createXMatrix2.m index 80fc07423e..fd64b62725 100644 --- a/src/reconstruction/fastGapFill/AuxillaryFiles/createXMatrix2.m +++ b/src/reconstruction/fastGapFill/AuxillaryFiles/createXMatrix2.m @@ -69,7 +69,7 @@ if transport == 0 R = ['sink_' compounds(i) '[c]']; sub = cellstr([compounds{i} '[c]']); - [ExchangeRxnMatrix] = addReaction(ExchangeRxnMatrix,R,'metaboliteList',sub(1),'stoichCoeffList',[-1],'lowerBound',-10000','upperBound',10000); + [ExchangeRxnMatrix] = addReaction(ExchangeRxnMatrix,R,'metaboliteList',sub(1),'stoichCoeffList',[-1],'lowerBound',-10000,'upperBound',10000); elseif transport == 1 %currently only this branch is taken. @@ -77,24 +77,24 @@ R = ['EX_' compounds(i) '[e]']; sub = cellstr([compounds{i} '[e]']); - [ExchangeRxnMatrix] = addReaction(ExchangeRxnMatrix,R,'metaboliteList',sub(1),'stoichCoeffList',[-1],'lowerBound',-10000','upperBound',10000); + [ExchangeRxnMatrix] = addReaction(ExchangeRxnMatrix,R,'metaboliteList',sub(1),'stoichCoeffList',[-1],'lowerBound',-10000,'upperBound',10000); % creates transport reaction from [c] to [e] R = [compounds(i) 'tr']; sub = cellstr([compounds{i} '[e]']); prod = cellstr([compounds{i} '[c]']); - [ExchangeRxnMatrix] = addReaction(ExchangeRxnMatrix,R,'metaboliteList',[sub(1) prod(1)],'stoichCoeffList',[-1 1],'lowerBound',-10000','upperBound',10000); + [ExchangeRxnMatrix] = addReaction(ExchangeRxnMatrix,R,'metaboliteList',[sub(1) prod(1)],'stoichCoeffList',[-1 1],'lowerBound',-10000,'upperBound',10000); elseif (strcmp(compartment,'[p]')==1) % keep this branch the same for now. R = ['EX_' compounds{i} '[e]']; sub = cellstr([compounds{i} '[e]']); - [ExchangeRxnMatrix] = addReaction(ExchangeRxnMatrix,R,'metaboliteList',sub(1),'stoichCoeffList',[-1],'lowerBound',-10000','upperBound',10000); + [ExchangeRxnMatrix] = addReaction(ExchangeRxnMatrix,R,'metaboliteList',sub(1),'stoichCoeffList',[-1],'lowerBound',-10000,'upperBound',10000); % creates transport reaction from [c] to [p] R = [compounds{i} 'tpr']; sub = cellstr([compounds{i} '[p]']); prod = cellstr([compounds{i} '[c]']); - [ExchangeRxnMatrix] = addReaction(ExchangeRxnMatrix,R,'metaboliteList',[sub(1) prod(1)],'stoichCoeffList',[-1 1],'lowerBound',-10000','upperBound',10000); + [ExchangeRxnMatrix] = addReaction(ExchangeRxnMatrix,R,'metaboliteList',[sub(1) prod(1)],'stoichCoeffList',[-1 1],'lowerBound',-10000,'upperBound',10000); % creates transport reaction from [p] to [e] R = [compounds{i} 'tr']; @@ -103,7 +103,7 @@ sub = cellstr([compounds{i} '[e]']); prod = cellstr([compounds{i} '[p]']); - [ExchangeRxnMatrix] = addReaction(ExchangeRxnMatrix,R,'metaboliteList',[sub(1) prod(1)],'stoichCoeffList',[-1 1],'lowerBound',-10000','upperBound',10000); + [ExchangeRxnMatrix] = addReaction(ExchangeRxnMatrix,R,'metaboliteList',[sub(1) prod(1)],'stoichCoeffList',[-1 1],'lowerBound',-10000,'upperBound',10000); elseif (strcmp(compartment,'all'))==1 % [m],[n],[g],[l],[x],[r] % if compound(i) exists in a compartment than add a diff --git a/src/reconstruction/fastGapFill/AuxillaryFiles/generateSUXComp.m b/src/reconstruction/fastGapFill/AuxillaryFiles/generateSUXComp.m index 7a3c889c69..a0f3da7beb 100644 --- a/src/reconstruction/fastGapFill/AuxillaryFiles/generateSUXComp.m +++ b/src/reconstruction/fastGapFill/AuxillaryFiles/generateSUXComp.m @@ -1,4 +1,4 @@ -function MatricesSUX =generateSUXComp(model, dictionary, KEGGFilename, KEGGBlackList, listCompartments,KEGGMatrixLoad) +function MatricesSUX =generateSUXComp(model, dictionary, KEGGFilename, KEGGBlackList, listCompartments, KEGGMatrixLoad) % Creates the matrices for gap filling for compartmentalized metabolic models (`S`) such % that the universal database (`U`, e.g., KEGG) is placed in each compartment % specified and reversible transport reactions (`X`) are added for each compound present in diff --git a/src/reconstruction/modelGeneration/fluxConsistency/findFluxConsistentSubset.m b/src/reconstruction/modelGeneration/fluxConsistency/findFluxConsistentSubset.m index 365fa4e4c3..c0b99d9a08 100644 --- a/src/reconstruction/modelGeneration/fluxConsistency/findFluxConsistentSubset.m +++ b/src/reconstruction/modelGeneration/fluxConsistency/findFluxConsistentSubset.m @@ -74,6 +74,9 @@ end end +if ~isfield(model,'c') + model.c = zeros(size(model.S,2),1); +end sol = optimizeCbModel(model); if (sol.stat == 1) From f03fa4f3aa7b2f928a747ac69e65a6e1eb8139a0 Mon Sep 17 00:00:00 2001 From: Ronan Fleming Date: Thu, 16 Sep 2021 13:12:45 +0100 Subject: [PATCH 75/82] updated submodules --- binary | 2 +- external/analysis/PolytopeSamplerMatlab | 2 +- papers | 2 +- tutorials | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/binary b/binary index e2c2603aff..e2c9bc538a 160000 --- a/binary +++ b/binary @@ -1 +1 @@ -Subproject commit e2c2603aff994e2ccda28e86dcf4f70be6fd70d7 +Subproject commit e2c9bc538a1855704e29adb18502c06ce697bc2e diff --git a/external/analysis/PolytopeSamplerMatlab b/external/analysis/PolytopeSamplerMatlab index cfa3ab9b6b..83e69fb652 160000 --- a/external/analysis/PolytopeSamplerMatlab +++ b/external/analysis/PolytopeSamplerMatlab @@ -1 +1 @@ -Subproject commit cfa3ab9b6b3c0c305ea4d0256d999a00cad834d6 +Subproject commit 83e69fb65213a3481d16d2949fe54636af7e0a53 diff --git a/papers b/papers index 521289b25c..92e4835344 160000 --- a/papers +++ b/papers @@ -1 +1 @@ -Subproject commit 521289b25ca80841c213289aae7c2bcb432ce240 +Subproject commit 92e4835344c7f2ab6d55c52dbb1634e7ec599651 diff --git a/tutorials b/tutorials index f1e2483dac..0236f4e42a 160000 --- a/tutorials +++ b/tutorials @@ -1 +1 @@ -Subproject commit f1e2483dac9290fab549d794c35963f0be38f8f2 +Subproject commit 0236f4e42a01dea4a190c6205b1ab356380d6948 From cdf7859156a9e4018097e374cced514436d482ce Mon Sep 17 00:00:00 2001 From: Ronan Fleming Date: Thu, 16 Sep 2021 13:13:45 +0100 Subject: [PATCH 76/82] updated submodules --- binary | 2 +- external/analysis/PolytopeSamplerMatlab | 2 +- papers | 2 +- tutorials | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/binary b/binary index e2c9bc538a..e2c2603aff 160000 --- a/binary +++ b/binary @@ -1 +1 @@ -Subproject commit e2c9bc538a1855704e29adb18502c06ce697bc2e +Subproject commit e2c2603aff994e2ccda28e86dcf4f70be6fd70d7 diff --git a/external/analysis/PolytopeSamplerMatlab b/external/analysis/PolytopeSamplerMatlab index 83e69fb652..cfa3ab9b6b 160000 --- a/external/analysis/PolytopeSamplerMatlab +++ b/external/analysis/PolytopeSamplerMatlab @@ -1 +1 @@ -Subproject commit 83e69fb65213a3481d16d2949fe54636af7e0a53 +Subproject commit cfa3ab9b6b3c0c305ea4d0256d999a00cad834d6 diff --git a/papers b/papers index 92e4835344..521289b25c 160000 --- a/papers +++ b/papers @@ -1 +1 @@ -Subproject commit 92e4835344c7f2ab6d55c52dbb1634e7ec599651 +Subproject commit 521289b25ca80841c213289aae7c2bcb432ce240 diff --git a/tutorials b/tutorials index 0236f4e42a..f1e2483dac 160000 --- a/tutorials +++ b/tutorials @@ -1 +1 @@ -Subproject commit 0236f4e42a01dea4a190c6205b1ab356380d6948 +Subproject commit f1e2483dac9290fab549d794c35963f0be38f8f2 From 7ebeca90567cee6854509caabade17e1a85e82e7 Mon Sep 17 00:00:00 2001 From: Ronan Fleming Date: Thu, 16 Sep 2021 13:16:28 +0100 Subject: [PATCH 77/82] updated tutorials --- tutorials | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tutorials b/tutorials index f1e2483dac..0236f4e42a 160000 --- a/tutorials +++ b/tutorials @@ -1 +1 @@ -Subproject commit f1e2483dac9290fab549d794c35963f0be38f8f2 +Subproject commit 0236f4e42a01dea4a190c6205b1ab356380d6948 From b8859d8fea50601877a2a11480ceae024c47ed9c Mon Sep 17 00:00:00 2001 From: Ronan Fleming Date: Thu, 16 Sep 2021 13:17:49 +0100 Subject: [PATCH 78/82] updated tutorials --- tutorials | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tutorials b/tutorials index 0236f4e42a..f1e2483dac 160000 --- a/tutorials +++ b/tutorials @@ -1 +1 @@ -Subproject commit 0236f4e42a01dea4a190c6205b1ab356380d6948 +Subproject commit f1e2483dac9290fab549d794c35963f0be38f8f2 From da43c6a31ab0f6cd418c974cab4b1cd1f293c827 Mon Sep 17 00:00:00 2001 From: Ronan Fleming Date: Thu, 16 Sep 2021 13:25:20 +0100 Subject: [PATCH 79/82] switched to master --- papers | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/papers b/papers index 521289b25c..92e4835344 160000 --- a/papers +++ b/papers @@ -1 +1 @@ -Subproject commit 521289b25ca80841c213289aae7c2bcb432ce240 +Subproject commit 92e4835344c7f2ab6d55c52dbb1634e7ec599651 From a9f0804c26dc441c962615a8d1af759df19acc3e Mon Sep 17 00:00:00 2001 From: Ronan Fleming Date: Thu, 16 Sep 2021 13:26:06 +0100 Subject: [PATCH 80/82] switched to master2 --- external/analysis/PolytopeSamplerMatlab | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/external/analysis/PolytopeSamplerMatlab b/external/analysis/PolytopeSamplerMatlab index cfa3ab9b6b..83e69fb652 160000 --- a/external/analysis/PolytopeSamplerMatlab +++ b/external/analysis/PolytopeSamplerMatlab @@ -1 +1 @@ -Subproject commit cfa3ab9b6b3c0c305ea4d0256d999a00cad834d6 +Subproject commit 83e69fb65213a3481d16d2949fe54636af7e0a53 From 021beda916fbb8d176a9e19947b1ba05a0ff0bb5 Mon Sep 17 00:00:00 2001 From: Ronan Fleming Date: Thu, 16 Sep 2021 13:26:45 +0100 Subject: [PATCH 81/82] switched to master3 --- tutorials | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tutorials b/tutorials index f1e2483dac..0236f4e42a 160000 --- a/tutorials +++ b/tutorials @@ -1 +1 @@ -Subproject commit f1e2483dac9290fab549d794c35963f0be38f8f2 +Subproject commit 0236f4e42a01dea4a190c6205b1ab356380d6948 From 21c91cef3a3c8298fcb117febf7c7ff003e9b6d9 Mon Sep 17 00:00:00 2001 From: Ronan Fleming Date: Thu, 16 Sep 2021 13:28:06 +0100 Subject: [PATCH 82/82] submodules point to master --- binary | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/binary b/binary index e2c2603aff..e2c9bc538a 160000 --- a/binary +++ b/binary @@ -1 +1 @@ -Subproject commit e2c2603aff994e2ccda28e86dcf4f70be6fd70d7 +Subproject commit e2c9bc538a1855704e29adb18502c06ce697bc2e