diff --git a/README.md b/README.md index 694ed59..1cb0fdf 100644 --- a/README.md +++ b/README.md @@ -242,6 +242,10 @@ wgd syn families gffs (option) -ac, --ancestor, the assumed ancestor species, default None, a option that is still in development -mg, --minseglen, the minimum length of segments to include in ratio if <= 1, default 100000 -kr, --keepredun, flag option, whether to keep redundant multiplicons, if the flag was set, the redundant multiplicons will be kept +-mgn, --mingenenum, the minimum number of genes for a segment to be considered, default 30 +-ds, --dotsize, the dot size in dot plot, default 1 +-aa, --apalpha, the opacity of anchor dots, default 1 +-ha, --hoalpha, the opacity of homolog dots, default 0.1 ``` The program `wgd viz` can realize the visualization of *K*S age distribution and synteny. @@ -272,6 +276,11 @@ wgd viz (option) -pag, --plotapgmm, flag option, whether to plot mixture modeling of anchor Ks in the mixed Ks distribution, if the flag was set, the mixture modeling of anchor Ks will be plotted -pem, --plotelmm, flag option, whether to plot elmm mixture modeling of paranome Ks in the mixed Ks distribution, if the flag was set, the elmm mixture modeling of paranome Ks will be plotted -c, --components, the range of the number of components to fit in anchor Ks mixture modeling, default (1,4) +-mgn, --mingenenum, the minimum number of genes for a segment to be considered, default 30 +-psy, --plotsyn, flag option, whether to initiate the synteny plot, if the flag was set, the synteny plot will be produced +-ds, --dotsize, the dot size in dot plot, default 1 +-aa, --apalpha, the opacity of anchor dots, default 1 +-ha, --hoalpha, the opacity of homolog dots, default 0.1 ``` ## Usage @@ -436,6 +445,8 @@ The dot plots without *K*S annotation will also be automately produce ![](data/Aquilegia_coerulea-vs-Aquilegia_coerulea.dot.png) +Note that the opacity of anchor dots and all homolog dots can be set by the option `apalpha` and `hoalpha` separately. If one just wants to see the anchor dots, setting the `hoalpha` as 0 (or other minuscule values) will do. If one wants to see the distribution of whole dots better, setting the `hoalpha` higher (and `apalpha` lower) will do. The `dotsize` option can be called to adjust the size of dots. + A further associated Syndepth plot shows that there are more than 80 duplicated segments, which dominates the whole collinear ratio category. ![](data/Syndepth.svg) diff --git a/cli.py b/cli.py index 85a2cfd..013e46a 100644 --- a/cli.py +++ b/cli.py @@ -521,15 +521,18 @@ def _ksd(families, sequences, outdir, tmpdir, nthreads, to_stop, cds, pairwise, @click.option('--plotapgmm', '-pag', is_flag=True, help='plot mixture modeling of anchor pairs') @click.option('--plotelmm', '-pem', is_flag=True, help='plot elmm mixture modeling') @click.option('--components', '-n', nargs=2, default=(1, 4), show_default=True, help="range of number of components to fit") -@click.option('--mingenenum', '-mgn', default=30, type=int, show_default=True, help="min number of genes on segments to be considered") +@click.option('--mingenenum', '-mgn', default=30, type=int, show_default=True, help="minimum number of genes on segments to be considered") @click.option('--plotsyn', '-psy', is_flag=True, help='plot synteny') +@click.option('--dotsize', '-ds', type=float, default=1, show_default=True, help='size of dots') +@click.option('--apalpha', '-aa', type=float, default=1, show_default=True, help='opacity of anchor dots') +@click.option('--hoalpha', '-ha', type=float, default=0.1, show_default=True, help='opacity of homolog dots') def viz(**kwargs): """ Visualization of Ks distribution or synteny """ _viz(**kwargs) -def _viz(datafile,spair,outdir,gsmap,plotkde,reweight,em_iterations,em_initializations,prominence_cutoff,segments,minlen,maxsize,anchorpoints,multiplicon,genetable,rel_height,speciestree,onlyrootout,minseglen,keepredun,extraparanomeks,plotapgmm,plotelmm,components,mingenenum,plotsyn): +def _viz(datafile,spair,outdir,gsmap,plotkde,reweight,em_iterations,em_initializations,prominence_cutoff,segments,minlen,maxsize,anchorpoints,multiplicon,genetable,rel_height,speciestree,onlyrootout,minseglen,keepredun,extraparanomeks,plotapgmm,plotelmm,components,mingenenum,plotsyn,dotsize,apalpha,hoalpha): from wgd.viz import elmm_plot, apply_filters, multi_sp_plot, default_plot,all_dotplots,filter_by_minlength,dotplotunitgene,dotplotingene,filter_mingenumber from wgd.core import _mkdir from wgd.syn import get_anchors,get_multi,get_segments_profile,get_chrom_gene,get_mp_geneorder,transformunit @@ -552,13 +555,13 @@ def _viz(datafile,spair,outdir,gsmap,plotkde,reweight,em_iterations,em_initializ segs,table,df_multi,removed_scfa = filter_by_minlength(table,segs,minlen,df_multi,keepredun,outdir,minseglen) segs_gene_unit, gene_order_dict_allsp = transformunit(segs,ordered_genes_perchrom_allsp,outdir) segs = filter_mingenumber(segs_gene_unit,mingenenum) - dotplotingene(ordered_genes_perchrom_allsp,removed_scfa,outdir,table,gene_orders,anchor=df_anchor,ksdf=df,maxsize=maxsize) + dotplotingene(ordered_genes_perchrom_allsp,removed_scfa,outdir,table,gene_orders,anchor=df_anchor,ksdf=df,maxsize=maxsize,dotsize=dotsize, apalpha=apalpha, hoalpha=hoalpha) #dotplotunitgene(ordered_genes_perchrom_allsp,segs_gene_unit,removed_scfa,outdir,mingenenum,table_orig,ordered_mp,ksdf=df) - figs = all_dotplots(table, segs, df_multi, minseglen, anchors=df_anchor, maxsize=maxsize, minlen=minlen, outdir=outdir, Ks = df) + figs = all_dotplots(table, segs, df_multi, minseglen, anchors=df_anchor, maxsize=maxsize, minlen=minlen, outdir=outdir, Ks = df, dotsize=dotsize, apalpha=apalpha, hoalpha=hoalpha) for k, v in figs.items(): v.savefig(os.path.join(outdir, "{}.dot.svg".format(k))) v.savefig(os.path.join(outdir, "{}.dot.pdf".format(k))) - v.savefig(os.path.join(outdir, "{}.dot.png".format(k)),dpi=1200) + v.savefig(os.path.join(outdir, "{}.dot.png".format(k)),dpi=500) logging.info('Done') exit() ksdb_df = pd.read_csv(datafile,header=0,index_col=0,sep='\t') @@ -599,7 +602,10 @@ def _viz(datafile,spair,outdir,gsmap,plotkde,reweight,em_iterations,em_initializ @click.option('--ancestor', '-ac', default=None,show_default=True,help='assumed ancestor species') @click.option('--minseglen', '-mg', default=10000, show_default=True, help="min length of segments in ratio if <= 1") @click.option('--keepredun', '-kr', is_flag=True, help='keep redundant multiplicons') -@click.option('--mingenenum', '-mgn', default=30, type=int, show_default=True, help="min number of genes on segments to be considered") +@click.option('--mingenenum', '-mgn', default=30, type=int, show_default=True, help="minimum number of genes on segments to be considered") +@click.option('--dotsize', '-ds', type=float, default=1, show_default=True, help='size of dots') +@click.option('--apalpha', '-aa', type=float, default=1, show_default=True, help='opacity of anchor dots') +@click.option('--hoalpha', '-ha', type=float, default=0.1, show_default=True, help='opacity of homolog dots') def syn(**kwargs): """ Co-linearity and anchor inference using I-ADHoRe. @@ -607,7 +613,7 @@ def syn(**kwargs): _syn(**kwargs) def _syn(families, gff_files, ks_distribution, outdir, feature, attribute, - minlen, maxsize, ks_range, iadhore_options, ancestor, minseglen, keepredun, mingenenum): + minlen, maxsize, ks_range, iadhore_options, ancestor, minseglen, keepredun, mingenenum, dotsize, apalpha, hoalpha): """ Co-linearity and anchor inference using I-ADHoRe. """ @@ -664,15 +670,15 @@ def _syn(families, gff_files, ks_distribution, outdir, feature, attribute, ksdb_df = pd.read_csv(ks_distribution,header=0,index_col=0,sep='\t') ksdb_df = formatv2(ksdb_df) df_ks = apply_filters(ksdb_df, [("dS", 0., 5.)]) - dotplotingene(ordered_genes_perchrom_allsp,removed_scfa,outdir,table,gene_orders,anchor=anchors,ksdf=df_ks,maxsize=maxsize) + dotplotingene(ordered_genes_perchrom_allsp,removed_scfa,outdir,table,gene_orders,anchor=anchors,ksdf=df_ks,maxsize=maxsize,dotsize=dotsize,apalpha=apalpha, hoalpha=hoalpha) #dotplotunitgene(ordered_genes_perchrom_allsp,segs_gene_unit,removed_scfa,outdir,mingenenum,table_orig,ordered_mp,ksdf=df_ks) # dotplot #logging.info("Generating dot plots") - figs = all_dotplots(table, segs, multi, minseglen, anchors=anchors, maxsize=maxsize, minlen=minlen, outdir=outdir, ancestor=ancestor, Ks = df_ks) + figs = all_dotplots(table, segs, multi, minseglen, anchors=anchors, maxsize=maxsize, minlen=minlen, outdir=outdir, ancestor=ancestor, Ks = df_ks, dotsize=dotsize, apalpha=apalpha, hoalpha=hoalpha) for k, v in figs.items(): v.savefig(os.path.join(outdir, "{}.dot.svg".format(k))) v.savefig(os.path.join(outdir, "{}.dot.pdf".format(k))) - v.savefig(os.path.join(outdir, "{}.dot.png".format(k))) + v.savefig(os.path.join(outdir, "{}.dot.png".format(k)),dpi=500) plt.close() # anchor Ks distributions diff --git a/data/Aquilegia_coerulea-vs-Aquilegia_coerulea.dot.png b/data/Aquilegia_coerulea-vs-Aquilegia_coerulea.dot.png index 418fdc8..5478cd7 100644 Binary files a/data/Aquilegia_coerulea-vs-Aquilegia_coerulea.dot.png and b/data/Aquilegia_coerulea-vs-Aquilegia_coerulea.dot.png differ diff --git a/data/Aquilegia_coerulea-vs-Aquilegia_coerulea.dot_unit_gene.png b/data/Aquilegia_coerulea-vs-Aquilegia_coerulea.dot_unit_gene.png index d30f6eb..e8f6b5e 100644 Binary files a/data/Aquilegia_coerulea-vs-Aquilegia_coerulea.dot_unit_gene.png and b/data/Aquilegia_coerulea-vs-Aquilegia_coerulea.dot_unit_gene.png differ diff --git a/data/Aquilegia_coerulea-vs-Aquilegia_coerulea_Ks.dot.png b/data/Aquilegia_coerulea-vs-Aquilegia_coerulea_Ks.dot.png index f97bed9..d44714e 100644 Binary files a/data/Aquilegia_coerulea-vs-Aquilegia_coerulea_Ks.dot.png and b/data/Aquilegia_coerulea-vs-Aquilegia_coerulea_Ks.dot.png differ diff --git a/data/Aquilegia_coerulea-vs-Aquilegia_coerulea_Ks.dot_unit_gene.png b/data/Aquilegia_coerulea-vs-Aquilegia_coerulea_Ks.dot_unit_gene.png index 4b345f4..6639fb6 100644 Binary files a/data/Aquilegia_coerulea-vs-Aquilegia_coerulea_Ks.dot_unit_gene.png and b/data/Aquilegia_coerulea-vs-Aquilegia_coerulea_Ks.dot_unit_gene.png differ diff --git a/setup.py b/setup.py index f978b43..81b1360 100644 --- a/setup.py +++ b/setup.py @@ -9,7 +9,7 @@ setup( name='wgd', - version='2.0.18', + version='2.0.19', packages=['wgd'], url='http://github.com/heche-psb/wgd', license='GPL', diff --git a/wgd/viz.py b/wgd/viz.py index 3c27d18..8eb64f4 100755 --- a/wgd/viz.py +++ b/wgd/viz.py @@ -1784,7 +1784,7 @@ def getpairks(pair,ksdf): Ks_dict = {pair:ks for pair,ks in zip(ksdf.index,ksdf['dS'])} return Ks_dict.get(pair,None) -def plotdp_ig(ax,dfx,dfy,spx,spy,table,gene_orders,anchor=None,ksdf=None,maxsize=200,showks=False): +def plotdp_ig(ax,dfx,dfy,spx,spy,table,gene_orders,anchor=None,ksdf=None,maxsize=200,showks=False,dotsize=0.8,apalpha=1, hoalpha=0.1): dfx,dfy = dfx.set_index('Coordinates'),dfy.set_index('Coordinates') leng_info_x,leng_info_y = {},{} gene_list = {gene:li for gene,li in zip(table.index,table['scaffold'])} @@ -1842,11 +1842,11 @@ def plotdp_ig(ax,dfx,dfy,spx,spy,table,gene_orders,anchor=None,ksdf=None,maxsize s_m = ScalarMappable(cmap=c_m, norm=norm) s_m.set_array([]) if not showks: - ax.scatter(xs, ys, s=0.4, color = 'k', alpha=0.01) - ax.scatter(xs_ap, ys_ap, s=0.4, color = 'r', alpha=1) + ax.scatter(xs, ys, s=dotsize, color = 'k', alpha=hoalpha) + ax.scatter(xs_ap, ys_ap, s=dotsize, color = 'r', alpha=apalpha) else: - ax.scatter(xs, ys, s=0.4, color=[c_m(norm(c)) for c in co], alpha=0.01) - ax.scatter(xs_ap, ys_ap, s=0.4, color=[c_m(norm(c)) for c in co_ap], alpha=1) + ax.scatter(xs, ys, s=dotsize, color=[c_m(norm(c)) for c in co], alpha=hoalpha) + ax.scatter(xs_ap, ys_ap, s=dotsize, color=[c_m(norm(c)) for c in co_ap], alpha=apalpha) #ax.scatter(xs_ap, ys_ap, s=0.4, alpha=0.5) xlim,ylim = xtick[-1],ytick[-1] ax.set_xlim(0, xlim) @@ -2067,15 +2067,15 @@ def dotplotunitgene(ordered_genes_perchrom_allsp,segs,removed_scfa,outdir,mingen fname = os.path.join(outdir, "{}.line_unit_gene.svg".format(prefix)) fig.savefig(fname) -def plotdotplotingene(spx,spy,table,removed_scfa,ordered_genes_perchrom_allsp,gene_orders,anchor=None,ksdf=None,maxsize=200,showks=False): +def plotdotplotingene(spx,spy,table,removed_scfa,ordered_genes_perchrom_allsp,gene_orders,anchor=None,ksdf=None,maxsize=200,showks=False,dotsize=0.8, apalpha=1, hoalpha=0.1): fig, ax = plt.subplots(1, 1, figsize=(10,10)) dfx = ordered_genes_perchrom_allsp[spx].copy().drop(removed_scfa[spx],axis=1) dfy = ordered_genes_perchrom_allsp[spy].copy().drop(removed_scfa[spy],axis=1) - ax = plotdp_ig(ax,dfx,dfy,spx,spy,table,gene_orders,anchor=anchor,ksdf=ksdf,maxsize=maxsize,showks=showks) + ax = plotdp_ig(ax,dfx,dfy,spx,spy,table,gene_orders,anchor=anchor,ksdf=ksdf,maxsize=maxsize,showks=showks,dotsize=dotsize, apalpha=apalpha, hoalpha=hoalpha) fig.tight_layout() return fig, ax -def dotplotingene(ordered_genes_perchrom_allsp,removed_scfa,outdir,table,gene_orders,anchor=None,ksdf=None,maxsize=200): +def dotplotingene(ordered_genes_perchrom_allsp,removed_scfa,outdir,table,gene_orders,anchor=None,ksdf=None,maxsize=200,dotsize=0.8, apalpha=1, hoalpha=0.1): sp_list = list(ordered_genes_perchrom_allsp.keys()) gene_list = {gene:li for gene,li in zip(table.index,table['scaffold'])} gene_genome = {gene:sp for gene,sp in zip(table.index,table['species'])} @@ -2085,22 +2085,22 @@ def dotplotingene(ordered_genes_perchrom_allsp,removed_scfa,outdir,table,gene_or for j in range(i,len(sp_list)): spx,spy = sp_list[i],sp_list[j] logging.info("{0} vs. {1}".format(spx,spy)) - fig, ax = plotdotplotingene(spx,spy,table,removed_scfa,ordered_genes_perchrom_allsp,gene_orders,anchor=anchor,ksdf=ksdf) + fig, ax = plotdotplotingene(spx,spy,table,removed_scfa,ordered_genes_perchrom_allsp,gene_orders,anchor=anchor,ksdf=ksdf,dotsize=dotsize,apalpha=apalpha, hoalpha=hoalpha) figs[spx + "-vs-" + spy] = fig if not (ksdf is None): - figks, ax = plotdotplotingene(spx,spy,table,removed_scfa,ordered_genes_perchrom_allsp,gene_orders,anchor=anchor,ksdf=ksdf,showks=True) + figks, ax = plotdotplotingene(spx,spy,table,removed_scfa,ordered_genes_perchrom_allsp,gene_orders,anchor=anchor,ksdf=ksdf,showks=True,dotsize=dotsize, apalpha=apalpha, hoalpha=hoalpha) figs[spx + "-vs-" + spy + "_Ks"] = figks for prefix, fig in figs.items(): fname = os.path.join(outdir, "{}.dot_unit_gene.svg".format(prefix)) fig.savefig(fname) fname = os.path.join(outdir, "{}.dot_unit_gene.png".format(prefix)) - fig.savefig(fname,dpi=1200) + fig.savefig(fname,dpi=500) fname = os.path.join(outdir, "{}.dot_unit_gene.pdf".format(prefix)) fig.savefig(fname) plt.close() # dot plot stuff -def all_dotplots(df, segs, multi, minseglen, anchors=None, ancestor=None, Ks=None, **kwargs): +def all_dotplots(df, segs, multi, minseglen, anchors=None, ancestor=None, Ks=None, dotsize = 0.8, apalpha=1, hoalpha=0.1, **kwargs): """ Generate dot plots for all pairs of species in `df`, coloring anchor pairs. """ @@ -2145,7 +2145,7 @@ def all_dotplots(df, segs, multi, minseglen, anchors=None, ancestor=None, Ks=Non continue #for x,y in zip(df.x,df.y): ax.scatter(x, y, s=1, color="k", alpha=0.1) #print((len(list(itertools.chain(df.x))),len(list(itertools.chain(df.y))))) - ax.scatter(list(itertools.chain(df.x)), list(itertools.chain(df.y)), s=0.4, color="k", alpha=0.01) + ax.scatter(list(itertools.chain(df.x)), list(itertools.chain(df.y)), s=dotsize, color="k", alpha=hoalpha) if not (Ks is None): for i,x,y in zip(df.index,df['x'],df['y']): ksage = ks_dict.get(i,None) @@ -2159,7 +2159,7 @@ def all_dotplots(df, segs, multi, minseglen, anchors=None, ancestor=None, Ks=Non if not (anchors is None): andf = df.join(anchors, how="inner") #for x,y in zip(andf.x,andf.y): ax.scatter(x, y, s=1, color="r", alpha=0.9) - ax.scatter(list(itertools.chain(andf.x)), list(itertools.chain(andf.y)), s=0.4, color="r", alpha=1) + ax.scatter(list(itertools.chain(andf.x)), list(itertools.chain(andf.y)), s=dotsize, color="r", alpha=apalpha) if not (Ks is None): for i,x,y in zip(andf.index,andf['x'],andf['y']): ksage = ks_dict.get(i,None) @@ -2197,8 +2197,8 @@ def all_dotplots(df, segs, multi, minseglen, anchors=None, ancestor=None, Ks=Non c_m = matplotlib.cm.rainbow s_m = ScalarMappable(cmap=c_m, norm=norm) s_m.set_array([]) - axks.scatter(xxs, yys, s=0.4, color=[c_m(norm(c)) for c in ksages], alpha=0.01) - axks.scatter(xxs_ap, yys_ap, s=0.4, color=[c_m(norm(c)) for c in ksages_ap], alpha=1) + axks.scatter(xxs, yys, s=dotsize, color=[c_m(norm(c)) for c in ksages], alpha=hoalpha) + axks.scatter(xxs_ap, yys_ap, s=dotsize, color=[c_m(norm(c)) for c in ksages_ap], alpha=apalpha) axks.set_xlim(0, xlim) axks.set_ylim(0, ylim) axks.vlines(xs+[xmax], ymin=0, ymax=ylim, alpha=0.8, color="k")