Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

python3 compatibility #85

Open
wants to merge 7 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion clustering/check_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
sys.stderr.write("%d junctions\n"%libs[lib])

failed_junc = []
threshold=max([len(x) for x in libChroms.values()])/1
threshold=max([len(x) for x in list(libChroms.values())])/1
for lib in libChroms:
if len(libChroms[lib]) < threshold:
failed_junc.append("rm "+lib)
Expand Down
6 changes: 3 additions & 3 deletions clustering/get_cluster_gene.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,10 @@ def get_feature(fname, feature = "exon"):

ss2gene = get_feature(sys.argv[1], "exon")

W = file("%s.clu2gene.txt"%sys.argv[2].split("_perind")[0],'w')
W = open("%s.clu2gene.txt"%sys.argv[2].split("_perind")[0],'w')
for ln in gzip.open(sys.argv[2]):
if "chrom" in ln: continue

if len(ln.split()[0].split(":")) == 5:
chrom, A, B, clu, strand = ln.split()[0].split(":")
else:
Expand All @@ -52,7 +52,7 @@ def get_feature(fname, feature = "exon"):
if (chrom, int(B)) in ss2gene:
gs.append(ss2gene[(chrom, int(B))])


if len(gs) > 0:
W.write("%s %s %s %s %s %.2f %.2f %.2f %.2f\n"%(clu,chrom, A,B, gs[0], mean, median, minAS, maxAS))
else:
Expand Down
88 changes: 44 additions & 44 deletions clustering/leafcutter_cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import shutil

def main(options,libl):

pool_junc_reads(libl, options)
refine_clusters(options)
sort_junctions(libl, options)
Expand All @@ -23,11 +23,11 @@ def pool_junc_reads(flist, options):
useStrand = options.strand

outFile = "%s/%s_pooled"%(rundir,outPrefix)

chromLst = ["chr%d"%x for x in range(1,23)]+['chrX','chrY']+["%d"%x for x in range(1,23)]+['X','Y']
by_chrom = {}
for libl in flist:

lib = libl.strip()
if not os.path.isfile(lib):
continue
Expand All @@ -38,24 +38,24 @@ def pool_junc_reads(flist, options):
if lib[-3:] == ".gz": F = gzip.open(lib)
else: F = open(lib)
for ln in F:

lnsplit=ln.split()
if len(lnsplit)<6:
if len(lnsplit)<6:
sys.stderr.write("Error in %s \n" % lib)
continue
chrom, A, B, dot, counts, strand = lnsplit

if not useStrand:
strand = "NA"
if checkchrom and (chrom not in chromLst): continue
A, B = int(A), int(B)+1
if B-A > int(maxIntronLen): continue
try: by_chrom[(chrom,strand)][(A,B)] = int(counts) + by_chrom[(chrom, strand)][(A,B)]
except:
except:
try: by_chrom[(chrom,strand)][(A,B)] = int(counts)
except: by_chrom[(chrom, strand)] = {(A,B):int(counts)}

fout = file(outFile, 'w')
fout = open(outFile, 'w')
Ncluster = 0
sys.stderr.write("Parsing...\n")
for chrom in by_chrom:
Expand All @@ -64,7 +64,7 @@ def pool_junc_reads(flist, options):
sys.stderr.write("%s:%s.."%chrom)
clu = cluster_intervals(read_ks)[0]
for cl in clu:
if len(cl) > 1: # if cluster has more than one intron
if len(cl) > 1: # if cluster has more than one intron
buf = '%s:%s '%chrom
for interval, count in [(x, by_chrom[chrom][x]) for x in cl]:
buf += "%d:%d" % interval + ":%d"%count+ " "
Expand All @@ -76,7 +76,7 @@ def pool_junc_reads(flist, options):

def sort_junctions(libl, options):

chromLst = ["chr%d"%x for x in range(1,23)]+['chrX','chrY']+["%d"%x for x in range(1,23)]+['X','Y']
chromLst = ["chr%d"%x for x in range(1,23)]+['chrX','chrY']+["%d"%x for x in range(1,23)]+['X','Y']
outPrefix = options.outprefix
rundir = options.rundir
refined_cluster = "%s/%s_refined"%(rundir,outPrefix)
Expand Down Expand Up @@ -112,7 +112,7 @@ def sort_junctions(libl, options):
merges[libN] = []
merges[libN].append(lib)

fout_runlibs = file(runName+"_sortedlibs",'w')
fout_runlibs = open(runName+"_sortedlibs",'w')

for libN in merges:
libName = "%s/%s"%(rundir,libN.split('/')[-1])
Expand All @@ -121,24 +121,24 @@ def sort_junctions(libl, options):

fout_runlibs.write(foutName+'\n')

if options.verbose:
if options.verbose:
sys.stderr.write("Sorting %s..\n"%libN)
if len(merges[libN]) > 1:
if options.verbose:
if options.verbose:
sys.stderr.write("merging %s...\n"%(" ".join(merges[libN])))
else:
pass
fout = gzip.open(foutName,'w')

fout.write("chrom %s\n"%libN.split("/")[-1].split(".junc")[0])
fout.write("chrom {junc}\n".format(junc=libN.split("/")[-1].split(".junc")[0]).encode())

for lib in merges[libN]:
if lib[-3:] == ".gz": F = gzip.open(lib)
else: F = open(lib)
for ln in F:

lnsplit=ln.split()
if len(lnsplit)<6:
if len(lnsplit)<6:
sys.stderr.write("Error in %s \n" % lib)
continue
chrom, start, end, dot, count, strand = ln.split()
Expand Down Expand Up @@ -167,23 +167,23 @@ def sort_junctions(libl, options):
elif (start,end) in by_chrom[chrom]:
tot += by_chrom[chrom][(start,end)]
for exon in ks:

chrom, start, end = exon
start, end = int(start), int(end)
chromID, strand = chrom
if chrom not in by_chrom:
buf.append("%s:%d:%d:clu_%d_%s 0/%d\n"%(chromID,start, end,clu, strand, tot))
elif (start,end) in by_chrom[chrom]:
elif (start,end) in by_chrom[chrom]:
buf.append("%s:%d:%d:clu_%d_%s %d/%d\n"%(chromID,start, end, clu,strand, by_chrom[chrom][(start,end)], tot))
else:
buf.append("%s:%d:%d:clu_%d_%s 0/%d\n"%(chromID,start, end,clu,strand, tot))
fout.write("".join(buf))

fout.write("".join(buf).encode())
fout.close()
fout_runlibs.close()

def refine_clusters(options):

outPrefix = options.outprefix
rundir = options.rundir
minratio = float(options.mincluratio)
Expand All @@ -192,7 +192,7 @@ def refine_clusters(options):
inFile = "%s/%s_pooled"%(rundir,outPrefix)
outFile = "%s/%s_refined"%(rundir,outPrefix)

fout = file(outFile,'w')
fout = open(outFile,'w')
Ncl = 0
for ln in open(inFile):
clu = []
Expand All @@ -206,7 +206,7 @@ def refine_clusters(options):
#print "CLU",clu
#print "linked",refine_linked(clu)
#print '\n\n'

for cl in refine_linked(clu):
rc = refine_cluster(cl,minratio, minreads)
if len(rc) > 0:
Expand All @@ -220,45 +220,45 @@ def refine_clusters(options):
fout.close()


def merge_junctions(options):
def merge_junctions(options):
''' function to merge junctions '''

outPrefix = options.outprefix
rundir = options.rundir
fnameout = "%s/%s"%(rundir,outPrefix)
flist = "%s/%s_sortedlibs"%(rundir, outPrefix)

lsts = []
for ln in open(flist):
lsts.append(ln.strip())
if options.verbose:
sys.stderr.write("merging %d junction files...\n"%(len(lsts)))

# Change 300 if max open file is < 300
N = min([300, max([100, int(len(lsts)**(0.5))])])

tmpfiles = []
while len(lsts) > 1:
while len(lsts) > 1:
clst = []
for i in range(0,(len(lsts)/N)+1):

for i in range(0,(len(lsts)//N)+1):
lst = lsts[N*i:N*(i+1)]
if len(lst) > 0:
clst.append(lst)
lsts = []

for lst in clst:
if len(lst) == 0: continue
tmpfile = tempfile.mktemp()
os.mkdir(tmpfile)
foutname = tmpfile+"/tmpmerge.gz"
fout = gzip.open(foutname,'w')

merge_files(lst, fout, options)
lsts.append(foutname)
tmpfiles.append(foutname)
fout.close()

shutil.move(lsts[0], fnameout+"_perind.counts.gz")

def merge_files(fnames, fout, options):
Expand All @@ -274,12 +274,12 @@ def merge_files(fnames, fout, options):
N = 0
while not finished:
N += 1
if N % 50000 == 0:
if N % 50000 == 0:
sys.stderr.write(".")
buf = []
for f in fopen:
ln = f.readline().split()
if len(ln) == 0:
if len(ln) == 0:
finished = True
break
chrom = ln[0]
Expand Down Expand Up @@ -320,10 +320,10 @@ def cluster_intervals(E):
i += 1

if len(cluster) > 0:

Eclusters.append(cluster)


return Eclusters, E

def overlaps(A,B):
Expand All @@ -343,7 +343,7 @@ def refine_linked(clusters):
newClusters = []
while len(unassigned) > 0:
finished = False

while not finished:
finished = True
torm = []
Expand All @@ -370,7 +370,7 @@ def refine_linked(clusters):
def refine_cluster(clu, cutoff, readcutoff):
''' for each exon in the cluster compute the ratio of reads, if smaller than cutoff,
remove and recluster '''

remove = []
dic = {}
intervals = []
Expand All @@ -387,15 +387,15 @@ def refine_cluster(clu, cutoff, readcutoff):
else:
reCLU = True
if len(intervals) == 0: return []

# This makes sure that after trimming, the clusters are still good
Atmp, B = cluster_intervals(intervals)
A = []
for cl in Atmp:
for c in refine_linked([(x,0) for x in cl]):
if len(c) > 0:
A.append([x[0] for x in c])

if len(A) == 1:
rc = [(x, dic[x]) for x in A[0]]
if len(rc) > 1:
Expand All @@ -421,15 +421,15 @@ def get_numers(options):
input_file=gzip.open(fname, 'rb')
fout = gzip.open(fnameout,'w')
first_line=True

for l in input_file:
if first_line:
fout.write(" ".join(l.strip().split(" ")[1:])+'\n') # print the sample names
first_line=False
else:
l=l.strip()
words=l.split(" ")

fout.write(words[0]+ " "+ " ".join( [ g.split("/")[0] for g in words[1:] ] ) +'\n')

input_file.close()
Expand All @@ -453,7 +453,7 @@ def get_numers(options):

parser.add_option("-r", "--rundir", dest="rundir", default='./',
help="write to directory (default ./)")

parser.add_option("-l", "--maxintronlen", dest="maxintronlen", default = 100000,
help="maximum intron length in bp (default 100,000bp)")

Expand All @@ -473,14 +473,14 @@ def get_numers(options):
if options.juncfiles == None:
sys.stderr.write("Error: no junction file provided...\n")
exit(0)

# Get the junction file list
libl = []
for junc in open(options.juncfiles):
junc = junc.strip()
try:
open(junc)
except:
except:
sys.stderr.write("%s does not exist... check your junction files.\n"%junc)
exit(0)
libl.append(junc)
Expand Down
Loading