"""
Module to help create scaled bigWig files from BAM
"""
import pybedtools
import os
import subprocess
try:
FileNotFoundError
except NameError:
# python2
FileNotFoundError = OSError
def mapped_read_count(bam, force=False):
"""
Scale is cached in a bam.scale file containing the number of mapped reads.
Use force=True to override caching.
"""
scale_fn = bam + '.scale'
if os.path.exists(scale_fn) and not force:
for line in open(scale_fn):
if line.startswith('#'):
continue
readcount = float(line.strip())
return readcount
cmds = ['samtools',
'view',
'-c',
'-F', '0x4',
bam]
p = subprocess.Popen(cmds, stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
stdout, stderr = p.communicate()
if p.returncode:
raise ValueError('samtools says: %s' % stderr)
readcount = float(stdout)
# write to file so the next time you need the lib size you can access
# it quickly
if not os.path.exists(scale_fn):
fout = open(scale_fn, 'w')
fout.write(str(readcount) + '\n')
fout.close()
return readcount
[docs]def bedgraph_to_bigwig(bedgraph, genome, output):
genome_file = pybedtools.chromsizes_to_file(pybedtools.chromsizes(genome))
cmds = [
'bedGraphToBigWig',
bedgraph.fn,
genome_file,
output]
try:
p = subprocess.Popen(cmds, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
stdout, stderr = p.communicate()
except FileNotFoundError:
raise FileNotFoundError(
"bedGraphToBigWig was not found on the path. This is an external "
"tool from UCSC which can be downloaded from "
"http://hgdownload.soe.ucsc.edu/admin/exe/. Alternatatively, use "
"`conda install ucsc-bedgraphtobigwig`"
)
if p.returncode:
raise ValueError("cmds: %s\nstderr:%s\nstdout:%s"
% (" ".join(cmds), stderr, stdout))
return output
def bigwig_to_bedgraph(fn, chrom=None, start=None, end=None, udcDir=None):
cmds = [
'bigWigToBedGraph',
fn]
if chrom is not None:
cmds.extend(['-chrom', chrom])
if start is not None:
cmds.extend(['-start', start])
if end is not None:
cmds.extend(['-end', end])
if udcDir is not None:
cmds.extend(['-udcDir', udcDir])
outfn = pybedtools.BedTool._tmp()
cmds.append(outfn)
try:
p = subprocess.Popen(cmds, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
stdout, stderr = p.communicate()
except FileNotFoundError:
raise FileNotFoundError(
"bigWigToBedGraph was not found on the path. This is an external "
"tool from UCSC which can be downloaded from "
"http://hgdownload.soe.ucsc.edu/admin/exe/. Alternatatively, use "
"`conda install ucsc-bedgraphtobigwig`"
)
if p.returncode:
raise ValueError("cmds: %s\nstderr:%s\nstdout:%s"
% (" ".join(cmds), stderr, stdout))
return pybedtools.BedTool(outfn)
[docs]def wig_to_bigwig(wig, genome, output):
genome_file = pybedtools.chromsizes_to_file(pybedtools.chromsizes(genome))
cmds = [
'wigToBigWig',
wig.fn,
genome_file,
output]
try:
p = subprocess.Popen(cmds, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
stdout, stderr = p.communicate()
except FileNotFoundError:
raise FileNotFoundError(
"bigWigToBedGraph was not found on the path. This is an external "
"tool from UCSC which can be downloaded from "
"http://hgdownload.soe.ucsc.edu/admin/exe/. Alternatatively, use "
"`conda install ucsc-bedgraphtobigwig`"
)
if p.returncode:
raise ValueError('cmds: %s\nstderr:%s\nstdout:%s'
% (' '.join(cmds), stderr, stdout))
return output
[docs]def bam_to_bigwig(bam, genome, output, scale=False):
"""
Given a BAM file `bam` and assembly `genome`, create a bigWig file scaled
such that the values represent scaled reads -- that is, reads per million
mapped reads.
(Disable this scaling step with scale=False; in this case values will
indicate number of reads)
Assumes that `bedGraphToBigWig` from UCSC tools is installed; see
http://genome.ucsc.edu/goldenPath/help/bigWig.html for more details on the
format.
"""
genome_file = pybedtools.chromsizes_to_file(pybedtools.chromsizes(genome))
kwargs = dict(bg=True, split=True, g=genome_file)
if scale:
readcount = mapped_read_count(bam)
_scale = 1 / (readcount / 1e6)
kwargs['scale'] = _scale
x = pybedtools.BedTool(bam).genome_coverage(**kwargs)
cmds = [
'bedGraphToBigWig',
x.fn,
genome_file,
output]
try:
p = subprocess.Popen(cmds, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True)
stdout, stderr = p.communicate()
except FileNotFoundError:
raise FileNotFoundError(
"bedGraphToBigWig was not found on the path. This is an external "
"tool from UCSC which can be downloaded from "
"http://hgdownload.soe.ucsc.edu/admin/exe/. Alternatatively, use "
"`conda install ucsc-bedgraphtobigwig`"
)
if p.returncode and 'bedSort' in stderr:
print('BAM header was not sorted; sorting bedGraph')
y = x.sort()
cmds[1] = y.fn
try:
p = subprocess.Popen(cmds, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True)
stdout, stderr = p.communicate()
except FileNotFoundError:
raise FileNotFoundError(
"bedSort was not found on the path. This is an external "
"tool from UCSC which can be downloaded from "
"http://hgdownload.soe.ucsc.edu/admin/exe/. Alternatatively, use "
"`conda install ucsc-bedgraphtobigwig`"
)
if p.returncode:
raise ValueError('cmds: %s\nstderr: %s\nstdout: %s'
% (' '.join(cmds), stderr, stdout))