SAMReader

Encapsulates a SAM/BAM file.

Implements InputRange interface using htslib calls. If indexed, Random-access query via multidimensional slicing.

struct SAMReader {}

Constructors

this
this(T f, int extra_threads)

Create a representation of SAM/BAM/CRAM file from given filename or File

Members

Aliases

all_records
deprecated alias all_records = allRecords
Undocumented in source.
fetch
alias fetch = query

fetch is provided as a PySAM compatible synonym for query

n_targets
alias n_targets = nTargets
Undocumented in source.
targetLen
alias targetLen = targetLength
Undocumented in source.
targetLens
alias targetLens = targetLengths
Undocumented in source.
target_id
alias target_id = targetId
Undocumented in source.
target_len
alias target_len = targetLength
Undocumented in source.
target_lens
alias target_lens = targetLengths
Undocumented in source.
target_names
alias target_names = targetNames
Undocumented in source.

Functions

allRecords
auto allRecords()

Return an InputRange representing all records in the SAM/BAM/CRAM

opDollar
OffsetType opDollar()

Array-end $ indexing hack courtesy of Steve Schveighoffer https://forum.dlang.org/post/rl7a56$nad$1@digitalmars.com

opIndex
auto opIndex(string[] regions)

opIndex with a list of string regions bam[["chr1:1-3","chr2:4-50"]]

opIndex
auto opIndex(string contig, Interval!cs coords)

opIndex with a string contig and an Interval bam["chr1", ZBHO(1,3)]

opIndex
auto opIndex(int tid, Interval!cs coords)

opIndex with an int tid and an Interval bam[0, ZBHO(1,3)]

opIndex
auto opIndex(string contig, Coordinate!bs pos)

opIndex with a string contig and a Coordinate bam["chr1", ZB(1)]

opIndex
auto opIndex(int tid, Coordinate!bs pos)

opIndex with an int tid and a Coordinate bam[0, ZB(1)]

opIndex
deprecated auto opIndex(string tid, Coordinate!bs pos1, Coordinate!bs pos2)

opIndex with a string contig and two Coordinates bam["chr1", ZB(1), ZB(3)]

opIndex
deprecated auto opIndex(int tid, Coordinate!bs pos1, Coordinate!bs pos2)

opIndex with an int tid and two Coordinates bam[0, ZB(1), ZB(3)]

opIndex
auto opIndex(string ctg, OffsetType endoff)

opIndex with a string contig and an Offset bam["chr1",$-2]

opIndex
auto opIndex(int tid, OffsetType endoff)

opIndex with an int tid and an Offset bam[0,$-2]

opIndex
auto opIndex(string ctg, Tuple!(Coordinate!bs, OffsetType) coords)

opIndex with a string contig and a Coordinate and Offset bam["chr1",ZB(1) .. $]

opIndex
auto opIndex(int tid, Tuple!(Coordinate!bs, OffsetType) coords)

opIndex with an int tid and a Coordinate and Offset bam[0,ZB(1) .. $]

opIndex
auto opIndex(string ctg, Tuple!(OffsetType, OffsetType) coords)

opIndex two Offsets i.e fai["chrom1", $-2 .. $]

opIndex
auto opIndex(int tid, Tuple!(OffsetType, OffsetType) coords)

opIndex with an int tid and a Coordinate and Offset bam[0,ZB(1) .. $]

opSlice
auto opSlice(Coordinate!bs start, Coordinate!bs end)

opSlice with two Coordinates [ZB(1) .. ZB(3)]

opSlice
auto opSlice(Coordinate!bs start, OffsetType off)

opSlice as Coordinate and an offset i.e [ZB(2) .. $]

opSlice
auto opSlice(OffsetType start, OffsetType end)

opSlice as two offset i.e [$-2 .. $]

query
auto query(string chrom, Interval!cs coords)
auto query(int tid, Interval!cs coords)
auto query(string[] regions)

Query a region and return matching alignments as InputRange

targetId
deprecated int targetId(string name)

reference contig name to integer id

targetLength
deprecated uint targetLength(int target)

length of specific reference sequence by number (tid)

Properties

nTargets
deprecated int nTargets [@property getter]

number of reference sequences; from bam_hdr_t

targetLengths
deprecated uint[] targetLengths [@property getter]

lengths of the reference sequences

targetNames
deprecated string[] targetNames [@property getter]

names of the reference sequences

Structs

AllRecordsRange
struct AllRecordsRange

Iterate through all records in the SAM

RecordRange
struct RecordRange

Iterate over records falling within a queried region

RecordRangeMulti
struct RecordRangeMulti

Iterate over records falling within queried regions using a RegionList

Variables

filename
string filename;

filename; as usable from D

header
SAMHeader header;

header struct

Examples

import dhtslib.sam;
import htslib.hts_log : hts_log_info;
import std.path : buildPath, dirName;
import std.string : fromStringz;
import std.array : array; 

hts_set_log_level(htsLogLevel.HTS_LOG_WARNING);
hts_log_info(__FUNCTION__, "Testing SAMFile & SAMRecord");
hts_log_info(__FUNCTION__, "Loading test file");
auto sam = SAMFile(buildPath(dirName(dirName(dirName(dirName(__FILE__)))),"htslib","test","auxf#values.sam"), 0);
auto sam2 = SAMWriter("/tmp/test.bam", sam.header);
auto readrange = sam.allRecords;
hts_log_info(__FUNCTION__, "Getting read 1");
assert(readrange.empty == false);
auto read = readrange.front();

// writeln(read.sequence);
assert(read.sequence=="GCTAGCTCAG");
assert(sam.allRecords.array.length == 2);
sam2.write(read);
destroy(sam2);


// testing with multiple specified threads
sam = SAMFile("/tmp/test.bam", 2);
readrange = sam.allRecords;
assert(readrange.empty == false);
read = readrange.front();
assert(read.sequence=="GCTAGCTCAG");
assert(sam.allRecords.array.length == 1);

// testing with no additional threads
sam = SAMFile("/tmp/test.bam", 0);
readrange = sam.allRecords;
assert(readrange.empty == false);
read = readrange.front();
assert(read.sequence=="GCTAGCTCAG");
assert(sam.allRecords.array.length == 1);

// testing SAMReader targets/tid functions
assert(sam.header.nTargets == 1);
assert(sam.header.targetId("Sheila") == 0);
assert(sam.header.targetLength(0) == 20);
assert(sam.header.targetLengths == [20]);
assert(sam.header.targetNames == ["Sheila"]);
1 import std.stdio;
2 import dhtslib.sam;
3 import dhtslib.sam.md : MDItr;
4 import std.algorithm : map;
5 import std.array : array;
6 import std.path : buildPath,dirName;
7 import std.range : drop;
8 hts_set_log_level(htsLogLevel.HTS_LOG_WARNING);
9 hts_log_info(__FUNCTION__, "Testing SAMFile query");
10 hts_log_info(__FUNCTION__, "Loading test file");
11 
12 auto bam = SAMFile(buildPath(dirName(dirName(dirName(dirName(__FILE__)))),"htslib","test","range.bam"), 0);
13 assert(bam.allRecords.array.length == 112);
14 // assert(bam["CHROMOSOME_I"].array.length == 18);
15 // assert(bam["CHROMOSOME_II"].array.length == 34);
16 // assert(bam["CHROMOSOME_III"].array.length == 41);
17 // assert(bam["CHROMOSOME_IV"].array.length == 19);
18 // assert(bam["CHROMOSOME_V"].array.length == 0);
19 assert(bam.query("CHROMOSOME_I", ZBHO(900, 2000)) .array.length == 14);
20 assert(bam["CHROMOSOME_I",ZB(900) .. ZB(2000)].array.length == 14);
21 assert(bam[0, ZB(900) .. ZB(2000)].array.length == 14);
22 
23 assert(bam["CHROMOSOME_I",ZB(940)].array.length == 2);
24 assert(bam[0, ZB(940)].array.length == 2);
25 
26 
27 assert(bam["CHROMOSOME_I",ZB(900) .. $].array.length == 18);
28 assert(bam[0, ZB(900) .. $].array.length == 18);
29 assert(bam["CHROMOSOME_I",$].array.length == 0);
30 assert(bam[0, $].array.length == 0);
31 assert(bam[["CHROMOSOME_I:900-2000","CHROMOSOME_II:900-2000"]].array.length == 33);
32 
33 assert(bam.query("CHROMOSOME_I", OBHO(901, 2000)) .array.length == 14);
34 assert(bam["CHROMOSOME_I",OB(901) .. OB(2001)].array.length == 14);
35 assert(bam[0, OB(901) .. OB(2001)].array.length == 14);
36 
37 assert(bam["CHROMOSOME_I",OB(941)].array.length == 2);
38 assert(bam[0, OB(941)].array.length == 2);
39 
40 
41 assert(bam["CHROMOSOME_I",OB(901) .. $].array.length == 18);
42 assert(bam[0, OB(901) .. $].array.length == 18);
43 assert(bam["CHROMOSOME_I",$].array.length == 0);
44 assert(bam[0, $].array.length == 0);
45 assert(bam[["CHROMOSOME_I:900-2000","CHROMOSOME_II:900-2000"]].array.length == 33);
46 
47 assert(bam["CHROMOSOME_II",$-1918 .. $].array.length == 0);
48 assert(bam["CHROMOSOME_II", ZB(3082) .. $].array.length == 0);
49 assert(bam["CHROMOSOME_II",$-1919 .. $].array.length == 1);
50 assert(bam["CHROMOSOME_II", ZB(3081) .. $].array.length == 1);
51 assert(bam["CHROMOSOME_II",$-2018 .. $].array.length == 2);
52 
53 auto range = bam[["CHROMOSOME_I:900-2000","CHROMOSOME_II:900-2000"]];
54 auto range1 = range.save;
55 range = range.drop(5);
56 auto range2 = range.save;
57 range = range.drop(5);
58 auto range3 = range.save;
59 range = range.drop(10);
60 auto range4 = range.save;
61 assert(range1.array.length == 33);
62 assert(range2.array.length == 28);
63 assert(range3.array.length == 23);
64 assert(range4.array.length == 13);
import std.stdio;
import dhtslib.sam;
import std.array : array;
import std.path : buildPath,dirName;
import std.range : drop;
hts_set_log_level(htsLogLevel.HTS_LOG_WARNING);
hts_log_info(__FUNCTION__, "Testing RecordsRange save()");
hts_log_info(__FUNCTION__, "Loading test file");

auto bam = SAMReader(buildPath(dirName(dirName(dirName(dirName(__FILE__)))),"htslib","test","range.bam"), 4);

auto range = bam.query("CHROMOSOME_I", ZBHO(900, 2000));
assert(bam.query("CHROMOSOME_I", ZBHO(900, 2000)).array.length == 14);

auto range1 =  range.save;
range = range.drop(1);

auto range2 =  range.save;
range = range.drop(2);

auto range3 =  range.save;
range = range.drop(3);

auto range4 =  range.save;
range = range.drop(5);

auto range5 =  range.save;
range.popFront;

auto range6 = range.save;

assert(range1.array.length == 14);
assert(range2.array.length == 13);
assert(range3.array.length == 11);
assert(range4.array.length == 8);
assert(range5.array.length == 3);
assert(range6.array.length == 2);

Meta