SAMRecord

Encapsulates a SAM/BAM/CRAM record, using the bam1_t type for memory efficiency, and the htslib helper functions for speed.

struct SAMRecord {}

Constructors

this
this(SAMHeader h)

Corresponding SAM/BAM header data

this
deprecated this(bam1_t* b)

Construct SAMRecord from supplied bam1_t

this
this(bam1_t* b, SAMHeader h)

Construct SAMRecord from supplied bam1_t and sam_hdr_type

Members

Functions

getAlignedCoordinates
InputRange!AlignedCoordinate getAlignedCoordinates()

get aligned coordinates per each cigar op

getAlignedCoordinates
InputRange!AlignedCoordinate getAlignedCoordinates(long start, long end)

get aligned coordinates per each cigar op within range range is 0-based half open using chromosomal coordinates

getAlignedPairs
auto getAlignedPairs()

get a range of aligned read and reference positions this is meant to recreate functionality from pysam: https://pysam.readthedocs.io/en/latest/api.html#pysam.AlignedSegment.get_aligned_pairs

getAlignedPairs
auto getAlignedPairs(long start, long end)

get a range of aligned read and reference positions this is meant to recreate functionality from pysam: https://pysam.readthedocs.io/en/latest/api.html#pysam.AlignedSegment.get_aligned_pairs

range is 0-based half open using chromosomal coordinates

opIndex
TagValue opIndex(string val)

Get aux tag from bam1_t record and return a TagValue

opIndexAssign
void opIndexAssign(T value, string index)

Assign a tag key value pair

opIndexAssign
void opIndexAssign(T[] value, string index)

Assign a tag key array pair

qscoresPhredScaled
const(char)[] qscoresPhredScaled()

get phred-scaled base qualities as char array

Properties

cigar
Cigar cigar [@property getter]

Create cigar from bam1_t record

cigar
Cigar cigar [@property setter]

Assign a cigar string

coordinates
ZBHO coordinates [@property getter]

0-based, half-open coordinates that represent the mapped length of the alignment

flag
ushort flag [@property getter]
ushort flag [@property setter]

bitwise flag

insertSize
long insertSize [@property getter]
long insertSize [@property setter]

Presumably Insert size, but is undocumented. Per samtools source, is measured 5' to 5' https://github.com/samtools/samtools/blob/bd1a409aa750d25d70a093405d174db8709a3f2c/bam_mate.c#L320

isDuplicate
bool isDuplicate [@property getter]

Is this read marked as a PCR or Optical duplicate?

isMapped
bool isMapped [@property getter]

is read mapped?

isMateMapped
bool isMateMapped [@property getter]

is read mate mapped?

isPaired
bool isPaired [@property getter]

is read paired?

isProperPair
bool isProperPair [@property getter]

is read paired?

isQCFail
bool isQCFail [@property getter]

Does this read fail qc?

isRead1
bool isRead1 [@property getter]

is this read 1?

isRead2
bool isRead2 [@property getter]

is this read 2?

isReversed
bool isReversed [@property getter]

is read reversed? bool bam_is_rev(bam1_t *b) { return ( ((*b).core.flag & BAM_FREVERSE) != 0 ); }

isSecondary
bool isSecondary [@property getter]

is read secondary?

isSupplementary
bool isSupplementary [@property getter]

is read supplementary?

l_extranul
int l_extranul [@property getter]

number of EXTRA trailing \0 in qname; 0 <= l_extranul <= 3, see l_qname. Never set directly.

l_qname
int l_qname [@property getter]

length of query name. Terminated by 1-4 \0, see l_extranul. Never set directly.

length
int length [@property getter]

query (and quality string) length

matePos
long matePos [@property getter]
long matePos [@property setter]

0-based leftmost coordinate of next read in template

mateReversed
bool mateReversed [@property getter]

is mate reversed? bool bam_is_mrev(bam1_t *b) { return( ((*b).core.flag & BAM_FMREVERSE) != 0); }

mateTID
int mateTID [@property getter]
int mateTID [@property setter]

chromosome ID of next read in template, defined by bam_hdr_t

pos
ZB pos [@property getter]
ZB pos [@property setter]

0-based leftmost coordinate

qscores
const(ubyte)[] qscores [@property getter]

Return array of the quality scores see samtools/sam_view.c: get_quality TODO: Discussion -- should we return const(ubyte[]) or const(ubyte)[] instead?

qscores
const(ubyte)[] qscores [@property setter]

Set quality scores from raw ubyte quality score array given that it is the same length as the bam sequence

qual
ubyte qual [@property getter]
ubyte qual [@property setter]

mapping quality

queryName
const(char)[] queryName [@property getter]

Get a slice of query name (read name) if you keep this around you should copy it -- wraps bam_get_qname(bam1_t *b)

queryName
string queryName [@property setter]

Set query name (read name)

referenceName
const(char)[] referenceName [@property getter]

mapped contig (tid) name, defined by sam_hdr_t* h

sequence
const(char)[] sequence [@property getter]

Return char array of the sequence see samtools/sam_view.c: get_read

sequence
const(char)[] sequence [@property setter]

Assigns sequence and resets quality score

strand
char strand [@property getter]

Return read strand + or - (as char)

tid
int tid [@property getter]
int tid [@property setter]

chromosome ID, defined by sam_hdr_t

Structs

AlignedPair
struct AlignedPair(bool refSeq)
Undocumented in source.
AlignedPairs
struct AlignedPairs(bool refSeq)
Undocumented in source.

Variables

b
Bam1 b;

Backing SAM/BAM row record

h
SAMHeader h;

Corresponding SAM/BAM header data

Examples

1 import dhtslib.sam;
2 import htslib.hts_log : hts_log_info;
3 import std.path:buildPath,dirName;
4 
5 hts_set_log_level(htsLogLevel.HTS_LOG_TRACE);
6 hts_log_info(__FUNCTION__, "Testing SAMRecord mutation");
7 hts_log_info(__FUNCTION__, "Loading test file");
8 auto bam = SAMFile(buildPath(dirName(dirName(dirName(dirName(__FILE__)))),"htslib","test","range.bam"), 0);
9 auto ar = bam.allRecords;
10 assert(ar.empty == false);
11 auto read = ar.front;
12 
13 //change queryname
14 hts_log_info(__FUNCTION__, "Testing queryname");
15 assert(read.queryName=="HS18_09653:4:1315:19857:61712");
16 read.queryName="HS18_09653:4:1315:19857:6171";
17 assert(read.queryName=="HS18_09653:4:1315:19857:6171");
18 assert(read.cigar.toString() == "78M1D22M");
19 read.queryName="HS18_09653:4:1315:19857:617122";
20 assert(read.queryName=="HS18_09653:4:1315:19857:617122");
21 assert(read.cigar.toString() == "78M1D22M");
22 assert(read["RG"].check!string);
23 assert(read["RG"].to!string=="1");
24 
25 //change sequence
26 hts_log_info(__FUNCTION__, "Testing sequence");
27 assert(read.sequence=="AGCTAGGGCACTTTTTGTCTGCCCAAATATAGGCAACCAAAAATAATTTCCAAGTTTTTAATGATTTGTTGCATATTGAAAAAACATTTTTTGGGTTTTT");
28 read.sequence=read.sequence[0..10];
29 assert(read.sequence=="AGCTAGGGCA");
30 
31 ubyte[] q=[255,255,255,255,255,255,255,255,255,255];
32 assert(read.qscores == q);
33 q = [1, 14, 20, 40, 30, 10, 14, 20, 40, 30];
34 read.qscores(q);
35 assert(read.qscores == q);
36 q[] += 33;
37 
38 assert(read.qscoresPhredScaled == cast(char[])(q));
39 
40 assert(read.cigar.toString() == "78M1D22M");
41 assert(read["RG"].check!string);
42 assert(read["RG"].to!string=="1");
43 
44 //change cigar
45 hts_log_info(__FUNCTION__, "Testing cigar");
46 auto c=read.cigar;
47 c[$-1].length=21;
48 read.cigar=c;
49 assert(read.cigar.toString() == "78M1D21M");
50 assert(read["RG"].check!string);
51 assert(read["RG"].to!string=="1");
52 
53 //change tag
54 hts_log_info(__FUNCTION__, "Testing tags");
55 read["X0"]=2;
56 assert(read["X0"].to!ubyte==2);
57 assert(read["RG"].check!string);
58 
59 read["RG"]="test";
60 assert(read["RG"].to!string=="test");
61 hts_log_info(__FUNCTION__, "Cigar:" ~ read.cigar.toString());
1 import dhtslib.sam;
2 import htslib.hts_log : hts_log_info;
3 import std.path:buildPath,dirName;
4 
5 hts_set_log_level(htsLogLevel.HTS_LOG_TRACE);
6 hts_log_info(__FUNCTION__, "Testing SAMRecord mutation w/ realloc");
7 hts_log_info(__FUNCTION__, "Loading test file");
8 auto bam = SAMFile(buildPath(dirName(dirName(dirName(dirName(__FILE__)))),"htslib","test","range.bam"), 0);
9 auto ar = bam.allRecords;
10 assert(ar.empty == false);
11 auto read = ar.front;
12 
13 //change queryname
14 hts_log_info(__FUNCTION__, "Testing queryname");
15 assert(read.queryName=="HS18_09653:4:1315:19857:61712");
16 
17 //we do this only to force realloc
18 read.b.m_data=read.b.l_data;
19 
20 
21 auto prev_m=read.b.m_data;
22 read.queryName="HS18_09653:4:1315:19857:61712HS18_09653:4:1315:19857:61712";
23 assert(read.b.m_data >prev_m);
24 assert(read.queryName=="HS18_09653:4:1315:19857:61712HS18_09653:4:1315:19857:61712");
25 assert(read.cigar.toString() == "78M1D22M");
26 assert(read["RG"].check!string);
27 assert(read["RG"].to!string=="1");
28 
29 //we do this only to force realloc
30 read.b.m_data=read.b.l_data;
31 
32 //change sequence
33 hts_log_info(__FUNCTION__, "Testing sequence");
34 assert(read.sequence=="AGCTAGGGCACTTTTTGTCTGCCCAAATATAGGCAACCAAAAATAATTTCCAAGTTTTTAATGATTTGTTGCATATTGAAAAAACATTTTTTGGGTTTTT");
35 prev_m=read.b.m_data;
36 read.sequence="AGCTAGGGCACTTTTTGTCTGCCCAAATATAGGCAACCAAAAATAATTTCCAAGTTTTTAATGATTTGTTGCATATTGAAAAAACATTTTTTGGGTTTTTAGCTAGGGCACTTTTTGTCTGCCCAAATATAGGCAACCAAAAATAATTTCCAAGTTTTTAATGATTTGTTGCATATTGAAAAAACATTTTTTGGGTTTTT";
37 assert(read.b.m_data >prev_m);
38 assert(read.sequence=="AGCTAGGGCACTTTTTGTCTGCCCAAATATAGGCAACCAAAAATAATTTCCAAGTTTTTAATGATTTGTTGCATATTGAAAAAACATTTTTTGGGTTTTTAGCTAGGGCACTTTTTGTCTGCCCAAATATAGGCAACCAAAAATAATTTCCAAGTTTTTAATGATTTGTTGCATATTGAAAAAACATTTTTTGGGTTTTT");
39 assert(read.cigar.toString() == "78M1D22M");
40 assert(read["RG"].check!string);
41 assert(read["RG"].to!string=="1");
42 
43 //change cigar
44 hts_log_info(__FUNCTION__, "Testing cigar");
45 auto c=read.cigar;
46 c=c~c;
47 prev_m=read.b.m_data;
48 read.cigar=c;
49 assert(read.b.m_data >prev_m);
50 assert(read.cigar.toString() == "78M1D22M78M1D22M");
51 assert(read["RG"].check!string);
52 assert(read["RG"].to!string=="1");
53 
54 //change tag
55 hts_log_info(__FUNCTION__, "Testing tags");
56 read["X0"]=2;
57 assert(read["X0"].to!ubyte==2);
58 assert(read["RG"].check!string);
59 
60 read["RG"]="test";
61 assert(read["RG"].to!string=="test");
62 hts_log_info(__FUNCTION__, "Cigar:" ~ read.cigar.toString());
import std.stdio;
import dhtslib.sam;
import dhtslib.sam.md : MDItr;
import std.algorithm: map;
import std.array: array;
import std.path:buildPath,dirName;
hts_set_log_level(htsLogLevel.HTS_LOG_TRACE);

auto bam = SAMFile(buildPath(dirName(dirName(dirName(dirName(__FILE__)))),"htslib","test","range.bam"), 0);
auto ar = bam.allRecords;
assert(ar.empty == false);
SAMRecord read = ar.front;

auto pairs = read.getAlignedPairs!true(77, 77 + 5);

assert(pairs.map!(x => x.qpos).array == [77,77,78,79,80]);
pairs = read.getAlignedPairs!true(77, 77 + 5);
assert(pairs.map!(x => x.rpos).array == [77,78,79,80,81]);
pairs = read.getAlignedPairs!true(77, 77 + 5);
// assert(pairs.map!(x => x.refBase).array == "GAAAA");
pairs = read.getAlignedPairs!true(77, 77 + 5);
assert(pairs.map!(x => x.queryBase).array == "G-AAA");
import std.stdio : writeln, File;
import std.range : drop;
import std.utf : toUTFz;
import htslib.hts_log; // @suppress(dscanner.suspicious.local_imports)
import std.path:buildPath,dirName;
import std.conv:to;
import dhtslib.sam : parseSam;

hts_set_log_level(htsLogLevel.HTS_LOG_TRACE);
hts_log_info(__FUNCTION__, "Loading sam file");
auto range = File(buildPath(dirName(dirName(dirName(dirName(__FILE__)))),"htslib","test","realn01_exp-a.sam")).byLineCopy();
auto b = bam_init1();
auto hdr = bam_hdr_init();
string hdr_str;
assert(range.empty == false);
for (auto i = 0; i < 4; i++)
{
    hdr_str ~= range.front ~ "\n";
    range.popFront;
}
hts_log_info(__FUNCTION__, "Header");
writeln(hdr_str);
hdr = sam_hdr_parse(cast(int) hdr_str.length, toUTFz!(char*)(hdr_str));
hts_log_info(__FUNCTION__, "Read status:" ~ parseSam(range.front, hdr, b).to!string);
auto r = new SAMRecord(b);
hts_log_info(__FUNCTION__, "Cigar" ~ r.cigar.toString);
assert(r.cigar.toString == "6M1D117M5D28M");
import std.stdio;
import dhtslib.sam;
import dhtslib.sam.md : MDItr;
import std.algorithm: map;
import std.array: array;
import std.parallelism : parallel; 
import std.path:buildPath,dirName;
hts_set_log_level(htsLogLevel.HTS_LOG_TRACE);

auto bam = SAMFile(buildPath(dirName(dirName(dirName(dirName(__FILE__)))),"htslib","test","range.bam"), 0);
foreach(rec;parallel(bam.allRecords)){
    rec.queryName = "multithreading test";
}

Meta