1 module dhtslib.sam.record;
2 
3 import core.stdc.stdlib : calloc, free, realloc;
4 import core.stdc..string : memset;
5 import core.stdc.errno : EINVAL, EOVERFLOW, ENOMEM, ERANGE, errno;
6 import std..string : fromStringz, toStringz;
7 import std.algorithm : filter;
8 import std.range.interfaces : InputRange, inputRangeObject;
9 import std.traits : isArray, isIntegral, isSomeString;
10 
11 import htslib.hts : seq_nt16_str, seq_nt16_table;
12 
13 import htslib.hts_log;
14 import htslib.sam;
15 import dhtslib.sam.cigar;
16 import dhtslib.sam.tagvalue;
17 import dhtslib.sam.header;
18 import dhtslib.coordinates;
19 import dhtslib.memory;
20 
21 /**
22 Encapsulates a SAM/BAM/CRAM record,
23 using the bam1_t type for memory efficiency,
24 and the htslib helper functions for speed.
25 **/
26 struct SAMRecord
27 {
28     /// Backing SAM/BAM row record
29     Bam1 b;
30 
31     /// Corresponding SAM/BAM header data
32     SAMHeader h;
33    
34     /// ditto
35     this(SAMHeader h)
36     {
37         this.b = Bam1(bam_init1);
38         this.h = h;
39     }
40  
41     /// Construct SAMRecord from supplied bam1_t
42     deprecated("Construct with sam_hdr_t* for mapped contig (tid) name support")
43     this(bam1_t* b)
44     {
45         this.b = Bam1(b);
46     }
47 
48     /// Construct SAMRecord from supplied bam1_t and sam_hdr_type
49     this(bam1_t* b, SAMHeader h)
50     {
51         this.b = Bam1(b);
52         this.h = h;
53     }
54 
55     /* bam1_core_t fields */
56 
57     /// chromosome ID, defined by sam_hdr_t
58     pragma(inline, true)
59     @nogc @safe nothrow
60     @property int tid() { return this.b.core.tid; }
61     /// ditto
62     pragma(inline, true)
63     @nogc @safe nothrow
64     @property void tid(int tid) { this.b.core.tid = tid; }
65 
66     /// mapped contig (tid) name, defined by sam_hdr_t* h
67     pragma(inline, true)
68     @property const(char)[] referenceName()
69     {
70         assert(!this.h.isNull);
71         return this.h.targetName(this.b.core.tid);
72     }
73  
74     /// 0-based leftmost coordinate
75     pragma(inline, true)
76     @nogc @safe nothrow
77     @property ZB pos() { return ZB(this.b.core.pos); }
78     /// ditto
79     pragma(inline, true)
80     @nogc @safe nothrow
81     @property void pos(ZB pos) { this.b.core.pos = pos.pos; }
82 
83     /// 0-based, half-open coordinates that represent
84     /// the mapped length of the alignment 
85     pragma(inline, true)
86     @property ZBHO coordinates()
87     {
88         return ZBHO(this.pos, this.pos + this.cigar.alignedLength);
89     }
90 
91     // TODO: @field  bin     bin calculated by bam_reg2bin()
92 
93     /// mapping quality
94     pragma(inline, true)
95     @nogc @safe nothrow
96     @property ubyte qual() { return this.b.core.qual; }
97     /// ditto
98     pragma(inline, true)
99     @nogc @safe nothrow
100     @property void qual(ubyte q) { this.b.core.qual = q; }
101 
102     /// length of query name. Terminated by 1-4 \0, see l_extranul. Never set directly.
103     pragma(inline, true)
104     @nogc @safe nothrow
105     @property int l_qname() { return this.b.core.l_qname; }
106 
107     /// number of EXTRA trailing \0 in qname; 0 <= l_extranul <= 3, see l_qname. Never set directly.
108     pragma(inline, true)
109     @nogc @safe nothrow
110     @property int l_extranul() { return this.b.core.l_extranul; }
111 
112     /// bitwise flag
113     pragma(inline, true)
114     @nogc @safe nothrow
115     @property ushort flag() { return this.b.core.flag; }
116     /// ditto
117     pragma(inline, true)
118     @nogc @safe nothrow
119     @property void flag(ushort fl) { this.b.core.flag = fl; }
120 
121     /// is read paired?
122     pragma(inline, true)
123     @nogc @safe nothrow
124     @property bool isPaired()
125     {
126         return cast(bool)(b.core.flag & BAM_FPAIRED);
127     }
128 
129     /// is read paired?
130     pragma(inline, true)
131     @nogc @safe nothrow
132     @property bool isProperPair()
133     {
134         return cast(bool)(b.core.flag & BAM_FPROPER_PAIR);
135     }
136 
137     /// is read mapped?
138     pragma(inline, true)
139     @nogc @safe nothrow
140     @property bool isMapped()
141     {
142         return (b.core.flag & BAM_FUNMAP) == 0;
143     }
144 
145     /// is read mate mapped?
146     pragma(inline, true)
147     @nogc @safe nothrow
148     @property bool isMateMapped()
149     {
150         return (b.core.flag & BAM_FMUNMAP) == 0;
151     }
152 
153     /// is read reversed?
154     /// bool bam_is_rev(bam1_t *b) { return ( ((*b).core.flag & BAM_FREVERSE) != 0 ); }
155     pragma(inline, true)
156     @nogc @trusted nothrow
157     @property bool isReversed()
158     {
159         return bam_is_rev(this.b);
160     }
161 
162     /// is mate reversed?
163     /// bool bam_is_mrev(bam1_t *b) { return( ((*b).core.flag & BAM_FMREVERSE) != 0); }
164     pragma(inline, true)
165     @nogc @trusted nothrow
166     @property bool mateReversed()
167     {
168         return bam_is_mrev(this.b);
169     }
170 
171     /// is this read 1?
172     pragma(inline, true)
173     @nogc @safe nothrow
174     @property bool isRead1()
175     {
176         return cast(bool)(b.core.flag & BAM_FREAD1);
177     }
178     
179     /// is this read 2?
180     pragma(inline, true)
181     @nogc @safe nothrow
182     @property bool isRead2()
183     {
184         return cast(bool)(b.core.flag & BAM_FREAD2);
185     }
186 
187     /// is read secondary?
188     @property bool isSecondary()
189     {
190         version(LDC){
191             pragma(inline, true);
192         }
193         return cast(bool)(b.core.flag & BAM_FSECONDARY);
194     }
195 
196     /// Does this read fail qc?
197     pragma(inline, true)
198     @nogc @safe nothrow
199     @property bool isQCFail()
200     {
201         return cast(bool)(b.core.flag & BAM_FQCFAIL);
202     }
203 
204     /// Is this read marked as a PCR or Optical duplicate?
205     pragma(inline, true)
206     @nogc @safe nothrow
207     @property bool isDuplicate()
208     {
209         return cast(bool)(b.core.flag & BAM_FDUP);
210     }
211 
212     /// is read supplementary?
213     pragma(inline, true)
214     @nogc @safe nothrow
215     @property bool isSupplementary()
216     {
217         return cast(bool)(b.core.flag & BAM_FSUPPLEMENTARY);
218     }
219 
220     /// Return read strand + or - (as char)
221     @property char strand(){
222         return ['+','-'][isReversed()];
223     }
224 
225     /// Get a slice of query name (read name)
226     /// if you keep this around you should copy it
227     /// -- wraps bam_get_qname(bam1_t *b)
228     pragma(inline, true)
229     @nogc @trusted nothrow
230     @property const(char)[] queryName()
231     {
232         // auto bam_get_qname(bam1_t *b) { return (cast(char*)(*b).data); }
233         return fromStringz(bam_get_qname(this.b));
234     }
235     
236     /// Set query name (read name)
237     pragma(inline, true)
238     @trusted nothrow
239     @property void queryName(string qname)
240     {
241         auto ret = bam_set_qname(this.b, toStringz(qname));
242         if(ret != 0)
243             hts_log_error(__FUNCTION__, "Could not set queryname");
244     }
245 
246     /// query (and quality string) length
247     pragma(inline, true)
248     @nogc @safe nothrow
249     @property int length()
250     {
251         return this.b.core.l_qseq;
252     }
253 
254     /// Return char array of the sequence
255     /// see samtools/sam_view.c: get_read
256     pragma(inline, true)
257     @trusted nothrow
258     @property const(char)[] sequence()
259     {
260         // calloc fills with \0; +1 len for Cstring
261         // char* s = cast(char*) calloc(1, this.b.core.l_qseq + 1);
262         char[] s;
263         s.length = this.b.core.l_qseq;
264 
265         // auto bam_get_seq(bam1_t *b) { return ((*b).data + ((*b).core.n_cigar<<2) + (*b).core.l_qname); }
266         auto seqdata = bam_get_seq(this.b);
267 
268         for (int i; i < this.b.core.l_qseq; i++)
269         {
270             s[i] = seq_nt16_str[bam_seqi(seqdata, i)];
271         }
272         return s;
273     }
274 
275     /// Assigns sequence and resets quality score
276     pragma(inline, true)
277     @trusted nothrow
278     @property void sequence(const(char)[] seq)
279     {
280         /// There is no bam_seq_seq
281 
282         //nibble encode sequence
283         ubyte[] en_seq;
284         en_seq.length = (seq.length + 1) >> 1;
285         
286         for(auto i=0;i < seq.length;i++){
287             bam_set_seqi(en_seq.ptr, i, seq_nt16_table[seq[i]]);
288         }
289 
290         //get length of data before seq
291         uint l_prev = b.core.l_qname + cast(uint)(b.core.n_cigar * uint.sizeof);
292 
293         //get starting point after seq
294         uint start_rest = l_prev + ((b.core.l_qseq + 1) >> 1) + b.core.l_qseq;
295 
296         //copy previous data
297         ubyte[] prev=b.data[0 .. l_prev].dup;
298 
299         //copy rest of data
300         ubyte[] rest=b.data[start_rest .. b.l_data].dup;
301         
302         //if not enough space
303         if(en_seq.length + seq.length + l_prev + (b.l_data - start_rest) > b.m_data){
304 
305             // realloc
306             auto ptr = cast(ubyte*) realloc(b.data, en_seq.length + seq.length + l_prev + (b.l_data - start_rest));
307             assert(ptr != null);
308             b.data = ptr;
309 
310             // increase capacity
311             b.m_data = cast(uint)en_seq.length + cast(uint) seq.length + l_prev + (b.l_data - start_rest);
312         }
313 
314         //reassign q_name and rest of data
315         b.data[0 .. l_prev] = prev;
316         b.data[l_prev .. en_seq.length + l_prev] = en_seq;
317         //set qscores to 255 
318         memset(b.data + en_seq.length + l_prev, 255, seq.length);
319         b.data[l_prev + en_seq.length + seq.length .. l_prev + en_seq.length + seq.length + (b.l_data - start_rest)] = rest;
320 
321         //reset data length, seq length
322         b.l_data = cast(uint) en_seq.length + cast(uint) seq.length + l_prev + (b.l_data-start_rest);
323         b.core.l_qseq = cast(int) (seq.length);
324     }
325 
326     /// Return array of the quality scores
327     /// see samtools/sam_view.c: get_quality
328     /// TODO: Discussion -- should we return const(ubyte[]) or const(ubyte)[] instead?
329     pragma(inline, true)
330     @nogc @trusted nothrow
331     @property const(ubyte)[] qscores()
332     {
333         auto slice = bam_get_qual(this.b)[0 .. this.b.core.l_qseq];
334         return slice;
335     }
336 
337     /// Set quality scores from raw ubyte quality score array given that it is the same length as the bam sequence
338     pragma(inline, true)
339     @trusted nothrow
340     @property void qscores(const(ubyte)[] seq)
341     {
342         /// There is no bam_seq_qual
343         
344         if(seq.length != b.core.l_qseq){
345             hts_log_error(__FUNCTION__,"qscore length does not match sequence length");
346             return;
347         }
348         
349         //get length of data before seq
350         uint l_prev = b.core.l_qname + cast(uint) (b.core.n_cigar * uint.sizeof) + ((b.core.l_qseq + 1) >> 1);
351         b.data[l_prev .. seq.length + l_prev] = seq;
352     }
353 
354     /// get phred-scaled base qualities as char array
355     pragma(inline, true)
356     @trusted nothrow
357     const(char)[] qscoresPhredScaled()
358     {
359         auto bqs = this.qscores.dup;
360         bqs[] += 33;
361         return cast(const(char)[]) bqs;
362     }
363 
364     /// Create cigar from bam1_t record
365     pragma(inline, true)
366     @property Cigar cigar()
367     {
368         return Cigar(this.b);
369     }
370 
371     /// Assign a cigar string
372     pragma(inline, true)
373     @trusted
374     @property void cigar(Cigar cigar)
375     {
376         // no bam_set_cigar
377 
378         //get length of data before seq
379         uint l_prev = b.core.l_qname;
380 
381         //get starting point after seq
382         uint start_rest = l_prev + cast(uint)(b.core.n_cigar * uint.sizeof);
383 
384         //copy previous data
385         ubyte[] prev = b.data[0..l_prev].dup;
386 
387         //copy rest of data
388         ubyte[] rest = b.data[start_rest..b.l_data].dup;
389 
390         //if not enough space
391         if(uint.sizeof * cigar.length + l_prev + (b.l_data - start_rest) > b.m_data){
392             // realloc
393             auto ptr = cast(ubyte*) realloc(b.data, uint.sizeof * cigar.length + l_prev + (b.l_data - start_rest));
394             assert(ptr != null);
395             b.data = ptr;
396 
397             // increase capacity
398             b.m_data=cast(uint)(uint.sizeof * cigar.length) + l_prev + (b.l_data - start_rest);
399         }
400 
401         //reassign q_name and rest of data
402         b.data[0 .. l_prev]=prev;
403         //without cigar.ops.dup we get the overlapping array copy error
404         b.data[l_prev .. uint.sizeof * cigar.length + l_prev] = cast(ubyte[])( cast(uint[]) cigar.dup[]);
405         b.data[l_prev + uint.sizeof * cigar.length .. l_prev + uint.sizeof * cigar.length + (b.l_data - start_rest)] = rest;
406 
407         //reset data length, seq length
408         b.l_data = cast(uint) (uint.sizeof * cigar.length) + l_prev + (b.l_data - start_rest);
409         b.core.n_cigar = cast(uint) (cigar.length);
410     }
411 
412     /// Get aux tag from bam1_t record and return a TagValue
413     TagValue opIndex(string val)
414     {
415         return TagValue(this.b, val[0..2]);
416     }
417 
418     /// Assign a tag key value pair
419     void opIndexAssign(T)(T value, string index)
420     if(!isArray!T || isSomeString!T)
421     {
422         static if(isIntegral!T){
423             auto err = bam_aux_update_int(b, index[0..2], value);
424         }else static if(is(T==float)){
425             auto err = bam_aux_update_float(b, index[0..2], value);
426         }else static if(isSomeString!T){
427             auto err = bam_aux_update_str(b, index[0..2], cast(int) value.length, value.ptr);
428         }
429         if(err == 0) return;
430         switch(errno){
431             case EINVAL:
432                 throw new Exception("The bam record's aux data is corrupt or an existing tag" ~ 
433                 " with the given ID is not of an integer type (c, C, s, S, i or I).");
434             case ENOMEM:
435                 throw new Exception("The bam data needs to be expanded and either the attempt" ~
436                 " to reallocate the data buffer failed or the resulting buffer would be longer" ~
437                 " than the maximum size allowed in a bam record (2Gbytes).");
438             case ERANGE:
439             case EOVERFLOW:
440                 throw new Exception("val is outside the range that can be stored in an integer" ~ 
441                 " bam tag (-2147483647 to 4294967295).");
442             case -1:
443                 throw new Exception("Something went wrong adding the bam tag.");
444             case 0:
445                 return;
446             default:
447                 throw new Exception("Something went wrong adding the bam tag.");
448         }
449     }
450 
451     /// Assign a tag key array pair
452     void opIndexAssign(T)(T[] value, string index)
453     if(!isSomeString!(T[]))
454     {
455         auto err = bam_aux_update_array(b, index[0..2], TypeChars[TypeIndex!T], cast(int) value.length, value.ptr);
456         if(err == 0) return;
457         switch(errno){
458             case EINVAL:
459                 throw new Exception("The bam record's aux data is corrupt or an existing tag" ~ 
460                 " with the given ID is not of an integer type (c, C, s, S, i or I).");
461             case ENOMEM:
462                 throw new Exception("The bam data needs to be expanded and either the attempt" ~
463                 " to reallocate the data buffer failed or the resulting buffer would be longer" ~
464                 " than the maximum size allowed in a bam record (2Gbytes).");
465             case ERANGE:
466             case EOVERFLOW:
467                 throw new Exception("val is outside the range that can be stored in an integer" ~ 
468                 " bam tag (-2147483647 to 4294967295).");
469             case -1:
470                 throw new Exception("Something went wrong adding the bam tag.");
471             case 0:
472                 return;
473             default:
474                 throw new Exception("Something went wrong adding the bam tag.");
475         }
476     }
477 
478     /// chromosome ID of next read in template, defined by bam_hdr_t
479     pragma(inline, true)
480     @nogc @safe nothrow
481     @property int mateTID() { return this.b.core.mtid; }
482     /// ditto
483     pragma(inline, true)
484     @nogc @safe nothrow
485     @property void mateTID(int mtid) { this.b.core.mtid = mtid; }
486 
487     /// 0-based leftmost coordinate of next read in template
488     pragma(inline, true)
489     @nogc @safe nothrow
490     @property long matePos() { return this.b.core.mpos; }
491     /// ditto
492     pragma(inline, true)
493     @nogc @safe nothrow
494     @property void matePos(long mpos) { this.b.core.mpos = mpos; }
495 
496     /// Presumably Insert size, but is undocumented.
497     /// Per samtools source, is measured 5' to 5'
498     /// https://github.com/samtools/samtools/blob/bd1a409aa750d25d70a093405d174db8709a3f2c/bam_mate.c#L320
499     pragma(inline, true)
500     @nogc @safe nothrow
501     @property long insertSize() { return this.b.core.isize; }
502     /// ditto
503     pragma(inline, true)
504     @nogc @safe nothrow
505     @property void insertSize(long isize) { this.b.core.isize = isize; }
506 /+ BEGIN CHERRY-PICK: TODO: confirm below chunk with htslib-1.10 (mainly long coordinates +/
507     /// get aligned coordinates per each cigar op
508     InputRange!AlignedCoordinate getAlignedCoordinates(){
509         return AlignedCoordinatesItr(this.cigar).inputRangeObject;
510     }
511 
512     /// get aligned coordinates per each cigar op within range
513     /// range is 0-based half open using chromosomal coordinates
514     InputRange!AlignedCoordinate getAlignedCoordinates(long start, long end){
515         assert(start >= 0);
516         assert(end <= this.cigar.ref_bases_covered);
517         return this.getAlignedCoordinates.filter!(x=> x.rpos >= start).filter!(x=> x.rpos < end).inputRangeObject;
518     }
519 
520     struct AlignedPair(bool refSeq)
521     {
522         Coordinate!(Basis.zero) qpos, rpos;
523         Ops cigar_op;
524         char queryBase;
525         static if(refSeq) char refBase;
526 
527         string toString(){
528             import std.conv : to;
529             static if(refSeq) return rpos.pos.to!string ~ ":" ~ refBase ~ " > " ~ qpos.pos.to!string ~ ":" ~ queryBase;
530             else return rpos.pos.to!string ~ ":" ~ qpos.pos.to!string ~ ":" ~ queryBase;
531         }
532     }
533 
534     /// get a range of aligned read and reference positions
535     /// this is meant to recreate functionality from pysam:
536     /// https://pysam.readthedocs.io/en/latest/api.html#pysam.AlignedSegment.get_aligned_pairs
537     /// range is 0-based half open using chromosomal coordinates
538     auto  getAlignedPairs(bool withRefSeq)(long start, long end)
539     {
540         import dhtslib.sam.md : MDItr;
541         import std.range : dropExactly;
542         struct AlignedPairs(bool refSeq)
543         {
544             InputRange!AlignedCoordinate coords;
545             static if(refSeq) MDItr mdItr;
546             AlignedPair!refSeq current;
547             const(char)[] qseq;
548             
549             this(InputRange!AlignedCoordinate coords, SAMRecord rec, long start, long end){
550                 assert(start < end);
551                 assert(start >= 0);
552                 assert(end <= rec.cigar.ref_bases_covered);
553                 this.coords = coords;
554                 qseq = rec.sequence;
555                 static if(refSeq){
556                     assert(rec["MD"].exists,"MD tag must exist for record");
557                     mdItr = MDItr(rec);
558                     mdItr = mdItr.dropExactly(start);
559                 } 
560                 current.qpos = coords.front.qpos;
561                 current.rpos = coords.front.rpos;
562                 current.cigar_op = coords.front.cigar_op;
563                 if(!CigarOp(0, current.cigar_op).is_query_consuming) current.queryBase = '-';
564                 else current.queryBase = qseq[current.qpos];
565                 
566                 static if(refSeq){
567                     if(!CigarOp(0, current.cigar_op).is_reference_consuming) current.refBase = '-';
568                     else if(mdItr.front == '=') current.refBase = current.queryBase;
569                     else current.refBase = mdItr.front;
570                 }
571             }
572 
573             AlignedPair!refSeq front(){
574                 return current;
575             }
576 
577             void popFront(){
578                 coords.popFront;
579                 if(coords.empty) return;
580                 current.qpos = coords.front.qpos;
581                 current.rpos = coords.front.rpos;
582                 current.cigar_op = coords.front.cigar_op;
583                 if(!CigarOp(0, current.cigar_op).is_query_consuming) current.queryBase = '-';
584                 else current.queryBase = qseq[current.qpos];
585                 
586                 static if(refSeq){
587                     if(CigarOp(0, current.cigar_op).is_reference_consuming) mdItr.popFront;
588                     if(!CigarOp(0, current.cigar_op).is_reference_consuming) current.refBase = '-';
589                     else if(mdItr.front == '=') current.refBase = current.queryBase;
590                     else current.refBase = mdItr.front;
591                 }
592 
593             }
594 
595             bool empty(){
596                 return coords.empty;
597             }
598 
599         }
600         auto coords = this.getAlignedCoordinates(start, end);
601         return AlignedPairs!withRefSeq(coords,this,start,end).inputRangeObject;
602     }
603     /// get a range of aligned read and reference positions
604     /// this is meant to recreate functionality from pysam:
605     /// https://pysam.readthedocs.io/en/latest/api.html#pysam.AlignedSegment.get_aligned_pairs
606     auto getAlignedPairs(bool withRefSeq)(){
607         return getAlignedPairs!withRefSeq(0, this.cigar.ref_bases_covered);
608     }
609 /+ END CHERRY-PICK +/
610 }
611 
612 ///
613 debug(dhtslib_unittest) unittest
614 {
615     import dhtslib.sam;
616     import htslib.hts_log : hts_log_info;
617     import std.path:buildPath,dirName;
618 
619     hts_set_log_level(htsLogLevel.HTS_LOG_TRACE);
620     hts_log_info(__FUNCTION__, "Testing SAMRecord mutation");
621     hts_log_info(__FUNCTION__, "Loading test file");
622     auto bam = SAMFile(buildPath(dirName(dirName(dirName(dirName(__FILE__)))),"htslib","test","range.bam"), 0);
623     auto ar = bam.allRecords;
624     assert(ar.empty == false);
625     auto read = ar.front;
626 
627     //change queryname
628     hts_log_info(__FUNCTION__, "Testing queryname");
629     assert(read.queryName=="HS18_09653:4:1315:19857:61712");
630     read.queryName="HS18_09653:4:1315:19857:6171";
631     assert(read.queryName=="HS18_09653:4:1315:19857:6171");
632     assert(read.cigar.toString() == "78M1D22M");
633     read.queryName="HS18_09653:4:1315:19857:617122";
634     assert(read.queryName=="HS18_09653:4:1315:19857:617122");
635     assert(read.cigar.toString() == "78M1D22M");
636     assert(read["RG"].check!string);
637     assert(read["RG"].to!string=="1");
638 
639     //change sequence
640     hts_log_info(__FUNCTION__, "Testing sequence");
641     assert(read.sequence=="AGCTAGGGCACTTTTTGTCTGCCCAAATATAGGCAACCAAAAATAATTTCCAAGTTTTTAATGATTTGTTGCATATTGAAAAAACATTTTTTGGGTTTTT");
642     read.sequence=read.sequence[0..10];
643     assert(read.sequence=="AGCTAGGGCA");
644 
645     ubyte[] q=[255,255,255,255,255,255,255,255,255,255];
646     assert(read.qscores == q);
647     q = [1, 14, 20, 40, 30, 10, 14, 20, 40, 30];
648     read.qscores(q);
649     assert(read.qscores == q);
650     q[] += 33;
651     
652     assert(read.qscoresPhredScaled == cast(char[])(q));
653 
654     assert(read.cigar.toString() == "78M1D22M");
655     assert(read["RG"].check!string);
656     assert(read["RG"].to!string=="1");
657 
658     //change cigar
659     hts_log_info(__FUNCTION__, "Testing cigar");
660     auto c=read.cigar;
661     c[$-1].length=21;
662     read.cigar=c;
663     assert(read.cigar.toString() == "78M1D21M");
664     assert(read["RG"].check!string);
665     assert(read["RG"].to!string=="1");
666     
667     //change tag
668     hts_log_info(__FUNCTION__, "Testing tags");
669     read["X0"]=2;
670     assert(read["X0"].to!ubyte==2);
671     assert(read["RG"].check!string);
672     
673     read["RG"]="test";
674     assert(read["RG"].to!string=="test");
675     hts_log_info(__FUNCTION__, "Cigar:" ~ read.cigar.toString());
676 }
677 
678 ///
679 debug(dhtslib_unittest) unittest
680 {
681     import dhtslib.sam;
682     import htslib.hts_log : hts_log_info;
683     import std.path:buildPath,dirName;
684 
685     hts_set_log_level(htsLogLevel.HTS_LOG_TRACE);
686     hts_log_info(__FUNCTION__, "Testing SAMRecord mutation w/ realloc");
687     hts_log_info(__FUNCTION__, "Loading test file");
688     auto bam = SAMFile(buildPath(dirName(dirName(dirName(dirName(__FILE__)))),"htslib","test","range.bam"), 0);
689     auto ar = bam.allRecords;
690     assert(ar.empty == false);
691     auto read = ar.front;
692 
693     //change queryname
694     hts_log_info(__FUNCTION__, "Testing queryname");
695     assert(read.queryName=="HS18_09653:4:1315:19857:61712");
696 
697     //we do this only to force realloc
698     read.b.m_data=read.b.l_data;
699 
700 
701     auto prev_m=read.b.m_data;
702     read.queryName="HS18_09653:4:1315:19857:61712HS18_09653:4:1315:19857:61712";
703     assert(read.b.m_data >prev_m);
704     assert(read.queryName=="HS18_09653:4:1315:19857:61712HS18_09653:4:1315:19857:61712");
705     assert(read.cigar.toString() == "78M1D22M");
706     assert(read["RG"].check!string);
707     assert(read["RG"].to!string=="1");
708 
709     //we do this only to force realloc
710     read.b.m_data=read.b.l_data;
711 
712     //change sequence
713     hts_log_info(__FUNCTION__, "Testing sequence");
714     assert(read.sequence=="AGCTAGGGCACTTTTTGTCTGCCCAAATATAGGCAACCAAAAATAATTTCCAAGTTTTTAATGATTTGTTGCATATTGAAAAAACATTTTTTGGGTTTTT");
715     prev_m=read.b.m_data;
716     read.sequence="AGCTAGGGCACTTTTTGTCTGCCCAAATATAGGCAACCAAAAATAATTTCCAAGTTTTTAATGATTTGTTGCATATTGAAAAAACATTTTTTGGGTTTTTAGCTAGGGCACTTTTTGTCTGCCCAAATATAGGCAACCAAAAATAATTTCCAAGTTTTTAATGATTTGTTGCATATTGAAAAAACATTTTTTGGGTTTTT";
717     assert(read.b.m_data >prev_m);
718     assert(read.sequence=="AGCTAGGGCACTTTTTGTCTGCCCAAATATAGGCAACCAAAAATAATTTCCAAGTTTTTAATGATTTGTTGCATATTGAAAAAACATTTTTTGGGTTTTTAGCTAGGGCACTTTTTGTCTGCCCAAATATAGGCAACCAAAAATAATTTCCAAGTTTTTAATGATTTGTTGCATATTGAAAAAACATTTTTTGGGTTTTT");
719     assert(read.cigar.toString() == "78M1D22M");
720     assert(read["RG"].check!string);
721     assert(read["RG"].to!string=="1");
722 
723     //change cigar
724     hts_log_info(__FUNCTION__, "Testing cigar");
725     auto c=read.cigar;
726     c=c~c;
727     prev_m=read.b.m_data;
728     read.cigar=c;
729     assert(read.b.m_data >prev_m);
730     assert(read.cigar.toString() == "78M1D22M78M1D22M");
731     assert(read["RG"].check!string);
732     assert(read["RG"].to!string=="1");
733     
734     //change tag
735     hts_log_info(__FUNCTION__, "Testing tags");
736     read["X0"]=2;
737     assert(read["X0"].to!ubyte==2);
738     assert(read["RG"].check!string);
739     
740     read["RG"]="test";
741     assert(read["RG"].to!string=="test");
742     hts_log_info(__FUNCTION__, "Cigar:" ~ read.cigar.toString());
743 }
744 
745 ///
746 debug(dhtslib_unittest) unittest
747 {
748     import std.stdio;
749     import dhtslib.sam;
750     import dhtslib.sam.md : MDItr;
751     import std.algorithm: map;
752     import std.array: array;
753     import std.path:buildPath,dirName;
754     hts_set_log_level(htsLogLevel.HTS_LOG_TRACE);
755 
756     auto bam = SAMFile(buildPath(dirName(dirName(dirName(dirName(__FILE__)))),"htslib","test","range.bam"), 0);
757     auto ar = bam.allRecords;
758     assert(ar.empty == false);
759     SAMRecord read = ar.front;
760     
761     auto pairs = read.getAlignedPairs!true(77, 77 + 5);
762 
763     assert(pairs.map!(x => x.qpos).array == [77,77,78,79,80]);
764     pairs = read.getAlignedPairs!true(77, 77 + 5);
765     assert(pairs.map!(x => x.rpos).array == [77,78,79,80,81]);
766     pairs = read.getAlignedPairs!true(77, 77 + 5);
767     assert(pairs.map!(x => x.refBase).array == "GAAAA");
768     pairs = read.getAlignedPairs!true(77, 77 + 5);
769     assert(pairs.map!(x => x.queryBase).array == "G-AAA");
770 }
771 
772 ///
773 debug(dhtslib_unittest) unittest
774 {
775     import std.stdio : writeln, File;
776     import std.range : drop;
777     import std.utf : toUTFz;
778     import htslib.hts_log; // @suppress(dscanner.suspicious.local_imports)
779     import std.path:buildPath,dirName;
780     import std.conv:to;
781     import dhtslib.sam : parseSam;
782 
783     hts_set_log_level(htsLogLevel.HTS_LOG_TRACE);
784     hts_log_info(__FUNCTION__, "Loading sam file");
785     auto range = File(buildPath(dirName(dirName(dirName(dirName(__FILE__)))),"htslib","test","realn01_exp-a.sam")).byLineCopy();
786     auto b = bam_init1();
787     auto hdr = bam_hdr_init();
788     string hdr_str;
789     assert(range.empty == false);
790     for (auto i = 0; i < 4; i++)
791     {
792         hdr_str ~= range.front ~ "\n";
793         range.popFront;
794     }
795     hts_log_info(__FUNCTION__, "Header");
796     writeln(hdr_str);
797     hdr = sam_hdr_parse(cast(int) hdr_str.length, toUTFz!(char*)(hdr_str));
798     hts_log_info(__FUNCTION__, "Read status:" ~ parseSam(range.front, hdr, b).to!string);
799     auto r = new SAMRecord(b);
800     hts_log_info(__FUNCTION__, "Cigar" ~ r.cigar.toString);
801     assert(r.cigar.toString == "6M1D117M5D28M");
802 }
803 
804 ///
805 debug(dhtslib_unittest) unittest
806 {
807     import std.stdio;
808     import dhtslib.sam;
809     import dhtslib.sam.md : MDItr;
810     import std.algorithm: map;
811     import std.array: array;
812     import std.parallelism : parallel; 
813     import std.path:buildPath,dirName;
814     hts_set_log_level(htsLogLevel.HTS_LOG_TRACE);
815 
816     auto bam = SAMFile(buildPath(dirName(dirName(dirName(dirName(__FILE__)))),"htslib","test","range.bam"), 0);
817     foreach(rec;parallel(bam.allRecords)){
818         rec.queryName = "multithreading test";
819     }
820 
821 }