1 module dhtslib.sam.record;
2 
3 import core.stdc.stdlib : calloc, free, realloc;
4 import core.stdc.string : memset;
5 import core.stdc.errno : EINVAL, EOVERFLOW, ENOMEM, ERANGE, errno;
6 import std.string : fromStringz, toStringz;
7 import std.algorithm : filter;
8 import std.range.interfaces : InputRange, inputRangeObject;
9 import std.traits : isArray, isIntegral, isSomeString;
10 
11 import htslib.hts : seq_nt16_str, seq_nt16_table;
12 
13 import htslib.hts_log;
14 import htslib.sam;
15 import dhtslib.sam.cigar;
16 import dhtslib.sam.tagvalue;
17 import dhtslib.sam.header;
18 import dhtslib.coordinates;
19 import dhtslib.memory;
20 
21 /**
22 Encapsulates a SAM/BAM/CRAM record,
23 using the bam1_t type for memory efficiency,
24 and the htslib helper functions for speed.
25 **/
26 struct SAMRecord
27 {
28     /// Backing SAM/BAM row record
29     Bam1 b;
30 
31     /// Corresponding SAM/BAM header data
32     SAMHeader h;
33    
34     /// ditto
35     this(SAMHeader h)
36     {
37         this.b = Bam1(bam_init1);
38         this.h = h;
39     }
40  
41     /// Construct SAMRecord from supplied bam1_t
42     deprecated("Construct with sam_hdr_t* for mapped contig (tid) name support")
43     this(bam1_t* b)
44     {
45         this.b = Bam1(b);
46     }
47 
48     /// Construct SAMRecord from supplied bam1_t and sam_hdr_type
49     this(bam1_t* b, SAMHeader h)
50     {
51         this.b = Bam1(b);
52         this.h = h;
53     }
54 
55     /* bam1_core_t fields */
56 
57     /// chromosome ID, defined by sam_hdr_t
58     pragma(inline, true)
59     @nogc @safe nothrow
60     @property int tid() { return this.b.core.tid; }
61     /// ditto
62     pragma(inline, true)
63     @nogc @safe nothrow
64     @property void tid(int tid) { this.b.core.tid = tid; }
65 
66     /// mapped contig (tid) name, defined by sam_hdr_t* h
67     pragma(inline, true)
68     @property const(char)[] referenceName()
69     {
70         assert(!this.h.isNull);
71         return this.h.targetName(this.b.core.tid);
72     }
73  
74     /// 0-based leftmost coordinate
75     pragma(inline, true)
76     @nogc @safe nothrow
77     @property ZB pos() { return ZB(this.b.core.pos); }
78     /// ditto
79     pragma(inline, true)
80     @nogc @safe nothrow
81     @property void pos(ZB pos) { this.b.core.pos = pos.pos; }
82 
83     /// 0-based, half-open coordinates that represent
84     /// the mapped length of the alignment 
85     pragma(inline, true)
86     @property ZBHO coordinates()
87     {
88         return ZBHO(this.pos, this.pos + this.cigar.alignedLength);
89     }
90 
91     // TODO: @field  bin     bin calculated by bam_reg2bin()
92 
93     /// mapping quality
94     pragma(inline, true)
95     @nogc @safe nothrow
96     @property ubyte qual() { return this.b.core.qual; }
97     /// ditto
98     pragma(inline, true)
99     @nogc @safe nothrow
100     @property void qual(ubyte q) { this.b.core.qual = q; }
101 
102     /// length of query name. Terminated by 1-4 \0, see l_extranul. Never set directly.
103     pragma(inline, true)
104     @nogc @safe nothrow
105     @property int l_qname() { return this.b.core.l_qname; }
106 
107     /// number of EXTRA trailing \0 in qname; 0 <= l_extranul <= 3, see l_qname. Never set directly.
108     pragma(inline, true)
109     @nogc @safe nothrow
110     @property int l_extranul() { return this.b.core.l_extranul; }
111 
112     /// bitwise flag
113     pragma(inline, true)
114     @nogc @safe nothrow
115     @property ushort flag() { return this.b.core.flag; }
116     /// ditto
117     pragma(inline, true)
118     @nogc @safe nothrow
119     @property void flag(ushort fl) { this.b.core.flag = fl; }
120 
121     /// is read paired?
122     pragma(inline, true)
123     @nogc @safe nothrow
124     @property bool isPaired()
125     {
126         return cast(bool)(b.core.flag & BAM_FPAIRED);
127     }
128 
129     /// is read paired?
130     pragma(inline, true)
131     @nogc @safe nothrow
132     @property bool isProperPair()
133     {
134         return cast(bool)(b.core.flag & BAM_FPROPER_PAIR);
135     }
136 
137     /// is read mapped?
138     pragma(inline, true)
139     @nogc @safe nothrow
140     @property bool isMapped()
141     {
142         return (b.core.flag & BAM_FUNMAP) == 0;
143     }
144 
145     /// is read mate mapped?
146     pragma(inline, true)
147     @nogc @safe nothrow
148     @property bool isMateMapped()
149     {
150         return (b.core.flag & BAM_FMUNMAP) == 0;
151     }
152 
153     /// is read reversed?
154     /// bool bam_is_rev(bam1_t *b) { return ( ((*b).core.flag & BAM_FREVERSE) != 0 ); }
155     pragma(inline, true)
156     @nogc @trusted nothrow
157     @property bool isReversed()
158     {
159         return bam_is_rev(this.b);
160     }
161 
162     /// is mate reversed?
163     /// bool bam_is_mrev(bam1_t *b) { return( ((*b).core.flag & BAM_FMREVERSE) != 0); }
164     pragma(inline, true)
165     @nogc @trusted nothrow
166     @property bool mateReversed()
167     {
168         return bam_is_mrev(this.b);
169     }
170 
171     /// is this read 1?
172     pragma(inline, true)
173     @nogc @safe nothrow
174     @property bool isRead1()
175     {
176         return cast(bool)(b.core.flag & BAM_FREAD1);
177     }
178     
179     /// is this read 2?
180     pragma(inline, true)
181     @nogc @safe nothrow
182     @property bool isRead2()
183     {
184         return cast(bool)(b.core.flag & BAM_FREAD2);
185     }
186 
187     /// is read secondary?
188     @property bool isSecondary()
189     {
190         version(LDC){
191             pragma(inline, true);
192         }
193         return cast(bool)(b.core.flag & BAM_FSECONDARY);
194     }
195 
196     /// Does this read fail qc?
197     pragma(inline, true)
198     @nogc @safe nothrow
199     @property bool isQCFail()
200     {
201         return cast(bool)(b.core.flag & BAM_FQCFAIL);
202     }
203 
204     /// Is this read marked as a PCR or Optical duplicate?
205     pragma(inline, true)
206     @nogc @safe nothrow
207     @property bool isDuplicate()
208     {
209         return cast(bool)(b.core.flag & BAM_FDUP);
210     }
211 
212     /// is read supplementary?
213     pragma(inline, true)
214     @nogc @safe nothrow
215     @property bool isSupplementary()
216     {
217         return cast(bool)(b.core.flag & BAM_FSUPPLEMENTARY);
218     }
219 
220     /// Return read strand + or - (as char)
221     @property char strand(){
222         return ['+','-'][isReversed()];
223     }
224 
225     /// Get a slice of query name (read name)
226     /// if you keep this around you should copy it
227     /// -- wraps bam_get_qname(bam1_t *b)
228     pragma(inline, true)
229     @nogc @trusted nothrow
230     @property const(char)[] queryName()
231     {
232         // auto bam_get_qname(bam1_t *b) { return (cast(char*)(*b).data); }
233         return fromStringz(bam_get_qname(this.b));
234     }
235     
236     /// Set query name (read name)
237     pragma(inline, true)
238     @trusted nothrow
239     @property void queryName(string qname)
240     {
241         auto ret = bam_set_qname(this.b, toStringz(qname));
242         if(ret != 0)
243             hts_log_error(__FUNCTION__, "Could not set queryname");
244     }
245 
246     /// query (and quality string) length
247     pragma(inline, true)
248     @nogc @safe nothrow
249     @property int length()
250     {
251         return this.b.core.l_qseq;
252     }
253 
254     /// Return char array of the sequence
255     /// see samtools/sam_view.c: get_read
256     pragma(inline, true)
257     @trusted nothrow
258     @property const(char)[] sequence()
259     {
260         // calloc fills with \0; +1 len for Cstring
261         // char* s = cast(char*) calloc(1, this.b.core.l_qseq + 1);
262         char[] s;
263         s.length = this.b.core.l_qseq;
264 
265         // auto bam_get_seq(bam1_t *b) { return ((*b).data + ((*b).core.n_cigar<<2) + (*b).core.l_qname); }
266         auto seqdata = bam_get_seq(this.b);
267 
268         for (int i; i < this.b.core.l_qseq; i++)
269         {
270             s[i] = seq_nt16_str[bam_seqi(seqdata, i)];
271         }
272         return s;
273     }
274 
275     /// Assigns sequence and resets quality score
276     pragma(inline, true)
277     @trusted nothrow
278     @property void sequence(const(char)[] seq)
279     {
280         /// There is no bam_seq_seq
281 
282         //nibble encode sequence
283         ubyte[] en_seq;
284         en_seq.length = (seq.length + 1) >> 1;
285         
286         for(auto i=0;i < seq.length;i++){
287             bam_set_seqi(en_seq.ptr, i, seq_nt16_table[seq[i]]);
288         }
289 
290         //get length of data before seq
291         uint l_prev = b.core.l_qname + cast(uint)(b.core.n_cigar * uint.sizeof);
292 
293         //get starting point after seq
294         uint start_rest = l_prev + ((b.core.l_qseq + 1) >> 1) + b.core.l_qseq;
295 
296         //copy previous data
297         ubyte[] prev=b.data[0 .. l_prev].dup;
298 
299         //copy rest of data
300         ubyte[] rest=b.data[start_rest .. b.l_data].dup;
301         
302         //if not enough space
303         if(en_seq.length + seq.length + l_prev + (b.l_data - start_rest) > b.m_data){
304 
305             // realloc
306             auto ptr = cast(ubyte*) realloc(b.data, en_seq.length + seq.length + l_prev + (b.l_data - start_rest));
307             assert(ptr != null);
308             b.data = ptr;
309 
310             // increase capacity
311             b.m_data = cast(uint)en_seq.length + cast(uint) seq.length + l_prev + (b.l_data - start_rest);
312         }
313 
314         //reassign q_name and rest of data
315         b.data[0 .. l_prev] = prev;
316         b.data[l_prev .. en_seq.length + l_prev] = en_seq;
317         //set qscores to 255 
318         memset(b.data + en_seq.length + l_prev, 255, seq.length);
319         b.data[l_prev + en_seq.length + seq.length .. l_prev + en_seq.length + seq.length + (b.l_data - start_rest)] = rest;
320 
321         //reset data length, seq length
322         b.l_data = cast(uint) en_seq.length + cast(uint) seq.length + l_prev + (b.l_data-start_rest);
323         b.core.l_qseq = cast(int) (seq.length);
324     }
325 
326     /// Return array of the quality scores
327     /// see samtools/sam_view.c: get_quality
328     /// TODO: Discussion -- should we return const(ubyte[]) or const(ubyte)[] instead?
329     pragma(inline, true)
330     @nogc @trusted nothrow
331     @property const(ubyte)[] qscores()
332     {
333         auto slice = bam_get_qual(this.b)[0 .. this.b.core.l_qseq];
334         return slice;
335     }
336 
337     /// Set quality scores from raw ubyte quality score array given that it is the same length as the bam sequence
338     pragma(inline, true)
339     @trusted nothrow
340     @property void qscores(const(ubyte)[] seq)
341     {
342         /// There is no bam_seq_qual
343         
344         if(seq.length != b.core.l_qseq){
345             hts_log_error(__FUNCTION__,"qscore length does not match sequence length");
346             return;
347         }
348         
349         //get length of data before seq
350         uint l_prev = b.core.l_qname + cast(uint) (b.core.n_cigar * uint.sizeof) + ((b.core.l_qseq + 1) >> 1);
351         b.data[l_prev .. seq.length + l_prev] = seq;
352     }
353 
354     /// get phred-scaled base qualities as char array
355     pragma(inline, true)
356     @trusted nothrow
357     const(char)[] qscoresPhredScaled()
358     {
359         auto bqs = this.qscores.dup;
360         bqs[] += 33;
361         return cast(const(char)[]) bqs;
362     }
363 
364     /// Create cigar from bam1_t record
365     pragma(inline, true)
366     @property Cigar cigar()
367     {
368         return Cigar(this.b);
369     }
370 
371     /// Assign a cigar string
372     pragma(inline, true)
373     @trusted
374     @property void cigar(Cigar cigar)
375     {
376         // no bam_set_cigar
377 
378         //get length of data before seq
379         uint l_prev = b.core.l_qname;
380 
381         //get starting point after seq
382         uint start_rest = l_prev + cast(uint)(b.core.n_cigar * uint.sizeof);
383 
384         //copy previous data
385         ubyte[] prev = b.data[0..l_prev].dup;
386 
387         //copy rest of data
388         ubyte[] rest = b.data[start_rest..b.l_data].dup;
389 
390         //if not enough space
391         if(uint.sizeof * cigar.length + l_prev + (b.l_data - start_rest) > b.m_data){
392             // realloc
393             auto ptr = cast(ubyte*) realloc(b.data, uint.sizeof * cigar.length + l_prev + (b.l_data - start_rest));
394             assert(ptr != null);
395             b.data = ptr;
396 
397             // increase capacity
398             b.m_data=cast(uint)(uint.sizeof * cigar.length) + l_prev + (b.l_data - start_rest);
399         }
400 
401         //reassign q_name and rest of data
402         b.data[0 .. l_prev]=prev;
403         //without cigar.ops.dup we get the overlapping array copy error
404         b.data[l_prev .. uint.sizeof * cigar.length + l_prev] = cast(ubyte[])( cast(uint[]) cigar.dup[]);
405         b.data[l_prev + uint.sizeof * cigar.length .. l_prev + uint.sizeof * cigar.length + (b.l_data - start_rest)] = rest;
406 
407         //reset data length, seq length
408         b.l_data = cast(uint) (uint.sizeof * cigar.length) + l_prev + (b.l_data - start_rest);
409         b.core.n_cigar = cast(uint) (cigar.length);
410     }
411 
412     /// Get aux tag from bam1_t record and return a TagValue
413     TagValue opIndex(string val)
414     {
415         return TagValue(this.b, val[0..2]);
416     }
417 
418     /// Assign a tag key value pair
419     void opIndexAssign(T)(T value, string index)
420     if(!isArray!T || isSomeString!T)
421     {
422         static if(isIntegral!T){
423             auto err = bam_aux_update_int(b, index[0..2], value);
424         }else static if(is(T==float)){
425             auto err = bam_aux_update_float(b, index[0..2], value);
426         }else static if(isSomeString!T){
427             auto err = bam_aux_update_str(b, index[0..2], cast(int) value.length, value.ptr);
428         }
429         if(err == 0) return;
430         switch(errno){
431             case EINVAL:
432                 throw new Exception("The bam record's aux data is corrupt or an existing tag" ~ 
433                 " with the given ID is not of an integer type (c, C, s, S, i or I).");
434             case ENOMEM:
435                 throw new Exception("The bam data needs to be expanded and either the attempt" ~
436                 " to reallocate the data buffer failed or the resulting buffer would be longer" ~
437                 " than the maximum size allowed in a bam record (2Gbytes).");
438             case ERANGE:
439             case EOVERFLOW:
440                 throw new Exception("val is outside the range that can be stored in an integer" ~ 
441                 " bam tag (-2147483647 to 4294967295).");
442             case -1:
443                 throw new Exception("Something went wrong adding the bam tag.");
444             case 0:
445                 return;
446             default:
447                 throw new Exception("Something went wrong adding the bam tag.");
448         }
449     }
450 
451     /// Assign a tag key array pair
452     void opIndexAssign(T)(T[] value, string index)
453     if(!isSomeString!(T[]))
454     {
455         auto err = bam_aux_update_array(b, index[0..2], TypeChars[TypeIndex!T], cast(int) value.length, value.ptr);
456         if(err == 0) return;
457         switch(errno){
458             case EINVAL:
459                 throw new Exception("The bam record's aux data is corrupt or an existing tag" ~ 
460                 " with the given ID is not of an integer type (c, C, s, S, i or I).");
461             case ENOMEM:
462                 throw new Exception("The bam data needs to be expanded and either the attempt" ~
463                 " to reallocate the data buffer failed or the resulting buffer would be longer" ~
464                 " than the maximum size allowed in a bam record (2Gbytes).");
465             case ERANGE:
466             case EOVERFLOW:
467                 throw new Exception("val is outside the range that can be stored in an integer" ~ 
468                 " bam tag (-2147483647 to 4294967295).");
469             case -1:
470                 throw new Exception("Something went wrong adding the bam tag.");
471             case 0:
472                 return;
473             default:
474                 throw new Exception("Something went wrong adding the bam tag.");
475         }
476     }
477 
478     /// chromosome ID of next read in template, defined by bam_hdr_t
479     pragma(inline, true)
480     @nogc @safe nothrow
481     @property int mateTID() { return this.b.core.mtid; }
482     /// ditto
483     pragma(inline, true)
484     @nogc @safe nothrow
485     @property void mateTID(int mtid) { this.b.core.mtid = mtid; }
486 
487     /// 0-based leftmost coordinate of next read in template
488     pragma(inline, true)
489     @nogc @safe nothrow
490     @property long matePos() { return this.b.core.mpos; }
491     /// ditto
492     pragma(inline, true)
493     @nogc @safe nothrow
494     @property void matePos(long mpos) { this.b.core.mpos = mpos; }
495 
496     /// Presumably Insert size, but is undocumented.
497     /// Per samtools source, is measured 5' to 5'
498     /// https://github.com/samtools/samtools/blob/bd1a409aa750d25d70a093405d174db8709a3f2c/bam_mate.c#L320
499     pragma(inline, true)
500     @nogc @safe nothrow
501     @property long insertSize() { return this.b.core.isize; }
502     /// ditto
503     pragma(inline, true)
504     @nogc @safe nothrow
505     @property void insertSize(long isize) { this.b.core.isize = isize; }
506 /+ BEGIN CHERRY-PICK: TODO: confirm below chunk with htslib-1.10 (mainly long coordinates +/
507     /// get aligned coordinates per each cigar op
508     InputRange!AlignedCoordinate getAlignedCoordinates(){
509         return AlignedCoordinatesItr(this.cigar).inputRangeObject;
510     }
511 
512     /// get aligned coordinates per each cigar op within range
513     /// range is 0-based half open using chromosomal coordinates
514     InputRange!AlignedCoordinate getAlignedCoordinates(long start, long end){
515         assert(start >= 0);
516         assert(end <= this.cigar.ref_bases_covered);
517         return this.getAlignedCoordinates.filter!(x=> x.rpos >= start).filter!(x=> x.rpos < end).inputRangeObject;
518     }
519 
520     struct AlignedPair(bool refSeq)
521     {
522         Coordinate!(Basis.zero) qpos= ZB(-1);
523 
524         Coordinate!(Basis.zero) rpos= ZB(-1);
525         
526         Ops cigar_op = Ops.MATCH;
527         
528         char queryBase = '-';
529 
530         static if(refSeq) 
531             char refBase = '-';
532 
533         string toString(){
534             import std.conv : to;
535             static if(refSeq) return rpos.pos.to!string ~ ":" ~ refBase ~ " > " ~ qpos.pos.to!string ~ ":" ~ queryBase;
536             else return rpos.pos.to!string ~ ":" ~ qpos.pos.to!string ~ ":" ~ queryBase;
537         }
538     }
539     struct AlignedPairs(bool refSeq)
540     {
541         import dhtslib.sam.md : MDItr;
542         import std.range : dropExactly;
543 
544         InputRange!AlignedCoordinate coords;
545         static if(refSeq) MDItr mdItr;
546         AlignedPair!refSeq current;
547         Bam1 b;
548         ubyte * seq;
549         
550         this(SAMRecord rec){
551             this.b = rec.b;
552             this.seq = bam_get_seq(this.b);
553             this.coords = rec.getAlignedCoordinates;
554             static if(refSeq){
555                 assert(rec["MD"].exists,"MD tag must exist for record");
556                 mdItr = MDItr(rec);
557             } 
558             current.qpos = coords.front.qpos;
559             current.rpos = coords.front.rpos;
560             current.cigar_op = coords.front.cigar_op;
561             if(current.cigar_op.isQueryConsuming) 
562                 current.queryBase = seq_nt16_str[bam_seqi(seq, current.qpos)];
563             
564             static if(refSeq){
565                 if(current.cigar_op.isReferenceConsuming){
566                     if(mdItr.front == '=')
567                         current.refBase = current.queryBase;
568                     else 
569                         current.refBase = mdItr.front;
570                     mdItr.popFront;
571                 }
572                 
573             }
574         }
575 
576         this(SAMRecord rec, long start, long end){
577             assert(start < end);
578             assert(start >= 0);
579             assert(end <= rec.cigar.ref_bases_covered);
580             this.b = rec.b;
581             this.seq = bam_get_seq(this.b);
582             this.coords = rec.getAlignedCoordinates(start, end);
583             static if(refSeq){
584                 assert(rec["MD"].exists,"MD tag must exist for record");
585                 mdItr = MDItr(rec);
586                 mdItr = mdItr.dropExactly(start);
587             } 
588             current.qpos = coords.front.qpos;
589             current.rpos = coords.front.rpos;
590             current.cigar_op = coords.front.cigar_op;
591             if(current.cigar_op.isQueryConsuming) 
592                 current.queryBase = seq_nt16_str[bam_seqi(seq, current.qpos)];
593             
594             static if(refSeq){
595                 if(current.cigar_op.isReferenceConsuming){
596                     if(mdItr.front == '=')
597                         current.refBase = current.queryBase;
598                     else 
599                         current.refBase = mdItr.front;
600                     mdItr.popFront;
601                 }
602                 
603             }
604         }
605 
606         AlignedPair!refSeq front(){
607             return current;
608         }
609 
610         void popFront(){
611             coords.popFront;
612             if(coords.empty) return;
613 
614             current = current.init;
615             current.qpos = coords.front.qpos;
616             current.rpos = coords.front.rpos;
617             current.cigar_op = coords.front.cigar_op;
618 
619             if(current.cigar_op.isQueryConsuming)
620                 current.queryBase = seq_nt16_str[bam_seqi(seq, current.qpos)];
621             
622             static if(refSeq){
623                 if(current.cigar_op.isReferenceConsuming){
624                     if(mdItr.front == '=') 
625                         current.refBase = current.queryBase;
626                     else 
627                         current.refBase = mdItr.front;
628                     mdItr.popFront;
629                 }
630             }
631 
632         }
633 
634         bool empty(){
635             return coords.empty;
636         }
637 
638     }
639     /// get a range of aligned read and reference positions
640     /// this is meant to recreate functionality from pysam:
641     /// https://pysam.readthedocs.io/en/latest/api.html#pysam.AlignedSegment.get_aligned_pairs
642     /// range is 0-based half open using chromosomal coordinates
643     auto  getAlignedPairs(bool withRefSeq)(long start, long end)
644     {
645         return AlignedPairs!withRefSeq(this,start,end).inputRangeObject;
646     }
647     /// get a range of aligned read and reference positions
648     /// this is meant to recreate functionality from pysam:
649     /// https://pysam.readthedocs.io/en/latest/api.html#pysam.AlignedSegment.get_aligned_pairs
650     auto getAlignedPairs(bool withRefSeq)(){
651         return AlignedPairs!withRefSeq(this);
652     }
653 /+ END CHERRY-PICK +/
654 }
655 
656 ///
657 debug(dhtslib_unittest) unittest
658 {
659     import dhtslib.sam;
660     import htslib.hts_log : hts_log_info;
661     import std.path:buildPath,dirName;
662 
663     hts_set_log_level(htsLogLevel.HTS_LOG_TRACE);
664     hts_log_info(__FUNCTION__, "Testing SAMRecord mutation");
665     hts_log_info(__FUNCTION__, "Loading test file");
666     auto bam = SAMFile(buildPath(dirName(dirName(dirName(dirName(__FILE__)))),"htslib","test","range.bam"), 0);
667     auto ar = bam.allRecords;
668     assert(ar.empty == false);
669     auto read = ar.front;
670 
671     //change queryname
672     hts_log_info(__FUNCTION__, "Testing queryname");
673     assert(read.queryName=="HS18_09653:4:1315:19857:61712");
674     read.queryName="HS18_09653:4:1315:19857:6171";
675     assert(read.queryName=="HS18_09653:4:1315:19857:6171");
676     assert(read.cigar.toString() == "78M1D22M");
677     read.queryName="HS18_09653:4:1315:19857:617122";
678     assert(read.queryName=="HS18_09653:4:1315:19857:617122");
679     assert(read.cigar.toString() == "78M1D22M");
680     assert(read["RG"].check!string);
681     assert(read["RG"].to!string=="1");
682 
683     //change sequence
684     hts_log_info(__FUNCTION__, "Testing sequence");
685     assert(read.sequence=="AGCTAGGGCACTTTTTGTCTGCCCAAATATAGGCAACCAAAAATAATTTCCAAGTTTTTAATGATTTGTTGCATATTGAAAAAACATTTTTTGGGTTTTT");
686     read.sequence=read.sequence[0..10];
687     assert(read.sequence=="AGCTAGGGCA");
688 
689     ubyte[] q=[255,255,255,255,255,255,255,255,255,255];
690     assert(read.qscores == q);
691     q = [1, 14, 20, 40, 30, 10, 14, 20, 40, 30];
692     read.qscores(q);
693     assert(read.qscores == q);
694     q[] += 33;
695     
696     assert(read.qscoresPhredScaled == cast(char[])(q));
697 
698     assert(read.cigar.toString() == "78M1D22M");
699     assert(read["RG"].check!string);
700     assert(read["RG"].to!string=="1");
701 
702     //change cigar
703     hts_log_info(__FUNCTION__, "Testing cigar");
704     auto c=read.cigar;
705     c[$-1].length=21;
706     read.cigar=c;
707     assert(read.cigar.toString() == "78M1D21M");
708     assert(read["RG"].check!string);
709     assert(read["RG"].to!string=="1");
710     
711     //change tag
712     hts_log_info(__FUNCTION__, "Testing tags");
713     read["X0"]=2;
714     assert(read["X0"].to!ubyte==2);
715     assert(read["RG"].check!string);
716     
717     read["RG"]="test";
718     assert(read["RG"].to!string=="test");
719     hts_log_info(__FUNCTION__, "Cigar:" ~ read.cigar.toString());
720 }
721 
722 ///
723 debug(dhtslib_unittest) unittest
724 {
725     import dhtslib.sam;
726     import htslib.hts_log : hts_log_info;
727     import std.path:buildPath,dirName;
728 
729     hts_set_log_level(htsLogLevel.HTS_LOG_TRACE);
730     hts_log_info(__FUNCTION__, "Testing SAMRecord mutation w/ realloc");
731     hts_log_info(__FUNCTION__, "Loading test file");
732     auto bam = SAMFile(buildPath(dirName(dirName(dirName(dirName(__FILE__)))),"htslib","test","range.bam"), 0);
733     auto ar = bam.allRecords;
734     assert(ar.empty == false);
735     auto read = ar.front;
736 
737     //change queryname
738     hts_log_info(__FUNCTION__, "Testing queryname");
739     assert(read.queryName=="HS18_09653:4:1315:19857:61712");
740 
741     //we do this only to force realloc
742     read.b.m_data=read.b.l_data;
743 
744 
745     auto prev_m=read.b.m_data;
746     read.queryName="HS18_09653:4:1315:19857:61712HS18_09653:4:1315:19857:61712";
747     assert(read.b.m_data >prev_m);
748     assert(read.queryName=="HS18_09653:4:1315:19857:61712HS18_09653:4:1315:19857:61712");
749     assert(read.cigar.toString() == "78M1D22M");
750     assert(read["RG"].check!string);
751     assert(read["RG"].to!string=="1");
752 
753     //we do this only to force realloc
754     read.b.m_data=read.b.l_data;
755 
756     //change sequence
757     hts_log_info(__FUNCTION__, "Testing sequence");
758     assert(read.sequence=="AGCTAGGGCACTTTTTGTCTGCCCAAATATAGGCAACCAAAAATAATTTCCAAGTTTTTAATGATTTGTTGCATATTGAAAAAACATTTTTTGGGTTTTT");
759     prev_m=read.b.m_data;
760     read.sequence="AGCTAGGGCACTTTTTGTCTGCCCAAATATAGGCAACCAAAAATAATTTCCAAGTTTTTAATGATTTGTTGCATATTGAAAAAACATTTTTTGGGTTTTTAGCTAGGGCACTTTTTGTCTGCCCAAATATAGGCAACCAAAAATAATTTCCAAGTTTTTAATGATTTGTTGCATATTGAAAAAACATTTTTTGGGTTTTT";
761     assert(read.b.m_data >prev_m);
762     assert(read.sequence=="AGCTAGGGCACTTTTTGTCTGCCCAAATATAGGCAACCAAAAATAATTTCCAAGTTTTTAATGATTTGTTGCATATTGAAAAAACATTTTTTGGGTTTTTAGCTAGGGCACTTTTTGTCTGCCCAAATATAGGCAACCAAAAATAATTTCCAAGTTTTTAATGATTTGTTGCATATTGAAAAAACATTTTTTGGGTTTTT");
763     assert(read.cigar.toString() == "78M1D22M");
764     assert(read["RG"].check!string);
765     assert(read["RG"].to!string=="1");
766 
767     //change cigar
768     hts_log_info(__FUNCTION__, "Testing cigar");
769     auto c=read.cigar;
770     c=c~c;
771     prev_m=read.b.m_data;
772     read.cigar=c;
773     assert(read.b.m_data >prev_m);
774     assert(read.cigar.toString() == "78M1D22M78M1D22M");
775     assert(read["RG"].check!string);
776     assert(read["RG"].to!string=="1");
777     
778     //change tag
779     hts_log_info(__FUNCTION__, "Testing tags");
780     read["X0"]=2;
781     assert(read["X0"].to!ubyte==2);
782     assert(read["RG"].check!string);
783     
784     read["RG"]="test";
785     assert(read["RG"].to!string=="test");
786     hts_log_info(__FUNCTION__, "Cigar:" ~ read.cigar.toString());
787 }
788 
789 ///
790 debug(dhtslib_unittest) unittest
791 {
792     import std.stdio;
793     import dhtslib.sam;
794     import dhtslib.sam.md : MDItr;
795     import std.algorithm: map;
796     import std.array: array;
797     import std.path:buildPath,dirName;
798     hts_set_log_level(htsLogLevel.HTS_LOG_TRACE);
799 
800     auto bam = SAMFile(buildPath(dirName(dirName(dirName(dirName(__FILE__)))),"htslib","test","range.bam"), 0);
801     auto ar = bam.allRecords;
802     assert(ar.empty == false);
803     SAMRecord read = ar.front;
804     
805     auto pairs = read.getAlignedPairs!true(77, 77 + 5);
806 
807     assert(pairs.map!(x => x.qpos).array == [77,77,78,79,80]);
808     pairs = read.getAlignedPairs!true(77, 77 + 5);
809     assert(pairs.map!(x => x.rpos).array == [77,78,79,80,81]);
810     pairs = read.getAlignedPairs!true(77, 77 + 5);
811     // assert(pairs.map!(x => x.refBase).array == "GAAAA");
812     pairs = read.getAlignedPairs!true(77, 77 + 5);
813     assert(pairs.map!(x => x.queryBase).array == "G-AAA");
814 }
815 
816 
817 debug(dhtslib_unittest) unittest
818 {
819     import std.stdio;
820     import dhtslib.sam;
821     import dhtslib.sam.md : MDItr;
822     import std.algorithm: map;
823     import std.range : iota;
824     import std.array: array;
825     import std.path:buildPath,dirName;
826     hts_set_log_level(htsLogLevel.HTS_LOG_TRACE);
827     SAMHeader h;
828     SAMRecord read = SAMRecord(h);
829     read.queryName = "test";
830     read.sequence = 
831     //6H [4S][                                                                        77M][4I]MD[                22M]SSS 5H
832         "NNNNAGCTAGGGCACTTTTTGTCTGCCCAAATATAGGCAACCAAAAATAATTTCCAAGTTTTTAATGATTTGTTGCATATTCCCCGAAAAAACATTTTTTGGGTTTTTNNN";
833     read.cigar = cigarFromString("6H4S77M4I1M1D22M3S5H");
834     read["MD"] = "48G29^A13C8";
835     
836     long[] rposArr = new long[read.getAlignedCoordinates.array.length];
837     long[] qposArr = new long[read.getAlignedCoordinates.array.length];
838     Ops[] opsArr = new Ops[read.getAlignedCoordinates.array.length];
839 
840     rposArr[0..6] = -1; // 6H
841     qposArr[0..6] = -1; // 6H
842     opsArr[0..6] = Ops.HARD_CLIP;
843 
844     rposArr[6..10] = -1; // 4S
845     qposArr[6..10] = iota!long(4).array; // 4S
846     opsArr[6..10] = Ops.SOFT_CLIP;
847 
848     rposArr[10..87] = iota!long(77).array; // 77M
849     qposArr[10..87] = iota!long(4, 81).array; // 77M
850     opsArr[10..87] = Ops.MATCH;
851 
852     rposArr[87..91] = 76; // I4
853     qposArr[87..91] = iota!long(81, 85).array; // I4
854     opsArr[87..91] = Ops.INS;
855 
856     rposArr[91] = 77; // 1M
857     qposArr[91] = 85; // 1M
858     opsArr[91] = Ops.MATCH;
859 
860     rposArr[92] = 78; // 1D
861     qposArr[92] = 85; // 1D
862     opsArr[92] = Ops.DEL;
863 
864     rposArr[93..115] = iota!long(79,101).array; // 22M
865     qposArr[93..115] = iota!long(86,108).array; // 22M
866     opsArr[93..115] = Ops.MATCH;
867 
868     rposArr[115..118] = 100; // 3S
869     qposArr[115..118] = iota!long(108, 111).array; // 3S
870     opsArr[115..118] = Ops.SOFT_CLIP;
871 
872     rposArr[118..123] = 100; // 5H
873     qposArr[118..123] = 110; // 5H
874     opsArr[118..123] = Ops.HARD_CLIP;
875 
876     auto pairs = read.getAlignedPairs!true.array;
877     assert(pairs.map!(x=> x.rpos.pos).array  == rposArr);
878     assert(pairs.map!(x=> x.qpos.pos).array == qposArr);
879     assert(pairs.map!(x=> x.cigar_op).array == opsArr);
880     writeln(pairs.map!(x=> x.refBase).array);
881     writeln(pairs.map!(x=> x.queryBase).array);
882     assert(pairs.map!(x=> x.refBase).array == 
883             // ================================================G============================    =A=============C========
884             //                                                                                  =A=============C========
885     "----------AGCTAGGGCACTTTTTGTCTGCCCAAATATAGGCAACCAAAAATAATTGCCAAGTTTTTAATGATTTGTTGCATATT----GAAAAAAACATTTTTCGGGTTTTT--------");
886     assert(pairs.map!(x=> x.queryBase).array == 
887     "------NNNNAGCTAGGGCACTTTTTGTCTGCCCAAATATAGGCAACCAAAAATAATTTCCAAGTTTTTAATGATTTGTTGCATATTCCCCG-AAAAAACATTTTTTGGGTTTTTNNN-----");
888             // AGCTAGGGCACTTTTTGTCTGCCCAAATATAGGCAACCAAAAATAATTTCCAAGTTTTTAATGATTTGTTGCATATTCCCCG AAAAAACATTTTTTGGGTTTTT
889     auto pairs2 = read.getAlignedPairs!true(77, 77 + 5).array;
890     assert(pairs2.map!(x => x.rpos).array == rposArr[91..96]);
891     assert(pairs2.map!(x => x.qpos).array == qposArr[91..96]);
892 
893     import dhtslib.sam.md;
894     import std.range : dropExactly;
895     auto mdItr = MDItr(read);
896     
897     mdItr = MDItr(read);
898     mdItr = mdItr.dropExactly(77);
899 
900     assert(pairs2.map!(x => x.refBase).array == "GAAAA");
901     assert(pairs2.map!(x => x.queryBase).array == "G-AAA");
902 }
903 
904 ///
905 debug(dhtslib_unittest) unittest
906 {
907     import std.stdio : writeln, File;
908     import std.range : drop;
909     import std.utf : toUTFz;
910     import htslib.hts_log; // @suppress(dscanner.suspicious.local_imports)
911     import std.path:buildPath,dirName;
912     import std.conv:to;
913     import dhtslib.sam : parseSam;
914 
915     hts_set_log_level(htsLogLevel.HTS_LOG_TRACE);
916     hts_log_info(__FUNCTION__, "Loading sam file");
917     auto range = File(buildPath(dirName(dirName(dirName(dirName(__FILE__)))),"htslib","test","realn01_exp-a.sam")).byLineCopy();
918     auto b = bam_init1();
919     auto hdr = bam_hdr_init();
920     string hdr_str;
921     assert(range.empty == false);
922     for (auto i = 0; i < 4; i++)
923     {
924         hdr_str ~= range.front ~ "\n";
925         range.popFront;
926     }
927     hts_log_info(__FUNCTION__, "Header");
928     writeln(hdr_str);
929     hdr = sam_hdr_parse(cast(int) hdr_str.length, toUTFz!(char*)(hdr_str));
930     hts_log_info(__FUNCTION__, "Read status:" ~ parseSam(range.front, hdr, b).to!string);
931     auto r = new SAMRecord(b);
932     hts_log_info(__FUNCTION__, "Cigar" ~ r.cigar.toString);
933     assert(r.cigar.toString == "6M1D117M5D28M");
934 }
935 
936 ///
937 debug(dhtslib_unittest) unittest
938 {
939     import std.stdio;
940     import dhtslib.sam;
941     import dhtslib.sam.md : MDItr;
942     import std.algorithm: map;
943     import std.array: array;
944     import std.parallelism : parallel; 
945     import std.path:buildPath,dirName;
946     hts_set_log_level(htsLogLevel.HTS_LOG_TRACE);
947 
948     auto bam = SAMFile(buildPath(dirName(dirName(dirName(dirName(__FILE__)))),"htslib","test","range.bam"), 0);
949     foreach(rec;parallel(bam.allRecords)){
950         rec.queryName = "multithreading test";
951     }
952 
953 }