1 module dhtslib.sam.record;
2 
3 import core.stdc.stdlib : calloc, free, realloc;
4 import core.stdc.string : memset;
5 import core.stdc.errno : EINVAL, EOVERFLOW, ENOMEM, ERANGE, errno;
6 import std.string : fromStringz, toStringz;
7 import std.algorithm : filter;
8 import std.range.interfaces : InputRange, inputRangeObject;
9 import std.traits : isArray, isIntegral, isSomeString;
10 
11 import htslib.hts : seq_nt16_str, seq_nt16_table;
12 
13 import htslib.hts_log;
14 import htslib.sam;
15 import dhtslib.sam.cigar;
16 import dhtslib.sam.tagvalue;
17 import dhtslib.sam.header;
18 import dhtslib.coordinates;
19 import dhtslib.memory;
20 
21 /**
22 Encapsulates a SAM/BAM/CRAM record,
23 using the bam1_t type for memory efficiency,
24 and the htslib helper functions for speed.
25 **/
26 struct SAMRecord
27 {
28     /// Backing SAM/BAM row record
29     Bam1 b;
30 
31     /// Corresponding SAM/BAM header data
32     SAMHeader h;
33    
34     /// ditto
35     this(SAMHeader h)
36     {
37         this.b = Bam1(bam_init1);
38         this.h = h;
39     }
40  
41     /// Construct SAMRecord from supplied bam1_t
42     deprecated("Construct with sam_hdr_t* for mapped contig (tid) name support")
43     this(bam1_t* b)
44     {
45         this.b = Bam1(b);
46     }
47 
48     /// Construct SAMRecord from supplied bam1_t and sam_hdr_type
49     this(bam1_t* b, SAMHeader h)
50     {
51         this.b = Bam1(b);
52         this.h = h;
53     }
54 
55     /// Explicit postblit to avoid 
56     /// https://github.com/blachlylab/dhtslib/issues/122
57     this(this)
58     {
59         this.b = b;
60         this.h = h;   
61     }
62 
63     /* bam1_core_t fields */
64 
65     /// chromosome ID, defined by sam_hdr_t
66     pragma(inline, true)
67     @nogc @safe nothrow
68     @property int tid() { return this.b.core.tid; }
69     /// ditto
70     pragma(inline, true)
71     @nogc @safe nothrow
72     @property void tid(int tid) { this.b.core.tid = tid; }
73 
74     /// mapped contig (tid) name, defined by sam_hdr_t* h
75     pragma(inline, true)
76     @property const(char)[] referenceName()
77     {
78         assert(!this.h.isNull);
79         return this.h.targetName(this.b.core.tid);
80     }
81  
82     /// 0-based leftmost coordinate
83     pragma(inline, true)
84     @nogc @safe nothrow
85     @property ZB pos() { return ZB(this.b.core.pos); }
86     /// ditto
87     pragma(inline, true)
88     @nogc @safe nothrow
89     @property void pos(ZB pos) { this.b.core.pos = pos.pos; }
90 
91     /// 0-based, half-open coordinates that represent
92     /// the mapped length of the alignment 
93     pragma(inline, true)
94     @property ZBHO coordinates()
95     {
96         return ZBHO(this.pos, this.pos + this.cigar.alignedLength);
97     }
98 
99     // TODO: @field  bin     bin calculated by bam_reg2bin()
100 
101     /// mapping quality
102     pragma(inline, true)
103     @nogc @safe nothrow
104     @property ubyte qual() { return this.b.core.qual; }
105     /// ditto
106     pragma(inline, true)
107     @nogc @safe nothrow
108     @property void qual(ubyte q) { this.b.core.qual = q; }
109 
110     /// length of query name. Terminated by 1-4 \0, see l_extranul. Never set directly.
111     pragma(inline, true)
112     @nogc @safe nothrow
113     @property int l_qname() { return this.b.core.l_qname; }
114 
115     /// number of EXTRA trailing \0 in qname; 0 <= l_extranul <= 3, see l_qname. Never set directly.
116     pragma(inline, true)
117     @nogc @safe nothrow
118     @property int l_extranul() { return this.b.core.l_extranul; }
119 
120     /// bitwise flag
121     pragma(inline, true)
122     @nogc @safe nothrow
123     @property ushort flag() { return this.b.core.flag; }
124     /// ditto
125     pragma(inline, true)
126     @nogc @safe nothrow
127     @property void flag(ushort fl) { this.b.core.flag = fl; }
128 
129     /// is read paired?
130     pragma(inline, true)
131     @nogc @safe nothrow
132     @property bool isPaired()
133     {
134         return cast(bool)(b.core.flag & BAM_FPAIRED);
135     }
136 
137     /// is read paired?
138     pragma(inline, true)
139     @nogc @safe nothrow
140     @property bool isProperPair()
141     {
142         return cast(bool)(b.core.flag & BAM_FPROPER_PAIR);
143     }
144 
145     /// is read mapped?
146     pragma(inline, true)
147     @nogc @safe nothrow
148     @property bool isMapped()
149     {
150         return (b.core.flag & BAM_FUNMAP) == 0;
151     }
152 
153     /// is read mate mapped?
154     pragma(inline, true)
155     @nogc @safe nothrow
156     @property bool isMateMapped()
157     {
158         return (b.core.flag & BAM_FMUNMAP) == 0;
159     }
160 
161     /// is read reversed?
162     /// bool bam_is_rev(bam1_t *b) { return ( ((*b).core.flag & BAM_FREVERSE) != 0 ); }
163     pragma(inline, true)
164     @nogc @trusted nothrow
165     @property bool isReversed()
166     {
167         return bam_is_rev(this.b);
168     }
169 
170     /// is mate reversed?
171     /// bool bam_is_mrev(bam1_t *b) { return( ((*b).core.flag & BAM_FMREVERSE) != 0); }
172     pragma(inline, true)
173     @nogc @trusted nothrow
174     @property bool mateReversed()
175     {
176         return bam_is_mrev(this.b);
177     }
178 
179     /// is this read 1?
180     pragma(inline, true)
181     @nogc @safe nothrow
182     @property bool isRead1()
183     {
184         return cast(bool)(b.core.flag & BAM_FREAD1);
185     }
186     
187     /// is this read 2?
188     pragma(inline, true)
189     @nogc @safe nothrow
190     @property bool isRead2()
191     {
192         return cast(bool)(b.core.flag & BAM_FREAD2);
193     }
194 
195     /// is read secondary?
196     @property bool isSecondary()
197     {
198         version(LDC){
199             pragma(inline, true);
200         }
201         return cast(bool)(b.core.flag & BAM_FSECONDARY);
202     }
203 
204     /// Does this read fail qc?
205     pragma(inline, true)
206     @nogc @safe nothrow
207     @property bool isQCFail()
208     {
209         return cast(bool)(b.core.flag & BAM_FQCFAIL);
210     }
211 
212     /// Is this read marked as a PCR or Optical duplicate?
213     pragma(inline, true)
214     @nogc @safe nothrow
215     @property bool isDuplicate()
216     {
217         return cast(bool)(b.core.flag & BAM_FDUP);
218     }
219 
220     /// is read supplementary?
221     pragma(inline, true)
222     @nogc @safe nothrow
223     @property bool isSupplementary()
224     {
225         return cast(bool)(b.core.flag & BAM_FSUPPLEMENTARY);
226     }
227 
228     /// Return read strand + or - (as char)
229     @property char strand(){
230         return ['+','-'][isReversed()];
231     }
232 
233     /// Get a slice of query name (read name)
234     /// if you keep this around you should copy it
235     /// -- wraps bam_get_qname(bam1_t *b)
236     pragma(inline, true)
237     @nogc @trusted nothrow
238     @property const(char)[] queryName()
239     {
240         // auto bam_get_qname(bam1_t *b) { return (cast(char*)(*b).data); }
241         return fromStringz(bam_get_qname(this.b));
242     }
243     
244     /// Set query name (read name)
245     pragma(inline, true)
246     @trusted nothrow
247     @property void queryName(string qname)
248     {
249         auto ret = bam_set_qname(this.b, toStringz(qname));
250         if(ret != 0)
251             hts_log_error(__FUNCTION__, "Could not set queryname");
252     }
253 
254     /// query (and quality string) length
255     pragma(inline, true)
256     @nogc @safe nothrow
257     @property int length()
258     {
259         return this.b.core.l_qseq;
260     }
261 
262     /// Return char array of the sequence
263     /// see samtools/sam_view.c: get_read
264     pragma(inline, true)
265     @trusted nothrow
266     @property const(char)[] sequence()
267     {
268         // calloc fills with \0; +1 len for Cstring
269         // char* s = cast(char*) calloc(1, this.b.core.l_qseq + 1);
270         char[] s;
271         s.length = this.b.core.l_qseq;
272 
273         // auto bam_get_seq(bam1_t *b) { return ((*b).data + ((*b).core.n_cigar<<2) + (*b).core.l_qname); }
274         auto seqdata = bam_get_seq(this.b);
275 
276         for (int i; i < this.b.core.l_qseq; i++)
277         {
278             s[i] = seq_nt16_str[bam_seqi(seqdata, i)];
279         }
280         return s;
281     }
282 
283     /// Assigns sequence and resets quality score
284     pragma(inline, true)
285     @trusted nothrow
286     @property void sequence(const(char)[] seq)
287     {
288         /// There is no bam_seq_seq
289 
290         //nibble encode sequence
291         ubyte[] en_seq;
292         en_seq.length = (seq.length + 1) >> 1;
293         
294         for(auto i=0;i < seq.length;i++){
295             bam_set_seqi(en_seq.ptr, i, seq_nt16_table[seq[i]]);
296         }
297 
298         //get length of data before seq
299         uint l_prev = b.core.l_qname + cast(uint)(b.core.n_cigar * uint.sizeof);
300 
301         //get starting point after seq
302         uint start_rest = l_prev + ((b.core.l_qseq + 1) >> 1) + b.core.l_qseq;
303 
304         //copy previous data
305         ubyte[] prev=b.data[0 .. l_prev].dup;
306 
307         //copy rest of data
308         ubyte[] rest=b.data[start_rest .. b.l_data].dup;
309         
310         //if not enough space
311         if(en_seq.length + seq.length + l_prev + (b.l_data - start_rest) > b.m_data){
312 
313             // realloc
314             auto ptr = cast(ubyte*) realloc(b.data, en_seq.length + seq.length + l_prev + (b.l_data - start_rest));
315             assert(ptr != null);
316             b.data = ptr;
317 
318             // increase capacity
319             b.m_data = cast(uint)en_seq.length + cast(uint) seq.length + l_prev + (b.l_data - start_rest);
320         }
321 
322         //reassign q_name and rest of data
323         b.data[0 .. l_prev] = prev;
324         b.data[l_prev .. en_seq.length + l_prev] = en_seq;
325         //set qscores to 255 
326         memset(b.data + en_seq.length + l_prev, 255, seq.length);
327         b.data[l_prev + en_seq.length + seq.length .. l_prev + en_seq.length + seq.length + (b.l_data - start_rest)] = rest;
328 
329         //reset data length, seq length
330         b.l_data = cast(uint) en_seq.length + cast(uint) seq.length + l_prev + (b.l_data-start_rest);
331         b.core.l_qseq = cast(int) (seq.length);
332     }
333 
334     /// Return array of the quality scores
335     /// see samtools/sam_view.c: get_quality
336     /// TODO: Discussion -- should we return const(ubyte[]) or const(ubyte)[] instead?
337     pragma(inline, true)
338     @nogc @trusted nothrow
339     @property const(ubyte)[] qscores()
340     {
341         auto slice = bam_get_qual(this.b)[0 .. this.b.core.l_qseq];
342         return slice;
343     }
344 
345     /// Set quality scores from raw ubyte quality score array given that it is the same length as the bam sequence
346     pragma(inline, true)
347     @trusted nothrow
348     @property void qscores(const(ubyte)[] seq)
349     {
350         /// There is no bam_seq_qual
351         
352         if(seq.length != b.core.l_qseq){
353             hts_log_error(__FUNCTION__,"qscore length does not match sequence length");
354             return;
355         }
356         
357         //get length of data before seq
358         uint l_prev = b.core.l_qname + cast(uint) (b.core.n_cigar * uint.sizeof) + ((b.core.l_qseq + 1) >> 1);
359         b.data[l_prev .. seq.length + l_prev] = seq;
360     }
361 
362     /// get phred-scaled base qualities as char array
363     pragma(inline, true)
364     @trusted nothrow
365     const(char)[] qscoresPhredScaled()
366     {
367         auto bqs = this.qscores.dup;
368         bqs[] += 33;
369         return cast(const(char)[]) bqs;
370     }
371 
372     /// Create cigar from bam1_t record
373     pragma(inline, true)
374     @property Cigar cigar()
375     {
376         return Cigar(this.b);
377     }
378 
379     /// Assign a cigar string
380     pragma(inline, true)
381     @trusted
382     @property void cigar(Cigar cigar)
383     {
384         // no bam_set_cigar
385 
386         //get length of data before seq
387         uint l_prev = b.core.l_qname;
388 
389         //get starting point after seq
390         uint start_rest = l_prev + cast(uint)(b.core.n_cigar * uint.sizeof);
391 
392         //copy previous data
393         ubyte[] prev = b.data[0..l_prev].dup;
394 
395         //copy rest of data
396         ubyte[] rest = b.data[start_rest..b.l_data].dup;
397 
398         //if not enough space
399         if(uint.sizeof * cigar.length + l_prev + (b.l_data - start_rest) > b.m_data){
400             // realloc
401             auto ptr = cast(ubyte*) realloc(b.data, uint.sizeof * cigar.length + l_prev + (b.l_data - start_rest));
402             assert(ptr != null);
403             b.data = ptr;
404 
405             // increase capacity
406             b.m_data=cast(uint)(uint.sizeof * cigar.length) + l_prev + (b.l_data - start_rest);
407         }
408 
409         //reassign q_name and rest of data
410         b.data[0 .. l_prev]=prev;
411         //without cigar.ops.dup we get the overlapping array copy error
412         b.data[l_prev .. uint.sizeof * cigar.length + l_prev] = cast(ubyte[])( cast(uint[]) cigar.dup[]);
413         b.data[l_prev + uint.sizeof * cigar.length .. l_prev + uint.sizeof * cigar.length + (b.l_data - start_rest)] = rest;
414 
415         //reset data length, seq length
416         b.l_data = cast(uint) (uint.sizeof * cigar.length) + l_prev + (b.l_data - start_rest);
417         b.core.n_cigar = cast(uint) (cigar.length);
418     }
419 
420     /// Get aux tag from bam1_t record and return a TagValue
421     TagValue opIndex(string val)
422     {
423         return TagValue(this.b, val[0..2]);
424     }
425 
426     /// Assign a tag key value pair
427     void opIndexAssign(T)(T value, string index)
428     if(!isArray!T || isSomeString!T)
429     {
430         static if(isIntegral!T){
431             auto err = bam_aux_update_int(b, index[0..2], value);
432         }else static if(is(T==float)){
433             auto err = bam_aux_update_float(b, index[0..2], value);
434         }else static if(isSomeString!T){
435             auto err = bam_aux_update_str(b, index[0..2], cast(int) value.length, value.ptr);
436         }
437         if(err == 0) return;
438         switch(errno){
439             case EINVAL:
440                 throw new Exception("The bam record's aux data is corrupt or an existing tag" ~ 
441                 " with the given ID is not of an integer type (c, C, s, S, i or I).");
442             case ENOMEM:
443                 throw new Exception("The bam data needs to be expanded and either the attempt" ~
444                 " to reallocate the data buffer failed or the resulting buffer would be longer" ~
445                 " than the maximum size allowed in a bam record (2Gbytes).");
446             case ERANGE:
447             case EOVERFLOW:
448                 throw new Exception("val is outside the range that can be stored in an integer" ~ 
449                 " bam tag (-2147483647 to 4294967295).");
450             case -1:
451                 throw new Exception("Something went wrong adding the bam tag.");
452             case 0:
453                 return;
454             default:
455                 throw new Exception("Something went wrong adding the bam tag.");
456         }
457     }
458 
459     /// Assign a tag key array pair
460     void opIndexAssign(T)(T[] value, string index)
461     if(!isSomeString!(T[]))
462     {
463         auto err = bam_aux_update_array(b, index[0..2], TypeChars[TypeIndex!T], cast(int) value.length, value.ptr);
464         if(err == 0) return;
465         switch(errno){
466             case EINVAL:
467                 throw new Exception("The bam record's aux data is corrupt or an existing tag" ~ 
468                 " with the given ID is not of an integer type (c, C, s, S, i or I).");
469             case ENOMEM:
470                 throw new Exception("The bam data needs to be expanded and either the attempt" ~
471                 " to reallocate the data buffer failed or the resulting buffer would be longer" ~
472                 " than the maximum size allowed in a bam record (2Gbytes).");
473             case ERANGE:
474             case EOVERFLOW:
475                 throw new Exception("val is outside the range that can be stored in an integer" ~ 
476                 " bam tag (-2147483647 to 4294967295).");
477             case -1:
478                 throw new Exception("Something went wrong adding the bam tag.");
479             case 0:
480                 return;
481             default:
482                 throw new Exception("Something went wrong adding the bam tag.");
483         }
484     }
485 
486     /// chromosome ID of next read in template, defined by bam_hdr_t
487     pragma(inline, true)
488     @nogc @safe nothrow
489     @property int mateTID() { return this.b.core.mtid; }
490     /// ditto
491     pragma(inline, true)
492     @nogc @safe nothrow
493     @property void mateTID(int mtid) { this.b.core.mtid = mtid; }
494 
495     /// 0-based leftmost coordinate of next read in template
496     pragma(inline, true)
497     @nogc @safe nothrow
498     @property long matePos() { return this.b.core.mpos; }
499     /// ditto
500     pragma(inline, true)
501     @nogc @safe nothrow
502     @property void matePos(long mpos) { this.b.core.mpos = mpos; }
503 
504     /// Presumably Insert size, but is undocumented.
505     /// Per samtools source, is measured 5' to 5'
506     /// https://github.com/samtools/samtools/blob/bd1a409aa750d25d70a093405d174db8709a3f2c/bam_mate.c#L320
507     pragma(inline, true)
508     @nogc @safe nothrow
509     @property long insertSize() { return this.b.core.isize; }
510     /// ditto
511     pragma(inline, true)
512     @nogc @safe nothrow
513     @property void insertSize(long isize) { this.b.core.isize = isize; }
514 /+ BEGIN CHERRY-PICK: TODO: confirm below chunk with htslib-1.10 (mainly long coordinates +/
515     /// get aligned coordinates per each cigar op
516     InputRange!AlignedCoordinate getAlignedCoordinates(){
517         return AlignedCoordinatesItr(this.cigar).inputRangeObject;
518     }
519 
520     /// get aligned coordinates per each cigar op within range
521     /// range is 0-based half open using chromosomal coordinates
522     InputRange!AlignedCoordinate getAlignedCoordinates(long start, long end){
523         assert(start >= 0);
524         assert(end <= this.cigar.ref_bases_covered);
525         return this.getAlignedCoordinates.filter!(x=> x.rpos >= start).filter!(x=> x.rpos < end).inputRangeObject;
526     }
527 
528     struct AlignedPair(bool refSeq)
529     {
530         Coordinate!(Basis.zero) qpos= ZB(-1);
531 
532         Coordinate!(Basis.zero) rpos= ZB(-1);
533         
534         Ops cigar_op = Ops.MATCH;
535         
536         char queryBase = '-';
537 
538         static if(refSeq) 
539             char refBase = '-';
540 
541         string toString(){
542             import std.conv : to;
543             static if(refSeq) return rpos.pos.to!string ~ ":" ~ refBase ~ " > " ~ qpos.pos.to!string ~ ":" ~ queryBase;
544             else return rpos.pos.to!string ~ ":" ~ qpos.pos.to!string ~ ":" ~ queryBase;
545         }
546     }
547     struct AlignedPairs(bool refSeq)
548     {
549         import dhtslib.sam.md : MDItr;
550         import std.range : dropExactly;
551 
552         InputRange!AlignedCoordinate coords;
553         static if(refSeq) MDItr mdItr;
554         AlignedPair!refSeq current;
555         Bam1 b;
556         ubyte * seq;
557         
558         this(SAMRecord rec){
559             this.b = rec.b;
560             this.seq = bam_get_seq(this.b);
561             this.coords = rec.getAlignedCoordinates;
562             static if(refSeq){
563                 assert(rec["MD"].exists,"MD tag must exist for record");
564                 mdItr = MDItr(rec);
565             } 
566             current.qpos = coords.front.qpos;
567             current.rpos = coords.front.rpos;
568             current.cigar_op = coords.front.cigar_op;
569             if(current.cigar_op.isQueryConsuming) 
570                 current.queryBase = seq_nt16_str[bam_seqi(seq, current.qpos)];
571             
572             static if(refSeq){
573                 if(current.cigar_op.isReferenceConsuming){
574                     if(mdItr.front == '=')
575                         current.refBase = current.queryBase;
576                     else 
577                         current.refBase = mdItr.front;
578                     mdItr.popFront;
579                 }
580                 
581             }
582         }
583 
584         this(SAMRecord rec, long start, long end){
585             assert(start < end);
586             assert(start >= 0);
587             assert(end <= rec.cigar.ref_bases_covered);
588             this.b = rec.b;
589             this.seq = bam_get_seq(this.b);
590             this.coords = rec.getAlignedCoordinates(start, end);
591             static if(refSeq){
592                 assert(rec["MD"].exists,"MD tag must exist for record");
593                 mdItr = MDItr(rec);
594                 mdItr = mdItr.dropExactly(start);
595             } 
596             current.qpos = coords.front.qpos;
597             current.rpos = coords.front.rpos;
598             current.cigar_op = coords.front.cigar_op;
599             if(current.cigar_op.isQueryConsuming) 
600                 current.queryBase = seq_nt16_str[bam_seqi(seq, current.qpos)];
601             
602             static if(refSeq){
603                 if(current.cigar_op.isReferenceConsuming){
604                     if(mdItr.front == '=')
605                         current.refBase = current.queryBase;
606                     else 
607                         current.refBase = mdItr.front;
608                     mdItr.popFront;
609                 }
610                 
611             }
612         }
613 
614         AlignedPair!refSeq front(){
615             return current;
616         }
617 
618         void popFront(){
619             coords.popFront;
620             if(coords.empty) return;
621 
622             current = current.init;
623             current.qpos = coords.front.qpos;
624             current.rpos = coords.front.rpos;
625             current.cigar_op = coords.front.cigar_op;
626 
627             if(current.cigar_op.isQueryConsuming)
628                 current.queryBase = seq_nt16_str[bam_seqi(seq, current.qpos)];
629             
630             static if(refSeq){
631                 if(current.cigar_op.isReferenceConsuming){
632                     if(mdItr.front == '=') 
633                         current.refBase = current.queryBase;
634                     else 
635                         current.refBase = mdItr.front;
636                     mdItr.popFront;
637                 }
638             }
639 
640         }
641 
642         bool empty(){
643             return coords.empty;
644         }
645 
646     }
647     /// get a range of aligned read and reference positions
648     /// this is meant to recreate functionality from pysam:
649     /// https://pysam.readthedocs.io/en/latest/api.html#pysam.AlignedSegment.get_aligned_pairs
650     /// range is 0-based half open using chromosomal coordinates
651     auto  getAlignedPairs(bool withRefSeq)(long start, long end)
652     {
653         return AlignedPairs!withRefSeq(this,start,end).inputRangeObject;
654     }
655     /// get a range of aligned read and reference positions
656     /// this is meant to recreate functionality from pysam:
657     /// https://pysam.readthedocs.io/en/latest/api.html#pysam.AlignedSegment.get_aligned_pairs
658     auto getAlignedPairs(bool withRefSeq)(){
659         return AlignedPairs!withRefSeq(this);
660     }
661 /+ END CHERRY-PICK +/
662 }
663 
664 ///
665 debug(dhtslib_unittest) unittest
666 {
667     import dhtslib.sam;
668     import htslib.hts_log : hts_log_info;
669     import std.path:buildPath,dirName;
670 
671     hts_set_log_level(htsLogLevel.HTS_LOG_TRACE);
672     hts_log_info(__FUNCTION__, "Testing SAMRecord mutation");
673     hts_log_info(__FUNCTION__, "Loading test file");
674     auto bam = SAMFile(buildPath(dirName(dirName(dirName(dirName(__FILE__)))),"htslib","test","range.bam"), 0);
675     auto ar = bam.allRecords;
676     assert(ar.empty == false);
677     auto read = ar.front;
678 
679     //change queryname
680     hts_log_info(__FUNCTION__, "Testing queryname");
681     assert(read.queryName=="HS18_09653:4:1315:19857:61712");
682     read.queryName="HS18_09653:4:1315:19857:6171";
683     assert(read.queryName=="HS18_09653:4:1315:19857:6171");
684     assert(read.cigar.toString() == "78M1D22M");
685     read.queryName="HS18_09653:4:1315:19857:617122";
686     assert(read.queryName=="HS18_09653:4:1315:19857:617122");
687     assert(read.cigar.toString() == "78M1D22M");
688     assert(read["RG"].check!string);
689     assert(read["RG"].to!string=="1");
690 
691     //change sequence
692     hts_log_info(__FUNCTION__, "Testing sequence");
693     assert(read.sequence=="AGCTAGGGCACTTTTTGTCTGCCCAAATATAGGCAACCAAAAATAATTTCCAAGTTTTTAATGATTTGTTGCATATTGAAAAAACATTTTTTGGGTTTTT");
694     read.sequence=read.sequence[0..10];
695     assert(read.sequence=="AGCTAGGGCA");
696 
697     ubyte[] q=[255,255,255,255,255,255,255,255,255,255];
698     assert(read.qscores == q);
699     q = [1, 14, 20, 40, 30, 10, 14, 20, 40, 30];
700     read.qscores(q);
701     assert(read.qscores == q);
702     q[] += 33;
703     
704     assert(read.qscoresPhredScaled == cast(char[])(q));
705 
706     assert(read.cigar.toString() == "78M1D22M");
707     assert(read["RG"].check!string);
708     assert(read["RG"].to!string=="1");
709 
710     //change cigar
711     hts_log_info(__FUNCTION__, "Testing cigar");
712     auto c=read.cigar;
713     c[$-1].length=21;
714     read.cigar=c;
715     assert(read.cigar.toString() == "78M1D21M");
716     assert(read["RG"].check!string);
717     assert(read["RG"].to!string=="1");
718     
719     //change tag
720     hts_log_info(__FUNCTION__, "Testing tags");
721     read["X0"]=2;
722     assert(read["X0"].to!ubyte==2);
723     assert(read["RG"].check!string);
724     
725     read["RG"]="test";
726     assert(read["RG"].to!string=="test");
727     hts_log_info(__FUNCTION__, "Cigar:" ~ read.cigar.toString());
728 }
729 
730 ///
731 debug(dhtslib_unittest) unittest
732 {
733     import dhtslib.sam;
734     import htslib.hts_log : hts_log_info;
735     import std.path:buildPath,dirName;
736 
737     hts_set_log_level(htsLogLevel.HTS_LOG_TRACE);
738     hts_log_info(__FUNCTION__, "Testing SAMRecord mutation w/ realloc");
739     hts_log_info(__FUNCTION__, "Loading test file");
740     auto bam = SAMFile(buildPath(dirName(dirName(dirName(dirName(__FILE__)))),"htslib","test","range.bam"), 0);
741     auto ar = bam.allRecords;
742     assert(ar.empty == false);
743     auto read = ar.front;
744 
745     //change queryname
746     hts_log_info(__FUNCTION__, "Testing queryname");
747     assert(read.queryName=="HS18_09653:4:1315:19857:61712");
748 
749     //we do this only to force realloc
750     read.b.m_data=read.b.l_data;
751 
752 
753     auto prev_m=read.b.m_data;
754     read.queryName="HS18_09653:4:1315:19857:61712HS18_09653:4:1315:19857:61712";
755     assert(read.b.m_data >prev_m);
756     assert(read.queryName=="HS18_09653:4:1315:19857:61712HS18_09653:4:1315:19857:61712");
757     assert(read.cigar.toString() == "78M1D22M");
758     assert(read["RG"].check!string);
759     assert(read["RG"].to!string=="1");
760 
761     //we do this only to force realloc
762     read.b.m_data=read.b.l_data;
763 
764     //change sequence
765     hts_log_info(__FUNCTION__, "Testing sequence");
766     assert(read.sequence=="AGCTAGGGCACTTTTTGTCTGCCCAAATATAGGCAACCAAAAATAATTTCCAAGTTTTTAATGATTTGTTGCATATTGAAAAAACATTTTTTGGGTTTTT");
767     prev_m=read.b.m_data;
768     read.sequence="AGCTAGGGCACTTTTTGTCTGCCCAAATATAGGCAACCAAAAATAATTTCCAAGTTTTTAATGATTTGTTGCATATTGAAAAAACATTTTTTGGGTTTTTAGCTAGGGCACTTTTTGTCTGCCCAAATATAGGCAACCAAAAATAATTTCCAAGTTTTTAATGATTTGTTGCATATTGAAAAAACATTTTTTGGGTTTTT";
769     assert(read.b.m_data >prev_m);
770     assert(read.sequence=="AGCTAGGGCACTTTTTGTCTGCCCAAATATAGGCAACCAAAAATAATTTCCAAGTTTTTAATGATTTGTTGCATATTGAAAAAACATTTTTTGGGTTTTTAGCTAGGGCACTTTTTGTCTGCCCAAATATAGGCAACCAAAAATAATTTCCAAGTTTTTAATGATTTGTTGCATATTGAAAAAACATTTTTTGGGTTTTT");
771     assert(read.cigar.toString() == "78M1D22M");
772     assert(read["RG"].check!string);
773     assert(read["RG"].to!string=="1");
774 
775     //change cigar
776     hts_log_info(__FUNCTION__, "Testing cigar");
777     auto c=read.cigar;
778     c=c~c;
779     prev_m=read.b.m_data;
780     read.cigar=c;
781     assert(read.b.m_data >prev_m);
782     assert(read.cigar.toString() == "78M1D22M78M1D22M");
783     assert(read["RG"].check!string);
784     assert(read["RG"].to!string=="1");
785     
786     //change tag
787     hts_log_info(__FUNCTION__, "Testing tags");
788     read["X0"]=2;
789     assert(read["X0"].to!ubyte==2);
790     assert(read["RG"].check!string);
791     
792     read["RG"]="test";
793     assert(read["RG"].to!string=="test");
794     hts_log_info(__FUNCTION__, "Cigar:" ~ read.cigar.toString());
795 }
796 
797 ///
798 debug(dhtslib_unittest) unittest
799 {
800     import std.stdio;
801     import dhtslib.sam;
802     import dhtslib.sam.md : MDItr;
803     import std.algorithm: map;
804     import std.array: array;
805     import std.path:buildPath,dirName;
806     hts_set_log_level(htsLogLevel.HTS_LOG_TRACE);
807 
808     auto bam = SAMFile(buildPath(dirName(dirName(dirName(dirName(__FILE__)))),"htslib","test","range.bam"), 0);
809     auto ar = bam.allRecords;
810     assert(ar.empty == false);
811     SAMRecord read = ar.front;
812     
813     auto pairs = read.getAlignedPairs!true(77, 77 + 5);
814 
815     assert(pairs.map!(x => x.qpos).array == [77,77,78,79,80]);
816     pairs = read.getAlignedPairs!true(77, 77 + 5);
817     assert(pairs.map!(x => x.rpos).array == [77,78,79,80,81]);
818     pairs = read.getAlignedPairs!true(77, 77 + 5);
819     // assert(pairs.map!(x => x.refBase).array == "GAAAA");
820     pairs = read.getAlignedPairs!true(77, 77 + 5);
821     assert(pairs.map!(x => x.queryBase).array == "G-AAA");
822 }
823 
824 
825 debug(dhtslib_unittest) unittest
826 {
827     import std.stdio;
828     import dhtslib.sam;
829     import dhtslib.sam.md : MDItr;
830     import std.algorithm: map;
831     import std.range : iota;
832     import std.array: array;
833     import std.path:buildPath,dirName;
834     hts_set_log_level(htsLogLevel.HTS_LOG_TRACE);
835     SAMHeader h;
836     SAMRecord read = SAMRecord(h);
837     read.queryName = "test";
838     read.sequence = 
839     //6H [4S][                                                                        77M][4I]MD[                22M]SSS 5H
840         "NNNNAGCTAGGGCACTTTTTGTCTGCCCAAATATAGGCAACCAAAAATAATTTCCAAGTTTTTAATGATTTGTTGCATATTCCCCGAAAAAACATTTTTTGGGTTTTTNNN";
841     read.cigar = cigarFromString("6H4S77M4I1M1D22M3S5H");
842     read["MD"] = "48G29^A13C8";
843     
844     long[] rposArr = new long[read.getAlignedCoordinates.array.length];
845     long[] qposArr = new long[read.getAlignedCoordinates.array.length];
846     Ops[] opsArr = new Ops[read.getAlignedCoordinates.array.length];
847 
848     rposArr[0..6] = -1; // 6H
849     qposArr[0..6] = -1; // 6H
850     opsArr[0..6] = Ops.HARD_CLIP;
851 
852     rposArr[6..10] = -1; // 4S
853     qposArr[6..10] = iota!long(4).array; // 4S
854     opsArr[6..10] = Ops.SOFT_CLIP;
855 
856     rposArr[10..87] = iota!long(77).array; // 77M
857     qposArr[10..87] = iota!long(4, 81).array; // 77M
858     opsArr[10..87] = Ops.MATCH;
859 
860     rposArr[87..91] = 76; // I4
861     qposArr[87..91] = iota!long(81, 85).array; // I4
862     opsArr[87..91] = Ops.INS;
863 
864     rposArr[91] = 77; // 1M
865     qposArr[91] = 85; // 1M
866     opsArr[91] = Ops.MATCH;
867 
868     rposArr[92] = 78; // 1D
869     qposArr[92] = 85; // 1D
870     opsArr[92] = Ops.DEL;
871 
872     rposArr[93..115] = iota!long(79,101).array; // 22M
873     qposArr[93..115] = iota!long(86,108).array; // 22M
874     opsArr[93..115] = Ops.MATCH;
875 
876     rposArr[115..118] = 100; // 3S
877     qposArr[115..118] = iota!long(108, 111).array; // 3S
878     opsArr[115..118] = Ops.SOFT_CLIP;
879 
880     rposArr[118..123] = 100; // 5H
881     qposArr[118..123] = 110; // 5H
882     opsArr[118..123] = Ops.HARD_CLIP;
883 
884     auto pairs = read.getAlignedPairs!true.array;
885     assert(pairs.map!(x=> x.rpos.pos).array  == rposArr);
886     assert(pairs.map!(x=> x.qpos.pos).array == qposArr);
887     assert(pairs.map!(x=> x.cigar_op).array == opsArr);
888     writeln(pairs.map!(x=> x.refBase).array);
889     writeln(pairs.map!(x=> x.queryBase).array);
890     assert(pairs.map!(x=> x.refBase).array == 
891             // ================================================G============================    =A=============C========
892             //                                                                                  =A=============C========
893     "----------AGCTAGGGCACTTTTTGTCTGCCCAAATATAGGCAACCAAAAATAATTGCCAAGTTTTTAATGATTTGTTGCATATT----GAAAAAAACATTTTTCGGGTTTTT--------");
894     assert(pairs.map!(x=> x.queryBase).array == 
895     "------NNNNAGCTAGGGCACTTTTTGTCTGCCCAAATATAGGCAACCAAAAATAATTTCCAAGTTTTTAATGATTTGTTGCATATTCCCCG-AAAAAACATTTTTTGGGTTTTTNNN-----");
896             // AGCTAGGGCACTTTTTGTCTGCCCAAATATAGGCAACCAAAAATAATTTCCAAGTTTTTAATGATTTGTTGCATATTCCCCG AAAAAACATTTTTTGGGTTTTT
897     auto pairs2 = read.getAlignedPairs!true(77, 77 + 5).array;
898     assert(pairs2.map!(x => x.rpos).array == rposArr[91..96]);
899     assert(pairs2.map!(x => x.qpos).array == qposArr[91..96]);
900 
901     import dhtslib.sam.md;
902     import std.range : dropExactly;
903     auto mdItr = MDItr(read);
904     
905     mdItr = MDItr(read);
906     mdItr = mdItr.dropExactly(77);
907 
908     assert(pairs2.map!(x => x.refBase).array == "GAAAA");
909     assert(pairs2.map!(x => x.queryBase).array == "G-AAA");
910 }
911 
912 ///
913 debug(dhtslib_unittest) unittest
914 {
915     import std.stdio : writeln, File;
916     import std.range : drop;
917     import std.utf : toUTFz;
918     import htslib.hts_log; // @suppress(dscanner.suspicious.local_imports)
919     import std.path:buildPath,dirName;
920     import std.conv:to;
921     import dhtslib.sam : parseSam;
922 
923     hts_set_log_level(htsLogLevel.HTS_LOG_TRACE);
924     hts_log_info(__FUNCTION__, "Loading sam file");
925     auto range = File(buildPath(dirName(dirName(dirName(dirName(__FILE__)))),"htslib","test","realn01_exp-a.sam")).byLineCopy();
926     auto b = bam_init1();
927     auto hdr = bam_hdr_init();
928     string hdr_str;
929     assert(range.empty == false);
930     for (auto i = 0; i < 4; i++)
931     {
932         hdr_str ~= range.front ~ "\n";
933         range.popFront;
934     }
935     hts_log_info(__FUNCTION__, "Header");
936     writeln(hdr_str);
937     hdr = sam_hdr_parse(cast(int) hdr_str.length, toUTFz!(char*)(hdr_str));
938     hts_log_info(__FUNCTION__, "Read status:" ~ parseSam(range.front, hdr, b).to!string);
939     auto r = new SAMRecord(b);
940     hts_log_info(__FUNCTION__, "Cigar" ~ r.cigar.toString);
941     assert(r.cigar.toString == "6M1D117M5D28M");
942 }
943 
944 ///
945 debug(dhtslib_unittest) unittest
946 {
947     import std.stdio;
948     import dhtslib.sam;
949     import dhtslib.sam.md : MDItr;
950     import std.algorithm: map;
951     import std.array: array;
952     import std.parallelism : parallel; 
953     import std.path:buildPath,dirName;
954     hts_set_log_level(htsLogLevel.HTS_LOG_TRACE);
955 
956     auto bam = SAMFile(buildPath(dirName(dirName(dirName(dirName(__FILE__)))),"htslib","test","range.bam"), 0);
957     foreach(rec;parallel(bam.allRecords)){
958         rec.queryName = "multithreading test";
959     }
960 
961 }