1 /**
2 
3 SAMRecord and SAMFile are wrappers for htslib functions relating to SAM/BAM/CRAM* files
4 
5 SAMRecord is a structured representation of a SAM/BAM/CRAM* record,
6 backed internally by htslib's bam1_t, but with convenient getters and setters
7 for record attributes, including functions not included in vanilla htslib 
8 like returning the sequence or the qscore string (NB: actually char*)
9 
10 SAMFile is a structured representation of SAM/BAM/CRAM* file,
11 backed internally by htslib's htsFile and bam_hdr_t,
12 but with convenient getters and setters for file and header attributes,
13 as well as query functions accessible explicitly (`query("chr1:999-9999"`)
14 and by indexing (`samfile["chr1", 999 .. 9999]`).
15 The file object can be iterated as an InputRange to obtain every record in the file.
16 
17 Authors: James S Blachly, MD <james.blachly@gmail.com> ; Thomas Gregory <charles.gregory@osumc.edu>
18 
19 Bugs: SAMRecord and SAMFile function only as readers, rather than writers (i.e, cannot build SAMFile)
20 Bugs: (*CRAM functionality is limited and untested)
21 
22 Date: 2019-01-27
23 
24 License: Apache 2.0
25 
26 Standards: Sequence Alignment/Map Format Specification v1 14 Dec 2018 http://samtools.github.io/hts-specs/
27 
28 */
29 module dhtslib.sam;
30 
31 import core.stdc.stdlib : calloc, free,realloc;
32 import core.stdc.string:memset;
33 import std.format;
34 import std.parallelism : totalCPUs;
35 import std.stdio : writeln, writefln, stderr, File;
36 import std.string : fromStringz, toStringz;
37 import std.traits : ReturnType;
38 import std.algorithm : filter;
39 
40 import dhtslib.htslib.hts : htsFile, hts_open, hts_close, hts_hopen;
41 import dhtslib.htslib.hts : hts_idx_t, hts_itr_t, hts_itr_multi_t, hts_reglist_t, hts_pair32_t;
42 import dhtslib.htslib.hts : seq_nt16_str,seq_nt16_table;
43 import dhtslib.htslib.hts : hts_set_threads;
44 import dhtslib.htslib.hfile : hdopen, hclose, hFILE;
45 
46 import dhtslib.htslib.hts_log;
47 import dhtslib.htslib.kstring;
48 import dhtslib.htslib.sam;
49 import dhtslib.cigar;
50 import dhtslib.tagvalue;
51 
52 /**
53 Encapsulates a SAM/BAM/CRAM record,
54 using the bam1_t type for memory efficiency,
55 and the htslib helper functions for speed.
56 **/
57 class SAMRecord
58 {
59     ///
60     bam1_t* b;
61 
62     /// Construct blank SAMRecord with empty backing bam1_t
63     this()
64     {
65         //debug(dhtslib_debug) hts_log_debug(__FUNCTION__, "ctor()"); /// This line triggers memory error when __FUNCTION__, but not when "Other string"
66         //test_log(__FUNCTION__, "ctor()");   /// This line will also trigger the memory error when __FUNCTION__, but not other strings
67         //writeln(__FUNCTION__);    // this will not trigger the memory error
68         this.b = bam_init1();
69         //assert(0);                // This will elide(?) the memory error
70         //assert(1 == 2);           // This will elide(?) the memory error
71     }
72     
73     /// Construct SAMRecord from supplied bam1_t
74     this(bam1_t* b)
75     {
76         //debug(dhtslib_debug) hts_log_debug(__FUNCTION__, "ctor(bam1_t *b)");
77         this.b = b;
78     }
79 
80     ~this()
81     {
82         //debug(dhtslib_debug) hts_log_debug(__FUNCTION__, "dtor");
83         bam_destroy1(this.b); // we created our own in default ctor, or received copy via bam_dup1
84     }
85 
86 
87     /* bam1_core_t fields */
88 
89     /// chromosome ID, defined by bam_hdr_t
90     pragma(inline, true)
91     @nogc @safe nothrow
92     @property int tid() { return this.b.core.tid; }
93     /// ditto
94     pragma(inline, true)
95     @nogc @safe nothrow
96     @property void tid(int tid) { this.b.core.tid = tid; }
97 
98     /// 0-based leftmost coordinate
99     pragma(inline, true)
100     @nogc @safe nothrow
101     @property int pos() { return this.b.core.pos; }
102     /// ditto
103     pragma(inline, true)
104     @nogc @safe nothrow
105     @property void pos(int pos) { this.b.core.pos = pos; }
106 
107     // TODO: @field  bin     bin calculated by bam_reg2bin()
108 
109     /// mapping quality
110     pragma(inline, true)
111     @nogc @safe nothrow
112     @property ubyte qual() { return this.b.core.qual; }
113     /// ditto
114     pragma(inline, true)
115     @nogc @safe nothrow
116     @property void qual(ubyte q) { this.b.core.qual = q; }
117 
118     // TODO:  @field  l_qname length of the query name
119 
120     /// bitwise flag
121     pragma(inline, true)
122     @nogc @safe nothrow
123     @property ushort flag() { return this.b.core.flag; }
124     /// ditto
125     pragma(inline, true)
126     @nogc @safe nothrow
127     @property void flag(ushort fl) { this.b.core.flag = fl; }
128 
129     /// is read paired?
130     pragma(inline, true)
131     @property bool isPaired()
132     {
133         return (b.core.flag & BAM_FPAIRED);
134     }
135     
136     /// is read reversed?
137     /// bool bam_is_rev(bam1_t *b) { return ( ((*b).core.flag & BAM_FREVERSE) != 0 ); }
138     @property bool isReversed()
139     {
140         version(LDC) pragma(inline, true);
141         return bam_is_rev(this.b);
142     }
143 
144     /// is read mapped?
145     @property bool isMapped()
146     {
147         version(LDC){
148             pragma(inline, true);
149         }
150         return (b.core.flag & BAM_FUNMAP) == 0;
151     }
152 
153     /// is read mate mapped?
154     @property bool isMateMapped()
155     {
156         version(LDC){
157             pragma(inline, true);
158         }
159         return (b.core.flag & BAM_FMUNMAP) == 0;
160     }
161 
162     /// is mate reversed?
163     /// bool bam_is_mrev(bam1_t *b) { return( ((*b).core.flag & BAM_FMREVERSE) != 0); }
164     pragma(inline, true)
165     @property bool mateReversed()
166     {
167         return bam_is_mrev(this.b);
168     }
169     
170     /// is read secondary?
171     @property bool isSecondary()
172     {
173         version(LDC){
174             pragma(inline, true);
175         }
176         return cast(bool)(b.core.flag & BAM_FSECONDARY);
177     }
178 
179     /// is read supplementary?
180     @property bool isSupplementary()
181     {
182         version(LDC){
183             pragma(inline, true);
184         }
185         return cast(bool)(b.core.flag & BAM_FSUPPLEMENTARY);
186     }
187 
188     /// Return read strand + or - (as char)
189     @property char strand(){
190         return ['+','-'][isReversed()];
191     }
192 
193     /// auto bam_get_qname(bam1_t *b) { return (cast(char*)(*b).data); }
194     pragma(inline, true)
195     @property char[] queryName()
196     {
197         return fromStringz(bam_get_qname(this.b));
198     }
199     
200     /// Set query name
201     pragma(inline, true)
202     @property void queryName(string qname)
203     {
204         assert(qname.length<252);
205 
206         //make cstring
207         auto qname_n=qname.dup~'\0';
208 
209         //append extra nulls to 32bit align cigar
210         auto extranul=0;
211         for (; qname_n.length % 4 != 0; extranul++) qname_n~='\0';
212         assert(extranul<=3);
213 
214         //get length of rest of data
215         auto l_rest=b.l_data-b.core.l_qname;
216 
217         //copy rest of data
218         ubyte[] rest=b.data[b.core.l_qname..b.l_data].dup;
219         
220         //if not enough space
221         if(qname_n.length+l_rest>b.m_data){
222             auto ptr=cast(ubyte*)realloc(b.data,qname_n.length+l_rest);
223             assert(ptr!=null);
224             b.data=ptr;
225             b.m_data=cast(uint)qname_n.length+l_rest;
226         }
227 
228         //reassign q_name and rest of data
229         b.data[0..qname_n.length]=(cast(ubyte[])qname_n);
230         b.data[qname_n.length..qname_n.length+l_rest]=rest;
231 
232         //reset data length, qname length, extra nul length
233         b.l_data=cast(uint)qname_n.length+l_rest;
234         b.core.l_qname=cast(ubyte)(qname_n.length);
235         b.core.l_extranul=cast(ubyte)extranul;
236     }
237 
238     /// query (and quality string) length
239     pragma(inline, true)
240     @property int length()
241     {
242         return this.b.core.l_qseq;
243     }
244 
245     /// Return char array of the sequence
246     /// see samtools/sam_view.c: get_read
247     @property const(char)[] sequence()
248     {
249         // calloc fills with \0; +1 len for Cstring
250         // char* s = cast(char*) calloc(1, this.b.core.l_qseq + 1);
251         char[] s;
252         s.length = this.b.core.l_qseq;
253 
254         // auto bam_get_seq(bam1_t *b) { return ((*b).data + ((*b).core.n_cigar<<2) + (*b).core.l_qname); }
255         auto seqdata = bam_get_seq(this.b);
256 
257         for (int i; i < this.b.core.l_qseq; i++)
258         {
259             s[i] = seq_nt16_str[bam_seqi(seqdata, i)];
260         }
261         return s;
262     }
263 
264     /// Assigns sequence and resets quality score
265     pragma(inline, true)
266     @property void sequence(const(char)[] seq)
267     {
268         //nibble encode sequence
269         ubyte[] en_seq;
270         en_seq.length=(seq.length+1)>>1;
271         for(auto i=0;i<seq.length;i++){
272             en_seq[i>>1]|=seq_nt16_table[seq[i]]<< ((~i&1)<<2);
273         }
274 
275         //get length of data before seq
276         uint l_prev=b.core.l_qname + cast(uint)(b.core.n_cigar*uint.sizeof);
277 
278         //get starting point after seq
279         uint start_rest=l_prev+((b.core.l_qseq+1)>>1)+b.core.l_qseq;
280 
281         //copy previous data
282         ubyte[] prev=b.data[0..l_prev].dup;
283 
284         //copy rest of data
285         ubyte[] rest=b.data[start_rest..b.l_data].dup;
286         
287         //if not enough space
288         if(en_seq.length+seq.length+l_prev+(b.l_data-start_rest)>b.m_data){
289             auto ptr=cast(ubyte*)realloc(b.data,en_seq.length+seq.length+l_prev+(b.l_data-start_rest));
290             assert(ptr!=null);
291             b.data=ptr;
292             b.m_data=cast(uint)en_seq.length+cast(uint)seq.length+l_prev+(b.l_data-start_rest);
293         }
294 
295         //reassign q_name and rest of data
296         b.data[0..l_prev]=prev;
297         b.data[l_prev..en_seq.length+l_prev]=en_seq;
298         //set qscores to 255 
299         memset(b.data+en_seq.length+l_prev,255,seq.length);
300         b.data[l_prev+en_seq.length+seq.length..l_prev+en_seq.length+seq.length+(b.l_data-start_rest)]=rest;
301 
302         //reset data length, seq length
303         b.l_data=cast(uint)en_seq.length+cast(uint)seq.length+l_prev+(b.l_data-start_rest);
304         b.core.l_qseq=cast(int)(seq.length);
305     }
306 
307     /// Return char array of the qscores
308     /// see samtools/sam_view.c: get_quality
309     const(char)[] qscores(bool phredScale=true)()
310     {
311         // calloc fills with \0; +1 len for Cstring
312         // char* q = cast(char*) calloc(1, this.b.core.l_qseq + 1);
313         char[] q;
314         q.length = this.b.core.l_qseq;
315 
316         // auto bam_get_qual(bam1_t *b) { return (*b).data + ((*b).core.n_cigar<<2) + (*b).core.l_qname + (((*b).core.l_qseq + 1)>>1); }
317         char* qualdata = cast(char*) bam_get_qual(this.b);
318 
319         for (int i; i < this.b.core.l_qseq; i++)
320             static if(phredScale) q[i] = cast(char)(qualdata[i] + 33);
321             else q[i] = cast(char)(qualdata[i]);
322         return q;
323     }
324 
325     /// Add qscore sequence given that it is the same length as the bam sequence
326     pragma(inline, true)
327     void q_scores(bool phredScale=true)(const(char)[] seq)
328     {
329         if(seq.length!=b.core.l_qseq){
330             hts_log_error(__FUNCTION__,"qscore length does not match sequence length");
331             return;
332         }
333         auto s= seq.dup;
334         for(auto i=0;i<seq.length;i++){
335             static if(phredScale) s[i]=seq[i]-33;
336         }
337         
338         //get length of data before seq
339         uint l_prev=b.core.l_qname + cast(uint)(b.core.n_cigar*uint.sizeof)+((b.core.l_qseq+1)>>1);
340         b.data[l_prev..s.length+l_prev]=cast(ubyte[])s;
341     }
342 
343     /// Add qscore sequence given that it is the same length as the bam sequence
344     pragma(inline, true)
345     void qscores(ubyte[] seq)
346     {
347         if(seq.length!=b.core.l_qseq){
348             hts_log_error(__FUNCTION__,"qscore length does not match sequence length");
349             return;
350         }
351         
352         //get length of data before seq
353         uint l_prev=b.core.l_qname + cast(uint)(b.core.n_cigar*uint.sizeof)+((b.core.l_qseq+1)>>1);
354         b.data[l_prev..seq.length+l_prev]=seq;
355     }
356 
357     /// Create cigar from bam1_t record
358     @property Cigar cigar()
359     {
360         return Cigar(bam_get_cigar(b), (*b).core.n_cigar);
361     }
362 
363     /// Assign a cigar string
364     pragma(inline, true)
365     @property void cigar(Cigar cigar)
366     {
367         
368         //get length of data before seq
369         uint l_prev=b.core.l_qname;
370 
371         //get starting point after seq
372         uint start_rest=l_prev+cast(uint)(b.core.n_cigar*uint.sizeof);
373 
374         //copy previous data
375         ubyte[] prev=b.data[0..l_prev].dup;
376 
377         //copy rest of data
378         ubyte[] rest=b.data[start_rest..b.l_data].dup;
379         
380         //if not enough space
381         if(uint.sizeof*cigar.ops.length+l_prev+(b.l_data-start_rest)>b.m_data){
382             auto ptr=cast(ubyte*)realloc(b.data,uint.sizeof*cigar.ops.length+l_prev+(b.l_data-start_rest));
383             assert(ptr!=null);
384             b.data=ptr;
385             b.m_data=cast(uint)(uint.sizeof*cigar.ops.length)+l_prev+(b.l_data-start_rest);
386         }
387 
388         //reassign q_name and rest of data
389         b.data[0..l_prev]=prev;
390         //without cigar.ops.dup we get the overlapping array copy error
391         b.data[l_prev..uint.sizeof*cigar.ops.length+l_prev]=cast(ubyte[])(cast(uint[])cigar.ops.dup);
392         b.data[l_prev+uint.sizeof*cigar.ops.length..l_prev+uint.sizeof*cigar.ops.length+(b.l_data-start_rest)]=rest;
393 
394         //reset data length, seq length
395         b.l_data=cast(uint)(uint.sizeof*cigar.ops.length)+l_prev+(b.l_data-start_rest);
396         b.core.n_cigar=cast(int)(cigar.ops.length);
397     }
398 
399     /// Get aux tag from bam1_t record and return a TagValue
400     TagValue opIndex(string val)
401     {
402         return TagValue(b, val[0..2]);
403     }
404 
405     /// Assign a tag key value pair
406     void opIndexAssign(T)(T value,string index)
407     {
408         import std.traits:isIntegral,isSomeString;
409         static if(isIntegral!T){
410             bam_aux_update_int(b,index[0..2],value);
411         }else static if(is(T==float)){
412             bam_aux_update_float(b,index[0..2],value);
413         }else static if(isSomeString!T){
414             bam_aux_update_str(b,index[0..2],cast(int)value.length+1,toStringz(value));
415         }
416     }
417 
418     /// Assign a tag key array pair
419     void opIndexAssign(T:T[])(T[] value,string index)
420     {
421         bam_aux_update_array(b,index[0..2],TypeChars[TypeIndex!T],value.length,value.ptr);
422     }
423 
424     /// chromosome ID of next read in template, defined by bam_hdr_t
425     pragma(inline, true)
426     @nogc @safe nothrow
427     @property int mateTID() { return this.b.core.mtid; }
428     /// ditto
429     pragma(inline, true)
430     @nogc @safe nothrow
431     @property void mateTID(int mtid) { this.b.core.mtid = mtid; }
432 
433     /// 0-based leftmost coordinate of next read in template
434     pragma(inline, true)
435     @nogc @safe nothrow
436     @property int matePos() { return this.b.core.mpos; }
437     /// ditto
438     pragma(inline, true)
439     @nogc @safe nothrow
440     @property void matePos(int mpos) { this.b.core.mpos = mpos; }
441 
442     /// Presumably Insert size, but is undocumented.
443     /// Per samtools source, is measured 5' to 5'
444     /// https://github.com/samtools/samtools/blob/bd1a409aa750d25d70a093405d174db8709a3f2c/bam_mate.c#L320
445     pragma(inline, true)
446     @nogc @safe nothrow
447     @property int insertSize() { return this.b.core.isize; }
448     /// ditto
449     pragma(inline, true)
450     @nogc @safe nothrow
451     @property void insertSize(int isize) { this.b.core.isize = isize; }
452 
453     /// get aligned coordinates per each cigar op
454     auto getAlignedCoordinates(){
455         return AlignedCoordinatesItr(this.cigar);
456     }
457 
458     /// get aligned coordinates per each cigar op within range
459     /// range is 0-based half open using chromosomal coordinates
460     auto getAlignedCoordinates(int start, int end){
461         assert(start >= this.pos);
462         assert(end <= this.pos + this.cigar.ref_bases_covered);
463         return this.getAlignedCoordinates.filter!(x=> this.pos + x.rpos >= start).filter!(x=> this.pos + x.rpos < end);
464     }
465 
466     struct AlignedPair(bool refSeq)
467     {
468         int qpos, rpos;
469         Ops cigar_op;
470         char queryBase;
471         static if(refSeq) char refBase;
472 
473         string toString(){
474             import std.conv : to;
475             static if(refSeq) return rpos.to!string ~ ":" ~ refBase ~ " > " ~ qpos.to!string ~ ":" ~ queryBase;
476             else return rpos.to!string ~ ":" ~ qpos.to!string ~ ":" ~ queryBase;
477         }
478     }
479 
480     /// get a range of aligned read and reference positions
481     /// this is meant to recreate functionality from pysam:
482     /// https://pysam.readthedocs.io/en/latest/api.html#pysam.AlignedSegment.get_aligned_pairs
483     /// range is 0-based half open using chromosomal coordinates
484     auto getAlignedPairs(bool withRefSeq)(int start, int end)
485     {
486         import dhtslib.md : MDItr;
487         import std.range : dropExactly;
488         struct AlignedPairs(bool refSeq)
489         {
490             ReturnType!getAlignedCoordinates coords;
491             static if(refSeq) MDItr mdItr;
492             AlignedPair!refSeq current;
493             const(char)[] qseq;
494 
495             auto getAlignedCoordinates(SAMRecord rec, int start, int end){
496                 return rec.getAlignedCoordinates(start,end);
497             }
498             
499             this(SAMRecord rec, int start, int end){
500                 assert(start < end);
501                 assert(start >= rec.pos);
502                 assert(end <= rec.pos + rec.cigar.ref_bases_covered);
503                 
504                 coords = getAlignedCoordinates(rec, start, end);
505                 qseq = rec.sequence;
506                 static if(refSeq){
507                     assert(rec["MD"].exists,"MD tag must exist for record");
508                     mdItr = MDItr(rec);
509                     mdItr = mdItr.dropExactly(start - rec.pos);
510                 } 
511                 current.qpos = coords.front.qpos;
512                 current.rpos = coords.front.rpos;
513                 current.cigar_op = coords.front.cigar_op;
514                 if(!CigarOp(0, current.cigar_op).is_query_consuming) current.queryBase = '-';
515                 else current.queryBase = qseq[current.qpos];
516                 
517                 static if(refSeq){
518                     if(!CigarOp(0, current.cigar_op).is_reference_consuming) current.refBase = '-';
519                     else if(mdItr.front == '=') current.refBase = current.queryBase;
520                     else current.refBase = mdItr.front;
521                 }
522             }
523 
524             AlignedPair!refSeq front(){
525                 return current;
526             }
527 
528             void popFront(){
529                 coords.popFront;
530                 if(coords.empty) return;
531                 current.qpos = coords.front.qpos;
532                 current.rpos = coords.front.rpos;
533                 current.cigar_op = coords.front.cigar_op;
534                 if(!CigarOp(0, current.cigar_op).is_query_consuming) current.queryBase = '-';
535                 else current.queryBase = qseq[current.qpos];
536                 
537                 static if(refSeq){
538                     if(CigarOp(0, current.cigar_op).is_reference_consuming) mdItr.popFront;
539                     if(!CigarOp(0, current.cigar_op).is_reference_consuming) current.refBase = '-';
540                     else if(mdItr.front == '=') current.refBase = current.queryBase;
541                     else current.refBase = mdItr.front;
542                 }
543 
544             }
545 
546             bool empty(){
547                 return coords.empty;
548             }
549 
550         }
551         return AlignedPairs!withRefSeq(this,start,end);
552     }
553     /// get a range of aligned read and reference positions
554     /// this is meant to recreate functionality from pysam:
555     /// https://pysam.readthedocs.io/en/latest/api.html#pysam.AlignedSegment.get_aligned_pairs
556     auto getAlignedPairs(bool withRefSeq)(){
557         return getAlignedPairs!withRefSeq(this.pos, this.pos + this.cigar.ref_bases_covered);
558     }
559 }
560 
561 debug(dhtslib_unittest)
562 unittest{
563     writeln();
564     import dhtslib.sam;
565     import dhtslib.htslib.hts_log : hts_log_info;
566     import std.path:buildPath,dirName;
567     hts_set_log_level(htsLogLevel.HTS_LOG_TRACE);
568     hts_log_info(__FUNCTION__, "Testing SAMRecord mutation");
569     hts_log_info(__FUNCTION__, "Loading test file");
570     auto bam = SAMFile(buildPath(dirName(dirName(dirName(__FILE__))),"htslib","test","range.bam"), 0);
571     auto read=bam.all_records.front;
572 
573     //change queryname
574     hts_log_info(__FUNCTION__, "Testing queryname");
575     assert(read.queryName=="HS18_09653:4:1315:19857:61712");
576     read.queryName="HS18_09653:4:1315:19857:6171";
577     assert(read.queryName=="HS18_09653:4:1315:19857:6171");
578     assert(read.cigar.toString() == "78M1D22M");
579     read.queryName="HS18_09653:4:1315:19857:617122";
580     assert(read.queryName=="HS18_09653:4:1315:19857:617122");
581     assert(read.cigar.toString() == "78M1D22M");
582     assert(read["RG"].check!string);
583     assert(read["RG"].to!string=="1");
584 
585     //change sequence
586     hts_log_info(__FUNCTION__, "Testing sequence");
587     assert(read.sequence=="AGCTAGGGCACTTTTTGTCTGCCCAAATATAGGCAACCAAAAATAATTTCCAAGTTTTTAATGATTTGTTGCATATTGAAAAAACATTTTTTGGGTTTTT");
588     read.sequence=read.sequence[0..10];
589     assert(read.sequence=="AGCTAGGGCA");
590     ubyte[] q=[255,255,255,255,255,255,255,255,255,255];
591     assert(read.qscores!false==cast(char[])(q));
592     q=[1,14,20,40,30,10,14,20,40,30];
593     read.qscores(q);
594     assert(read.qscores!false==cast(char[])(q));
595     q[]+=33;
596     assert(read.qscores==cast(char[])(q));
597     assert(read.cigar.toString() == "78M1D22M");
598     assert(read["RG"].check!string);
599     assert(read["RG"].to!string=="1");
600 
601     //change cigar
602     hts_log_info(__FUNCTION__, "Testing cigar");
603     auto c=read.cigar;
604     c.ops[$-1].length=21;
605     read.cigar=c;
606     assert(read.cigar.toString() == "78M1D21M");
607     assert(read["RG"].check!string);
608     assert(read["RG"].to!string=="1");
609     
610     //change tag
611     hts_log_info(__FUNCTION__, "Testing tags");
612     read["X0"]=2;
613     assert(read["X0"].to!ubyte==2);
614     assert(read["RG"].check!string);
615     
616     read["RG"]="test";
617     assert(read["RG"].to!string=="test");
618     hts_log_info(__FUNCTION__, "Cigar:" ~ read.cigar.toString());
619 }
620 
621 debug(dhtslib_unittest)
622 unittest{
623     writeln();
624     import dhtslib.sam;
625     import dhtslib.htslib.hts_log : hts_log_info;
626     import std.path:buildPath,dirName;
627     hts_set_log_level(htsLogLevel.HTS_LOG_TRACE);
628     hts_log_info(__FUNCTION__, "Testing SAMRecord mutation w/ realloc");
629     hts_log_info(__FUNCTION__, "Loading test file");
630     auto bam = SAMFile(buildPath(dirName(dirName(dirName(__FILE__))),"htslib","test","range.bam"), 0);
631     auto read=bam.all_records.front;
632 
633     //change queryname
634     hts_log_info(__FUNCTION__, "Testing queryname");
635     assert(read.queryName=="HS18_09653:4:1315:19857:61712");
636 
637     //we do this only to force realloc
638     read.b.m_data=read.b.l_data;
639 
640 
641     auto prev_m=read.b.m_data;
642     read.queryName="HS18_09653:4:1315:19857:61712HS18_09653:4:1315:19857:61712";
643     assert(read.b.m_data >prev_m);
644     assert(read.queryName=="HS18_09653:4:1315:19857:61712HS18_09653:4:1315:19857:61712");
645     assert(read.cigar.toString() == "78M1D22M");
646     assert(read["RG"].check!string);
647     assert(read["RG"].to!string=="1");
648 
649     //change sequence
650     hts_log_info(__FUNCTION__, "Testing sequence");
651     assert(read.sequence=="AGCTAGGGCACTTTTTGTCTGCCCAAATATAGGCAACCAAAAATAATTTCCAAGTTTTTAATGATTTGTTGCATATTGAAAAAACATTTTTTGGGTTTTT");
652     prev_m=read.b.m_data;
653     read.sequence="AGCTAGGGCACTTTTTGTCTGCCCAAATATAGGCAACCAAAAATAATTTCCAAGTTTTTAATGATTTGTTGCATATTGAAAAAACATTTTTTGGGTTTTTAGCTAGGGCACTTTTTGTCTGCCCAAATATAGGCAACCAAAAATAATTTCCAAGTTTTTAATGATTTGTTGCATATTGAAAAAACATTTTTTGGGTTTTT";
654     assert(read.b.m_data >prev_m);
655     assert(read.sequence=="AGCTAGGGCACTTTTTGTCTGCCCAAATATAGGCAACCAAAAATAATTTCCAAGTTTTTAATGATTTGTTGCATATTGAAAAAACATTTTTTGGGTTTTTAGCTAGGGCACTTTTTGTCTGCCCAAATATAGGCAACCAAAAATAATTTCCAAGTTTTTAATGATTTGTTGCATATTGAAAAAACATTTTTTGGGTTTTT");
656     assert(read.cigar.toString() == "78M1D22M");
657     assert(read["RG"].check!string);
658     assert(read["RG"].to!string=="1");
659 
660     //change cigar
661     hts_log_info(__FUNCTION__, "Testing cigar");
662     auto c=read.cigar;
663     c.ops=c.ops~c.ops;
664     prev_m=read.b.m_data;
665     read.cigar=c;
666     assert(read.b.m_data >prev_m);
667     assert(read.cigar.toString() == "78M1D22M78M1D22M");
668     assert(read["RG"].check!string);
669     assert(read["RG"].to!string=="1");
670     
671     //change tag
672     hts_log_info(__FUNCTION__, "Testing tags");
673     read["X0"]=2;
674     assert(read["X0"].to!ubyte==2);
675     assert(read["RG"].check!string);
676     
677     read["RG"]="test";
678     assert(read["RG"].to!string=="test");
679     hts_log_info(__FUNCTION__, "Cigar:" ~ read.cigar.toString());
680 }
681 
682 unittest
683 {
684     import std.stdio;
685     import dhtslib.sam;
686     import dhtslib.md : MDItr;
687     import std.algorithm: map;
688     import std.array: array;
689     import std.path:buildPath,dirName;
690     auto bam = SAMFile(buildPath(dirName(dirName(dirName(__FILE__))),"htslib","test","range.bam"), 0);
691     auto read=bam.all_records.front;
692     auto pairs = read.getAlignedPairs!true(read.pos + 77, read.pos + 77 + 5);
693     assert(pairs.map!(x => x.qpos).array == [77,77,78,79,80]);
694     assert(pairs.map!(x => x.rpos).array == [77,78,79,80,81]);
695     assert(pairs.map!(x => x.refBase).array == "GAAAA");
696     assert(pairs.map!(x => x.queryBase).array == "G-AAA");
697 }
698 
699 alias SAMFile = SAMReader;
700 /**
701 Encapsulates a SAM/BAM file.
702 
703 Implements InputRange interface using htslib calls.
704 If indexed, Random-access query via multidimensional slicing.
705 */
706 struct SAMReader
707 {
708     /// filename; as usable from D
709     string filename;
710 
711     /// filename \0-terminated C string; reference needed to avoid GC reaping result of toStringz when ctor goes out of scope
712     private immutable(char)* fn;
713 
714     /// htsFile
715     private htsFile* fp;
716 
717     /// hFILE if required
718     private hFILE* f;
719 
720     /// header struct
721     bam_hdr_t* header = null;
722 
723     /// SAM/BAM/CRAM index 
724     private hts_idx_t* idx;
725 
726     private kstring_t line;
727 
728     /// disallow copying
729     @disable this(this);
730 
731     /** Create a representation of SAM/BAM/CRAM file from given filename or File
732 
733     Params:
734         fn =            string filename (complete path passed to htslib; may support S3:// https:// etc.)
735         extra_threads = extra threads for compression/decompression
736                         -1 use all available cores (default)
737                         0  use no extra threads
738                         >1 add indicated number of threads (to a default of 1)
739     */
740     this(T)(T f, int extra_threads = -1)
741     if (is(T == string) || is(T == File))
742     {
743         import std.parallelism : totalCPUs;
744 
745         // open file
746         static if (is(T == string))
747         {
748             this.filename = f;
749             this.fn = toStringz(f);
750             this.fp = hts_open(this.fn, cast(immutable(char)*) "r");
751         }
752         else static if (is(T == File))
753         {
754             this.filename = f.name();
755             this.fn = toStringz(f.name);
756             this.f = hdopen(f.fileno, cast(immutable(char)*) "r");
757             this.fp = hts_hopen(this.f, this.fn, cast(immutable(char)*) "r");
758         }
759         else assert(0);
760 
761         if (extra_threads == -1)
762         {
763             if ( totalCPUs > 1)
764             {
765                 hts_log_info(__FUNCTION__,
766                         format("%d CPU cores detected; enabling multithreading", totalCPUs));
767                 // hts_set_threads adds N _EXTRA_ threads, so totalCPUs - 1 seemed reasonable,
768                 // but overcomitting by 1 thread (i.e., passing totalCPUs) buys an extra 3% on my 2-core 2013 Mac
769                 hts_set_threads(this.fp, totalCPUs);
770             }
771         }
772         else if (extra_threads > 0)
773         {
774             if ((extra_threads + 1) > totalCPUs)
775                 hts_log_warning(__FUNCTION__, "More threads requested than CPU cores detected");
776             hts_set_threads(this.fp, extra_threads);
777         }
778         else if (extra_threads == 0)
779         {
780             hts_log_debug(__FUNCTION__, "Zero extra threads requested");
781         }
782         else
783         {
784             hts_log_warning(__FUNCTION__, "Invalid negative number of extra threads requested");
785         }
786 
787         // read header
788         this.header = sam_hdr_read(this.fp);
789         this.idx = sam_index_load(this.fp, this.fn);
790         if (this.idx == null)
791         {
792             hts_log_info(__FUNCTION__, "SAM index not found");
793             // TODO: attempt to build
794             // TODO: edit range to return empty immediately if no idx
795         }
796         hts_log_debug(__FUNCTION__, format("SAM index: %s", this.idx));
797     }
798 
799     ~this()
800     {
801         debug (dhtslib_debug)
802         {
803             writeln("SAMFile dtor");
804         }
805 
806         bam_hdr_destroy(this.header);
807 
808         if((this.fp !is null) && (this.f is null))
809         {
810             const auto ret = hts_close(fp);
811             if (ret < 0)
812                 writefln("There was an error closing %s", fromStringz(this.fn));
813         }
814     }
815 
816     /// number of reference sequences; from bam_hdr_t
817     @property int n_targets() const
818     {
819         return this.header.n_targets;
820     }
821 
822     /// length of specific reference sequence (by number)
823     uint target_len(int target) const
824     {
825         return this.header.target_len[target];
826     }
827 
828     /// lengths of the reference sequences
829     @property uint[] target_lens() const
830     {
831         return this.header.target_len[0 .. this.n_targets].dup;
832     }
833 
834     /// names of the reference sequences
835     @property string[] target_names() const
836     {
837         string[] names;
838         names.length = this.n_targets;
839         foreach (i; 0 .. this.n_targets)
840         {
841             names[i] = fromStringz(this.header.target_name[i]).idup;
842         }
843         return names;
844     }
845 
846     /// reference contig name to integer id
847     /// Calls int bam_name2id(bam_hdr_t *h, const char *_ref);
848     int target_id(string name)
849     {
850         return bam_name2id(this.header, toStringz(name));
851     }
852 
853     /** Query a region and return matching alignments as an InputRange */
854     /// Query by chr, start, end
855     auto query(string chrom, int start, int end)
856     {
857         string q = format("%s:%d-%d", chrom, start, end);
858         return query(q);
859     }
860 
861     /// Query by string chr:start-end
862     auto query(string q)
863     {
864         auto itr = sam_itr_querys(this.idx, this.header, toStringz(q));
865         return RecordRange(this.fp, itr);
866     }
867 
868     /// Query by contig id, start, end
869     auto query(int tid, int start, int end)
870     {
871         auto itr = sam_itr_queryi(this.idx, tid, start, end);
872         return RecordRange(this.fp, itr);
873     }
874 
875     /// Query by ["chr1:1-2","chr1:1000-1001"]
876     auto query(string[] regions)
877     {
878         return RecordRangeMulti(this.fp, this.idx, this.header, &(this), regions);
879     }
880 
881     /// bam["chr1:1-2"]
882     auto opIndex(string q)
883     {
884         return query(q);
885     }
886 
887     /// bam["chr1", 1..2]
888     auto opIndex(string tid, int[2] pos)
889     {
890         return query(tid, pos[0], pos[1]);
891     }
892 
893     /// bam["chr1", 1]
894     auto opIndex(string tid, int pos)
895     {
896         return query(tid, pos, pos + 1);
897     }
898 
899     /** Deprecated: use multidimensional slicing with second parameter as range (["chr1", 1 .. 2]) */
900     /// bam["chr1", 1, 2]
901     deprecated auto opIndex(string tid, int pos1, int pos2)
902     {
903         return query(tid, pos1, pos2);
904     }
905 
906     /// Integer-based chr below
907     /// bam[0, 1..2]
908     auto opIndex(int tid, int[2] pos)
909     {
910         return query(tid, pos[0], pos[1]);
911     }
912 
913     /// bam[0, 1]
914     auto opIndex(int tid, int pos)
915     {
916         return query(tid, pos, pos + 1);
917     }
918 
919     /// bam[0, 1, 2]
920     deprecated auto opIndex(int tid, int pos1, int pos2)
921     {
922         return query(tid, pos1, pos2);
923     }
924 
925     /// support bam["chr1", 1..2 ]
926     int[2] opSlice(size_t dim)(int start, int end) if (dim  == 1)
927     {
928         return [start, end];
929     }
930 
931     /// Return an InputRange representing all recods in the SAM/BAM/CRAM
932     AllRecordsRange all_records()
933     {
934         auto range = AllRecordsRange(this.fp, this.header, bam_init1());
935         range.popFront();
936         return range;
937     }
938     alias allRecords = all_records;
939 
940     /// Iterate through all records in the SAM/BAM/CRAM
941     struct AllRecordsRange
942     {
943         private htsFile*    fp;     // belongs to parent; shared
944         private bam_hdr_t*  header; // belongs to parent; shared
945         private bam1_t*     b;
946         private int success;
947         ~this()
948         {
949             //debug(dhtslib_debug) hts_log_debug(__FUNCTION__, "dtor");
950         }
951 
952         /// InputRange interface
953         @property bool empty() // @suppress(dscanner.suspicious.incorrect_infinite_range)
954         {
955             //    int sam_read1(samFile *fp, bam_hdr_t *h, bam1_t *b);
956             if (success >= 0)
957                 return false;
958             else if (success == -1)
959                 return true;
960             else
961             {
962                 hts_log_error(__FUNCTION__, "*** ERROR sam_read1 < -1");
963                 return true;
964             }
965         }
966         /// ditto
967         void popFront()
968         {
969             success = sam_read1(this.fp, this.header, this.b);
970             // noop? 
971             // free this.b ?
972 
973             //bam_destroy1(this.b);
974         }
975         /// ditto
976         SAMRecord front()
977         {
978             return new SAMRecord(bam_dup1(this.b));
979         }
980 
981     }
982 
983     /// Iterate over records falling within a queried region (TODO: itr_multi_query)
984     struct RecordRange
985     {
986         private htsFile* fp;
987         private hts_itr_t* itr;
988         private bam1_t* b;
989 
990         private int r;
991 
992         ///
993         this(htsFile* fp, hts_itr_t* itr)
994         {
995             this.itr = itr;
996             this.fp = fp;
997             b = bam_init1();
998             //debug(dhtslib_debug) { writeln("sam_itr null? ",(cast(int)itr)==0); }
999             hts_log_debug(__FUNCTION__, format("SAM itr null?: %s", cast(int) itr == 0));
1000             popFront();
1001         }
1002 
1003         /// InputRange interface
1004         @property bool empty()
1005         {
1006             return (r <= 0 && itr.finished) ? true : false;
1007         }
1008         /// ditto
1009         void popFront()
1010         {
1011             r = sam_itr_next(this.fp, this.itr, this.b);
1012         }
1013         /// ditto
1014         SAMRecord front()
1015         {
1016             return new SAMRecord(bam_dup1(b));
1017         }
1018     }
1019 
1020     /// Iterate over records falling within queried regions using a RegionList
1021     struct RecordRangeMulti
1022     {
1023         private htsFile* fp;
1024         private hts_itr_multi_t* itr;
1025         private bam1_t* b;
1026         private hts_reglist_t[] rlist;
1027         private int r;
1028 
1029         ///
1030         this(htsFile* fp, hts_idx_t* idx, bam_hdr_t* header, SAMFile* sam, string[] regions)
1031         {
1032             rlist = RegionList(sam, regions).getRegList();
1033             this.fp = fp;
1034             b = bam_init1();
1035             itr = sam_itr_regions(idx, header, rlist.ptr, cast(uint) rlist.length);
1036             //debug(dhtslib_debug) { writeln("sam_itr null? ",(cast(int)itr)==0); }
1037             hts_log_debug(__FUNCTION__, format("SAM itr null?: %s", cast(int) itr == 0));
1038             popFront();
1039         }
1040 
1041         /// InputRange interface
1042         @property bool empty()
1043         {
1044             return (r <= 0 && itr.finished) ? true : false;
1045         }
1046         /// ditto
1047         void popFront()
1048         {
1049             r = sam_itr_multi_next(this.fp, this.itr, this.b);
1050         }
1051         /// ditto
1052         SAMRecord front()
1053         {
1054             return new SAMRecord(bam_dup1(b));
1055         }
1056     }
1057 
1058     /// List of regions based on sam/bam
1059     struct RegionList
1060     {
1061         import std.algorithm.iteration : splitter;
1062         import std.algorithm.sorting : sort;
1063         import std.range : drop, array;
1064         import std.conv : to;
1065 
1066         private hts_reglist_t[string] rlist;
1067         private SAMFile* sam;
1068 
1069         ///
1070         this(SAMFile* sam, string[] queries)
1071         {
1072             this.sam = sam;
1073             foreach (q; queries)
1074             {
1075                 addRegion(q);
1076             }
1077         }
1078 
1079         /// Add a region in standard format (chr1:2000-3000) to the RegionList
1080         void addRegion(string reg)
1081         {
1082             //chr:1-2
1083             //split into chr and 1-2
1084             auto split = reg.splitter(":");
1085             auto chr = split.front;
1086             //split 1-2 into 1 and 2
1087             split = split.drop(1).front.splitter("-");
1088             if (sam.target_id(chr) < 0)
1089             {
1090                 hts_log_error(__FUNCTION__, "tid not present in sam/bam");
1091             }
1092             addRegion(sam.target_id(chr), split.front.to!int, split.drop(1).front.to!int);
1093         }
1094 
1095         /// Add a region by {target/contig/chr id, start coord, end coord} to the RegionList
1096         void addRegion(int tid, int beg, int end)
1097         {
1098             if (tid > this.sam.n_targets || tid < 0)
1099                 hts_log_error(__FUNCTION__, "tid not present in sam/bam");
1100 
1101             auto val = (this.sam.target_names[tid] in this.rlist);
1102             hts_pair32_t p;
1103 
1104             if (beg < 0)
1105                 hts_log_error(__FUNCTION__, "first coordinate < 0");
1106 
1107             if (beg >= this.sam.target_len(tid))
1108                 hts_log_error(__FUNCTION__, "first coordinate larger than tid length");
1109 
1110             if (end < 0)
1111                 hts_log_error(__FUNCTION__, "second coordinate < 0");
1112 
1113             if (end >= this.sam.target_len(tid))
1114                 hts_log_error(__FUNCTION__, "second coordinate larger than tid length");
1115 
1116             p.beg = beg;
1117             p.end = end;
1118             hts_pair32_t[] plist;
1119             if (val is null)
1120             {
1121                 hts_reglist_t r;
1122 
1123                 //set tid
1124                 r.tid = tid;
1125 
1126                 //create intervals
1127                 plist = plist ~ p;
1128                 r.intervals = plist.ptr;
1129                 r.count = cast(uint) plist.length;
1130                 r.min_beg = p.beg;
1131                 r.max_end = p.end;
1132                 this.rlist[this.sam.target_names[tid]] = r;
1133             }
1134             else
1135             {
1136                 plist = (val.intervals[0 .. val.count] ~ p).sort!(cmpInterval).array;
1137                 val.intervals = plist.ptr;
1138                 val.count = cast(uint) plist.length;
1139                 val.min_beg = plist[0].beg;
1140                 val.max_end = plist[$ - 1].end;
1141             }
1142         }
1143 
1144         hts_reglist_t[] getRegList()
1145         {
1146             return rlist.byValue.array.sort!(cmpRegList).array;
1147         }
1148     }
1149 }
1150 
1151 /// Nucleotide complement table; from samtools/sam_view.c
1152 private const(char)[16] seq_comp_table = [0, 8, 4, 12, 2, 10, 6, 14, 1, 9, 5, 13, 3, 11, 7, 15];
1153 
1154 /// Reverse a string in place; from samtools/sam_view.c
1155 //  TODO: Could be sped up, and potentially made safer? by passing strlen since already known
1156 private char* reverse(char* str)
1157 {
1158     import core.stdc.string : strlen;
1159 
1160     auto i = strlen(str) - 1, j = 0;
1161     char ch;
1162     while (i > j)
1163     {
1164         ch = str[i];
1165         str[i] = str[j];
1166         str[j] = ch;
1167         i--;
1168         j++;
1169     }
1170     return str;
1171 }
1172 
1173 /// SAM/BAM/CRAM disk format
1174 enum SAMWriterTypes{
1175     BAM,
1176     UBAM,
1177     SAM,
1178     CRAM,
1179     DEDUCE
1180 }
1181 
1182 /// Encapsulates a SAM/BAM/CRAM, but as write-only
1183 struct SAMWriter
1184 {
1185     /// filename; as usable from D
1186     string filename;
1187 
1188     /// filename \0-terminated C string; reference needed to avoid GC reaping result of toStringz when ctor goes out of scope
1189     private immutable(char)* fn;
1190 
1191     /// htsFile
1192     private htsFile* fp;
1193 
1194     /// hFILE if required
1195     private hFILE* f;
1196 
1197     /// header struct
1198     bam_hdr_t* header = null;
1199 
1200     private kstring_t line;
1201 
1202     /// disallow copying
1203     @disable this(this);
1204 
1205     /** Create a representation of SAM/BAM/CRAM file from given filename or File
1206 
1207     Params:
1208         fn =            string filename (complete path passed to htslib; may support S3:// https:// etc.)
1209         extra_threads = extra threads for compression/decompression
1210                         -1 use all available cores (default)
1211                         0  use no extra threads
1212                         >1 add indicated number of threads (to a default of 1)
1213     */
1214     this(T)(T f,bam_hdr_t * header, SAMWriterTypes t=SAMWriterTypes.DEDUCE,int extra_threads = -1)
1215     if (is(T == string) || is(T == File))
1216     {
1217         import std.parallelism : totalCPUs;
1218         char[] mode;
1219         if(t == SAMWriterTypes.BAM) mode=['w','b','\0'];
1220         else if(t == SAMWriterTypes.UBAM) mode=['w','b','u','\0'];
1221         else if(t == SAMWriterTypes.SAM) mode=['w','\0'];
1222         else if(t == SAMWriterTypes.CRAM) mode=['w','c','\0'];
1223         // open file
1224         static if (is(T == string))
1225         {
1226             if(t == SAMWriterTypes.DEDUCE){
1227                 import std.path:extension;
1228                 auto ext=extension(f);
1229                 if(ext==".bam") mode=['w','b','\0'];
1230                 else if(ext==".sam") mode=['w','\0'];
1231                 else if(ext==".cram") mode=['w','c','\0'];
1232                 else {
1233                     hts_log_error(__FUNCTION__,"extension "~ext~" not valid");
1234                     throw new Exception("DEDUCE SAMWriterType used with non-valid extension");
1235                 }
1236             }
1237             this.filename = f;
1238             this.fn = toStringz(f);
1239             this.fp = hts_open(this.fn, mode.ptr);
1240         }
1241         else static if (is(T == File))
1242         {
1243             assert(t!=SAMWriterTypes.DEDUCE);
1244             this.filename = f.name();
1245             this.fn = toStringz(f.name);
1246             this.f = hdopen(f.fileno, cast(immutable(char)*) "w");
1247             this.fp = hts_hopen(this.f, this.fn, mode.ptr);
1248         }
1249         else assert(0);
1250 
1251         if (extra_threads == -1)
1252         {
1253             if ( totalCPUs > 1)
1254             {
1255                 hts_log_info(__FUNCTION__,
1256                         format("%d CPU cores detected; enabling multithreading", totalCPUs));
1257                 // hts_set_threads adds N _EXTRA_ threads, so totalCPUs - 1 seemed reasonable,
1258                 // but overcomitting by 1 thread (i.e., passing totalCPUs) buys an extra 3% on my 2-core 2013 Mac
1259                 hts_set_threads(this.fp, totalCPUs);
1260             }
1261         }
1262         else if (extra_threads > 0)
1263         {
1264             if ((extra_threads + 1) > totalCPUs)
1265                 hts_log_warning(__FUNCTION__, "More threads requested than CPU cores detected");
1266             hts_set_threads(this.fp, extra_threads);
1267         }
1268         else if (extra_threads == 0)
1269         {
1270             hts_log_debug(__FUNCTION__, "Zero extra threads requested");
1271         }
1272         else
1273         {
1274             hts_log_warning(__FUNCTION__, "Invalid negative number of extra threads requested");
1275         }
1276 
1277         // read header
1278         this.header = bam_hdr_dup(header);
1279         sam_hdr_write(this.fp,this.header);
1280     }
1281 
1282     ~this()
1283     {
1284         debug (dhtslib_debug)
1285         {
1286             writeln("SAMWriter dtor");
1287         }
1288 
1289         bam_hdr_destroy(this.header);
1290 
1291     }
1292 
1293     /// close file
1294     void close(){
1295         const auto ret = hts_close(this.fp);
1296         if (ret < 0)
1297             stderr.writefln("There was an error closing %s", fromStringz(this.fn));
1298     }
1299 
1300     /// write out to disk
1301     void write(SAMRecord * rec){
1302         const auto ret = sam_write1(this.fp,this.header,rec.b);
1303         assert(ret>=0);
1304     }
1305 }
1306 debug(dhtslib_unittest)
1307 unittest{
1308     writeln();
1309     import dhtslib.sam;
1310     import dhtslib.htslib.hts_log : hts_log_info;
1311     import std.path:buildPath,dirName;
1312     import std.string:fromStringz;
1313     hts_set_log_level(htsLogLevel.HTS_LOG_TRACE);
1314     hts_log_info(__FUNCTION__, "Testing SAMWriter");
1315     hts_log_info(__FUNCTION__, "Loading test file");
1316     auto sam = SAMFile(buildPath(dirName(dirName(dirName(__FILE__))),"htslib","test","auxf#values.sam"), 0);
1317     auto readrange = sam.allRecords;
1318     auto sam2 = SAMWriter("test.bam",sam.header);
1319     hts_log_info(__FUNCTION__, "Getting read 1");
1320     auto read = readrange.front();
1321     sam2.write(&read);
1322     sam2.close;
1323     destroy(sam2);
1324     sam = SAMFile("test.bam");
1325     readrange = sam.allRecords;
1326     read = readrange.front();
1327     writeln(read.sequence);
1328     assert(read.sequence=="GCTAGCTCAG");
1329     // destroy(sam2);
1330 }
1331 
1332 /// Used in sorting
1333 bool cmpInterval(hts_pair32_t a, hts_pair32_t b)
1334 {
1335     if (a.beg < b.beg)
1336     {
1337         return true;
1338     }
1339     if (a.end < b.end)
1340     {
1341         return true;
1342     }
1343     return false;
1344 }
1345 
1346 /// Used in sorting
1347 bool cmpRegList(hts_reglist_t a, hts_reglist_t b)
1348 {
1349     if (a.tid < b.tid)
1350     {
1351         return true;
1352     }
1353     return false;
1354 }
1355 
1356 /// Parse text line of SAM; Used in unittest
1357 private int parseSam(string line, bam_hdr_t* header, bam1_t* b)
1358 {
1359     import dhtslib.htslib.kstring : kstring_t;
1360     import std.utf : toUTFz;
1361 
1362     kstring_t k;
1363     k.s = toUTFz!(char*)(line.dup);
1364     k.m = line.length + 1;
1365     k.l = line.length + 1;
1366     return sam_parse1(&k, header, b);
1367 }
1368 debug(dhtslib_unittest)
1369 unittest{
1370     writeln();
1371     import dhtslib.sam;
1372     import dhtslib.htslib.hts_log : hts_log_info;
1373     import std.path:buildPath,dirName;
1374     import std.string:fromStringz;
1375     hts_set_log_level(htsLogLevel.HTS_LOG_TRACE);
1376     hts_log_info(__FUNCTION__, "Testing SAMFile & SAMRecord");
1377     hts_log_info(__FUNCTION__, "Loading test file");
1378     auto sam = SAMFile(buildPath(dirName(dirName(dirName(__FILE__))),"htslib","test","auxf#values.sam"), 0);
1379     auto readrange = sam.allRecords;
1380     hts_log_info(__FUNCTION__, "Getting read 1");
1381     auto read = readrange.front();
1382     writeln(read.sequence);
1383     assert(read.sequence=="GCTAGCTCAG");
1384 }
1385 
1386 debug(dhtslib_unittest)
1387 unittest
1388 {
1389     import std.stdio : writeln;
1390     import std.range : drop;
1391     import std.utf : toUTFz;
1392     import dhtslib.htslib.hts_log; // @suppress(dscanner.suspicious.local_imports)
1393     import std.path:buildPath,dirName;
1394     import std.conv:to;
1395     hts_set_log_level(htsLogLevel.HTS_LOG_TRACE);
1396     hts_log_info(__FUNCTION__, "Loading sam file");
1397     auto range = File(buildPath(dirName(dirName(dirName(__FILE__))),"htslib","test","realn01_exp-a.sam")).byLineCopy();
1398     auto b = bam_init1();
1399     auto hdr = bam_hdr_init();
1400     string hdr_str;
1401     for (auto i = 0; i < 4; i++)
1402     {
1403         hdr_str ~= range.front ~ "\n";
1404         range.popFront;
1405     }
1406     hts_log_info(__FUNCTION__, "Header");
1407     writeln(hdr_str);
1408     hdr = sam_hdr_parse(cast(int) hdr_str.length, toUTFz!(char*)(hdr_str));
1409     hts_log_info(__FUNCTION__, "Read status:" ~ parseSam(range.front, hdr, b).to!string);
1410     auto r = new SAMRecord(b);
1411     hts_log_info(__FUNCTION__, "Cigar" ~ r.cigar.toString);
1412     assert(r.cigar.toString == "6M1D117M5D28M");
1413 }