1 /**
2 
3 SAMRecord and SAMFile are wrappers for htslib functions relating to SAM/BAM/CRAM* files
4 
5 SAMRecord is a structured representation of a SAM/BAM/CRAM* record,
6 backed internally by htslib's bam1_t, but with convenient getters and setters
7 for record attributes, including functions not included in vanilla htslib 
8 like returning the sequence or the qscore string (NB: actually char*)
9 
10 SAMFile is a structured representation of SAM/BAM/CRAM* file,
11 backed internally by htslib's htsFile and bam_hdr_t,
12 but with convenient getters and setters for file and header attributes,
13 as well as query functions accessible explicitly (`query("chr1:999-9999"`)
14 and by indexing (`samfile["chr1", 999 .. 9999]`).
15 The file object can be iterated as an InputRange to obtain every record in the file.
16 
17 Authors: James S Blachly, MD <james.blachly@gmail.com> ; Thomas Gregory <charles.gregory@osumc.edu>
18 
19 Bugs: SAMRecord and SAMFile function only as readers, rather than writers (i.e, cannot build SAMFile)
20 Bugs: (*CRAM functionality is limited and untested)
21 
22 Date: 2020-09-12
23 
24 License: Apache 2.0
25 
26 Standards: Sequence Alignment/Map Format Specification v1 14 Dec 2018 http://samtools.github.io/hts-specs/
27 
28 */
29 module dhtslib.sam;
30 
31 public import dhtslib.sam.header;
32 
33 import core.stdc.stdlib : calloc, free,realloc;
34 import core.stdc..string:memset;
35 import std.format;
36 import std.parallelism : totalCPUs;
37 import std.stdio : writeln, writefln, stderr, File;
38 import std..string : fromStringz, toStringz;
39 import std.traits : ReturnType;
40 import std.algorithm : filter;
41 
42 import htslib.hts : htsFile, hts_open, hts_close, hts_hopen;
43 import htslib.hts : hts_idx_t, hts_itr_t, hts_itr_multi_t, hts_reglist_t, hts_pair32_t;
44 import htslib.hts : seq_nt16_str,seq_nt16_table;
45 import htslib.hts : hts_set_threads;
46 import htslib.hfile : hdopen, hclose, hFILE;
47 
48 import htslib.hts_log;
49 import htslib.kstring;
50 import htslib.sam;
51 import dhtslib.cigar;
52 import dhtslib.tagvalue;
53 
54 /**
55 Encapsulates a SAM/BAM/CRAM record,
56 using the bam1_t type for memory efficiency,
57 and the htslib helper functions for speed.
58 **/
59 class SAMRecord
60 {
61     ///
62     bam1_t* b;
63 
64     /// Construct blank SAMRecord with empty backing bam1_t
65     this()
66     {
67         //debug(dhtslib_debug) hts_log_debug(__FUNCTION__, "ctor()"); /// This line triggers memory error when __FUNCTION__, but not when "Other string"
68         //test_log(__FUNCTION__, "ctor()");   /// This line will also trigger the memory error when __FUNCTION__, but not other strings
69         //writeln(__FUNCTION__);    // this will not trigger the memory error
70         this.b = bam_init1();
71         //assert(0);                // This will elide(?) the memory error
72         //assert(1 == 2);           // This will elide(?) the memory error
73     }
74     
75     /// Construct SAMRecord from supplied bam1_t
76     this(bam1_t* b)
77     {
78         //debug(dhtslib_debug) hts_log_debug(__FUNCTION__, "ctor(bam1_t *b)");
79         this.b = b;
80     }
81 
82     ~this()
83     {
84         //debug(dhtslib_debug) hts_log_debug(__FUNCTION__, "dtor");
85         bam_destroy1(this.b); // we created our own in default ctor, or received copy via bam_dup1
86     }
87 
88 
89     /* bam1_core_t fields */
90 
91     /// chromosome ID, defined by bam_hdr_t
92     pragma(inline, true)
93     @nogc @safe nothrow
94     @property int tid() { return this.b.core.tid; }
95     /// ditto
96     pragma(inline, true)
97     @nogc @safe nothrow
98     @property void tid(int tid) { this.b.core.tid = tid; }
99 
100     /// 0-based leftmost coordinate
101     pragma(inline, true)
102     @nogc @safe nothrow
103     @property long pos() { return this.b.core.pos; }
104     /// ditto
105     pragma(inline, true)
106     @nogc @safe nothrow
107     @property void pos(long pos) { this.b.core.pos = pos; }
108 
109     // TODO: @field  bin     bin calculated by bam_reg2bin()
110 
111     /// mapping quality
112     pragma(inline, true)
113     @nogc @safe nothrow
114     @property ubyte qual() { return this.b.core.qual; }
115     /// ditto
116     pragma(inline, true)
117     @nogc @safe nothrow
118     @property void qual(ubyte q) { this.b.core.qual = q; }
119 
120     /// length of query name. Terminated by 1-4 \0, see l_extranul. Never set directly.
121     pragma(inline, true)
122     @nogc @safe nothrow
123     @property int l_qname() { return this.b.core.l_qname; }
124 
125     /// number of EXTRA trailing \0 in qname; 0 <= l_extranul <= 3, see l_qname. Never set directly.
126     pragma(inline, true)
127     @nogc @safe nothrow
128     @property int l_extranul() { return this.b.core.l_extranul; }
129 
130     /// bitwise flag
131     pragma(inline, true)
132     @nogc @safe nothrow
133     @property ushort flag() { return this.b.core.flag; }
134     /// ditto
135     pragma(inline, true)
136     @nogc @safe nothrow
137     @property void flag(ushort fl) { this.b.core.flag = fl; }
138 
139     /// is read paired?
140     pragma(inline, true)
141     @property bool isPaired()
142     {
143         return (b.core.flag & BAM_FPAIRED);
144     }
145     
146     /// is read reversed?
147     /// bool bam_is_rev(bam1_t *b) { return ( ((*b).core.flag & BAM_FREVERSE) != 0 ); }
148     @property bool isReversed()
149     {
150         version(LDC) pragma(inline, true);
151         return bam_is_rev(this.b);
152     }
153 
154     /// is read mapped?
155     @property bool isMapped()
156     {
157         version(LDC){
158             pragma(inline, true);
159         }
160         return (b.core.flag & BAM_FUNMAP) == 0;
161     }
162 
163     /// is read mate mapped?
164     @property bool isMateMapped()
165     {
166         version(LDC){
167             pragma(inline, true);
168         }
169         return (b.core.flag & BAM_FMUNMAP) == 0;
170     }
171 
172     /// is mate reversed?
173     /// bool bam_is_mrev(bam1_t *b) { return( ((*b).core.flag & BAM_FMREVERSE) != 0); }
174     pragma(inline, true)
175     @property bool mateReversed()
176     {
177         return bam_is_mrev(this.b);
178     }
179     
180     /// is read secondary?
181     @property bool isSecondary()
182     {
183         version(LDC){
184             pragma(inline, true);
185         }
186         return cast(bool)(b.core.flag & BAM_FSECONDARY);
187     }
188 
189     /// is read supplementary?
190     @property bool isSupplementary()
191     {
192         version(LDC){
193             pragma(inline, true);
194         }
195         return cast(bool)(b.core.flag & BAM_FSUPPLEMENTARY);
196     }
197 
198     /// Return read strand + or - (as char)
199     @property char strand(){
200         return ['+','-'][isReversed()];
201     }
202 
203     /// auto bam_get_qname(bam1_t *b) { return (cast(char*)(*b).data); }
204     pragma(inline, true)
205     @property char[] queryName()
206     {
207         return fromStringz(bam_get_qname(this.b));
208     }
209     
210     /// Set query name
211     pragma(inline, true)
212     @property void queryName(string qname)
213     {
214         assert(qname.length<252);
215 
216         //make cstring
217         auto qname_n=qname.dup~'\0';
218 
219         //append extra nulls to 32bit align cigar
220         auto extranul=0;
221         for (; qname_n.length % 4 != 0; extranul++) qname_n~='\0';
222         assert(extranul<=3);
223 
224         //get length of rest of data
225         auto l_rest=b.l_data-b.core.l_qname;
226 
227         //copy rest of data
228         ubyte[] rest=b.data[b.core.l_qname..b.l_data].dup;
229         
230         //if not enough space
231         if(qname_n.length+l_rest>b.m_data){
232             auto ptr=cast(ubyte*)realloc(b.data,qname_n.length+l_rest);
233             assert(ptr!=null);
234             b.data=ptr;
235             b.m_data=cast(uint)qname_n.length+l_rest;
236         }
237 
238         //reassign q_name and rest of data
239         b.data[0..qname_n.length]=(cast(ubyte[])qname_n);
240         b.data[qname_n.length..qname_n.length+l_rest]=rest;
241 
242         //reset data length, qname length, extra nul length
243         b.l_data=cast(uint)qname_n.length+l_rest;
244         b.core.l_qname=cast(ubyte)(qname_n.length);
245         b.core.l_extranul=cast(ubyte)extranul;
246     }
247 
248     /// query (and quality string) length
249     pragma(inline, true)
250     @property int length()
251     {
252         return this.b.core.l_qseq;
253     }
254 
255     /// Return char array of the sequence
256     /// see samtools/sam_view.c: get_read
257     @property const(char)[] sequence()
258     {
259         // calloc fills with \0; +1 len for Cstring
260         // char* s = cast(char*) calloc(1, this.b.core.l_qseq + 1);
261         char[] s;
262         s.length = this.b.core.l_qseq;
263 
264         // auto bam_get_seq(bam1_t *b) { return ((*b).data + ((*b).core.n_cigar<<2) + (*b).core.l_qname); }
265         auto seqdata = bam_get_seq(this.b);
266 
267         for (int i; i < this.b.core.l_qseq; i++)
268         {
269             s[i] = seq_nt16_str[bam_seqi(seqdata, i)];
270         }
271         return s;
272     }
273 
274     /// Assigns sequence and resets quality score
275     pragma(inline, true)
276     @property void sequence(const(char)[] seq)
277     {
278         //nibble encode sequence
279         ubyte[] en_seq;
280         en_seq.length=(seq.length+1)>>1;
281         for(auto i=0;i<seq.length;i++){
282             en_seq[i>>1]|=seq_nt16_table[seq[i]]<< ((~i&1)<<2);
283         }
284 
285         //get length of data before seq
286         uint l_prev=b.core.l_qname + cast(uint)(b.core.n_cigar*uint.sizeof);
287 
288         //get starting point after seq
289         uint start_rest=l_prev+((b.core.l_qseq+1)>>1)+b.core.l_qseq;
290 
291         //copy previous data
292         ubyte[] prev=b.data[0..l_prev].dup;
293 
294         //copy rest of data
295         ubyte[] rest=b.data[start_rest..b.l_data].dup;
296         
297         //if not enough space
298         if(en_seq.length+seq.length+l_prev+(b.l_data-start_rest)>b.m_data){
299             auto ptr=cast(ubyte*)realloc(b.data,en_seq.length+seq.length+l_prev+(b.l_data-start_rest));
300             assert(ptr!=null);
301             b.data=ptr;
302             b.m_data=cast(uint)en_seq.length+cast(uint)seq.length+l_prev+(b.l_data-start_rest);
303         }
304 
305         //reassign q_name and rest of data
306         b.data[0..l_prev]=prev;
307         b.data[l_prev..en_seq.length+l_prev]=en_seq;
308         //set qscores to 255 
309         memset(b.data+en_seq.length+l_prev,255,seq.length);
310         b.data[l_prev+en_seq.length+seq.length..l_prev+en_seq.length+seq.length+(b.l_data-start_rest)]=rest;
311 
312         //reset data length, seq length
313         b.l_data=cast(uint)en_seq.length+cast(uint)seq.length+l_prev+(b.l_data-start_rest);
314         b.core.l_qseq=cast(int)(seq.length);
315     }
316 
317     /// Return char array of the qscores
318     /// see samtools/sam_view.c: get_quality
319     const(char)[] qscores(bool phredScale=true)()
320     {
321         // calloc fills with \0; +1 len for Cstring
322         // char* q = cast(char*) calloc(1, this.b.core.l_qseq + 1);
323         char[] q;
324         q.length = this.b.core.l_qseq;
325 
326         // auto bam_get_qual(bam1_t *b) { return (*b).data + ((*b).core.n_cigar<<2) + (*b).core.l_qname + (((*b).core.l_qseq + 1)>>1); }
327         char* qualdata = cast(char*) bam_get_qual(this.b);
328 
329         for (int i; i < this.b.core.l_qseq; i++)
330             static if(phredScale) q[i] = cast(char)(qualdata[i] + 33);
331             else q[i] = cast(char)(qualdata[i]);
332         return q;
333     }
334 
335     /// Add qscore sequence given that it is the same length as the bam sequence
336     pragma(inline, true)
337     void q_scores(bool phredScale=true)(const(char)[] seq)
338     {
339         if(seq.length!=b.core.l_qseq){
340             hts_log_error(__FUNCTION__,"qscore length does not match sequence length");
341             return;
342         }
343         auto s= seq.dup;
344         for(auto i=0;i<seq.length;i++){
345             static if(phredScale) s[i]=seq[i]-33;
346         }
347         
348         //get length of data before seq
349         uint l_prev=b.core.l_qname + cast(uint)(b.core.n_cigar*uint.sizeof)+((b.core.l_qseq+1)>>1);
350         b.data[l_prev..s.length+l_prev]=cast(ubyte[])s;
351     }
352 
353     /// Add qscore sequence given that it is the same length as the bam sequence
354     pragma(inline, true)
355     void qscores(ubyte[] seq)
356     {
357         if(seq.length!=b.core.l_qseq){
358             hts_log_error(__FUNCTION__,"qscore length does not match sequence length");
359             return;
360         }
361         
362         //get length of data before seq
363         uint l_prev=b.core.l_qname + cast(uint)(b.core.n_cigar*uint.sizeof)+((b.core.l_qseq+1)>>1);
364         b.data[l_prev..seq.length+l_prev]=seq;
365     }
366 
367     /// Create cigar from bam1_t record
368     @property Cigar cigar()
369     {
370         return Cigar(bam_get_cigar(b), (*b).core.n_cigar);
371     }
372 
373     /// Assign a cigar string
374     pragma(inline, true)
375     @property void cigar(Cigar cigar)
376     {
377         
378         //get length of data before seq
379         uint l_prev=b.core.l_qname;
380 
381         //get starting point after seq
382         uint start_rest=l_prev+cast(uint)(b.core.n_cigar*uint.sizeof);
383 
384         //copy previous data
385         ubyte[] prev=b.data[0..l_prev].dup;
386 
387         //copy rest of data
388         ubyte[] rest=b.data[start_rest..b.l_data].dup;
389         
390         //if not enough space
391         if(uint.sizeof*cigar.ops.length+l_prev+(b.l_data-start_rest)>b.m_data){
392             auto ptr=cast(ubyte*)realloc(b.data,uint.sizeof*cigar.ops.length+l_prev+(b.l_data-start_rest));
393             assert(ptr!=null);
394             b.data=ptr;
395             b.m_data=cast(uint)(uint.sizeof*cigar.ops.length)+l_prev+(b.l_data-start_rest);
396         }
397 
398         //reassign q_name and rest of data
399         b.data[0..l_prev]=prev;
400         //without cigar.ops.dup we get the overlapping array copy error
401         b.data[l_prev..uint.sizeof*cigar.ops.length+l_prev]=cast(ubyte[])(cast(uint[])cigar.ops.dup);
402         b.data[l_prev+uint.sizeof*cigar.ops.length..l_prev+uint.sizeof*cigar.ops.length+(b.l_data-start_rest)]=rest;
403 
404         //reset data length, seq length
405         b.l_data=cast(uint)(uint.sizeof*cigar.ops.length)+l_prev+(b.l_data-start_rest);
406         b.core.n_cigar=cast(int)(cigar.ops.length);
407     }
408 
409     /// Get aux tag from bam1_t record and return a TagValue
410     TagValue opIndex(string val)
411     {
412         return TagValue(b, val[0..2]);
413     }
414 
415     /// Assign a tag key value pair
416     void opIndexAssign(T)(T value,string index)
417     {
418         import std.traits:isIntegral,isSomeString;
419         static if(isIntegral!T){
420             bam_aux_update_int(b,index[0..2],value);
421         }else static if(is(T==float)){
422             bam_aux_update_float(b,index[0..2],value);
423         }else static if(isSomeString!T){
424             bam_aux_update_str(b,index[0..2],cast(int)value.length+1,toStringz(value));
425         }
426     }
427 
428     /// Assign a tag key array pair
429     void opIndexAssign(T:T[])(T[] value,string index)
430     {
431         bam_aux_update_array(b,index[0..2],TypeChars[TypeIndex!T],value.length,value.ptr);
432     }
433 
434     /// chromosome ID of next read in template, defined by bam_hdr_t
435     pragma(inline, true)
436     @nogc @safe nothrow
437     @property int mateTID() { return this.b.core.mtid; }
438     /// ditto
439     pragma(inline, true)
440     @nogc @safe nothrow
441     @property void mateTID(int mtid) { this.b.core.mtid = mtid; }
442 
443     /// 0-based leftmost coordinate of next read in template
444     pragma(inline, true)
445     @nogc @safe nothrow
446     @property long matePos() { return this.b.core.mpos; }
447     /// ditto
448     pragma(inline, true)
449     @nogc @safe nothrow
450     @property void matePos(long mpos) { this.b.core.mpos = mpos; }
451 
452     /// Presumably Insert size, but is undocumented.
453     /// Per samtools source, is measured 5' to 5'
454     /// https://github.com/samtools/samtools/blob/bd1a409aa750d25d70a093405d174db8709a3f2c/bam_mate.c#L320
455     pragma(inline, true)
456     @nogc @safe nothrow
457     @property long insertSize() { return this.b.core.isize; }
458     /// ditto
459     pragma(inline, true)
460     @nogc @safe nothrow
461     @property void insertSize(long isize) { this.b.core.isize = isize; }
462 /+ BEGIN CHERRY-PICK: TODO: confirm below chunk with htslib-1.10 (mainly long coordinates +/
463     /// get aligned coordinates per each cigar op
464     auto getAlignedCoordinates(){
465         return AlignedCoordinatesItr(this.cigar);
466     }
467 
468     /// get aligned coordinates per each cigar op within range
469     /// range is 0-based half open using chromosomal coordinates
470     auto getAlignedCoordinates(long start, long end){
471         assert(start >= this.pos);
472         assert(end <= this.pos + this.cigar.ref_bases_covered);
473         return this.getAlignedCoordinates.filter!(x=> this.pos + x.rpos >= start).filter!(x=> this.pos + x.rpos < end);
474     }
475 
476     struct AlignedPair(bool refSeq)
477     {
478         long qpos, rpos;
479         Ops cigar_op;
480         char queryBase;
481         static if(refSeq) char refBase;
482 
483         string toString(){
484             import std.conv : to;
485             static if(refSeq) return rpos.to!string ~ ":" ~ refBase ~ " > " ~ qpos.to!string ~ ":" ~ queryBase;
486             else return rpos.to!string ~ ":" ~ qpos.to!string ~ ":" ~ queryBase;
487         }
488     }
489 
490     /// get a range of aligned read and reference positions
491     /// this is meant to recreate functionality from pysam:
492     /// https://pysam.readthedocs.io/en/latest/api.html#pysam.AlignedSegment.get_aligned_pairs
493     /// range is 0-based half open using chromosomal coordinates
494     auto getAlignedPairs(bool withRefSeq)(long start, long end)
495     {
496         import dhtslib.md : MDItr;
497         import std.range : dropExactly;
498         struct AlignedPairs(bool refSeq)
499         {
500             ReturnType!getAlignedCoordinates coords;
501             static if(refSeq) MDItr mdItr;
502             AlignedPair!refSeq current;
503             const(char)[] qseq;
504 
505             auto getAlignedCoordinates(SAMRecord rec, long start, long end){
506                 return rec.getAlignedCoordinates(start,end);
507             }
508             
509             this(SAMRecord rec, long start, long end){
510                 assert(start < end);
511                 assert(start >= rec.pos);
512                 assert(end <= rec.pos + rec.cigar.ref_bases_covered);
513                 
514                 coords = getAlignedCoordinates(rec, start, end);
515                 qseq = rec.sequence;
516                 static if(refSeq){
517                     assert(rec["MD"].exists,"MD tag must exist for record");
518                     mdItr = MDItr(rec);
519                     mdItr = mdItr.dropExactly(start - rec.pos);
520                 } 
521                 current.qpos = coords.front.qpos;
522                 current.rpos = coords.front.rpos;
523                 current.cigar_op = coords.front.cigar_op;
524                 if(!CigarOp(0, current.cigar_op).is_query_consuming) current.queryBase = '-';
525                 else current.queryBase = qseq[current.qpos];
526                 
527                 static if(refSeq){
528                     if(!CigarOp(0, current.cigar_op).is_reference_consuming) current.refBase = '-';
529                     else if(mdItr.front == '=') current.refBase = current.queryBase;
530                     else current.refBase = mdItr.front;
531                 }
532             }
533 
534             AlignedPair!refSeq front(){
535                 return current;
536             }
537 
538             void popFront(){
539                 coords.popFront;
540                 if(coords.empty) return;
541                 current.qpos = coords.front.qpos;
542                 current.rpos = coords.front.rpos;
543                 current.cigar_op = coords.front.cigar_op;
544                 if(!CigarOp(0, current.cigar_op).is_query_consuming) current.queryBase = '-';
545                 else current.queryBase = qseq[current.qpos];
546                 
547                 static if(refSeq){
548                     if(CigarOp(0, current.cigar_op).is_reference_consuming) mdItr.popFront;
549                     if(!CigarOp(0, current.cigar_op).is_reference_consuming) current.refBase = '-';
550                     else if(mdItr.front == '=') current.refBase = current.queryBase;
551                     else current.refBase = mdItr.front;
552                 }
553 
554             }
555 
556             bool empty(){
557                 return coords.empty;
558             }
559 
560         }
561         return AlignedPairs!withRefSeq(this,start,end);
562     }
563     /// get a range of aligned read and reference positions
564     /// this is meant to recreate functionality from pysam:
565     /// https://pysam.readthedocs.io/en/latest/api.html#pysam.AlignedSegment.get_aligned_pairs
566     auto getAlignedPairs(bool withRefSeq)(){
567         return getAlignedPairs!withRefSeq(this.pos, this.pos + this.cigar.ref_bases_covered);
568     }
569 /+ END CHERRY-PICK +/
570 }
571 
572 debug(dhtslib_unittest)
573 unittest{
574     writeln();
575     import dhtslib.sam;
576     import htslib.hts_log : hts_log_info;
577     import std.path:buildPath,dirName;
578     hts_set_log_level(htsLogLevel.HTS_LOG_TRACE);
579     hts_log_info(__FUNCTION__, "Testing SAMRecord mutation");
580     hts_log_info(__FUNCTION__, "Loading test file");
581     auto bam = SAMFile(buildPath(dirName(dirName(dirName(__FILE__))),"htslib","test","range.bam"), 0);
582     auto read=bam.all_records.front;
583 
584     //change queryname
585     hts_log_info(__FUNCTION__, "Testing queryname");
586     assert(read.queryName=="HS18_09653:4:1315:19857:61712");
587     read.queryName="HS18_09653:4:1315:19857:6171";
588     assert(read.queryName=="HS18_09653:4:1315:19857:6171");
589     assert(read.cigar.toString() == "78M1D22M");
590     read.queryName="HS18_09653:4:1315:19857:617122";
591     assert(read.queryName=="HS18_09653:4:1315:19857:617122");
592     assert(read.cigar.toString() == "78M1D22M");
593     assert(read["RG"].check!string);
594     assert(read["RG"].to!string=="1");
595 
596     //change sequence
597     hts_log_info(__FUNCTION__, "Testing sequence");
598     assert(read.sequence=="AGCTAGGGCACTTTTTGTCTGCCCAAATATAGGCAACCAAAAATAATTTCCAAGTTTTTAATGATTTGTTGCATATTGAAAAAACATTTTTTGGGTTTTT");
599     read.sequence=read.sequence[0..10];
600     assert(read.sequence=="AGCTAGGGCA");
601     ubyte[] q=[255,255,255,255,255,255,255,255,255,255];
602     assert(read.qscores!false==cast(char[])(q));
603     q=[1,14,20,40,30,10,14,20,40,30];
604     read.qscores(q);
605     assert(read.qscores!false==cast(char[])(q));
606     q[]+=33;
607     assert(read.qscores==cast(char[])(q));
608     assert(read.cigar.toString() == "78M1D22M");
609     assert(read["RG"].check!string);
610     assert(read["RG"].to!string=="1");
611 
612     //change cigar
613     hts_log_info(__FUNCTION__, "Testing cigar");
614     auto c=read.cigar;
615     c.ops[$-1].length=21;
616     read.cigar=c;
617     assert(read.cigar.toString() == "78M1D21M");
618     assert(read["RG"].check!string);
619     assert(read["RG"].to!string=="1");
620     
621     //change tag
622     hts_log_info(__FUNCTION__, "Testing tags");
623     read["X0"]=2;
624     assert(read["X0"].to!ubyte==2);
625     assert(read["RG"].check!string);
626     
627     read["RG"]="test";
628     assert(read["RG"].to!string=="test");
629     hts_log_info(__FUNCTION__, "Cigar:" ~ read.cigar.toString());
630 }
631 
632 debug(dhtslib_unittest)
633 unittest{
634     writeln();
635     import dhtslib.sam;
636     import htslib.hts_log : hts_log_info;
637     import std.path:buildPath,dirName;
638     hts_set_log_level(htsLogLevel.HTS_LOG_TRACE);
639     hts_log_info(__FUNCTION__, "Testing SAMRecord mutation w/ realloc");
640     hts_log_info(__FUNCTION__, "Loading test file");
641     auto bam = SAMFile(buildPath(dirName(dirName(dirName(__FILE__))),"htslib","test","range.bam"), 0);
642     auto read=bam.all_records.front;
643 
644     //change queryname
645     hts_log_info(__FUNCTION__, "Testing queryname");
646     assert(read.queryName=="HS18_09653:4:1315:19857:61712");
647 
648     //we do this only to force realloc
649     read.b.m_data=read.b.l_data;
650 
651 
652     auto prev_m=read.b.m_data;
653     read.queryName="HS18_09653:4:1315:19857:61712HS18_09653:4:1315:19857:61712";
654     assert(read.b.m_data >prev_m);
655     assert(read.queryName=="HS18_09653:4:1315:19857:61712HS18_09653:4:1315:19857:61712");
656     assert(read.cigar.toString() == "78M1D22M");
657     assert(read["RG"].check!string);
658     assert(read["RG"].to!string=="1");
659 
660     //change sequence
661     hts_log_info(__FUNCTION__, "Testing sequence");
662     assert(read.sequence=="AGCTAGGGCACTTTTTGTCTGCCCAAATATAGGCAACCAAAAATAATTTCCAAGTTTTTAATGATTTGTTGCATATTGAAAAAACATTTTTTGGGTTTTT");
663     prev_m=read.b.m_data;
664     read.sequence="AGCTAGGGCACTTTTTGTCTGCCCAAATATAGGCAACCAAAAATAATTTCCAAGTTTTTAATGATTTGTTGCATATTGAAAAAACATTTTTTGGGTTTTTAGCTAGGGCACTTTTTGTCTGCCCAAATATAGGCAACCAAAAATAATTTCCAAGTTTTTAATGATTTGTTGCATATTGAAAAAACATTTTTTGGGTTTTT";
665     assert(read.b.m_data >prev_m);
666     assert(read.sequence=="AGCTAGGGCACTTTTTGTCTGCCCAAATATAGGCAACCAAAAATAATTTCCAAGTTTTTAATGATTTGTTGCATATTGAAAAAACATTTTTTGGGTTTTTAGCTAGGGCACTTTTTGTCTGCCCAAATATAGGCAACCAAAAATAATTTCCAAGTTTTTAATGATTTGTTGCATATTGAAAAAACATTTTTTGGGTTTTT");
667     assert(read.cigar.toString() == "78M1D22M");
668     assert(read["RG"].check!string);
669     assert(read["RG"].to!string=="1");
670 
671     //change cigar
672     hts_log_info(__FUNCTION__, "Testing cigar");
673     auto c=read.cigar;
674     c.ops=c.ops~c.ops;
675     prev_m=read.b.m_data;
676     read.cigar=c;
677     assert(read.b.m_data >prev_m);
678     assert(read.cigar.toString() == "78M1D22M78M1D22M");
679     assert(read["RG"].check!string);
680     assert(read["RG"].to!string=="1");
681     
682     //change tag
683     hts_log_info(__FUNCTION__, "Testing tags");
684     read["X0"]=2;
685     assert(read["X0"].to!ubyte==2);
686     assert(read["RG"].check!string);
687     
688     read["RG"]="test";
689     assert(read["RG"].to!string=="test");
690     hts_log_info(__FUNCTION__, "Cigar:" ~ read.cigar.toString());
691 }
692 
693 unittest
694 {
695     import std.stdio;
696     import dhtslib.sam;
697     import dhtslib.md : MDItr;
698     import std.algorithm: map;
699     import std.array: array;
700     import std.path:buildPath,dirName;
701     auto bam = SAMFile(buildPath(dirName(dirName(dirName(__FILE__))),"htslib","test","range.bam"), 0);
702     auto read=bam.all_records.front;
703     auto pairs = read.getAlignedPairs!true(read.pos + 77, read.pos + 77 + 5);
704     assert(pairs.map!(x => x.qpos).array == [77,77,78,79,80]);
705     assert(pairs.map!(x => x.rpos).array == [77,78,79,80,81]);
706     assert(pairs.map!(x => x.refBase).array == "GAAAA");
707     assert(pairs.map!(x => x.queryBase).array == "G-AAA");
708 }
709 
710 alias SAMFile = SAMReader;
711 /**
712 Encapsulates a SAM/BAM file.
713 
714 Implements InputRange interface using htslib calls.
715 If indexed, Random-access query via multidimensional slicing.
716 */
717 struct SAMReader
718 {
719     /// filename; as usable from D
720     string filename;
721 
722     /// filename \0-terminated C string; reference needed to avoid GC reaping result of toStringz when ctor goes out of scope
723     private immutable(char)* fn;
724 
725     /// htsFile
726     private htsFile* fp;
727 
728     /// hFILE if required
729     private hFILE* f;
730 
731     /// header struct
732     bam_hdr_t* header = null;
733 
734     /// SAM/BAM/CRAM index 
735     private hts_idx_t* idx;
736 
737     private kstring_t line;
738 
739     /// disallow copying
740     @disable this(this);
741 
742     /** Create a representation of SAM/BAM/CRAM file from given filename or File
743 
744     Params:
745         fn =            string filename (complete path passed to htslib; may support S3:// https:// etc.)
746         extra_threads = extra threads for compression/decompression
747                         -1 use all available cores (default)
748                         0  use no extra threads
749                         >1 add indicated number of threads (to a default of 1)
750     */
751     this(T)(T f, int extra_threads = -1)
752     if (is(T == string) || is(T == File))
753     {
754         import std.parallelism : totalCPUs;
755 
756         // open file
757         static if (is(T == string))
758         {
759             this.filename = f;
760             this.fn = toStringz(f);
761             this.fp = hts_open(this.fn, cast(immutable(char)*) "r");
762         }
763         else static if (is(T == File))
764         {
765             this.filename = f.name();
766             this.fn = toStringz(f.name);
767             this.f = hdopen(f.fileno, cast(immutable(char)*) "r");
768             this.fp = hts_hopen(this.f, this.fn, cast(immutable(char)*) "r");
769         }
770         else assert(0);
771 
772         if (extra_threads == -1)
773         {
774             if ( totalCPUs > 1)
775             {
776                 hts_log_info(__FUNCTION__,
777                         format("%d CPU cores detected; enabling multithreading", totalCPUs));
778                 // hts_set_threads adds N _EXTRA_ threads, so totalCPUs - 1 seemed reasonable,
779                 // but overcomitting by 1 thread (i.e., passing totalCPUs) buys an extra 3% on my 2-core 2013 Mac
780                 hts_set_threads(this.fp, totalCPUs);
781             }
782         }
783         else if (extra_threads > 0)
784         {
785             if ((extra_threads + 1) > totalCPUs)
786                 hts_log_warning(__FUNCTION__, "More threads requested than CPU cores detected");
787             hts_set_threads(this.fp, extra_threads);
788         }
789         else if (extra_threads == 0)
790         {
791             hts_log_debug(__FUNCTION__, "Zero extra threads requested");
792         }
793         else
794         {
795             hts_log_warning(__FUNCTION__, "Invalid negative number of extra threads requested");
796         }
797 
798         // read header
799         this.header = sam_hdr_read(this.fp);
800         this.idx = sam_index_load(this.fp, this.fn);
801         if (this.idx == null)
802         {
803             hts_log_info(__FUNCTION__, "SAM index not found");
804             // TODO: attempt to build
805             // TODO: edit range to return empty immediately if no idx
806         }
807         hts_log_debug(__FUNCTION__, format("SAM index: %s", this.idx));
808     }
809 
810     ~this()
811     {
812         debug (dhtslib_debug)
813         {
814             writeln("SAMFile dtor");
815         }
816 
817         bam_hdr_destroy(this.header);
818 
819         if((this.fp !is null) && (this.f is null))
820         {
821             const auto ret = hts_close(fp);
822             if (ret < 0)
823                 writefln("There was an error closing %s", fromStringz(this.fn));
824         }
825     }
826 
827     /// number of reference sequences; from bam_hdr_t
828     @property int n_targets() const
829     {
830         return this.header.n_targets;
831     }
832 
833     /// length of specific reference sequence (by number)
834     uint target_len(int target) const
835     {
836         return this.header.target_len[target];
837     }
838 
839     /// lengths of the reference sequences
840     @property uint[] target_lens() const
841     {
842         return this.header.target_len[0 .. this.n_targets].dup;
843     }
844 
845     /// names of the reference sequences
846     @property string[] target_names() const
847     {
848         string[] names;
849         names.length = this.n_targets;
850         foreach (i; 0 .. this.n_targets)
851         {
852             names[i] = fromStringz(this.header.target_name[i]).idup;
853         }
854         return names;
855     }
856 
857     /// reference contig name to integer id
858     /// Calls int bam_name2id(bam_hdr_t *h, const char *_ref);
859     int target_id(string name)
860     {
861         return bam_name2id(this.header, toStringz(name));
862     }
863 
864     /** Query a region and return matching alignments as an InputRange */
865     /// Query by chr, start, end
866     auto query(string chrom, long start, long end)
867     {
868         string q = format("%s:%d-%d", chrom, start, end);
869         return query(q);
870     }
871 
872     /// Query by string chr:start-end
873     auto query(string q)
874     {
875         auto itr = sam_itr_querys(this.idx, this.header, toStringz(q));
876         return RecordRange(this.fp, itr);
877     }
878 
879     /// Query by contig id, start, end
880     auto query(int tid, long start, long end)
881     {
882         auto itr = sam_itr_queryi(this.idx, tid, start, end);
883         return RecordRange(this.fp, itr);
884     }
885 
886     /// Query by ["chr1:1-2","chr1:1000-1001"]
887     auto query(string[] regions)
888     {
889         return RecordRangeMulti(this.fp, this.idx, this.header, &(this), regions);
890     }
891 
892     /// bam["chr1:1-2"]
893     auto opIndex(string q)
894     {
895         return query(q);
896     }
897 
898     /// bam["chr1", 1..2]
899     auto opIndex(string tid, int[2] pos)
900     {
901         return query(tid, pos[0], pos[1]);
902     }
903 
904     /// bam["chr1", 1]
905     auto opIndex(string tid, int pos)
906     {
907         return query(tid, pos, pos + 1);
908     }
909 
910     /** Deprecated: use multidimensional slicing with second parameter as range (["chr1", 1 .. 2]) */
911     /// bam["chr1", 1, 2]
912     deprecated auto opIndex(string tid, int pos1, int pos2)
913     {
914         return query(tid, pos1, pos2);
915     }
916 
917     /// Integer-based chr below
918     /// bam[0, 1..2]
919     auto opIndex(int tid, int[2] pos)
920     {
921         return query(tid, pos[0], pos[1]);
922     }
923 
924     /// bam[0, 1]
925     auto opIndex(int tid, int pos)
926     {
927         return query(tid, pos, pos + 1);
928     }
929 
930     /// bam[0, 1, 2]
931     deprecated auto opIndex(int tid, int pos1, int pos2)
932     {
933         return query(tid, pos1, pos2);
934     }
935 
936     /// support bam["chr1", 1..2 ]
937     int[2] opSlice(size_t dim)(int start, int end) if (dim  == 1)
938     {
939         return [start, end];
940     }
941 
942     /// Return an InputRange representing all recods in the SAM/BAM/CRAM
943     AllRecordsRange all_records()
944     {
945         auto range = AllRecordsRange(this.fp, this.header, bam_init1());
946         range.popFront();
947         return range;
948     }
949     alias allRecords = all_records;
950 
951     /// Iterate through all records in the SAM/BAM/CRAM
952     struct AllRecordsRange
953     {
954         private htsFile*    fp;     // belongs to parent; shared
955         private bam_hdr_t*  header; // belongs to parent; shared
956         private bam1_t*     b;
957         private int success;
958         ~this()
959         {
960             //debug(dhtslib_debug) hts_log_debug(__FUNCTION__, "dtor");
961         }
962 
963         /// InputRange interface
964         @property bool empty() // @suppress(dscanner.suspicious.incorrect_infinite_range)
965         {
966             //    int sam_read1(samFile *fp, bam_hdr_t *h, bam1_t *b);
967             if (success >= 0)
968                 return false;
969             else if (success == -1)
970                 return true;
971             else
972             {
973                 hts_log_error(__FUNCTION__, "*** ERROR sam_read1 < -1");
974                 return true;
975             }
976         }
977         /// ditto
978         void popFront()
979         {
980             success = sam_read1(this.fp, this.header, this.b);
981             // noop? 
982             // free this.b ?
983 
984             //bam_destroy1(this.b);
985         }
986         /// ditto
987         SAMRecord front()
988         {
989             return new SAMRecord(bam_dup1(this.b));
990         }
991 
992     }
993 
994     /// Iterate over records falling within a queried region (TODO: itr_multi_query)
995     struct RecordRange
996     {
997         private htsFile* fp;
998         private hts_itr_t* itr;
999         private bam1_t* b;
1000 
1001         private int r;
1002 
1003         ///
1004         this(htsFile* fp, hts_itr_t* itr)
1005         {
1006             this.itr = itr;
1007             this.fp = fp;
1008             b = bam_init1();
1009             //debug(dhtslib_debug) { writeln("sam_itr null? ",(cast(int)itr)==0); }
1010             hts_log_debug(__FUNCTION__, format("SAM itr null?: %s", cast(int) itr == 0));
1011             popFront();
1012         }
1013 
1014         /// InputRange interface
1015         @property bool empty()
1016         {
1017             return (r <= 0 && itr.finished) ? true : false;
1018         }
1019         /// ditto
1020         void popFront()
1021         {
1022             r = sam_itr_next(this.fp, this.itr, this.b);
1023         }
1024         /// ditto
1025         SAMRecord front()
1026         {
1027             return new SAMRecord(bam_dup1(b));
1028         }
1029     }
1030 
1031     /// Iterate over records falling within queried regions using a RegionList
1032     struct RecordRangeMulti
1033     {
1034         private htsFile* fp;
1035         private hts_itr_multi_t* itr;
1036         private bam1_t* b;
1037         private hts_reglist_t[] rlist;
1038         private int r;
1039 
1040         ///
1041         this(htsFile* fp, hts_idx_t* idx, bam_hdr_t* header, SAMFile* sam, string[] regions)
1042         {
1043             rlist = RegionList(sam, regions).getRegList();
1044             this.fp = fp;
1045             b = bam_init1();
1046             itr = sam_itr_regions(idx, header, rlist.ptr, cast(uint) rlist.length);
1047             //debug(dhtslib_debug) { writeln("sam_itr null? ",(cast(int)itr)==0); }
1048             hts_log_debug(__FUNCTION__, format("SAM itr null?: %s", cast(int) itr == 0));
1049             popFront();
1050         }
1051 
1052         /// InputRange interface
1053         @property bool empty()
1054         {
1055             return (r <= 0 && itr.finished) ? true : false;
1056         }
1057         /// ditto
1058         void popFront()
1059         {
1060             r = sam_itr_multi_next(this.fp, this.itr, this.b);
1061         }
1062         /// ditto
1063         SAMRecord front()
1064         {
1065             return new SAMRecord(bam_dup1(b));
1066         }
1067     }
1068 
1069     /// List of regions based on sam/bam
1070     struct RegionList
1071     {
1072         import std.algorithm.iteration : splitter;
1073         import std.algorithm.sorting : sort;
1074         import std.range : drop, array;
1075         import std.conv : to;
1076 
1077         private hts_reglist_t[string] rlist;
1078         private SAMFile* sam;
1079 
1080         ///
1081         this(SAMFile* sam, string[] queries)
1082         {
1083             this.sam = sam;
1084             foreach (q; queries)
1085             {
1086                 addRegion(q);
1087             }
1088         }
1089 
1090         /// Add a region in standard format (chr1:2000-3000) to the RegionList
1091         void addRegion(string reg)
1092         {
1093             //chr:1-2
1094             //split into chr and 1-2
1095             auto split = reg.splitter(":");
1096             auto chr = split.front;
1097             //split 1-2 into 1 and 2
1098             split = split.drop(1).front.splitter("-");
1099             if (sam.target_id(chr) < 0)
1100             {
1101                 hts_log_error(__FUNCTION__, "tid not present in sam/bam");
1102             }
1103             addRegion(sam.target_id(chr), split.front.to!int, split.drop(1).front.to!int);
1104         }
1105 
1106         /// Add a region by {target/contig/chr id, start coord, end coord} to the RegionList
1107         void addRegion(int tid, int beg, int end)
1108         {
1109             if (tid > this.sam.n_targets || tid < 0)
1110                 hts_log_error(__FUNCTION__, "tid not present in sam/bam");
1111 
1112             auto val = (this.sam.target_names[tid] in this.rlist);
1113             hts_pair32_t p;
1114 
1115             if (beg < 0)
1116                 hts_log_error(__FUNCTION__, "first coordinate < 0");
1117 
1118             if (beg >= this.sam.target_len(tid))
1119                 hts_log_error(__FUNCTION__, "first coordinate larger than tid length");
1120 
1121             if (end < 0)
1122                 hts_log_error(__FUNCTION__, "second coordinate < 0");
1123 
1124             if (end >= this.sam.target_len(tid))
1125                 hts_log_error(__FUNCTION__, "second coordinate larger than tid length");
1126 
1127             p.beg = beg;
1128             p.end = end;
1129             hts_pair32_t[] plist;
1130             if (val is null)
1131             {
1132                 hts_reglist_t r;
1133 
1134                 //set tid
1135                 r.tid = tid;
1136 
1137                 //create intervals
1138                 plist = plist ~ p;
1139                 r.intervals = plist.ptr;
1140                 r.count = cast(uint) plist.length;
1141                 r.min_beg = p.beg;
1142                 r.max_end = p.end;
1143                 this.rlist[this.sam.target_names[tid]] = r;
1144             }
1145             else
1146             {
1147                 plist = (val.intervals[0 .. val.count] ~ p).sort!(cmpInterval).array;
1148                 val.intervals = plist.ptr;
1149                 val.count = cast(uint) plist.length;
1150                 val.min_beg = plist[0].beg;
1151                 val.max_end = plist[$ - 1].end;
1152             }
1153         }
1154 
1155         hts_reglist_t[] getRegList()
1156         {
1157             return rlist.byValue.array.sort!(cmpRegList).array;
1158         }
1159     }
1160 }
1161 
1162 /// Nucleotide complement table; from samtools/sam_view.c
1163 private const(char)[16] seq_comp_table = [0, 8, 4, 12, 2, 10, 6, 14, 1, 9, 5, 13, 3, 11, 7, 15];
1164 
1165 /// Reverse a string in place; from samtools/sam_view.c
1166 //  TODO: Could be sped up, and potentially made safer? by passing strlen since already known
1167 private char* reverse(char* str)
1168 {
1169     import core.stdc..string : strlen;
1170 
1171     auto i = strlen(str) - 1, j = 0;
1172     char ch;
1173     while (i > j)
1174     {
1175         ch = str[i];
1176         str[i] = str[j];
1177         str[j] = ch;
1178         i--;
1179         j++;
1180     }
1181     return str;
1182 }
1183 
1184 /// SAM/BAM/CRAM disk format
1185 enum SAMWriterTypes{
1186     BAM,
1187     UBAM,
1188     SAM,
1189     CRAM,
1190     DEDUCE
1191 }
1192 
1193 /// Encapsulates a SAM/BAM/CRAM, but as write-only
1194 struct SAMWriter
1195 {
1196     /// filename; as usable from D
1197     string filename;
1198 
1199     /// filename \0-terminated C string; reference needed to avoid GC reaping result of toStringz when ctor goes out of scope
1200     private immutable(char)* fn;
1201 
1202     /// htsFile
1203     private htsFile* fp;
1204 
1205     /// hFILE if required
1206     private hFILE* f;
1207 
1208     /// header struct
1209     bam_hdr_t* header = null;
1210 
1211     private kstring_t line;
1212 
1213     /// disallow copying
1214     @disable this(this);
1215 
1216     /** Create a representation of SAM/BAM/CRAM file from given filename or File
1217 
1218     Params:
1219         fn =            string filename (complete path passed to htslib; may support S3:// https:// etc.)
1220         extra_threads = extra threads for compression/decompression
1221                         -1 use all available cores (default)
1222                         0  use no extra threads
1223                         >1 add indicated number of threads (to a default of 1)
1224     */
1225     this(T)(T f,bam_hdr_t * header, SAMWriterTypes t=SAMWriterTypes.DEDUCE,int extra_threads = -1)
1226     if (is(T == string) || is(T == File))
1227     {
1228         import std.parallelism : totalCPUs;
1229         char[] mode;
1230         if(t == SAMWriterTypes.BAM) mode=['w','b','\0'];
1231         else if(t == SAMWriterTypes.UBAM) mode=['w','b','u','\0'];
1232         else if(t == SAMWriterTypes.SAM) mode=['w','\0'];
1233         else if(t == SAMWriterTypes.CRAM) mode=['w','c','\0'];
1234         // open file
1235         static if (is(T == string))
1236         {
1237             if(t == SAMWriterTypes.DEDUCE){
1238                 import std.path:extension;
1239                 auto ext=extension(f);
1240                 if(ext==".bam") mode=['w','b','\0'];
1241                 else if(ext==".sam") mode=['w','\0'];
1242                 else if(ext==".cram") mode=['w','c','\0'];
1243                 else {
1244                     hts_log_error(__FUNCTION__,"extension "~ext~" not valid");
1245                     throw new Exception("DEDUCE SAMWriterType used with non-valid extension");
1246                 }
1247             }
1248             this.filename = f;
1249             this.fn = toStringz(f);
1250             this.fp = hts_open(this.fn, mode.ptr);
1251         }
1252         else static if (is(T == File))
1253         {
1254             assert(t!=SAMWriterTypes.DEDUCE);
1255             this.filename = f.name();
1256             this.fn = toStringz(f.name);
1257             this.f = hdopen(f.fileno, cast(immutable(char)*) "w");
1258             this.fp = hts_hopen(this.f, this.fn, mode.ptr);
1259         }
1260         else assert(0);
1261 
1262         if (extra_threads == -1)
1263         {
1264             if ( totalCPUs > 1)
1265             {
1266                 hts_log_info(__FUNCTION__,
1267                         format("%d CPU cores detected; enabling multithreading", totalCPUs));
1268                 // hts_set_threads adds N _EXTRA_ threads, so totalCPUs - 1 seemed reasonable,
1269                 // but overcomitting by 1 thread (i.e., passing totalCPUs) buys an extra 3% on my 2-core 2013 Mac
1270                 hts_set_threads(this.fp, totalCPUs);
1271             }
1272         }
1273         else if (extra_threads > 0)
1274         {
1275             if ((extra_threads + 1) > totalCPUs)
1276                 hts_log_warning(__FUNCTION__, "More threads requested than CPU cores detected");
1277             hts_set_threads(this.fp, extra_threads);
1278         }
1279         else if (extra_threads == 0)
1280         {
1281             hts_log_debug(__FUNCTION__, "Zero extra threads requested");
1282         }
1283         else
1284         {
1285             hts_log_warning(__FUNCTION__, "Invalid negative number of extra threads requested");
1286         }
1287 
1288         // read header
1289         this.header = bam_hdr_dup(header);
1290         sam_hdr_write(this.fp,this.header);
1291     }
1292 
1293     ~this()
1294     {
1295         debug (dhtslib_debug)
1296         {
1297             writeln("SAMWriter dtor");
1298         }
1299 
1300         bam_hdr_destroy(this.header);
1301 
1302     }
1303 
1304     /// close file
1305     void close(){
1306         const auto ret = hts_close(this.fp);
1307         if (ret < 0)
1308             stderr.writefln("There was an error closing %s", fromStringz(this.fn));
1309     }
1310 
1311     /// write out to disk
1312     void write(SAMRecord * rec){
1313         const auto ret = sam_write1(this.fp,this.header,rec.b);
1314         assert(ret>=0);
1315     }
1316 }
1317 debug(dhtslib_unittest)
1318 unittest{
1319     writeln();
1320     import dhtslib.sam;
1321     import htslib.hts_log : hts_log_info;
1322     import std.path:buildPath,dirName;
1323     import std..string:fromStringz;
1324     hts_set_log_level(htsLogLevel.HTS_LOG_TRACE);
1325     hts_log_info(__FUNCTION__, "Testing SAMWriter");
1326     hts_log_info(__FUNCTION__, "Loading test file");
1327     auto sam = SAMFile(buildPath(dirName(dirName(dirName(__FILE__))),"htslib","test","auxf#values.sam"), 0);
1328     auto readrange = sam.allRecords;
1329     auto sam2 = SAMWriter("test.bam",sam.header);
1330     hts_log_info(__FUNCTION__, "Getting read 1");
1331     auto read = readrange.front();
1332     sam2.write(&read);
1333     sam2.close;
1334     destroy(sam2);
1335     sam = SAMFile("test.bam");
1336     readrange = sam.allRecords;
1337     read = readrange.front();
1338     writeln(read.sequence);
1339     assert(read.sequence=="GCTAGCTCAG");
1340     // destroy(sam2);
1341 }
1342 
1343 /// Used in sorting
1344 bool cmpInterval(hts_pair32_t a, hts_pair32_t b)
1345 {
1346     if (a.beg < b.beg)
1347     {
1348         return true;
1349     }
1350     if (a.end < b.end)
1351     {
1352         return true;
1353     }
1354     return false;
1355 }
1356 
1357 /// Used in sorting
1358 bool cmpRegList(hts_reglist_t a, hts_reglist_t b)
1359 {
1360     if (a.tid < b.tid)
1361     {
1362         return true;
1363     }
1364     return false;
1365 }
1366 
1367 /// Parse text line of SAM; Used in unittest
1368 private int parseSam(string line, bam_hdr_t* header, bam1_t* b)
1369 {
1370     import htslib.kstring : kstring_t;
1371     import std.utf : toUTFz;
1372 
1373     kstring_t k;
1374     k.s = toUTFz!(char*)(line.dup);
1375     k.m = line.length + 1;
1376     k.l = line.length + 1;
1377     return sam_parse1(&k, header, b);
1378 }
1379 debug(dhtslib_unittest)
1380 unittest{
1381     writeln();
1382     import dhtslib.sam;
1383     import htslib.hts_log : hts_log_info;
1384     import std.path:buildPath,dirName;
1385     import std..string:fromStringz;
1386     hts_set_log_level(htsLogLevel.HTS_LOG_TRACE);
1387     hts_log_info(__FUNCTION__, "Testing SAMFile & SAMRecord");
1388     hts_log_info(__FUNCTION__, "Loading test file");
1389     auto sam = SAMFile(buildPath(dirName(dirName(dirName(__FILE__))),"htslib","test","auxf#values.sam"), 0);
1390     auto readrange = sam.allRecords;
1391     hts_log_info(__FUNCTION__, "Getting read 1");
1392     auto read = readrange.front();
1393     writeln(read.sequence);
1394     assert(read.sequence=="GCTAGCTCAG");
1395 }
1396 
1397 debug(dhtslib_unittest)
1398 unittest
1399 {
1400     import std.stdio : writeln;
1401     import std.range : drop;
1402     import std.utf : toUTFz;
1403     import htslib.hts_log; // @suppress(dscanner.suspicious.local_imports)
1404     import std.path:buildPath,dirName;
1405     import std.conv:to;
1406     hts_set_log_level(htsLogLevel.HTS_LOG_TRACE);
1407     hts_log_info(__FUNCTION__, "Loading sam file");
1408     auto range = File(buildPath(dirName(dirName(dirName(__FILE__))),"htslib","test","realn01_exp-a.sam")).byLineCopy();
1409     auto b = bam_init1();
1410     auto hdr = bam_hdr_init();
1411     string hdr_str;
1412     for (auto i = 0; i < 4; i++)
1413     {
1414         hdr_str ~= range.front ~ "\n";
1415         range.popFront;
1416     }
1417     hts_log_info(__FUNCTION__, "Header");
1418     writeln(hdr_str);
1419     hdr = sam_hdr_parse(cast(int) hdr_str.length, toUTFz!(char*)(hdr_str));
1420     hts_log_info(__FUNCTION__, "Read status:" ~ parseSam(range.front, hdr, b).to!string);
1421     auto r = new SAMRecord(b);
1422     hts_log_info(__FUNCTION__, "Cigar" ~ r.cigar.toString);
1423     assert(r.cigar.toString == "6M1D117M5D28M");
1424 }