1 module dhtslib.sam.reader;
2 
3 import core.stdc.stdio : SEEK_SET;
4 import std.format;
5 import std.stdio : writeln, writefln, stderr, File;
6 import std.string : fromStringz, toStringz;
7 import std.typecons : Tuple;
8 import std.range.interfaces : InputRange, inputRangeObject;
9 import std.range.primitives : isInputRange, ElementType;
10 import std.algorithm : map;
11 import std.array : array;
12 import std.utf : toUTFz;
13 
14 import htslib.hts;
15 import htslib.hfile : hdopen, hclose, hFILE, off_t, htell, hseek;
16 import htslib.bgzf : bgzf_tell, bgzf_seek;
17 
18 import htslib.hts_log;
19 import htslib.kstring;
20 import htslib.sam;
21 import dhtslib.sam.record;
22 import dhtslib.coordinates;
23 import dhtslib.sam.header;
24 import dhtslib.sam : cmpInterval, cmpRegList;
25 import dhtslib.memory;
26 import dhtslib.util;
27 
28 alias SAMFile = SAMReader;
29 /**
30 Encapsulates a SAM/BAM file.
31 
32 Implements InputRange interface using htslib calls.
33 If indexed, Random-access query via multidimensional slicing.
34 */
35 struct SAMReader
36 {
37     /// filename; as usable from D
38     string filename;
39 
40     /// filename \0-terminated C string; reference needed to avoid GC reaping result of toStringz when ctor goes out of scope
41     private const(char)* fn;
42 
43     /// htsFile
44     private HtsFile fp;
45 
46     /// hFILE if required
47     private hFILE* f;
48 
49     /// header struct
50     SAMHeader header;
51 
52     private off_t header_offset;
53 
54     /// SAM/BAM/CRAM index 
55     private HtsIdx idx;
56 
57     private kstring_t line;
58 
59     /** Create a representation of SAM/BAM/CRAM file from given filename or File
60 
61     Params:
62         fn =            string filename (complete path passed to htslib; may support S3:// https:// etc.)
63         extra_threads = extra threads for compression/decompression
64                         -1 use all available cores (default)
65                         0  use no extra threads
66                         >1 add indicated number of threads (to a default of 1)
67     */
68     this(T)(T f, int extra_threads = -1)
69     if (is(T == string) || is(T == File))
70     {
71         import std.parallelism : totalCPUs;
72 
73         // open file
74         static if (is(T == string))
75         {
76             this.filename = f;
77             this.fn = toStringz(f);
78             this.fp = HtsFile(hts_open(this.fn, cast(immutable(char)*) "r"));
79         }
80         else static if (is(T == File))
81         {
82             this.filename = f.name();
83             this.fn = toStringz(f.name);
84             this.f = hdopen(f.fileno, cast(immutable(char)*) "r");
85             this.fp = HtsFile(hts_hopen(this.f, this.fn, cast(immutable(char)*) "r"));
86         }
87         else assert(0);
88 
89         if (extra_threads == -1)
90         {
91             if ( totalCPUs > 1)
92             {
93                 hts_log_info(__FUNCTION__,
94                         format("%d CPU cores detected; enabling multithreading", totalCPUs));
95                 // hts_set_threads adds N _EXTRA_ threads, so totalCPUs - 1 seemed reasonable,
96                 // but overcomitting by 1 thread (i.e., passing totalCPUs) buys an extra 3% on my 2-core 2013 Mac
97                 hts_set_threads(this.fp, totalCPUs);
98             }
99         }
100         else if (extra_threads > 0)
101         {
102             if ((extra_threads + 1) > totalCPUs)
103                 hts_log_warning(__FUNCTION__, "More threads requested than CPU cores detected");
104             hts_set_threads(this.fp, extra_threads);
105         }
106         else if (extra_threads == 0)
107         {
108             hts_log_debug(__FUNCTION__, "Zero extra threads requested");
109         }
110         else
111         {
112             hts_log_warning(__FUNCTION__, "Invalid negative number of extra threads requested");
113         }
114 
115         // read header
116         this.header = SAMHeader(sam_hdr_read(this.fp));
117 
118         // collect header offset just in case it is a sam file
119         // we would like to iterate
120         if(this.fp.is_bgzf) this.header_offset = bgzf_tell(this.fp.fp.bgzf);
121         else this.header_offset = htell(this.fp.fp.hfile);
122 
123         this.idx = HtsIdx(null);
124     }
125 
126     /// number of reference sequences; from bam_hdr_t
127     deprecated("use these properties from SAMHeader")
128     @property int nTargets() const
129     {
130         return this.header.nTargets;
131     }
132     alias n_targets = nTargets;
133 
134     /// length of specific reference sequence by number (`tid`)
135     deprecated("use these properties from SAMHeader")
136     uint targetLength(int target) const
137     in (target >= 0)
138     in (target < this.nTargets)
139     {
140         return this.header.targetLength(target);
141     }
142     alias targetLen = targetLength;
143     alias target_len = targetLength;
144 
145     /// lengths of the reference sequences
146     deprecated("use these properties from SAMHeader")
147     @property uint[] targetLengths() const
148     {
149         return this.header.targetLengths;
150     }
151     alias targetLens = targetLengths;
152     alias target_lens = targetLengths;
153 
154     /// names of the reference sequences
155     deprecated("use these properties from SAMHeader")
156     @property string[] targetNames() const
157     {
158         return this.header.targetNames;
159     }
160     alias target_names = targetNames;
161 
162     /// reference contig name to integer id
163     deprecated("use these properties from SAMHeader")
164     int targetId(string name)
165     {
166         return this.header.targetId(name);
167     }
168     alias target_id = targetId;
169 
170 
171     /// fetch is provided as a PySAM compatible synonym for `query`
172     alias fetch = query;
173 
174     /** Query a region and return matching alignments as InputRange
175     *
176     *   Query on (chr, start, end) may take several forms:
177     *
178     *   1. `query(region)` with a string-based "region" form (e.g. chr1:1000-2000)
179             - Variant: pass an array of query region strings: `query([reg1, reg, ...])`
180     *   2. `query(chr, start, end)` with a combination of function parameters for
181     *       contig, start, and end (where contig may be either a string or the numeric
182     *       `tid` from BAM header; it would be uncommon to use this directly)
183     *
184     *   NOTE THAT THERE IS AN OFF-BY-ONE DIFFERENCE IN THE TWO METHODS ABOVE!
185     *   Region string based coordinates assume the first base of the reference
186     *   is 1 (e.g., chrX:1-100 yields the first 100 bases), whereas with the
187     *   integer function parameter versions, the coordinates are zero-based, half-open
188     *   (e.g., <chrX, 0, 100> yields the first 100 bases).
189     *
190     *   We also support array indexing on object of type SAMReader directly
191     *   in one of two above styles:
192     *       1. `bamfile[region-string]`
193     *       2. `bamfile[contig, start .. end]` with contig like no. 2 above
194     *
195     *   The D convention `$` operator marking length of array is supported.
196     *
197     *   Finally, the region string is parsed by underlying htslib's `hts_parse_region`
198     *   and has special semantics available:
199     *
200     *   region          | Outputs
201     *   --------------- | -------------
202     *   REF             | All reads with RNAME REF
203     *   REF:            | All reads with RNAME REF
204     *   REF:START       | Reads with RNAME REF overlapping START to end of REF
205     *   REF:-END        | Reads with RNAME REF overlapping start of REF to END
206     *   REF:START-END   | Reads with RNAME REF overlapping START to END
207     *   .               | All reads from the start of the file
208     *   *               | Unmapped reads at the end of the file (RNAME '*' in SAM)
209     *
210     *
211     *   Examples:
212     *   ```
213     *   bamfile = SAMReader("whatever.bam");
214     *   auto reads1 = bamfile.query("chr1:1-500");
215     *   auto reads2 = bamfile.query("chr2", 0, 500);
216     *   auto reads3 = bamfile["chr3", 0 .. 500];
217     *
218     *   auto reads4 = bamfile["chrX", $-500 .. $];  // last 500 nt
219     *
220     *   auto reads5 = bamfile.query("chrY");    // entirety of chrY
221     *
222     *   // When colon present in reference name (e.g. HLA additions in GRCh38)
223     *   // wrap the ref name in { } (this is an htslib convention; see hts_parse_region)
224     *   auto reads6 = bamfile.query("{HLA-DRB1*12:17}:1-100");
225     *   ```
226     */ 
227     auto query(CoordSystem cs)(string chrom, Interval!cs coords)
228     in (!this.header.isNull)
229     {
230         auto tid = this.header.targetId(chrom);
231         return query(tid, coords);
232     }
233 
234     /// ditto
235     auto query(CoordSystem cs)(int tid, Interval!cs coords)
236     in (!this.header.isNull)
237     {
238         /// convert to zero-based half-open
239         auto newcoords = coords.to!(CoordSystem.zbho);
240 
241         /// load index
242         if(this.idx == null)
243             this.idx = HtsIdx(sam_index_load(this.fp, this.fn));
244         if (this.idx == null)
245         {
246             hts_log_error(__FUNCTION__, "SAM index not found");
247             throw new Exception("SAM index not found");
248         }
249         auto itr = HtsItr(sam_itr_queryi(this.idx, tid, newcoords.start, newcoords.end));
250         return RecordRange(this.fp, this.header, itr, this.header_offset);
251     }
252 
253 
254     /// ditto
255     auto query(string[] regions)
256     in (!this.header.isNull)
257     {
258         /// load index
259         if(this.idx == null)
260             this.idx = HtsIdx(sam_index_load(this.fp, this.fn));
261         if (this.idx == null)
262         {
263             hts_log_error(__FUNCTION__, "SAM index not found");
264             throw new Exception("SAM index not found");
265         }
266         return RecordRangeMulti(this.fp, this.idx, this.header, regions);
267     }
268 
269     /// opIndex with a list of string regions
270     /// bam[["chr1:1-3","chr2:4-50"]]
271     auto opIndex(string[] regions)
272     {
273         return query(regions);
274     }
275 
276     /// opIndex with a string contig and an Interval
277     /// bam["chr1", ZBHO(1,3)]
278     auto opIndex(CoordSystem cs)(string contig, Interval!cs coords)
279     {
280         return query(contig, coords);
281     }
282 
283     /// opIndex with an int tid and an Interval
284     /// bam[0, ZBHO(1,3)]
285     auto opIndex(CoordSystem cs)(int tid, Interval!cs coords)
286     {
287         return query(tid, coords);
288     }
289 
290     /// opIndex with a string contig and a Coordinate
291     /// bam["chr1", ZB(1)]
292     auto opIndex(Basis bs)(string contig, Coordinate!bs pos)
293     {
294         auto coords = Interval!(getCoordinateSystem!(bs,End.open))(pos, pos + 1);
295         return query(contig, coords);
296     }
297 
298     /// opIndex with an int tid and a Coordinate
299     /// bam[0, ZB(1)]
300     auto opIndex(Basis bs)(int tid, Coordinate!bs pos)
301     {
302         auto coords = Interval!(getCoordinateSystem!(bs,End.open))(pos, pos + 1);
303         return query(tid, coords);
304     }
305 
306     /// opIndex with a string contig and two Coordinates
307     /// bam["chr1", ZB(1), ZB(3)]
308     deprecated("use multidimensional slicing with second parameter as range ([\"chr1\", 1 .. 2])")
309     auto opIndex(Basis bs)(string tid, Coordinate!bs pos1, Coordinate!bs pos2)
310     {
311         auto coords = Interval!(getCoordinateSystem!(bs,End.open))(pos1, pos2);
312         return query(tid, coords);
313     }
314 
315     /// opIndex with an int tid and two Coordinates
316     /// bam[0, ZB(1), ZB(3)]
317     deprecated("use multidimensional slicing with second parameter as range ([20, 1 .. 2])")
318     auto opIndex(Basis bs)(int tid, Coordinate!bs pos1, Coordinate!bs pos2)
319     {
320         auto coords = Interval!(getCoordinateSystem!(bs,End.open))(pos1, pos2);
321         return query(tid, coords);
322     }
323 
324     /// opSlice with two Coordinates
325     /// [ZB(1) .. ZB(3)]
326     auto opSlice(size_t dim, Basis bs)(Coordinate!bs start, Coordinate!bs end) if(dim == 1)
327     {
328         assert(end > start);
329         return Interval!(getCoordinateSystem!(bs, End.open))(start, end);
330     }
331 
332 
333     private struct OffsetType
334     {
335         ptrdiff_t offset;
336         alias offset this;
337 
338         // supports e.g. $ - x
339         OffsetType opBinary(string s, T)(T val)
340         {
341             mixin("return OffsetType(offset " ~ s ~ " val);");
342         }
343 
344         invariant
345         {
346             assert(this.offset <= 0, "Offset from end should be zero or negative");
347         }
348     }
349     /** Array-end `$` indexing hack courtesy of Steve Schveighoffer
350         https://forum.dlang.org/post/rl7a56$nad$1@digitalmars.com
351 
352         Requires in addition to opDollar returning a bespoke non-integral type
353         a series of overloads for opIndex and opSlice taking this type
354     */
355     OffsetType opDollar(size_t dim)() if(dim == 1)
356     {
357         return OffsetType.init;
358     }
359     /// opIndex with a string contig and an Offset
360     /// bam["chr1",$-2]
361     auto opIndex(string ctg, OffsetType endoff)
362     {
363         auto tid = this.header.targetId(ctg);
364         auto end = this.header.targetLength(tid) + endoff.offset;
365         // TODO review: is targetLength the last nt, or targetLength - 1 the last nt?
366         auto coords = Interval!(CoordSystem.zbho)(end, end + 1);
367         return query(tid, coords);
368     }
369     /// opIndex with an int tid and an Offset
370     /// bam[0,$-2]
371     auto opIndex(int tid, OffsetType endoff)
372     {
373         auto end = this.header.targetLength(tid) + endoff.offset;
374         // TODO review: is targetLength the last nt, or targetLength - 1 the last nt?
375         auto coords = Interval!(CoordSystem.zbho)(end, end + 1);
376         return query(tid, coords);
377     }
378 
379     /// opSlice as Coordinate and an offset
380     /// i.e [ZB(2) .. $]
381     auto opSlice(size_t dim, Basis bs)(Coordinate!bs start, OffsetType off) if(dim == 1)
382     {
383         return Tuple!(Coordinate!bs, OffsetType)(start, off);
384     }
385 
386     /// opIndex with a string contig and a Coordinate and Offset
387     /// bam["chr1",ZB(1) .. $]
388     auto opIndex(Basis bs)(string ctg, Tuple!(Coordinate!bs, OffsetType) coords)
389     {
390         auto tid = this.header.targetId(ctg);
391         auto end = this.header.targetLength(tid) + coords[1];
392         auto endCoord = ZB(end);
393         auto newEndCoord = endCoord.to!bs;
394         auto c = Interval!(getCoordinateSystem!(bs,End.open))(coords[0], newEndCoord);
395         return query(tid, c);
396     }
397 
398     /// opIndex with an int tid and a Coordinate and Offset
399     /// bam[0,ZB(1) .. $]
400     auto opIndex(Basis bs)(int tid, Tuple!(Coordinate!bs, OffsetType) coords)
401     {
402         auto end = this.header.targetLength(tid) + coords[1];
403         auto endCoord = ZB(end);
404         auto newEndCoord = endCoord.to!bs;
405         auto c = Interval!(getCoordinateSystem!(bs,End.open))(coords[0], newEndCoord);
406         return query(tid, c);
407     }
408 
409     /// opSlice as two offset
410     /// i.e [$-2 .. $]
411     auto opSlice(size_t dim)(OffsetType start, OffsetType end) if(dim == 1)
412     {
413         return Tuple!(OffsetType, OffsetType)(start, end);
414     }
415 
416     /// opIndex two Offsets
417     /// i.e fai["chrom1", $-2 .. $]
418     auto opIndex(string ctg, Tuple!(OffsetType, OffsetType) coords)
419     {
420         auto tid = this.header.targetId(ctg);
421         auto start = this.header.targetLength(tid) + coords[0];
422         auto end = this.header.targetLength(tid) + coords[1];
423         auto c = ZBHO(start, end);
424         return query(tid, c);
425     }
426 
427     /// opIndex with an int tid and a Coordinate and Offset
428     /// bam[0,ZB(1) .. $]
429     auto opIndex(int tid, Tuple!(OffsetType, OffsetType) coords)
430     {
431         auto start = this.header.targetLength(tid) + coords[0];
432         auto end = this.header.targetLength(tid) + coords[1];
433         auto c = ZBHO(start, end);
434         return query(tid, c);
435     }
436 
437 
438     /// Return an InputRange representing all records in the SAM/BAM/CRAM
439     auto allRecords()
440     {
441         return AllRecordsRange(this.fp, this.header, this.header_offset);
442     }
443 
444     deprecated("Avoid snake_case names")
445     alias all_records = allRecords;
446 
447 
448     /// Iterate through all records in the SAM
449     struct AllRecordsRange
450     {
451         private HtsFile    fp;     // belongs to parent; shared
452         private SAMHeader  header; // belongs to parent; shared
453         private Bam1     b;
454         private bool initialized;   // Needed to support both foreach and immediate .front()
455         private int success;        // sam_read1 return code
456 
457         this(HtsFile fp, SAMHeader header, off_t offset)
458         {
459             this.fp = copyHtsFile(fp, offset);
460             assert(this.fp.format.format == htsExactFormat.sam || this.fp.format.format == htsExactFormat.bam);
461             this.header = header;
462             this.b = Bam1(bam_init1());
463             this.empty;
464         }
465 
466         /// InputRange interface
467         @property bool empty() // @suppress(dscanner.suspicious.incorrect_infinite_range)
468         {
469             if (!this.initialized) {
470                 this.popFront();
471                 this.initialized = true;
472             }
473             if (success >= 0)
474                 return false;
475             else if (success == -1)
476                 return true;
477             else
478             {
479                 hts_log_error(__FUNCTION__, "*** ERROR sam_read1 < -1");
480                 return true;
481             }
482         }
483         /// ditto
484         void popFront()
485         {
486             success = sam_read1(this.fp, this.header.h, this.b);
487             //bam_destroy1(this.b);
488         }
489         /// ditto
490         SAMRecord front()
491         {
492             assert(this.initialized, "front called before empty");
493             return SAMRecord(bam_dup1(this.b), this.header);
494         }
495 
496         AllRecordsRange save()
497         {
498             AllRecordsRange newRange;
499             newRange.fp = HtsFile(copyHtsFile(fp));
500             newRange.header = header;
501             newRange.b = Bam1(bam_dup1(this.b));
502             newRange.initialized = this.initialized;
503             newRange.success = this.success;
504 
505             return newRange;
506         }
507 
508     }
509 
510     /// Iterate over records falling within a queried region
511     struct RecordRange
512     {
513         private HtsFile fp;
514         private SAMHeader h;
515         private HtsItr itr;
516         private Bam1 b;
517 
518         private int r;  // sam_itr_next: >= 0 on success; -1 when there is no more data; < -1 on error
519         
520         private bool initialized;   // Needed to support both foreach and immediate .front()
521         private int success;        // sam_read1 return code
522 
523         /// Constructor relieves caller of calling bam_init1 and simplifies first-record flow 
524         this(HtsFile fp, SAMHeader header, HtsItr itr, off_t offset)
525         {
526             this.fp = HtsFile(copyHtsFile(fp, offset));
527             this.itr = HtsItr(copyHtsItr(itr));
528             this.h = header;
529             this.b = Bam1(bam_init1);
530             this.empty;
531             assert(itr !is null, "itr was null"); 
532         }
533 
534         /// InputRange interface
535         @property bool empty()
536         {
537             // TODO, itr.finished shouldn't be used
538             if (!this.initialized) {
539                 this.popFront();
540                 this.initialized = true;
541             }
542             assert(this.itr !is null);
543             return (r < 0 || itr.finished) ? true : false;
544         }
545         /// ditto
546         void popFront()
547         {
548             this.r = sam_itr_next(this.fp, this.itr, this.b);
549         }
550         /// ditto
551         SAMRecord front()
552         {
553             return SAMRecord(bam_dup1(b), this.h);
554         }
555 
556         /// make a ForwardRange
557         RecordRange save()
558         {
559             RecordRange newRange;
560             newRange.fp = HtsFile(copyHtsFile(fp, itr.curr_off));
561             newRange.itr = HtsItr(copyHtsItr(itr));
562             newRange.h = h;
563             newRange.b = Bam1(bam_dup1(this.b));
564             newRange.initialized = this.initialized;
565             newRange.success = this.success;
566             newRange.r = this.r;
567             return newRange;
568         }
569     }
570 
571     static assert(isInputRange!RecordRange);
572     static assert(is(ElementType!RecordRange == SAMRecord));
573 
574     /// Iterate over records falling within queried regions using a RegionList
575     struct RecordRangeMulti
576     {
577 
578         private HtsFile fp;
579         private SAMHeader h;
580         private HtsItrMulti itr;
581         private Bam1 b;
582 
583         private int r;
584         private bool initialized;
585 
586         ///
587         this(HtsFile fp, HtsIdx idx, SAMHeader header, string[] regions)
588         {
589             /// get reglist
590             auto cQueries = regions.map!(toUTFz!(char *)).array;
591             int count;
592             auto fnPtr = cast(hts_name2id_f) &sam_hdr_name2tid;
593 
594             /// rlistPtr ends up being free'd by the hts_itr
595             auto rlistPtr = hts_reglist_create(cQueries.ptr, cast(int) cQueries.length, &count, cast(void *)header.h.getRef, fnPtr);
596 
597 
598             /// set header and fp
599             this.fp = HtsFile(copyHtsFile(fp));
600             this.h = header;
601 
602             /// init bam1_t
603             b = Bam1(bam_init1());
604 
605             /// get the itr
606             itr = sam_itr_regions(idx, this.h.h, rlistPtr, cast(uint) count);
607 
608             empty();
609         }
610 
611         /// InputRange interface
612         @property bool empty()
613         {
614             if(!initialized){
615                 this.initialized = true;
616                 popFront;
617             }
618             return (r <= 0 && itr.finished) ? true : false;
619         }
620         /// ditto
621         void popFront()
622         {
623             r = sam_itr_multi_next(this.fp, this.itr, this.b);
624         }
625         /// ditto
626         SAMRecord front()
627         {
628             return SAMRecord(bam_dup1(b), this.h);
629         }
630         /// make a ForwardRange
631         RecordRangeMulti save()
632         {
633             RecordRangeMulti newRange;
634             newRange.fp = HtsFile(copyHtsFile(fp, itr.curr_off));
635             newRange.itr = HtsItrMulti(copyHtsItr(itr));
636             newRange.h = h;
637             newRange.b = Bam1(bam_dup1(this.b));
638             newRange.initialized = this.initialized;
639             newRange.r = this.r;
640             return newRange;
641         }
642     }
643     static assert(isInputRange!RecordRangeMulti);
644     static assert(is(ElementType!RecordRangeMulti == SAMRecord));
645 }
646 
647 ///
648 debug(dhtslib_unittest) unittest
649 {
650     import dhtslib.sam;
651     import htslib.hts_log : hts_log_info;
652     import std.path : buildPath, dirName;
653     import std.string : fromStringz;
654     import std.array : array; 
655 
656     hts_set_log_level(htsLogLevel.HTS_LOG_WARNING);
657     hts_log_info(__FUNCTION__, "Testing SAMFile & SAMRecord");
658     hts_log_info(__FUNCTION__, "Loading test file");
659     auto sam = SAMFile(buildPath(dirName(dirName(dirName(dirName(__FILE__)))),"htslib","test","auxf#values.sam"), 0);
660     auto sam2 = SAMWriter("/tmp/test.bam", sam.header);
661     auto readrange = sam.allRecords;
662     hts_log_info(__FUNCTION__, "Getting read 1");
663     assert(readrange.empty == false);
664     auto read = readrange.front();
665     
666     // writeln(read.sequence);
667     assert(read.sequence=="GCTAGCTCAG");
668     assert(sam.allRecords.array.length == 2);
669     sam2.write(read);
670     destroy(sam2);
671 
672 
673     // testing with multiple specified threads
674     sam = SAMFile("/tmp/test.bam", 2);
675     readrange = sam.allRecords;
676     assert(readrange.empty == false);
677     read = readrange.front();
678     assert(read.sequence=="GCTAGCTCAG");
679     assert(sam.allRecords.array.length == 1);
680 
681     // testing with no additional threads
682     sam = SAMFile("/tmp/test.bam", 0);
683     readrange = sam.allRecords;
684     assert(readrange.empty == false);
685     read = readrange.front();
686     assert(read.sequence=="GCTAGCTCAG");
687     assert(sam.allRecords.array.length == 1);
688 
689     // testing SAMReader targets/tid functions
690     assert(sam.header.nTargets == 1);
691     assert(sam.header.targetId("Sheila") == 0);
692     assert(sam.header.targetLength(0) == 20);
693     assert(sam.header.targetLengths == [20]);
694     assert(sam.header.targetNames == ["Sheila"]);
695 
696 }
697 
698 ///
699 debug(dhtslib_unittest) unittest
700 {
701     import std.stdio;
702     import dhtslib.sam;
703     import dhtslib.sam.md : MDItr;
704     import std.algorithm : map;
705     import std.array : array;
706     import std.path : buildPath,dirName;
707     import std.range : drop;
708     hts_set_log_level(htsLogLevel.HTS_LOG_WARNING);
709     hts_log_info(__FUNCTION__, "Testing SAMFile query");
710     hts_log_info(__FUNCTION__, "Loading test file");
711 
712     auto bam = SAMFile(buildPath(dirName(dirName(dirName(dirName(__FILE__)))),"htslib","test","range.bam"), 0);
713     assert(bam.allRecords.array.length == 112);
714     // assert(bam["CHROMOSOME_I"].array.length == 18);
715     // assert(bam["CHROMOSOME_II"].array.length == 34);
716     // assert(bam["CHROMOSOME_III"].array.length == 41);
717     // assert(bam["CHROMOSOME_IV"].array.length == 19);
718     // assert(bam["CHROMOSOME_V"].array.length == 0);
719     assert(bam.query("CHROMOSOME_I", ZBHO(900, 2000)) .array.length == 14);
720     assert(bam["CHROMOSOME_I",ZB(900) .. ZB(2000)].array.length == 14);
721     assert(bam[0, ZB(900) .. ZB(2000)].array.length == 14);
722 
723     assert(bam["CHROMOSOME_I",ZB(940)].array.length == 2);
724     assert(bam[0, ZB(940)].array.length == 2);
725 
726 
727     assert(bam["CHROMOSOME_I",ZB(900) .. $].array.length == 18);
728     assert(bam[0, ZB(900) .. $].array.length == 18);
729     assert(bam["CHROMOSOME_I",$].array.length == 0);
730     assert(bam[0, $].array.length == 0);
731     assert(bam[["CHROMOSOME_I:900-2000","CHROMOSOME_II:900-2000"]].array.length == 33);
732 
733     assert(bam.query("CHROMOSOME_I", OBHO(901, 2000)) .array.length == 14);
734     assert(bam["CHROMOSOME_I",OB(901) .. OB(2001)].array.length == 14);
735     assert(bam[0, OB(901) .. OB(2001)].array.length == 14);
736 
737     assert(bam["CHROMOSOME_I",OB(941)].array.length == 2);
738     assert(bam[0, OB(941)].array.length == 2);
739 
740 
741     assert(bam["CHROMOSOME_I",OB(901) .. $].array.length == 18);
742     assert(bam[0, OB(901) .. $].array.length == 18);
743     assert(bam["CHROMOSOME_I",$].array.length == 0);
744     assert(bam[0, $].array.length == 0);
745     assert(bam[["CHROMOSOME_I:900-2000","CHROMOSOME_II:900-2000"]].array.length == 33);
746 
747     assert(bam["CHROMOSOME_II",$-1918 .. $].array.length == 0);
748     assert(bam["CHROMOSOME_II", ZB(3082) .. $].array.length == 0);
749     assert(bam["CHROMOSOME_II",$-1919 .. $].array.length == 1);
750     assert(bam["CHROMOSOME_II", ZB(3081) .. $].array.length == 1);
751     assert(bam["CHROMOSOME_II",$-2018 .. $].array.length == 2);
752 
753     auto range = bam[["CHROMOSOME_I:900-2000","CHROMOSOME_II:900-2000"]];
754     auto range1 = range.save;
755     range = range.drop(5);
756     auto range2 = range.save;
757     range = range.drop(5);
758     auto range3 = range.save;
759     range = range.drop(10);
760     auto range4 = range.save;
761     assert(range1.array.length == 33);
762     assert(range2.array.length == 28);
763     assert(range3.array.length == 23);
764     assert(range4.array.length == 13);
765 }
766 debug(dhtslib_unittest) unittest
767 {
768     import std.stdio;
769     import dhtslib.sam;
770     import std.array : array;
771     import std.path : buildPath,dirName;
772     import std.range : drop;
773     hts_set_log_level(htsLogLevel.HTS_LOG_WARNING);
774     hts_log_info(__FUNCTION__, "Testing AllRecordsRange save()");
775     hts_log_info(__FUNCTION__, "Loading test file");
776 
777     auto bam = SAMFile(buildPath(dirName(dirName(dirName(dirName(__FILE__)))),"htslib","test","range.bam"), 0);
778     assert(bam.allRecords.array.length == 112);
779     assert(bam.allRecords.array.length == 112);
780 
781     auto range = bam.allRecords;
782 
783     auto range1 = range.save();
784     range = range.drop(5);
785 
786     auto range2 = range.save();
787     range = range.drop(5);
788 
789     auto range3 = range.save();
790     range = range.drop(5);
791 
792     auto range4 = range.save();
793     range = range.drop(50);
794 
795     auto range5 = range.save();
796     range.popFront;
797 
798     auto range6 = range.save();
799 
800     assert(range1.array.length == 112);
801     assert(range2.array.length == 107);
802     assert(range3.array.length == 102);
803     assert(range4.array.length == 97);
804     assert(range5.array.length == 47);
805     assert(range6.array.length == 46);
806 }
807 ///
808 debug(dhtslib_unittest) unittest
809 {
810     import std.stdio;
811     import dhtslib.sam;
812     import std.array : array;
813     import std.path : buildPath,dirName;
814     import std.range : drop;
815     hts_set_log_level(htsLogLevel.HTS_LOG_WARNING);
816     hts_log_info(__FUNCTION__, "Testing RecordsRange save()");
817     hts_log_info(__FUNCTION__, "Loading test file");
818 
819     auto bam = SAMReader(buildPath(dirName(dirName(dirName(dirName(__FILE__)))),"htslib","test","range.bam"), 4);
820     
821     auto range = bam.query("CHROMOSOME_I", ZBHO(900, 2000));
822     assert(bam.query("CHROMOSOME_I", ZBHO(900, 2000)).array.length == 14);
823     
824     auto range1 =  range.save;
825     range = range.drop(1);
826     
827     auto range2 =  range.save;
828     range = range.drop(2);
829     
830     auto range3 =  range.save;
831     range = range.drop(3);
832     
833     auto range4 =  range.save;
834     range = range.drop(5);
835     
836     auto range5 =  range.save;
837     range.popFront;
838     
839     auto range6 = range.save;
840 
841     assert(range1.array.length == 14);
842     assert(range2.array.length == 13);
843     assert(range3.array.length == 11);
844     assert(range4.array.length == 8);
845     assert(range5.array.length == 3);
846     assert(range6.array.length == 2);
847 }