1 module dhtslib.sam.reader; 2 3 import core.stdc.stdio : SEEK_SET; 4 import std.format; 5 import std.stdio : writeln, writefln, stderr, File; 6 import std.string : fromStringz, toStringz; 7 import std.typecons : Tuple; 8 import std.range.interfaces : InputRange, inputRangeObject; 9 import std.range.primitives : isInputRange, ElementType; 10 import std.algorithm : map; 11 import std.array : array; 12 import std.utf : toUTFz; 13 14 import htslib.hts; 15 import htslib.hfile : hdopen, hclose, hFILE, off_t, htell, hseek; 16 import htslib.bgzf : bgzf_tell, bgzf_seek; 17 18 import htslib.hts_log; 19 import htslib.kstring; 20 import htslib.sam; 21 import dhtslib.sam.record; 22 import dhtslib.coordinates; 23 import dhtslib.sam.header; 24 import dhtslib.sam : cmpInterval, cmpRegList; 25 import dhtslib.memory; 26 import dhtslib.util; 27 28 alias SAMFile = SAMReader; 29 /** 30 Encapsulates a SAM/BAM file. 31 32 Implements InputRange interface using htslib calls. 33 If indexed, Random-access query via multidimensional slicing. 34 */ 35 struct SAMReader 36 { 37 /// filename; as usable from D 38 string filename; 39 40 /// filename \0-terminated C string; reference needed to avoid GC reaping result of toStringz when ctor goes out of scope 41 private const(char)* fn; 42 43 /// htsFile 44 private HtsFile fp; 45 46 /// hFILE if required 47 private hFILE* f; 48 49 /// header struct 50 SAMHeader header; 51 52 private off_t header_offset; 53 54 /// SAM/BAM/CRAM index 55 private HtsIdx idx; 56 57 private kstring_t line; 58 59 /** Create a representation of SAM/BAM/CRAM file from given filename or File 60 61 Params: 62 fn = string filename (complete path passed to htslib; may support S3:// https:// etc.) 63 extra_threads = extra threads for compression/decompression 64 -1 use all available cores (default) 65 0 use no extra threads 66 >1 add indicated number of threads (to a default of 1) 67 */ 68 this(T)(T f, int extra_threads = -1) 69 if (is(T == string) || is(T == File)) 70 { 71 import std.parallelism : totalCPUs; 72 73 // open file 74 static if (is(T == string)) 75 { 76 this.filename = f; 77 this.fn = toStringz(f); 78 this.fp = HtsFile(hts_open(this.fn, cast(immutable(char)*) "r")); 79 } 80 else static if (is(T == File)) 81 { 82 this.filename = f.name(); 83 this.fn = toStringz(f.name); 84 this.f = hdopen(f.fileno, cast(immutable(char)*) "r"); 85 this.fp = HtsFile(hts_hopen(this.f, this.fn, cast(immutable(char)*) "r")); 86 } 87 else assert(0); 88 89 if (extra_threads == -1) 90 { 91 if ( totalCPUs > 1) 92 { 93 hts_log_info(__FUNCTION__, 94 format("%d CPU cores detected; enabling multithreading", totalCPUs)); 95 // hts_set_threads adds N _EXTRA_ threads, so totalCPUs - 1 seemed reasonable, 96 // but overcomitting by 1 thread (i.e., passing totalCPUs) buys an extra 3% on my 2-core 2013 Mac 97 hts_set_threads(this.fp, totalCPUs); 98 } 99 } 100 else if (extra_threads > 0) 101 { 102 if ((extra_threads + 1) > totalCPUs) 103 hts_log_warning(__FUNCTION__, "More threads requested than CPU cores detected"); 104 hts_set_threads(this.fp, extra_threads); 105 } 106 else if (extra_threads == 0) 107 { 108 hts_log_debug(__FUNCTION__, "Zero extra threads requested"); 109 } 110 else 111 { 112 hts_log_warning(__FUNCTION__, "Invalid negative number of extra threads requested"); 113 } 114 115 // read header 116 this.header = SAMHeader(sam_hdr_read(this.fp)); 117 118 // collect header offset just in case it is a sam file 119 // we would like to iterate 120 if(this.fp.is_bgzf) this.header_offset = bgzf_tell(this.fp.fp.bgzf); 121 else this.header_offset = htell(this.fp.fp.hfile); 122 123 this.idx = HtsIdx(null); 124 } 125 126 /// Explicit postblit to avoid 127 /// https://github.com/blachlylab/dhtslib/issues/122 128 this(this) 129 { 130 this.filename = filename; 131 this.fn = fn; 132 this.f = f; 133 this.header = header; 134 this.header_offset = header_offset; 135 this.idx = idx; 136 this.line = line; 137 } 138 139 /// number of reference sequences; from bam_hdr_t 140 deprecated("use these properties from SAMHeader") 141 @property int nTargets() const 142 { 143 return this.header.nTargets; 144 } 145 alias n_targets = nTargets; 146 147 /// length of specific reference sequence by number (`tid`) 148 deprecated("use these properties from SAMHeader") 149 uint targetLength(int target) const 150 in (target >= 0) 151 in (target < this.nTargets) 152 { 153 return this.header.targetLength(target); 154 } 155 alias targetLen = targetLength; 156 alias target_len = targetLength; 157 158 /// lengths of the reference sequences 159 deprecated("use these properties from SAMHeader") 160 @property uint[] targetLengths() const 161 { 162 return this.header.targetLengths; 163 } 164 alias targetLens = targetLengths; 165 alias target_lens = targetLengths; 166 167 /// names of the reference sequences 168 deprecated("use these properties from SAMHeader") 169 @property string[] targetNames() const 170 { 171 return this.header.targetNames; 172 } 173 alias target_names = targetNames; 174 175 /// reference contig name to integer id 176 deprecated("use these properties from SAMHeader") 177 int targetId(string name) 178 { 179 return this.header.targetId(name); 180 } 181 alias target_id = targetId; 182 183 184 /// fetch is provided as a PySAM compatible synonym for `query` 185 alias fetch = query; 186 187 /** Query a region and return matching alignments as InputRange 188 * 189 * Query on (chr, start, end) may take several forms: 190 * 191 * 1. `query(region)` with a string-based "region" form (e.g. chr1:1000-2000) 192 - Variant: pass an array of query region strings: `query([reg1, reg, ...])` 193 * 2. `query(chr, start, end)` with a combination of function parameters for 194 * contig, start, and end (where contig may be either a string or the numeric 195 * `tid` from BAM header; it would be uncommon to use this directly) 196 * 197 * NOTE THAT THERE IS AN OFF-BY-ONE DIFFERENCE IN THE TWO METHODS ABOVE! 198 * Region string based coordinates assume the first base of the reference 199 * is 1 (e.g., chrX:1-100 yields the first 100 bases), whereas with the 200 * integer function parameter versions, the coordinates are zero-based, half-open 201 * (e.g., <chrX, 0, 100> yields the first 100 bases). 202 * 203 * We also support array indexing on object of type SAMReader directly 204 * in one of two above styles: 205 * 1. `bamfile[region-string]` 206 * 2. `bamfile[contig, start .. end]` with contig like no. 2 above 207 * 208 * The D convention `$` operator marking length of array is supported. 209 * 210 * Finally, the region string is parsed by underlying htslib's `hts_parse_region` 211 * and has special semantics available: 212 * 213 * region | Outputs 214 * --------------- | ------------- 215 * REF | All reads with RNAME REF 216 * REF: | All reads with RNAME REF 217 * REF:START | Reads with RNAME REF overlapping START to end of REF 218 * REF:-END | Reads with RNAME REF overlapping start of REF to END 219 * REF:START-END | Reads with RNAME REF overlapping START to END 220 * . | All reads from the start of the file 221 * * | Unmapped reads at the end of the file (RNAME '*' in SAM) 222 * 223 * 224 * Examples: 225 * ``` 226 * bamfile = SAMReader("whatever.bam"); 227 * auto reads1 = bamfile.query("chr1:1-500"); 228 * auto reads2 = bamfile.query("chr2", 0, 500); 229 * auto reads3 = bamfile["chr3", 0 .. 500]; 230 * 231 * auto reads4 = bamfile["chrX", $-500 .. $]; // last 500 nt 232 * 233 * auto reads5 = bamfile.query("chrY"); // entirety of chrY 234 * 235 * // When colon present in reference name (e.g. HLA additions in GRCh38) 236 * // wrap the ref name in { } (this is an htslib convention; see hts_parse_region) 237 * auto reads6 = bamfile.query("{HLA-DRB1*12:17}:1-100"); 238 * ``` 239 */ 240 auto query(CoordSystem cs)(string chrom, Interval!cs coords) 241 in (!this.header.isNull) 242 { 243 auto tid = this.header.targetId(chrom); 244 return query(tid, coords); 245 } 246 247 /// ditto 248 auto query(CoordSystem cs)(int tid, Interval!cs coords) 249 in (!this.header.isNull) 250 { 251 /// convert to zero-based half-open 252 auto newcoords = coords.to!(CoordSystem.zbho); 253 254 /// load index 255 if(this.idx == null) 256 this.idx = HtsIdx(sam_index_load(this.fp, this.fn)); 257 if (this.idx == null) 258 { 259 hts_log_error(__FUNCTION__, "SAM index not found"); 260 throw new Exception("SAM index not found"); 261 } 262 auto itr = HtsItr(sam_itr_queryi(this.idx, tid, newcoords.start, newcoords.end)); 263 return RecordRange(this.fp, this.header, itr, this.header_offset); 264 } 265 266 267 /// ditto 268 auto query(string[] regions) 269 in (!this.header.isNull) 270 { 271 /// load index 272 if(this.idx == null) 273 this.idx = HtsIdx(sam_index_load(this.fp, this.fn)); 274 if (this.idx == null) 275 { 276 hts_log_error(__FUNCTION__, "SAM index not found"); 277 throw new Exception("SAM index not found"); 278 } 279 return RecordRangeMulti(this.fp, this.idx, this.header, regions); 280 } 281 282 /// opIndex with a list of string regions 283 /// bam[["chr1:1-3","chr2:4-50"]] 284 auto opIndex(string[] regions) 285 { 286 return query(regions); 287 } 288 289 /// opIndex with a string contig and an Interval 290 /// bam["chr1", ZBHO(1,3)] 291 auto opIndex(CoordSystem cs)(string contig, Interval!cs coords) 292 { 293 return query(contig, coords); 294 } 295 296 /// opIndex with an int tid and an Interval 297 /// bam[0, ZBHO(1,3)] 298 auto opIndex(CoordSystem cs)(int tid, Interval!cs coords) 299 { 300 return query(tid, coords); 301 } 302 303 /// opIndex with a string contig and a Coordinate 304 /// bam["chr1", ZB(1)] 305 auto opIndex(Basis bs)(string contig, Coordinate!bs pos) 306 { 307 auto coords = Interval!(getCoordinateSystem!(bs,End.open))(pos, pos + 1); 308 return query(contig, coords); 309 } 310 311 /// opIndex with an int tid and a Coordinate 312 /// bam[0, ZB(1)] 313 auto opIndex(Basis bs)(int tid, Coordinate!bs pos) 314 { 315 auto coords = Interval!(getCoordinateSystem!(bs,End.open))(pos, pos + 1); 316 return query(tid, coords); 317 } 318 319 /// opIndex with a string contig and two Coordinates 320 /// bam["chr1", ZB(1), ZB(3)] 321 deprecated("use multidimensional slicing with second parameter as range ([\"chr1\", 1 .. 2])") 322 auto opIndex(Basis bs)(string tid, Coordinate!bs pos1, Coordinate!bs pos2) 323 { 324 auto coords = Interval!(getCoordinateSystem!(bs,End.open))(pos1, pos2); 325 return query(tid, coords); 326 } 327 328 /// opIndex with an int tid and two Coordinates 329 /// bam[0, ZB(1), ZB(3)] 330 deprecated("use multidimensional slicing with second parameter as range ([20, 1 .. 2])") 331 auto opIndex(Basis bs)(int tid, Coordinate!bs pos1, Coordinate!bs pos2) 332 { 333 auto coords = Interval!(getCoordinateSystem!(bs,End.open))(pos1, pos2); 334 return query(tid, coords); 335 } 336 337 /// opSlice with two Coordinates 338 /// [ZB(1) .. ZB(3)] 339 auto opSlice(size_t dim, Basis bs)(Coordinate!bs start, Coordinate!bs end) if(dim == 1) 340 { 341 assert(end > start); 342 return Interval!(getCoordinateSystem!(bs, End.open))(start, end); 343 } 344 345 346 private struct OffsetType 347 { 348 ptrdiff_t offset; 349 alias offset this; 350 351 // supports e.g. $ - x 352 OffsetType opBinary(string s, T)(T val) 353 { 354 mixin("return OffsetType(offset " ~ s ~ " val);"); 355 } 356 357 invariant 358 { 359 assert(this.offset <= 0, "Offset from end should be zero or negative"); 360 } 361 } 362 /** Array-end `$` indexing hack courtesy of Steve Schveighoffer 363 https://forum.dlang.org/post/rl7a56$nad$1@digitalmars.com 364 365 Requires in addition to opDollar returning a bespoke non-integral type 366 a series of overloads for opIndex and opSlice taking this type 367 */ 368 OffsetType opDollar(size_t dim)() if(dim == 1) 369 { 370 return OffsetType.init; 371 } 372 /// opIndex with a string contig and an Offset 373 /// bam["chr1",$-2] 374 auto opIndex(string ctg, OffsetType endoff) 375 { 376 auto tid = this.header.targetId(ctg); 377 auto end = this.header.targetLength(tid) + endoff.offset; 378 // TODO review: is targetLength the last nt, or targetLength - 1 the last nt? 379 auto coords = Interval!(CoordSystem.zbho)(end, end + 1); 380 return query(tid, coords); 381 } 382 /// opIndex with an int tid and an Offset 383 /// bam[0,$-2] 384 auto opIndex(int tid, OffsetType endoff) 385 { 386 auto end = this.header.targetLength(tid) + endoff.offset; 387 // TODO review: is targetLength the last nt, or targetLength - 1 the last nt? 388 auto coords = Interval!(CoordSystem.zbho)(end, end + 1); 389 return query(tid, coords); 390 } 391 392 /// opSlice as Coordinate and an offset 393 /// i.e [ZB(2) .. $] 394 auto opSlice(size_t dim, Basis bs)(Coordinate!bs start, OffsetType off) if(dim == 1) 395 { 396 return Tuple!(Coordinate!bs, OffsetType)(start, off); 397 } 398 399 /// opIndex with a string contig and a Coordinate and Offset 400 /// bam["chr1",ZB(1) .. $] 401 auto opIndex(Basis bs)(string ctg, Tuple!(Coordinate!bs, OffsetType) coords) 402 { 403 auto tid = this.header.targetId(ctg); 404 auto end = this.header.targetLength(tid) + coords[1]; 405 auto endCoord = ZB(end); 406 auto newEndCoord = endCoord.to!bs; 407 auto c = Interval!(getCoordinateSystem!(bs,End.open))(coords[0], newEndCoord); 408 return query(tid, c); 409 } 410 411 /// opIndex with an int tid and a Coordinate and Offset 412 /// bam[0,ZB(1) .. $] 413 auto opIndex(Basis bs)(int tid, Tuple!(Coordinate!bs, OffsetType) coords) 414 { 415 auto end = this.header.targetLength(tid) + coords[1]; 416 auto endCoord = ZB(end); 417 auto newEndCoord = endCoord.to!bs; 418 auto c = Interval!(getCoordinateSystem!(bs,End.open))(coords[0], newEndCoord); 419 return query(tid, c); 420 } 421 422 /// opSlice as two offset 423 /// i.e [$-2 .. $] 424 auto opSlice(size_t dim)(OffsetType start, OffsetType end) if(dim == 1) 425 { 426 return Tuple!(OffsetType, OffsetType)(start, end); 427 } 428 429 /// opIndex two Offsets 430 /// i.e fai["chrom1", $-2 .. $] 431 auto opIndex(string ctg, Tuple!(OffsetType, OffsetType) coords) 432 { 433 auto tid = this.header.targetId(ctg); 434 auto start = this.header.targetLength(tid) + coords[0]; 435 auto end = this.header.targetLength(tid) + coords[1]; 436 auto c = ZBHO(start, end); 437 return query(tid, c); 438 } 439 440 /// opIndex with an int tid and a Coordinate and Offset 441 /// bam[0,ZB(1) .. $] 442 auto opIndex(int tid, Tuple!(OffsetType, OffsetType) coords) 443 { 444 auto start = this.header.targetLength(tid) + coords[0]; 445 auto end = this.header.targetLength(tid) + coords[1]; 446 auto c = ZBHO(start, end); 447 return query(tid, c); 448 } 449 450 451 /// Return an InputRange representing all records in the SAM/BAM/CRAM 452 auto allRecords() 453 { 454 return AllRecordsRange(this.fp, this.header, this.header_offset); 455 } 456 457 deprecated("Avoid snake_case names") 458 alias all_records = allRecords; 459 460 461 /// Iterate through all records in the SAM 462 struct AllRecordsRange 463 { 464 private HtsFile fp; // belongs to parent; shared 465 private SAMHeader header; // belongs to parent; shared 466 private Bam1 b; 467 private bool initialized; // Needed to support both foreach and immediate .front() 468 private int success; // sam_read1 return code 469 470 this(HtsFile fp, SAMHeader header, off_t offset) 471 { 472 this.fp = copyHtsFile(fp, offset); 473 assert(this.fp.format.format == htsExactFormat.sam || this.fp.format.format == htsExactFormat.bam); 474 this.header = header; 475 this.b = Bam1(bam_init1()); 476 this.empty; 477 } 478 479 /// InputRange interface 480 @property bool empty() // @suppress(dscanner.suspicious.incorrect_infinite_range) 481 { 482 if (!this.initialized) { 483 this.popFront(); 484 this.initialized = true; 485 } 486 if (success >= 0) 487 return false; 488 else if (success == -1) 489 return true; 490 else 491 { 492 hts_log_error(__FUNCTION__, "*** ERROR sam_read1 < -1"); 493 return true; 494 } 495 } 496 /// ditto 497 void popFront() 498 { 499 success = sam_read1(this.fp, this.header.h, this.b); 500 //bam_destroy1(this.b); 501 } 502 /// ditto 503 SAMRecord front() 504 { 505 assert(this.initialized, "front called before empty"); 506 return SAMRecord(bam_dup1(this.b), this.header); 507 } 508 509 AllRecordsRange save() 510 { 511 AllRecordsRange newRange; 512 newRange.fp = HtsFile(copyHtsFile(fp)); 513 newRange.header = header; 514 newRange.b = Bam1(bam_dup1(this.b)); 515 newRange.initialized = this.initialized; 516 newRange.success = this.success; 517 518 return newRange; 519 } 520 521 } 522 523 /// Iterate over records falling within a queried region 524 struct RecordRange 525 { 526 private HtsFile fp; 527 private SAMHeader h; 528 private HtsItr itr; 529 private Bam1 b; 530 531 private int r; // sam_itr_next: >= 0 on success; -1 when there is no more data; < -1 on error 532 533 private bool initialized; // Needed to support both foreach and immediate .front() 534 private int success; // sam_read1 return code 535 536 /// Constructor relieves caller of calling bam_init1 and simplifies first-record flow 537 this(HtsFile fp, SAMHeader header, HtsItr itr, off_t offset) 538 { 539 this.fp = HtsFile(copyHtsFile(fp, offset)); 540 this.itr = HtsItr(copyHtsItr(itr)); 541 this.h = header; 542 this.b = Bam1(bam_init1); 543 this.empty; 544 assert(itr !is null, "itr was null"); 545 } 546 547 /// InputRange interface 548 @property bool empty() 549 { 550 // TODO, itr.finished shouldn't be used 551 if (!this.initialized) { 552 this.popFront(); 553 this.initialized = true; 554 } 555 assert(this.itr !is null); 556 return (r < 0 || itr.finished) ? true : false; 557 } 558 /// ditto 559 void popFront() 560 { 561 this.r = sam_itr_next(this.fp, this.itr, this.b); 562 } 563 /// ditto 564 SAMRecord front() 565 { 566 return SAMRecord(bam_dup1(b), this.h); 567 } 568 569 /// make a ForwardRange 570 RecordRange save() 571 { 572 RecordRange newRange; 573 newRange.fp = HtsFile(copyHtsFile(fp, itr.curr_off)); 574 newRange.itr = HtsItr(copyHtsItr(itr)); 575 newRange.h = h; 576 newRange.b = Bam1(bam_dup1(this.b)); 577 newRange.initialized = this.initialized; 578 newRange.success = this.success; 579 newRange.r = this.r; 580 return newRange; 581 } 582 } 583 584 static assert(isInputRange!RecordRange); 585 static assert(is(ElementType!RecordRange == SAMRecord)); 586 587 /// Iterate over records falling within queried regions using a RegionList 588 struct RecordRangeMulti 589 { 590 591 private HtsFile fp; 592 private SAMHeader h; 593 private HtsItrMulti itr; 594 private Bam1 b; 595 596 private int r; 597 private bool initialized; 598 599 /// 600 this(HtsFile fp, HtsIdx idx, SAMHeader header, string[] regions) 601 { 602 /// get reglist 603 auto cQueries = regions.map!(toUTFz!(char *)).array; 604 int count; 605 auto fnPtr = cast(hts_name2id_f) &sam_hdr_name2tid; 606 607 /// rlistPtr ends up being free'd by the hts_itr 608 auto rlistPtr = hts_reglist_create(cQueries.ptr, cast(int) cQueries.length, &count, cast(void *)header.h.getRef, fnPtr); 609 610 611 /// set header and fp 612 this.fp = HtsFile(copyHtsFile(fp)); 613 this.h = header; 614 615 /// init bam1_t 616 b = Bam1(bam_init1()); 617 618 /// get the itr 619 itr = sam_itr_regions(idx, this.h.h, rlistPtr, cast(uint) count); 620 621 empty(); 622 } 623 624 /// InputRange interface 625 @property bool empty() 626 { 627 if(!initialized){ 628 this.initialized = true; 629 popFront; 630 } 631 return (r <= 0 && itr.finished) ? true : false; 632 } 633 /// ditto 634 void popFront() 635 { 636 r = sam_itr_multi_next(this.fp, this.itr, this.b); 637 } 638 /// ditto 639 SAMRecord front() 640 { 641 return SAMRecord(bam_dup1(b), this.h); 642 } 643 /// make a ForwardRange 644 RecordRangeMulti save() 645 { 646 RecordRangeMulti newRange; 647 newRange.fp = HtsFile(copyHtsFile(fp, itr.curr_off)); 648 newRange.itr = HtsItrMulti(copyHtsItr(itr)); 649 newRange.h = h; 650 newRange.b = Bam1(bam_dup1(this.b)); 651 newRange.initialized = this.initialized; 652 newRange.r = this.r; 653 return newRange; 654 } 655 } 656 static assert(isInputRange!RecordRangeMulti); 657 static assert(is(ElementType!RecordRangeMulti == SAMRecord)); 658 } 659 660 /// 661 debug(dhtslib_unittest) unittest 662 { 663 import dhtslib.sam; 664 import htslib.hts_log : hts_log_info; 665 import std.path : buildPath, dirName; 666 import std.string : fromStringz; 667 import std.array : array; 668 669 hts_set_log_level(htsLogLevel.HTS_LOG_WARNING); 670 hts_log_info(__FUNCTION__, "Testing SAMFile & SAMRecord"); 671 hts_log_info(__FUNCTION__, "Loading test file"); 672 auto sam = SAMFile(buildPath(dirName(dirName(dirName(dirName(__FILE__)))),"htslib","test","auxf#values.sam"), 0); 673 auto sam2 = SAMWriter("/tmp/test.bam", sam.header); 674 auto readrange = sam.allRecords; 675 hts_log_info(__FUNCTION__, "Getting read 1"); 676 assert(readrange.empty == false); 677 auto read = readrange.front(); 678 679 // writeln(read.sequence); 680 assert(read.sequence=="GCTAGCTCAG"); 681 assert(sam.allRecords.array.length == 2); 682 sam2.write(read); 683 destroy(sam2); 684 685 686 // testing with multiple specified threads 687 sam = SAMFile("/tmp/test.bam", 2); 688 readrange = sam.allRecords; 689 assert(readrange.empty == false); 690 read = readrange.front(); 691 assert(read.sequence=="GCTAGCTCAG"); 692 assert(sam.allRecords.array.length == 1); 693 694 // testing with no additional threads 695 sam = SAMFile("/tmp/test.bam", 0); 696 readrange = sam.allRecords; 697 assert(readrange.empty == false); 698 read = readrange.front(); 699 assert(read.sequence=="GCTAGCTCAG"); 700 assert(sam.allRecords.array.length == 1); 701 702 // testing SAMReader targets/tid functions 703 assert(sam.header.nTargets == 1); 704 assert(sam.header.targetId("Sheila") == 0); 705 assert(sam.header.targetLength(0) == 20); 706 assert(sam.header.targetLengths == [20]); 707 assert(sam.header.targetNames == ["Sheila"]); 708 709 } 710 711 /// 712 debug(dhtslib_unittest) unittest 713 { 714 import std.stdio; 715 import dhtslib.sam; 716 import dhtslib.sam.md : MDItr; 717 import std.algorithm : map; 718 import std.array : array; 719 import std.path : buildPath,dirName; 720 import std.range : drop; 721 hts_set_log_level(htsLogLevel.HTS_LOG_WARNING); 722 hts_log_info(__FUNCTION__, "Testing SAMFile query"); 723 hts_log_info(__FUNCTION__, "Loading test file"); 724 725 auto bam = SAMFile(buildPath(dirName(dirName(dirName(dirName(__FILE__)))),"htslib","test","range.bam"), 0); 726 assert(bam.allRecords.array.length == 112); 727 // assert(bam["CHROMOSOME_I"].array.length == 18); 728 // assert(bam["CHROMOSOME_II"].array.length == 34); 729 // assert(bam["CHROMOSOME_III"].array.length == 41); 730 // assert(bam["CHROMOSOME_IV"].array.length == 19); 731 // assert(bam["CHROMOSOME_V"].array.length == 0); 732 assert(bam.query("CHROMOSOME_I", ZBHO(900, 2000)) .array.length == 14); 733 assert(bam["CHROMOSOME_I",ZB(900) .. ZB(2000)].array.length == 14); 734 assert(bam[0, ZB(900) .. ZB(2000)].array.length == 14); 735 736 assert(bam["CHROMOSOME_I",ZB(940)].array.length == 2); 737 assert(bam[0, ZB(940)].array.length == 2); 738 739 740 assert(bam["CHROMOSOME_I",ZB(900) .. $].array.length == 18); 741 assert(bam[0, ZB(900) .. $].array.length == 18); 742 assert(bam["CHROMOSOME_I",$].array.length == 0); 743 assert(bam[0, $].array.length == 0); 744 assert(bam[["CHROMOSOME_I:900-2000","CHROMOSOME_II:900-2000"]].array.length == 33); 745 746 assert(bam.query("CHROMOSOME_I", OBHO(901, 2000)) .array.length == 14); 747 assert(bam["CHROMOSOME_I",OB(901) .. OB(2001)].array.length == 14); 748 assert(bam[0, OB(901) .. OB(2001)].array.length == 14); 749 750 assert(bam["CHROMOSOME_I",OB(941)].array.length == 2); 751 assert(bam[0, OB(941)].array.length == 2); 752 753 754 assert(bam["CHROMOSOME_I",OB(901) .. $].array.length == 18); 755 assert(bam[0, OB(901) .. $].array.length == 18); 756 assert(bam["CHROMOSOME_I",$].array.length == 0); 757 assert(bam[0, $].array.length == 0); 758 assert(bam[["CHROMOSOME_I:900-2000","CHROMOSOME_II:900-2000"]].array.length == 33); 759 760 assert(bam["CHROMOSOME_II",$-1918 .. $].array.length == 0); 761 assert(bam["CHROMOSOME_II", ZB(3082) .. $].array.length == 0); 762 assert(bam["CHROMOSOME_II",$-1919 .. $].array.length == 1); 763 assert(bam["CHROMOSOME_II", ZB(3081) .. $].array.length == 1); 764 assert(bam["CHROMOSOME_II",$-2018 .. $].array.length == 2); 765 766 auto range = bam[["CHROMOSOME_I:900-2000","CHROMOSOME_II:900-2000"]]; 767 auto range1 = range.save; 768 range = range.drop(5); 769 auto range2 = range.save; 770 range = range.drop(5); 771 auto range3 = range.save; 772 range = range.drop(10); 773 auto range4 = range.save; 774 assert(range1.array.length == 33); 775 assert(range2.array.length == 28); 776 assert(range3.array.length == 23); 777 assert(range4.array.length == 13); 778 } 779 debug(dhtslib_unittest) unittest 780 { 781 import std.stdio; 782 import dhtslib.sam; 783 import std.array : array; 784 import std.path : buildPath,dirName; 785 import std.range : drop; 786 hts_set_log_level(htsLogLevel.HTS_LOG_WARNING); 787 hts_log_info(__FUNCTION__, "Testing AllRecordsRange save()"); 788 hts_log_info(__FUNCTION__, "Loading test file"); 789 790 auto bam = SAMFile(buildPath(dirName(dirName(dirName(dirName(__FILE__)))),"htslib","test","range.bam"), 0); 791 assert(bam.allRecords.array.length == 112); 792 assert(bam.allRecords.array.length == 112); 793 794 auto range = bam.allRecords; 795 796 auto range1 = range.save(); 797 range = range.drop(5); 798 799 auto range2 = range.save(); 800 range = range.drop(5); 801 802 auto range3 = range.save(); 803 range = range.drop(5); 804 805 auto range4 = range.save(); 806 range = range.drop(50); 807 808 auto range5 = range.save(); 809 range.popFront; 810 811 auto range6 = range.save(); 812 813 assert(range1.array.length == 112); 814 assert(range2.array.length == 107); 815 assert(range3.array.length == 102); 816 assert(range4.array.length == 97); 817 assert(range5.array.length == 47); 818 assert(range6.array.length == 46); 819 } 820 /// 821 debug(dhtslib_unittest) unittest 822 { 823 import std.stdio; 824 import dhtslib.sam; 825 import std.array : array; 826 import std.path : buildPath,dirName; 827 import std.range : drop; 828 hts_set_log_level(htsLogLevel.HTS_LOG_WARNING); 829 hts_log_info(__FUNCTION__, "Testing RecordsRange save()"); 830 hts_log_info(__FUNCTION__, "Loading test file"); 831 832 auto bam = SAMReader(buildPath(dirName(dirName(dirName(dirName(__FILE__)))),"htslib","test","range.bam"), 4); 833 834 auto range = bam.query("CHROMOSOME_I", ZBHO(900, 2000)); 835 assert(bam.query("CHROMOSOME_I", ZBHO(900, 2000)).array.length == 14); 836 837 auto range1 = range.save; 838 range = range.drop(1); 839 840 auto range2 = range.save; 841 range = range.drop(2); 842 843 auto range3 = range.save; 844 range = range.drop(3); 845 846 auto range4 = range.save; 847 range = range.drop(5); 848 849 auto range5 = range.save; 850 range.popFront; 851 852 auto range6 = range.save; 853 854 assert(range1.array.length == 14); 855 assert(range2.array.length == 13); 856 assert(range3.array.length == 11); 857 assert(range4.array.length == 8); 858 assert(range5.array.length == 3); 859 assert(range6.array.length == 2); 860 }