1 module dhtslib.sam.reader; 2 3 import core.stdc.stdio : SEEK_SET; 4 import std.format; 5 import std.stdio : writeln, writefln, stderr, File; 6 import std.string : fromStringz, toStringz; 7 import std.typecons : Tuple; 8 import std.range.interfaces : InputRange, inputRangeObject; 9 import std.range.primitives : isInputRange, ElementType; 10 import std.algorithm : map; 11 import std.array : array; 12 import std.utf : toUTFz; 13 14 import htslib.hts; 15 import htslib.hfile : hdopen, hclose, hFILE, off_t, htell, hseek; 16 import htslib.bgzf : bgzf_tell, bgzf_seek; 17 18 import htslib.hts_log; 19 import htslib.kstring; 20 import htslib.sam; 21 import dhtslib.sam.record; 22 import dhtslib.coordinates; 23 import dhtslib.sam.header; 24 import dhtslib.sam : cmpInterval, cmpRegList; 25 import dhtslib.memory; 26 import dhtslib.util; 27 28 alias SAMFile = SAMReader; 29 /** 30 Encapsulates a SAM/BAM file. 31 32 Implements InputRange interface using htslib calls. 33 If indexed, Random-access query via multidimensional slicing. 34 */ 35 struct SAMReader 36 { 37 /// filename; as usable from D 38 string filename; 39 40 /// filename \0-terminated C string; reference needed to avoid GC reaping result of toStringz when ctor goes out of scope 41 private const(char)* fn; 42 43 /// htsFile 44 private HtsFile fp; 45 46 /// hFILE if required 47 private hFILE* f; 48 49 /// header struct 50 SAMHeader header; 51 52 private off_t header_offset; 53 54 /// SAM/BAM/CRAM index 55 private HtsIdx idx; 56 57 private kstring_t line; 58 59 /** Create a representation of SAM/BAM/CRAM file from given filename or File 60 61 Params: 62 fn = string filename (complete path passed to htslib; may support S3:// https:// etc.) 63 extra_threads = extra threads for compression/decompression 64 -1 use all available cores (default) 65 0 use no extra threads 66 >1 add indicated number of threads (to a default of 1) 67 */ 68 this(T)(T f, int extra_threads = -1) 69 if (is(T == string) || is(T == File)) 70 { 71 import std.parallelism : totalCPUs; 72 73 // open file 74 static if (is(T == string)) 75 { 76 this.filename = f; 77 this.fn = toStringz(f); 78 this.fp = HtsFile(hts_open(this.fn, cast(immutable(char)*) "r")); 79 } 80 else static if (is(T == File)) 81 { 82 this.filename = f.name(); 83 this.fn = toStringz(f.name); 84 this.f = hdopen(f.fileno, cast(immutable(char)*) "r"); 85 this.fp = HtsFile(hts_hopen(this.f, this.fn, cast(immutable(char)*) "r")); 86 } 87 else assert(0); 88 89 if (extra_threads == -1) 90 { 91 if ( totalCPUs > 1) 92 { 93 hts_log_info(__FUNCTION__, 94 format("%d CPU cores detected; enabling multithreading", totalCPUs)); 95 // hts_set_threads adds N _EXTRA_ threads, so totalCPUs - 1 seemed reasonable, 96 // but overcomitting by 1 thread (i.e., passing totalCPUs) buys an extra 3% on my 2-core 2013 Mac 97 hts_set_threads(this.fp, totalCPUs); 98 } 99 } 100 else if (extra_threads > 0) 101 { 102 if ((extra_threads + 1) > totalCPUs) 103 hts_log_warning(__FUNCTION__, "More threads requested than CPU cores detected"); 104 hts_set_threads(this.fp, extra_threads); 105 } 106 else if (extra_threads == 0) 107 { 108 hts_log_debug(__FUNCTION__, "Zero extra threads requested"); 109 } 110 else 111 { 112 hts_log_warning(__FUNCTION__, "Invalid negative number of extra threads requested"); 113 } 114 115 // read header 116 this.header = SAMHeader(sam_hdr_read(this.fp)); 117 118 // collect header offset just in case it is a sam file 119 // we would like to iterate 120 if(this.fp.is_bgzf) this.header_offset = bgzf_tell(this.fp.fp.bgzf); 121 else this.header_offset = htell(this.fp.fp.hfile); 122 123 this.idx = HtsIdx(null); 124 } 125 126 /// number of reference sequences; from bam_hdr_t 127 deprecated("use these properties from SAMHeader") 128 @property int nTargets() const 129 { 130 return this.header.nTargets; 131 } 132 alias n_targets = nTargets; 133 134 /// length of specific reference sequence by number (`tid`) 135 deprecated("use these properties from SAMHeader") 136 uint targetLength(int target) const 137 in (target >= 0) 138 in (target < this.nTargets) 139 { 140 return this.header.targetLength(target); 141 } 142 alias targetLen = targetLength; 143 alias target_len = targetLength; 144 145 /// lengths of the reference sequences 146 deprecated("use these properties from SAMHeader") 147 @property uint[] targetLengths() const 148 { 149 return this.header.targetLengths; 150 } 151 alias targetLens = targetLengths; 152 alias target_lens = targetLengths; 153 154 /// names of the reference sequences 155 deprecated("use these properties from SAMHeader") 156 @property string[] targetNames() const 157 { 158 return this.header.targetNames; 159 } 160 alias target_names = targetNames; 161 162 /// reference contig name to integer id 163 deprecated("use these properties from SAMHeader") 164 int targetId(string name) 165 { 166 return this.header.targetId(name); 167 } 168 alias target_id = targetId; 169 170 171 /// fetch is provided as a PySAM compatible synonym for `query` 172 alias fetch = query; 173 174 /** Query a region and return matching alignments as InputRange 175 * 176 * Query on (chr, start, end) may take several forms: 177 * 178 * 1. `query(region)` with a string-based "region" form (e.g. chr1:1000-2000) 179 - Variant: pass an array of query region strings: `query([reg1, reg, ...])` 180 * 2. `query(chr, start, end)` with a combination of function parameters for 181 * contig, start, and end (where contig may be either a string or the numeric 182 * `tid` from BAM header; it would be uncommon to use this directly) 183 * 184 * NOTE THAT THERE IS AN OFF-BY-ONE DIFFERENCE IN THE TWO METHODS ABOVE! 185 * Region string based coordinates assume the first base of the reference 186 * is 1 (e.g., chrX:1-100 yields the first 100 bases), whereas with the 187 * integer function parameter versions, the coordinates are zero-based, half-open 188 * (e.g., <chrX, 0, 100> yields the first 100 bases). 189 * 190 * We also support array indexing on object of type SAMReader directly 191 * in one of two above styles: 192 * 1. `bamfile[region-string]` 193 * 2. `bamfile[contig, start .. end]` with contig like no. 2 above 194 * 195 * The D convention `$` operator marking length of array is supported. 196 * 197 * Finally, the region string is parsed by underlying htslib's `hts_parse_region` 198 * and has special semantics available: 199 * 200 * region | Outputs 201 * --------------- | ------------- 202 * REF | All reads with RNAME REF 203 * REF: | All reads with RNAME REF 204 * REF:START | Reads with RNAME REF overlapping START to end of REF 205 * REF:-END | Reads with RNAME REF overlapping start of REF to END 206 * REF:START-END | Reads with RNAME REF overlapping START to END 207 * . | All reads from the start of the file 208 * * | Unmapped reads at the end of the file (RNAME '*' in SAM) 209 * 210 * 211 * Examples: 212 * ``` 213 * bamfile = SAMReader("whatever.bam"); 214 * auto reads1 = bamfile.query("chr1:1-500"); 215 * auto reads2 = bamfile.query("chr2", 0, 500); 216 * auto reads3 = bamfile["chr3", 0 .. 500]; 217 * 218 * auto reads4 = bamfile["chrX", $-500 .. $]; // last 500 nt 219 * 220 * auto reads5 = bamfile.query("chrY"); // entirety of chrY 221 * 222 * // When colon present in reference name (e.g. HLA additions in GRCh38) 223 * // wrap the ref name in { } (this is an htslib convention; see hts_parse_region) 224 * auto reads6 = bamfile.query("{HLA-DRB1*12:17}:1-100"); 225 * ``` 226 */ 227 auto query(CoordSystem cs)(string chrom, Interval!cs coords) 228 in (!this.header.isNull) 229 { 230 auto tid = this.header.targetId(chrom); 231 return query(tid, coords); 232 } 233 234 /// ditto 235 auto query(CoordSystem cs)(int tid, Interval!cs coords) 236 in (!this.header.isNull) 237 { 238 /// convert to zero-based half-open 239 auto newcoords = coords.to!(CoordSystem.zbho); 240 241 /// load index 242 if(this.idx == null) 243 this.idx = HtsIdx(sam_index_load(this.fp, this.fn)); 244 if (this.idx == null) 245 { 246 hts_log_error(__FUNCTION__, "SAM index not found"); 247 throw new Exception("SAM index not found"); 248 } 249 auto itr = HtsItr(sam_itr_queryi(this.idx, tid, newcoords.start, newcoords.end)); 250 return RecordRange(this.fp, this.header, itr, this.header_offset); 251 } 252 253 254 /// ditto 255 auto query(string[] regions) 256 in (!this.header.isNull) 257 { 258 /// load index 259 if(this.idx == null) 260 this.idx = HtsIdx(sam_index_load(this.fp, this.fn)); 261 if (this.idx == null) 262 { 263 hts_log_error(__FUNCTION__, "SAM index not found"); 264 throw new Exception("SAM index not found"); 265 } 266 return RecordRangeMulti(this.fp, this.idx, this.header, regions); 267 } 268 269 /// opIndex with a list of string regions 270 /// bam[["chr1:1-3","chr2:4-50"]] 271 auto opIndex(string[] regions) 272 { 273 return query(regions); 274 } 275 276 /// opIndex with a string contig and an Interval 277 /// bam["chr1", ZBHO(1,3)] 278 auto opIndex(CoordSystem cs)(string contig, Interval!cs coords) 279 { 280 return query(contig, coords); 281 } 282 283 /// opIndex with an int tid and an Interval 284 /// bam[0, ZBHO(1,3)] 285 auto opIndex(CoordSystem cs)(int tid, Interval!cs coords) 286 { 287 return query(tid, coords); 288 } 289 290 /// opIndex with a string contig and a Coordinate 291 /// bam["chr1", ZB(1)] 292 auto opIndex(Basis bs)(string contig, Coordinate!bs pos) 293 { 294 auto coords = Interval!(getCoordinateSystem!(bs,End.open))(pos, pos + 1); 295 return query(contig, coords); 296 } 297 298 /// opIndex with an int tid and a Coordinate 299 /// bam[0, ZB(1)] 300 auto opIndex(Basis bs)(int tid, Coordinate!bs pos) 301 { 302 auto coords = Interval!(getCoordinateSystem!(bs,End.open))(pos, pos + 1); 303 return query(tid, coords); 304 } 305 306 /// opIndex with a string contig and two Coordinates 307 /// bam["chr1", ZB(1), ZB(3)] 308 deprecated("use multidimensional slicing with second parameter as range ([\"chr1\", 1 .. 2])") 309 auto opIndex(Basis bs)(string tid, Coordinate!bs pos1, Coordinate!bs pos2) 310 { 311 auto coords = Interval!(getCoordinateSystem!(bs,End.open))(pos1, pos2); 312 return query(tid, coords); 313 } 314 315 /// opIndex with an int tid and two Coordinates 316 /// bam[0, ZB(1), ZB(3)] 317 deprecated("use multidimensional slicing with second parameter as range ([20, 1 .. 2])") 318 auto opIndex(Basis bs)(int tid, Coordinate!bs pos1, Coordinate!bs pos2) 319 { 320 auto coords = Interval!(getCoordinateSystem!(bs,End.open))(pos1, pos2); 321 return query(tid, coords); 322 } 323 324 /// opSlice with two Coordinates 325 /// [ZB(1) .. ZB(3)] 326 auto opSlice(size_t dim, Basis bs)(Coordinate!bs start, Coordinate!bs end) if(dim == 1) 327 { 328 assert(end > start); 329 return Interval!(getCoordinateSystem!(bs, End.open))(start, end); 330 } 331 332 333 private struct OffsetType 334 { 335 ptrdiff_t offset; 336 alias offset this; 337 338 // supports e.g. $ - x 339 OffsetType opBinary(string s, T)(T val) 340 { 341 mixin("return OffsetType(offset " ~ s ~ " val);"); 342 } 343 344 invariant 345 { 346 assert(this.offset <= 0, "Offset from end should be zero or negative"); 347 } 348 } 349 /** Array-end `$` indexing hack courtesy of Steve Schveighoffer 350 https://forum.dlang.org/post/rl7a56$nad$1@digitalmars.com 351 352 Requires in addition to opDollar returning a bespoke non-integral type 353 a series of overloads for opIndex and opSlice taking this type 354 */ 355 OffsetType opDollar(size_t dim)() if(dim == 1) 356 { 357 return OffsetType.init; 358 } 359 /// opIndex with a string contig and an Offset 360 /// bam["chr1",$-2] 361 auto opIndex(string ctg, OffsetType endoff) 362 { 363 auto tid = this.header.targetId(ctg); 364 auto end = this.header.targetLength(tid) + endoff.offset; 365 // TODO review: is targetLength the last nt, or targetLength - 1 the last nt? 366 auto coords = Interval!(CoordSystem.zbho)(end, end + 1); 367 return query(tid, coords); 368 } 369 /// opIndex with an int tid and an Offset 370 /// bam[0,$-2] 371 auto opIndex(int tid, OffsetType endoff) 372 { 373 auto end = this.header.targetLength(tid) + endoff.offset; 374 // TODO review: is targetLength the last nt, or targetLength - 1 the last nt? 375 auto coords = Interval!(CoordSystem.zbho)(end, end + 1); 376 return query(tid, coords); 377 } 378 379 /// opSlice as Coordinate and an offset 380 /// i.e [ZB(2) .. $] 381 auto opSlice(size_t dim, Basis bs)(Coordinate!bs start, OffsetType off) if(dim == 1) 382 { 383 return Tuple!(Coordinate!bs, OffsetType)(start, off); 384 } 385 386 /// opIndex with a string contig and a Coordinate and Offset 387 /// bam["chr1",ZB(1) .. $] 388 auto opIndex(Basis bs)(string ctg, Tuple!(Coordinate!bs, OffsetType) coords) 389 { 390 auto tid = this.header.targetId(ctg); 391 auto end = this.header.targetLength(tid) + coords[1]; 392 auto endCoord = ZB(end); 393 auto newEndCoord = endCoord.to!bs; 394 auto c = Interval!(getCoordinateSystem!(bs,End.open))(coords[0], newEndCoord); 395 return query(tid, c); 396 } 397 398 /// opIndex with an int tid and a Coordinate and Offset 399 /// bam[0,ZB(1) .. $] 400 auto opIndex(Basis bs)(int tid, Tuple!(Coordinate!bs, OffsetType) coords) 401 { 402 auto end = this.header.targetLength(tid) + coords[1]; 403 auto endCoord = ZB(end); 404 auto newEndCoord = endCoord.to!bs; 405 auto c = Interval!(getCoordinateSystem!(bs,End.open))(coords[0], newEndCoord); 406 return query(tid, c); 407 } 408 409 /// opSlice as two offset 410 /// i.e [$-2 .. $] 411 auto opSlice(size_t dim)(OffsetType start, OffsetType end) if(dim == 1) 412 { 413 return Tuple!(OffsetType, OffsetType)(start, end); 414 } 415 416 /// opIndex two Offsets 417 /// i.e fai["chrom1", $-2 .. $] 418 auto opIndex(string ctg, Tuple!(OffsetType, OffsetType) coords) 419 { 420 auto tid = this.header.targetId(ctg); 421 auto start = this.header.targetLength(tid) + coords[0]; 422 auto end = this.header.targetLength(tid) + coords[1]; 423 auto c = ZBHO(start, end); 424 return query(tid, c); 425 } 426 427 /// opIndex with an int tid and a Coordinate and Offset 428 /// bam[0,ZB(1) .. $] 429 auto opIndex(int tid, Tuple!(OffsetType, OffsetType) coords) 430 { 431 auto start = this.header.targetLength(tid) + coords[0]; 432 auto end = this.header.targetLength(tid) + coords[1]; 433 auto c = ZBHO(start, end); 434 return query(tid, c); 435 } 436 437 438 /// Return an InputRange representing all records in the SAM/BAM/CRAM 439 auto allRecords() 440 { 441 return AllRecordsRange(this.fp, this.header, this.header_offset); 442 } 443 444 deprecated("Avoid snake_case names") 445 alias all_records = allRecords; 446 447 448 /// Iterate through all records in the SAM 449 struct AllRecordsRange 450 { 451 private HtsFile fp; // belongs to parent; shared 452 private SAMHeader header; // belongs to parent; shared 453 private Bam1 b; 454 private bool initialized; // Needed to support both foreach and immediate .front() 455 private int success; // sam_read1 return code 456 457 this(HtsFile fp, SAMHeader header, off_t offset) 458 { 459 this.fp = copyHtsFile(fp, offset); 460 assert(this.fp.format.format == htsExactFormat.sam || this.fp.format.format == htsExactFormat.bam); 461 this.header = header; 462 this.b = Bam1(bam_init1()); 463 this.empty; 464 } 465 466 /// InputRange interface 467 @property bool empty() // @suppress(dscanner.suspicious.incorrect_infinite_range) 468 { 469 if (!this.initialized) { 470 this.popFront(); 471 this.initialized = true; 472 } 473 if (success >= 0) 474 return false; 475 else if (success == -1) 476 return true; 477 else 478 { 479 hts_log_error(__FUNCTION__, "*** ERROR sam_read1 < -1"); 480 return true; 481 } 482 } 483 /// ditto 484 void popFront() 485 { 486 success = sam_read1(this.fp, this.header.h, this.b); 487 //bam_destroy1(this.b); 488 } 489 /// ditto 490 SAMRecord front() 491 { 492 assert(this.initialized, "front called before empty"); 493 return SAMRecord(bam_dup1(this.b), this.header); 494 } 495 496 AllRecordsRange save() 497 { 498 AllRecordsRange newRange; 499 newRange.fp = HtsFile(copyHtsFile(fp)); 500 newRange.header = header; 501 newRange.b = Bam1(bam_dup1(this.b)); 502 newRange.initialized = this.initialized; 503 newRange.success = this.success; 504 505 return newRange; 506 } 507 508 } 509 510 /// Iterate over records falling within a queried region 511 struct RecordRange 512 { 513 private HtsFile fp; 514 private SAMHeader h; 515 private HtsItr itr; 516 private Bam1 b; 517 518 private int r; // sam_itr_next: >= 0 on success; -1 when there is no more data; < -1 on error 519 520 private bool initialized; // Needed to support both foreach and immediate .front() 521 private int success; // sam_read1 return code 522 523 /// Constructor relieves caller of calling bam_init1 and simplifies first-record flow 524 this(HtsFile fp, SAMHeader header, HtsItr itr, off_t offset) 525 { 526 this.fp = HtsFile(copyHtsFile(fp, offset)); 527 this.itr = HtsItr(copyHtsItr(itr)); 528 this.h = header; 529 this.b = Bam1(bam_init1); 530 this.empty; 531 assert(itr !is null, "itr was null"); 532 } 533 534 /// InputRange interface 535 @property bool empty() 536 { 537 // TODO, itr.finished shouldn't be used 538 if (!this.initialized) { 539 this.popFront(); 540 this.initialized = true; 541 } 542 assert(this.itr !is null); 543 return (r < 0 || itr.finished) ? true : false; 544 } 545 /// ditto 546 void popFront() 547 { 548 this.r = sam_itr_next(this.fp, this.itr, this.b); 549 } 550 /// ditto 551 SAMRecord front() 552 { 553 return SAMRecord(bam_dup1(b), this.h); 554 } 555 556 /// make a ForwardRange 557 RecordRange save() 558 { 559 RecordRange newRange; 560 newRange.fp = HtsFile(copyHtsFile(fp, itr.curr_off)); 561 newRange.itr = HtsItr(copyHtsItr(itr)); 562 newRange.h = h; 563 newRange.b = Bam1(bam_dup1(this.b)); 564 newRange.initialized = this.initialized; 565 newRange.success = this.success; 566 newRange.r = this.r; 567 return newRange; 568 } 569 } 570 571 static assert(isInputRange!RecordRange); 572 static assert(is(ElementType!RecordRange == SAMRecord)); 573 574 /// Iterate over records falling within queried regions using a RegionList 575 struct RecordRangeMulti 576 { 577 578 private HtsFile fp; 579 private SAMHeader h; 580 private HtsItrMulti itr; 581 private Bam1 b; 582 583 private int r; 584 private bool initialized; 585 586 /// 587 this(HtsFile fp, HtsIdx idx, SAMHeader header, string[] regions) 588 { 589 /// get reglist 590 auto cQueries = regions.map!(toUTFz!(char *)).array; 591 int count; 592 auto fnPtr = cast(hts_name2id_f) &sam_hdr_name2tid; 593 594 /// rlistPtr ends up being free'd by the hts_itr 595 auto rlistPtr = hts_reglist_create(cQueries.ptr, cast(int) cQueries.length, &count, cast(void *)header.h.getRef, fnPtr); 596 597 598 /// set header and fp 599 this.fp = HtsFile(copyHtsFile(fp)); 600 this.h = header; 601 602 /// init bam1_t 603 b = Bam1(bam_init1()); 604 605 /// get the itr 606 itr = sam_itr_regions(idx, this.h.h, rlistPtr, cast(uint) count); 607 608 empty(); 609 } 610 611 /// InputRange interface 612 @property bool empty() 613 { 614 if(!initialized){ 615 this.initialized = true; 616 popFront; 617 } 618 return (r <= 0 && itr.finished) ? true : false; 619 } 620 /// ditto 621 void popFront() 622 { 623 r = sam_itr_multi_next(this.fp, this.itr, this.b); 624 } 625 /// ditto 626 SAMRecord front() 627 { 628 return SAMRecord(bam_dup1(b), this.h); 629 } 630 /// make a ForwardRange 631 RecordRangeMulti save() 632 { 633 RecordRangeMulti newRange; 634 newRange.fp = HtsFile(copyHtsFile(fp, itr.curr_off)); 635 newRange.itr = HtsItrMulti(copyHtsItr(itr)); 636 newRange.h = h; 637 newRange.b = Bam1(bam_dup1(this.b)); 638 newRange.initialized = this.initialized; 639 newRange.r = this.r; 640 return newRange; 641 } 642 } 643 static assert(isInputRange!RecordRangeMulti); 644 static assert(is(ElementType!RecordRangeMulti == SAMRecord)); 645 } 646 647 /// 648 debug(dhtslib_unittest) unittest 649 { 650 import dhtslib.sam; 651 import htslib.hts_log : hts_log_info; 652 import std.path : buildPath, dirName; 653 import std.string : fromStringz; 654 import std.array : array; 655 656 hts_set_log_level(htsLogLevel.HTS_LOG_WARNING); 657 hts_log_info(__FUNCTION__, "Testing SAMFile & SAMRecord"); 658 hts_log_info(__FUNCTION__, "Loading test file"); 659 auto sam = SAMFile(buildPath(dirName(dirName(dirName(dirName(__FILE__)))),"htslib","test","auxf#values.sam"), 0); 660 auto sam2 = SAMWriter("/tmp/test.bam", sam.header); 661 auto readrange = sam.allRecords; 662 hts_log_info(__FUNCTION__, "Getting read 1"); 663 assert(readrange.empty == false); 664 auto read = readrange.front(); 665 666 // writeln(read.sequence); 667 assert(read.sequence=="GCTAGCTCAG"); 668 assert(sam.allRecords.array.length == 2); 669 sam2.write(read); 670 destroy(sam2); 671 672 673 // testing with multiple specified threads 674 sam = SAMFile("/tmp/test.bam", 2); 675 readrange = sam.allRecords; 676 assert(readrange.empty == false); 677 read = readrange.front(); 678 assert(read.sequence=="GCTAGCTCAG"); 679 assert(sam.allRecords.array.length == 1); 680 681 // testing with no additional threads 682 sam = SAMFile("/tmp/test.bam", 0); 683 readrange = sam.allRecords; 684 assert(readrange.empty == false); 685 read = readrange.front(); 686 assert(read.sequence=="GCTAGCTCAG"); 687 assert(sam.allRecords.array.length == 1); 688 689 // testing SAMReader targets/tid functions 690 assert(sam.header.nTargets == 1); 691 assert(sam.header.targetId("Sheila") == 0); 692 assert(sam.header.targetLength(0) == 20); 693 assert(sam.header.targetLengths == [20]); 694 assert(sam.header.targetNames == ["Sheila"]); 695 696 } 697 698 /// 699 debug(dhtslib_unittest) unittest 700 { 701 import std.stdio; 702 import dhtslib.sam; 703 import dhtslib.sam.md : MDItr; 704 import std.algorithm : map; 705 import std.array : array; 706 import std.path : buildPath,dirName; 707 import std.range : drop; 708 hts_set_log_level(htsLogLevel.HTS_LOG_WARNING); 709 hts_log_info(__FUNCTION__, "Testing SAMFile query"); 710 hts_log_info(__FUNCTION__, "Loading test file"); 711 712 auto bam = SAMFile(buildPath(dirName(dirName(dirName(dirName(__FILE__)))),"htslib","test","range.bam"), 0); 713 assert(bam.allRecords.array.length == 112); 714 // assert(bam["CHROMOSOME_I"].array.length == 18); 715 // assert(bam["CHROMOSOME_II"].array.length == 34); 716 // assert(bam["CHROMOSOME_III"].array.length == 41); 717 // assert(bam["CHROMOSOME_IV"].array.length == 19); 718 // assert(bam["CHROMOSOME_V"].array.length == 0); 719 assert(bam.query("CHROMOSOME_I", ZBHO(900, 2000)) .array.length == 14); 720 assert(bam["CHROMOSOME_I",ZB(900) .. ZB(2000)].array.length == 14); 721 assert(bam[0, ZB(900) .. ZB(2000)].array.length == 14); 722 723 assert(bam["CHROMOSOME_I",ZB(940)].array.length == 2); 724 assert(bam[0, ZB(940)].array.length == 2); 725 726 727 assert(bam["CHROMOSOME_I",ZB(900) .. $].array.length == 18); 728 assert(bam[0, ZB(900) .. $].array.length == 18); 729 assert(bam["CHROMOSOME_I",$].array.length == 0); 730 assert(bam[0, $].array.length == 0); 731 assert(bam[["CHROMOSOME_I:900-2000","CHROMOSOME_II:900-2000"]].array.length == 33); 732 733 assert(bam.query("CHROMOSOME_I", OBHO(901, 2000)) .array.length == 14); 734 assert(bam["CHROMOSOME_I",OB(901) .. OB(2001)].array.length == 14); 735 assert(bam[0, OB(901) .. OB(2001)].array.length == 14); 736 737 assert(bam["CHROMOSOME_I",OB(941)].array.length == 2); 738 assert(bam[0, OB(941)].array.length == 2); 739 740 741 assert(bam["CHROMOSOME_I",OB(901) .. $].array.length == 18); 742 assert(bam[0, OB(901) .. $].array.length == 18); 743 assert(bam["CHROMOSOME_I",$].array.length == 0); 744 assert(bam[0, $].array.length == 0); 745 assert(bam[["CHROMOSOME_I:900-2000","CHROMOSOME_II:900-2000"]].array.length == 33); 746 747 assert(bam["CHROMOSOME_II",$-1918 .. $].array.length == 0); 748 assert(bam["CHROMOSOME_II", ZB(3082) .. $].array.length == 0); 749 assert(bam["CHROMOSOME_II",$-1919 .. $].array.length == 1); 750 assert(bam["CHROMOSOME_II", ZB(3081) .. $].array.length == 1); 751 assert(bam["CHROMOSOME_II",$-2018 .. $].array.length == 2); 752 753 auto range = bam[["CHROMOSOME_I:900-2000","CHROMOSOME_II:900-2000"]]; 754 auto range1 = range.save; 755 range = range.drop(5); 756 auto range2 = range.save; 757 range = range.drop(5); 758 auto range3 = range.save; 759 range = range.drop(10); 760 auto range4 = range.save; 761 assert(range1.array.length == 33); 762 assert(range2.array.length == 28); 763 assert(range3.array.length == 23); 764 assert(range4.array.length == 13); 765 } 766 debug(dhtslib_unittest) unittest 767 { 768 import std.stdio; 769 import dhtslib.sam; 770 import std.array : array; 771 import std.path : buildPath,dirName; 772 import std.range : drop; 773 hts_set_log_level(htsLogLevel.HTS_LOG_WARNING); 774 hts_log_info(__FUNCTION__, "Testing AllRecordsRange save()"); 775 hts_log_info(__FUNCTION__, "Loading test file"); 776 777 auto bam = SAMFile(buildPath(dirName(dirName(dirName(dirName(__FILE__)))),"htslib","test","range.bam"), 0); 778 assert(bam.allRecords.array.length == 112); 779 assert(bam.allRecords.array.length == 112); 780 781 auto range = bam.allRecords; 782 783 auto range1 = range.save(); 784 range = range.drop(5); 785 786 auto range2 = range.save(); 787 range = range.drop(5); 788 789 auto range3 = range.save(); 790 range = range.drop(5); 791 792 auto range4 = range.save(); 793 range = range.drop(50); 794 795 auto range5 = range.save(); 796 range.popFront; 797 798 auto range6 = range.save(); 799 800 assert(range1.array.length == 112); 801 assert(range2.array.length == 107); 802 assert(range3.array.length == 102); 803 assert(range4.array.length == 97); 804 assert(range5.array.length == 47); 805 assert(range6.array.length == 46); 806 } 807 /// 808 debug(dhtslib_unittest) unittest 809 { 810 import std.stdio; 811 import dhtslib.sam; 812 import std.array : array; 813 import std.path : buildPath,dirName; 814 import std.range : drop; 815 hts_set_log_level(htsLogLevel.HTS_LOG_WARNING); 816 hts_log_info(__FUNCTION__, "Testing RecordsRange save()"); 817 hts_log_info(__FUNCTION__, "Loading test file"); 818 819 auto bam = SAMReader(buildPath(dirName(dirName(dirName(dirName(__FILE__)))),"htslib","test","range.bam"), 4); 820 821 auto range = bam.query("CHROMOSOME_I", ZBHO(900, 2000)); 822 assert(bam.query("CHROMOSOME_I", ZBHO(900, 2000)).array.length == 14); 823 824 auto range1 = range.save; 825 range = range.drop(1); 826 827 auto range2 = range.save; 828 range = range.drop(2); 829 830 auto range3 = range.save; 831 range = range.drop(3); 832 833 auto range4 = range.save; 834 range = range.drop(5); 835 836 auto range5 = range.save; 837 range.popFront; 838 839 auto range6 = range.save; 840 841 assert(range1.array.length == 14); 842 assert(range2.array.length == 13); 843 assert(range3.array.length == 11); 844 assert(range4.array.length == 8); 845 assert(range5.array.length == 3); 846 assert(range6.array.length == 2); 847 }