1 module dhtslib.sam.record; 2 3 import core.stdc.stdlib : calloc, free, realloc; 4 import core.stdc.string : memset; 5 import core.stdc.errno : EINVAL, EOVERFLOW, ENOMEM, ERANGE, errno; 6 import std.string : fromStringz, toStringz; 7 import std.algorithm : filter; 8 import std.range.interfaces : InputRange, inputRangeObject; 9 import std.traits : isArray, isIntegral, isSomeString; 10 11 import htslib.hts : seq_nt16_str, seq_nt16_table; 12 13 import htslib.hts_log; 14 import htslib.sam; 15 import dhtslib.sam.cigar; 16 import dhtslib.sam.tagvalue; 17 import dhtslib.sam.header; 18 import dhtslib.coordinates; 19 import dhtslib.memory; 20 21 /** 22 Encapsulates a SAM/BAM/CRAM record, 23 using the bam1_t type for memory efficiency, 24 and the htslib helper functions for speed. 25 **/ 26 struct SAMRecord 27 { 28 /// Backing SAM/BAM row record 29 Bam1 b; 30 31 /// Corresponding SAM/BAM header data 32 SAMHeader h; 33 34 /// ditto 35 this(SAMHeader h) 36 { 37 this.b = Bam1(bam_init1); 38 this.h = h; 39 } 40 41 /// Construct SAMRecord from supplied bam1_t 42 deprecated("Construct with sam_hdr_t* for mapped contig (tid) name support") 43 this(bam1_t* b) 44 { 45 this.b = Bam1(b); 46 } 47 48 /// Construct SAMRecord from supplied bam1_t and sam_hdr_type 49 this(bam1_t* b, SAMHeader h) 50 { 51 this.b = Bam1(b); 52 this.h = h; 53 } 54 55 /// Explicit postblit to avoid 56 /// https://github.com/blachlylab/dhtslib/issues/122 57 this(this) 58 { 59 this.b = b; 60 this.h = h; 61 } 62 63 /* bam1_core_t fields */ 64 65 /// chromosome ID, defined by sam_hdr_t 66 pragma(inline, true) 67 @nogc @safe nothrow 68 @property int tid() { return this.b.core.tid; } 69 /// ditto 70 pragma(inline, true) 71 @nogc @safe nothrow 72 @property void tid(int tid) { this.b.core.tid = tid; } 73 74 /// mapped contig (tid) name, defined by sam_hdr_t* h 75 pragma(inline, true) 76 @property const(char)[] referenceName() 77 { 78 assert(!this.h.isNull); 79 return this.h.targetName(this.b.core.tid); 80 } 81 82 /// 0-based leftmost coordinate 83 pragma(inline, true) 84 @nogc @safe nothrow 85 @property ZB pos() { return ZB(this.b.core.pos); } 86 /// ditto 87 pragma(inline, true) 88 @nogc @safe nothrow 89 @property void pos(ZB pos) { this.b.core.pos = pos.pos; } 90 91 /// 0-based, half-open coordinates that represent 92 /// the mapped length of the alignment 93 pragma(inline, true) 94 @property ZBHO coordinates() 95 { 96 return ZBHO(this.pos, this.pos + this.cigar.alignedLength); 97 } 98 99 // TODO: @field bin bin calculated by bam_reg2bin() 100 101 /// mapping quality 102 pragma(inline, true) 103 @nogc @safe nothrow 104 @property ubyte qual() { return this.b.core.qual; } 105 /// ditto 106 pragma(inline, true) 107 @nogc @safe nothrow 108 @property void qual(ubyte q) { this.b.core.qual = q; } 109 110 /// length of query name. Terminated by 1-4 \0, see l_extranul. Never set directly. 111 pragma(inline, true) 112 @nogc @safe nothrow 113 @property int l_qname() { return this.b.core.l_qname; } 114 115 /// number of EXTRA trailing \0 in qname; 0 <= l_extranul <= 3, see l_qname. Never set directly. 116 pragma(inline, true) 117 @nogc @safe nothrow 118 @property int l_extranul() { return this.b.core.l_extranul; } 119 120 /// bitwise flag 121 pragma(inline, true) 122 @nogc @safe nothrow 123 @property ushort flag() { return this.b.core.flag; } 124 /// ditto 125 pragma(inline, true) 126 @nogc @safe nothrow 127 @property void flag(ushort fl) { this.b.core.flag = fl; } 128 129 /// is read paired? 130 pragma(inline, true) 131 @nogc @safe nothrow 132 @property bool isPaired() 133 { 134 return cast(bool)(b.core.flag & BAM_FPAIRED); 135 } 136 137 /// is read paired? 138 pragma(inline, true) 139 @nogc @safe nothrow 140 @property bool isProperPair() 141 { 142 return cast(bool)(b.core.flag & BAM_FPROPER_PAIR); 143 } 144 145 /// is read mapped? 146 pragma(inline, true) 147 @nogc @safe nothrow 148 @property bool isMapped() 149 { 150 return (b.core.flag & BAM_FUNMAP) == 0; 151 } 152 153 /// is read mate mapped? 154 pragma(inline, true) 155 @nogc @safe nothrow 156 @property bool isMateMapped() 157 { 158 return (b.core.flag & BAM_FMUNMAP) == 0; 159 } 160 161 /// is read reversed? 162 /// bool bam_is_rev(bam1_t *b) { return ( ((*b).core.flag & BAM_FREVERSE) != 0 ); } 163 pragma(inline, true) 164 @nogc @trusted nothrow 165 @property bool isReversed() 166 { 167 return bam_is_rev(this.b); 168 } 169 170 /// is mate reversed? 171 /// bool bam_is_mrev(bam1_t *b) { return( ((*b).core.flag & BAM_FMREVERSE) != 0); } 172 pragma(inline, true) 173 @nogc @trusted nothrow 174 @property bool mateReversed() 175 { 176 return bam_is_mrev(this.b); 177 } 178 179 /// is this read 1? 180 pragma(inline, true) 181 @nogc @safe nothrow 182 @property bool isRead1() 183 { 184 return cast(bool)(b.core.flag & BAM_FREAD1); 185 } 186 187 /// is this read 2? 188 pragma(inline, true) 189 @nogc @safe nothrow 190 @property bool isRead2() 191 { 192 return cast(bool)(b.core.flag & BAM_FREAD2); 193 } 194 195 /// is read secondary? 196 @property bool isSecondary() 197 { 198 version(LDC){ 199 pragma(inline, true); 200 } 201 return cast(bool)(b.core.flag & BAM_FSECONDARY); 202 } 203 204 /// Does this read fail qc? 205 pragma(inline, true) 206 @nogc @safe nothrow 207 @property bool isQCFail() 208 { 209 return cast(bool)(b.core.flag & BAM_FQCFAIL); 210 } 211 212 /// Is this read marked as a PCR or Optical duplicate? 213 pragma(inline, true) 214 @nogc @safe nothrow 215 @property bool isDuplicate() 216 { 217 return cast(bool)(b.core.flag & BAM_FDUP); 218 } 219 220 /// is read supplementary? 221 pragma(inline, true) 222 @nogc @safe nothrow 223 @property bool isSupplementary() 224 { 225 return cast(bool)(b.core.flag & BAM_FSUPPLEMENTARY); 226 } 227 228 /// Return read strand + or - (as char) 229 @property char strand(){ 230 return ['+','-'][isReversed()]; 231 } 232 233 /// Get a slice of query name (read name) 234 /// if you keep this around you should copy it 235 /// -- wraps bam_get_qname(bam1_t *b) 236 pragma(inline, true) 237 @nogc @trusted nothrow 238 @property const(char)[] queryName() 239 { 240 // auto bam_get_qname(bam1_t *b) { return (cast(char*)(*b).data); } 241 return fromStringz(bam_get_qname(this.b)); 242 } 243 244 /// Set query name (read name) 245 pragma(inline, true) 246 @trusted nothrow 247 @property void queryName(string qname) 248 { 249 auto ret = bam_set_qname(this.b, toStringz(qname)); 250 if(ret != 0) 251 hts_log_error(__FUNCTION__, "Could not set queryname"); 252 } 253 254 /// query (and quality string) length 255 pragma(inline, true) 256 @nogc @safe nothrow 257 @property int length() 258 { 259 return this.b.core.l_qseq; 260 } 261 262 /// Return char array of the sequence 263 /// see samtools/sam_view.c: get_read 264 pragma(inline, true) 265 @trusted nothrow 266 @property const(char)[] sequence() 267 { 268 // calloc fills with \0; +1 len for Cstring 269 // char* s = cast(char*) calloc(1, this.b.core.l_qseq + 1); 270 char[] s; 271 s.length = this.b.core.l_qseq; 272 273 // auto bam_get_seq(bam1_t *b) { return ((*b).data + ((*b).core.n_cigar<<2) + (*b).core.l_qname); } 274 auto seqdata = bam_get_seq(this.b); 275 276 for (int i; i < this.b.core.l_qseq; i++) 277 { 278 s[i] = seq_nt16_str[bam_seqi(seqdata, i)]; 279 } 280 return s; 281 } 282 283 /// Assigns sequence and resets quality score 284 pragma(inline, true) 285 @trusted nothrow 286 @property void sequence(const(char)[] seq) 287 { 288 /// There is no bam_seq_seq 289 290 //nibble encode sequence 291 ubyte[] en_seq; 292 en_seq.length = (seq.length + 1) >> 1; 293 294 for(auto i=0;i < seq.length;i++){ 295 bam_set_seqi(en_seq.ptr, i, seq_nt16_table[seq[i]]); 296 } 297 298 //get length of data before seq 299 uint l_prev = b.core.l_qname + cast(uint)(b.core.n_cigar * uint.sizeof); 300 301 //get starting point after seq 302 uint start_rest = l_prev + ((b.core.l_qseq + 1) >> 1) + b.core.l_qseq; 303 304 //copy previous data 305 ubyte[] prev=b.data[0 .. l_prev].dup; 306 307 //copy rest of data 308 ubyte[] rest=b.data[start_rest .. b.l_data].dup; 309 310 //if not enough space 311 if(en_seq.length + seq.length + l_prev + (b.l_data - start_rest) > b.m_data){ 312 313 // realloc 314 auto ptr = cast(ubyte*) realloc(b.data, en_seq.length + seq.length + l_prev + (b.l_data - start_rest)); 315 assert(ptr != null); 316 b.data = ptr; 317 318 // increase capacity 319 b.m_data = cast(uint)en_seq.length + cast(uint) seq.length + l_prev + (b.l_data - start_rest); 320 } 321 322 //reassign q_name and rest of data 323 b.data[0 .. l_prev] = prev; 324 b.data[l_prev .. en_seq.length + l_prev] = en_seq; 325 //set qscores to 255 326 memset(b.data + en_seq.length + l_prev, 255, seq.length); 327 b.data[l_prev + en_seq.length + seq.length .. l_prev + en_seq.length + seq.length + (b.l_data - start_rest)] = rest; 328 329 //reset data length, seq length 330 b.l_data = cast(uint) en_seq.length + cast(uint) seq.length + l_prev + (b.l_data-start_rest); 331 b.core.l_qseq = cast(int) (seq.length); 332 } 333 334 /// Return array of the quality scores 335 /// see samtools/sam_view.c: get_quality 336 /// TODO: Discussion -- should we return const(ubyte[]) or const(ubyte)[] instead? 337 pragma(inline, true) 338 @nogc @trusted nothrow 339 @property const(ubyte)[] qscores() 340 { 341 auto slice = bam_get_qual(this.b)[0 .. this.b.core.l_qseq]; 342 return slice; 343 } 344 345 /// Set quality scores from raw ubyte quality score array given that it is the same length as the bam sequence 346 pragma(inline, true) 347 @trusted nothrow 348 @property void qscores(const(ubyte)[] seq) 349 { 350 /// There is no bam_seq_qual 351 352 if(seq.length != b.core.l_qseq){ 353 hts_log_error(__FUNCTION__,"qscore length does not match sequence length"); 354 return; 355 } 356 357 //get length of data before seq 358 uint l_prev = b.core.l_qname + cast(uint) (b.core.n_cigar * uint.sizeof) + ((b.core.l_qseq + 1) >> 1); 359 b.data[l_prev .. seq.length + l_prev] = seq; 360 } 361 362 /// get phred-scaled base qualities as char array 363 pragma(inline, true) 364 @trusted nothrow 365 const(char)[] qscoresPhredScaled() 366 { 367 auto bqs = this.qscores.dup; 368 bqs[] += 33; 369 return cast(const(char)[]) bqs; 370 } 371 372 /// Create cigar from bam1_t record 373 pragma(inline, true) 374 @property Cigar cigar() 375 { 376 return Cigar(this.b); 377 } 378 379 /// Assign a cigar string 380 pragma(inline, true) 381 @trusted 382 @property void cigar(Cigar cigar) 383 { 384 // no bam_set_cigar 385 386 //get length of data before seq 387 uint l_prev = b.core.l_qname; 388 389 //get starting point after seq 390 uint start_rest = l_prev + cast(uint)(b.core.n_cigar * uint.sizeof); 391 392 //copy previous data 393 ubyte[] prev = b.data[0..l_prev].dup; 394 395 //copy rest of data 396 ubyte[] rest = b.data[start_rest..b.l_data].dup; 397 398 //if not enough space 399 if(uint.sizeof * cigar.length + l_prev + (b.l_data - start_rest) > b.m_data){ 400 // realloc 401 auto ptr = cast(ubyte*) realloc(b.data, uint.sizeof * cigar.length + l_prev + (b.l_data - start_rest)); 402 assert(ptr != null); 403 b.data = ptr; 404 405 // increase capacity 406 b.m_data=cast(uint)(uint.sizeof * cigar.length) + l_prev + (b.l_data - start_rest); 407 } 408 409 //reassign q_name and rest of data 410 b.data[0 .. l_prev]=prev; 411 //without cigar.ops.dup we get the overlapping array copy error 412 b.data[l_prev .. uint.sizeof * cigar.length + l_prev] = cast(ubyte[])( cast(uint[]) cigar.dup[]); 413 b.data[l_prev + uint.sizeof * cigar.length .. l_prev + uint.sizeof * cigar.length + (b.l_data - start_rest)] = rest; 414 415 //reset data length, seq length 416 b.l_data = cast(uint) (uint.sizeof * cigar.length) + l_prev + (b.l_data - start_rest); 417 b.core.n_cigar = cast(uint) (cigar.length); 418 } 419 420 /// Get aux tag from bam1_t record and return a TagValue 421 TagValue opIndex(string val) 422 { 423 return TagValue(this.b, val[0..2]); 424 } 425 426 /// Assign a tag key value pair 427 void opIndexAssign(T)(T value, string index) 428 if(!isArray!T || isSomeString!T) 429 { 430 static if(isIntegral!T){ 431 auto err = bam_aux_update_int(b, index[0..2], value); 432 }else static if(is(T==float)){ 433 auto err = bam_aux_update_float(b, index[0..2], value); 434 }else static if(isSomeString!T){ 435 auto err = bam_aux_update_str(b, index[0..2], cast(int) value.length, value.ptr); 436 } 437 if(err == 0) return; 438 switch(errno){ 439 case EINVAL: 440 throw new Exception("The bam record's aux data is corrupt or an existing tag" ~ 441 " with the given ID is not of an integer type (c, C, s, S, i or I)."); 442 case ENOMEM: 443 throw new Exception("The bam data needs to be expanded and either the attempt" ~ 444 " to reallocate the data buffer failed or the resulting buffer would be longer" ~ 445 " than the maximum size allowed in a bam record (2Gbytes)."); 446 case ERANGE: 447 case EOVERFLOW: 448 throw new Exception("val is outside the range that can be stored in an integer" ~ 449 " bam tag (-2147483647 to 4294967295)."); 450 case -1: 451 throw new Exception("Something went wrong adding the bam tag."); 452 case 0: 453 return; 454 default: 455 throw new Exception("Something went wrong adding the bam tag."); 456 } 457 } 458 459 /// Assign a tag key array pair 460 void opIndexAssign(T)(T[] value, string index) 461 if(!isSomeString!(T[])) 462 { 463 auto err = bam_aux_update_array(b, index[0..2], TypeChars[TypeIndex!T], cast(int) value.length, value.ptr); 464 if(err == 0) return; 465 switch(errno){ 466 case EINVAL: 467 throw new Exception("The bam record's aux data is corrupt or an existing tag" ~ 468 " with the given ID is not of an integer type (c, C, s, S, i or I)."); 469 case ENOMEM: 470 throw new Exception("The bam data needs to be expanded and either the attempt" ~ 471 " to reallocate the data buffer failed or the resulting buffer would be longer" ~ 472 " than the maximum size allowed in a bam record (2Gbytes)."); 473 case ERANGE: 474 case EOVERFLOW: 475 throw new Exception("val is outside the range that can be stored in an integer" ~ 476 " bam tag (-2147483647 to 4294967295)."); 477 case -1: 478 throw new Exception("Something went wrong adding the bam tag."); 479 case 0: 480 return; 481 default: 482 throw new Exception("Something went wrong adding the bam tag."); 483 } 484 } 485 486 /// chromosome ID of next read in template, defined by bam_hdr_t 487 pragma(inline, true) 488 @nogc @safe nothrow 489 @property int mateTID() { return this.b.core.mtid; } 490 /// ditto 491 pragma(inline, true) 492 @nogc @safe nothrow 493 @property void mateTID(int mtid) { this.b.core.mtid = mtid; } 494 495 /// 0-based leftmost coordinate of next read in template 496 pragma(inline, true) 497 @nogc @safe nothrow 498 @property long matePos() { return this.b.core.mpos; } 499 /// ditto 500 pragma(inline, true) 501 @nogc @safe nothrow 502 @property void matePos(long mpos) { this.b.core.mpos = mpos; } 503 504 /// Presumably Insert size, but is undocumented. 505 /// Per samtools source, is measured 5' to 5' 506 /// https://github.com/samtools/samtools/blob/bd1a409aa750d25d70a093405d174db8709a3f2c/bam_mate.c#L320 507 pragma(inline, true) 508 @nogc @safe nothrow 509 @property long insertSize() { return this.b.core.isize; } 510 /// ditto 511 pragma(inline, true) 512 @nogc @safe nothrow 513 @property void insertSize(long isize) { this.b.core.isize = isize; } 514 /+ BEGIN CHERRY-PICK: TODO: confirm below chunk with htslib-1.10 (mainly long coordinates +/ 515 /// get aligned coordinates per each cigar op 516 InputRange!AlignedCoordinate getAlignedCoordinates(){ 517 return AlignedCoordinatesItr(this.cigar).inputRangeObject; 518 } 519 520 /// get aligned coordinates per each cigar op within range 521 /// range is 0-based half open using chromosomal coordinates 522 InputRange!AlignedCoordinate getAlignedCoordinates(long start, long end){ 523 assert(start >= 0); 524 assert(end <= this.cigar.ref_bases_covered); 525 return this.getAlignedCoordinates.filter!(x=> x.rpos >= start).filter!(x=> x.rpos < end).inputRangeObject; 526 } 527 528 struct AlignedPair(bool refSeq) 529 { 530 Coordinate!(Basis.zero) qpos= ZB(-1); 531 532 Coordinate!(Basis.zero) rpos= ZB(-1); 533 534 Ops cigar_op = Ops.MATCH; 535 536 char queryBase = '-'; 537 538 static if(refSeq) 539 char refBase = '-'; 540 541 string toString(){ 542 import std.conv : to; 543 static if(refSeq) return rpos.pos.to!string ~ ":" ~ refBase ~ " > " ~ qpos.pos.to!string ~ ":" ~ queryBase; 544 else return rpos.pos.to!string ~ ":" ~ qpos.pos.to!string ~ ":" ~ queryBase; 545 } 546 } 547 struct AlignedPairs(bool refSeq) 548 { 549 import dhtslib.sam.md : MDItr; 550 import std.range : dropExactly; 551 552 InputRange!AlignedCoordinate coords; 553 static if(refSeq) MDItr mdItr; 554 AlignedPair!refSeq current; 555 Bam1 b; 556 ubyte * seq; 557 558 this(SAMRecord rec){ 559 this.b = rec.b; 560 this.seq = bam_get_seq(this.b); 561 this.coords = rec.getAlignedCoordinates; 562 static if(refSeq){ 563 assert(rec["MD"].exists,"MD tag must exist for record"); 564 mdItr = MDItr(rec); 565 } 566 current.qpos = coords.front.qpos; 567 current.rpos = coords.front.rpos; 568 current.cigar_op = coords.front.cigar_op; 569 if(current.cigar_op.isQueryConsuming) 570 current.queryBase = seq_nt16_str[bam_seqi(seq, current.qpos)]; 571 572 static if(refSeq){ 573 if(current.cigar_op.isReferenceConsuming){ 574 if(mdItr.front == '=') 575 current.refBase = current.queryBase; 576 else 577 current.refBase = mdItr.front; 578 mdItr.popFront; 579 } 580 581 } 582 } 583 584 this(SAMRecord rec, long start, long end){ 585 assert(start < end); 586 assert(start >= 0); 587 assert(end <= rec.cigar.ref_bases_covered); 588 this.b = rec.b; 589 this.seq = bam_get_seq(this.b); 590 this.coords = rec.getAlignedCoordinates(start, end); 591 static if(refSeq){ 592 assert(rec["MD"].exists,"MD tag must exist for record"); 593 mdItr = MDItr(rec); 594 mdItr = mdItr.dropExactly(start); 595 } 596 current.qpos = coords.front.qpos; 597 current.rpos = coords.front.rpos; 598 current.cigar_op = coords.front.cigar_op; 599 if(current.cigar_op.isQueryConsuming) 600 current.queryBase = seq_nt16_str[bam_seqi(seq, current.qpos)]; 601 602 static if(refSeq){ 603 if(current.cigar_op.isReferenceConsuming){ 604 if(mdItr.front == '=') 605 current.refBase = current.queryBase; 606 else 607 current.refBase = mdItr.front; 608 mdItr.popFront; 609 } 610 611 } 612 } 613 614 AlignedPair!refSeq front(){ 615 return current; 616 } 617 618 void popFront(){ 619 coords.popFront; 620 if(coords.empty) return; 621 622 current = current.init; 623 current.qpos = coords.front.qpos; 624 current.rpos = coords.front.rpos; 625 current.cigar_op = coords.front.cigar_op; 626 627 if(current.cigar_op.isQueryConsuming) 628 current.queryBase = seq_nt16_str[bam_seqi(seq, current.qpos)]; 629 630 static if(refSeq){ 631 if(current.cigar_op.isReferenceConsuming){ 632 if(mdItr.front == '=') 633 current.refBase = current.queryBase; 634 else 635 current.refBase = mdItr.front; 636 mdItr.popFront; 637 } 638 } 639 640 } 641 642 bool empty(){ 643 return coords.empty; 644 } 645 646 } 647 /// get a range of aligned read and reference positions 648 /// this is meant to recreate functionality from pysam: 649 /// https://pysam.readthedocs.io/en/latest/api.html#pysam.AlignedSegment.get_aligned_pairs 650 /// range is 0-based half open using chromosomal coordinates 651 auto getAlignedPairs(bool withRefSeq)(long start, long end) 652 { 653 return AlignedPairs!withRefSeq(this,start,end).inputRangeObject; 654 } 655 /// get a range of aligned read and reference positions 656 /// this is meant to recreate functionality from pysam: 657 /// https://pysam.readthedocs.io/en/latest/api.html#pysam.AlignedSegment.get_aligned_pairs 658 auto getAlignedPairs(bool withRefSeq)(){ 659 return AlignedPairs!withRefSeq(this); 660 } 661 /+ END CHERRY-PICK +/ 662 } 663 664 /// 665 debug(dhtslib_unittest) unittest 666 { 667 import dhtslib.sam; 668 import htslib.hts_log : hts_log_info; 669 import std.path:buildPath,dirName; 670 671 hts_set_log_level(htsLogLevel.HTS_LOG_TRACE); 672 hts_log_info(__FUNCTION__, "Testing SAMRecord mutation"); 673 hts_log_info(__FUNCTION__, "Loading test file"); 674 auto bam = SAMFile(buildPath(dirName(dirName(dirName(dirName(__FILE__)))),"htslib","test","range.bam"), 0); 675 auto ar = bam.allRecords; 676 assert(ar.empty == false); 677 auto read = ar.front; 678 679 //change queryname 680 hts_log_info(__FUNCTION__, "Testing queryname"); 681 assert(read.queryName=="HS18_09653:4:1315:19857:61712"); 682 read.queryName="HS18_09653:4:1315:19857:6171"; 683 assert(read.queryName=="HS18_09653:4:1315:19857:6171"); 684 assert(read.cigar.toString() == "78M1D22M"); 685 read.queryName="HS18_09653:4:1315:19857:617122"; 686 assert(read.queryName=="HS18_09653:4:1315:19857:617122"); 687 assert(read.cigar.toString() == "78M1D22M"); 688 assert(read["RG"].check!string); 689 assert(read["RG"].to!string=="1"); 690 691 //change sequence 692 hts_log_info(__FUNCTION__, "Testing sequence"); 693 assert(read.sequence=="AGCTAGGGCACTTTTTGTCTGCCCAAATATAGGCAACCAAAAATAATTTCCAAGTTTTTAATGATTTGTTGCATATTGAAAAAACATTTTTTGGGTTTTT"); 694 read.sequence=read.sequence[0..10]; 695 assert(read.sequence=="AGCTAGGGCA"); 696 697 ubyte[] q=[255,255,255,255,255,255,255,255,255,255]; 698 assert(read.qscores == q); 699 q = [1, 14, 20, 40, 30, 10, 14, 20, 40, 30]; 700 read.qscores(q); 701 assert(read.qscores == q); 702 q[] += 33; 703 704 assert(read.qscoresPhredScaled == cast(char[])(q)); 705 706 assert(read.cigar.toString() == "78M1D22M"); 707 assert(read["RG"].check!string); 708 assert(read["RG"].to!string=="1"); 709 710 //change cigar 711 hts_log_info(__FUNCTION__, "Testing cigar"); 712 auto c=read.cigar; 713 c[$-1].length=21; 714 read.cigar=c; 715 assert(read.cigar.toString() == "78M1D21M"); 716 assert(read["RG"].check!string); 717 assert(read["RG"].to!string=="1"); 718 719 //change tag 720 hts_log_info(__FUNCTION__, "Testing tags"); 721 read["X0"]=2; 722 assert(read["X0"].to!ubyte==2); 723 assert(read["RG"].check!string); 724 725 read["RG"]="test"; 726 assert(read["RG"].to!string=="test"); 727 hts_log_info(__FUNCTION__, "Cigar:" ~ read.cigar.toString()); 728 } 729 730 /// 731 debug(dhtslib_unittest) unittest 732 { 733 import dhtslib.sam; 734 import htslib.hts_log : hts_log_info; 735 import std.path:buildPath,dirName; 736 737 hts_set_log_level(htsLogLevel.HTS_LOG_TRACE); 738 hts_log_info(__FUNCTION__, "Testing SAMRecord mutation w/ realloc"); 739 hts_log_info(__FUNCTION__, "Loading test file"); 740 auto bam = SAMFile(buildPath(dirName(dirName(dirName(dirName(__FILE__)))),"htslib","test","range.bam"), 0); 741 auto ar = bam.allRecords; 742 assert(ar.empty == false); 743 auto read = ar.front; 744 745 //change queryname 746 hts_log_info(__FUNCTION__, "Testing queryname"); 747 assert(read.queryName=="HS18_09653:4:1315:19857:61712"); 748 749 //we do this only to force realloc 750 read.b.m_data=read.b.l_data; 751 752 753 auto prev_m=read.b.m_data; 754 read.queryName="HS18_09653:4:1315:19857:61712HS18_09653:4:1315:19857:61712"; 755 assert(read.b.m_data >prev_m); 756 assert(read.queryName=="HS18_09653:4:1315:19857:61712HS18_09653:4:1315:19857:61712"); 757 assert(read.cigar.toString() == "78M1D22M"); 758 assert(read["RG"].check!string); 759 assert(read["RG"].to!string=="1"); 760 761 //we do this only to force realloc 762 read.b.m_data=read.b.l_data; 763 764 //change sequence 765 hts_log_info(__FUNCTION__, "Testing sequence"); 766 assert(read.sequence=="AGCTAGGGCACTTTTTGTCTGCCCAAATATAGGCAACCAAAAATAATTTCCAAGTTTTTAATGATTTGTTGCATATTGAAAAAACATTTTTTGGGTTTTT"); 767 prev_m=read.b.m_data; 768 read.sequence="AGCTAGGGCACTTTTTGTCTGCCCAAATATAGGCAACCAAAAATAATTTCCAAGTTTTTAATGATTTGTTGCATATTGAAAAAACATTTTTTGGGTTTTTAGCTAGGGCACTTTTTGTCTGCCCAAATATAGGCAACCAAAAATAATTTCCAAGTTTTTAATGATTTGTTGCATATTGAAAAAACATTTTTTGGGTTTTT"; 769 assert(read.b.m_data >prev_m); 770 assert(read.sequence=="AGCTAGGGCACTTTTTGTCTGCCCAAATATAGGCAACCAAAAATAATTTCCAAGTTTTTAATGATTTGTTGCATATTGAAAAAACATTTTTTGGGTTTTTAGCTAGGGCACTTTTTGTCTGCCCAAATATAGGCAACCAAAAATAATTTCCAAGTTTTTAATGATTTGTTGCATATTGAAAAAACATTTTTTGGGTTTTT"); 771 assert(read.cigar.toString() == "78M1D22M"); 772 assert(read["RG"].check!string); 773 assert(read["RG"].to!string=="1"); 774 775 //change cigar 776 hts_log_info(__FUNCTION__, "Testing cigar"); 777 auto c=read.cigar; 778 c=c~c; 779 prev_m=read.b.m_data; 780 read.cigar=c; 781 assert(read.b.m_data >prev_m); 782 assert(read.cigar.toString() == "78M1D22M78M1D22M"); 783 assert(read["RG"].check!string); 784 assert(read["RG"].to!string=="1"); 785 786 //change tag 787 hts_log_info(__FUNCTION__, "Testing tags"); 788 read["X0"]=2; 789 assert(read["X0"].to!ubyte==2); 790 assert(read["RG"].check!string); 791 792 read["RG"]="test"; 793 assert(read["RG"].to!string=="test"); 794 hts_log_info(__FUNCTION__, "Cigar:" ~ read.cigar.toString()); 795 } 796 797 /// 798 debug(dhtslib_unittest) unittest 799 { 800 import std.stdio; 801 import dhtslib.sam; 802 import dhtslib.sam.md : MDItr; 803 import std.algorithm: map; 804 import std.array: array; 805 import std.path:buildPath,dirName; 806 hts_set_log_level(htsLogLevel.HTS_LOG_TRACE); 807 808 auto bam = SAMFile(buildPath(dirName(dirName(dirName(dirName(__FILE__)))),"htslib","test","range.bam"), 0); 809 auto ar = bam.allRecords; 810 assert(ar.empty == false); 811 SAMRecord read = ar.front; 812 813 auto pairs = read.getAlignedPairs!true(77, 77 + 5); 814 815 assert(pairs.map!(x => x.qpos).array == [77,77,78,79,80]); 816 pairs = read.getAlignedPairs!true(77, 77 + 5); 817 assert(pairs.map!(x => x.rpos).array == [77,78,79,80,81]); 818 pairs = read.getAlignedPairs!true(77, 77 + 5); 819 // assert(pairs.map!(x => x.refBase).array == "GAAAA"); 820 pairs = read.getAlignedPairs!true(77, 77 + 5); 821 assert(pairs.map!(x => x.queryBase).array == "G-AAA"); 822 } 823 824 825 debug(dhtslib_unittest) unittest 826 { 827 import std.stdio; 828 import dhtslib.sam; 829 import dhtslib.sam.md : MDItr; 830 import std.algorithm: map; 831 import std.range : iota; 832 import std.array: array; 833 import std.path:buildPath,dirName; 834 hts_set_log_level(htsLogLevel.HTS_LOG_TRACE); 835 SAMHeader h; 836 SAMRecord read = SAMRecord(h); 837 read.queryName = "test"; 838 read.sequence = 839 //6H [4S][ 77M][4I]MD[ 22M]SSS 5H 840 "NNNNAGCTAGGGCACTTTTTGTCTGCCCAAATATAGGCAACCAAAAATAATTTCCAAGTTTTTAATGATTTGTTGCATATTCCCCGAAAAAACATTTTTTGGGTTTTTNNN"; 841 read.cigar = cigarFromString("6H4S77M4I1M1D22M3S5H"); 842 read["MD"] = "48G29^A13C8"; 843 844 long[] rposArr = new long[read.getAlignedCoordinates.array.length]; 845 long[] qposArr = new long[read.getAlignedCoordinates.array.length]; 846 Ops[] opsArr = new Ops[read.getAlignedCoordinates.array.length]; 847 848 rposArr[0..6] = -1; // 6H 849 qposArr[0..6] = -1; // 6H 850 opsArr[0..6] = Ops.HARD_CLIP; 851 852 rposArr[6..10] = -1; // 4S 853 qposArr[6..10] = iota!long(4).array; // 4S 854 opsArr[6..10] = Ops.SOFT_CLIP; 855 856 rposArr[10..87] = iota!long(77).array; // 77M 857 qposArr[10..87] = iota!long(4, 81).array; // 77M 858 opsArr[10..87] = Ops.MATCH; 859 860 rposArr[87..91] = 76; // I4 861 qposArr[87..91] = iota!long(81, 85).array; // I4 862 opsArr[87..91] = Ops.INS; 863 864 rposArr[91] = 77; // 1M 865 qposArr[91] = 85; // 1M 866 opsArr[91] = Ops.MATCH; 867 868 rposArr[92] = 78; // 1D 869 qposArr[92] = 85; // 1D 870 opsArr[92] = Ops.DEL; 871 872 rposArr[93..115] = iota!long(79,101).array; // 22M 873 qposArr[93..115] = iota!long(86,108).array; // 22M 874 opsArr[93..115] = Ops.MATCH; 875 876 rposArr[115..118] = 100; // 3S 877 qposArr[115..118] = iota!long(108, 111).array; // 3S 878 opsArr[115..118] = Ops.SOFT_CLIP; 879 880 rposArr[118..123] = 100; // 5H 881 qposArr[118..123] = 110; // 5H 882 opsArr[118..123] = Ops.HARD_CLIP; 883 884 auto pairs = read.getAlignedPairs!true.array; 885 assert(pairs.map!(x=> x.rpos.pos).array == rposArr); 886 assert(pairs.map!(x=> x.qpos.pos).array == qposArr); 887 assert(pairs.map!(x=> x.cigar_op).array == opsArr); 888 writeln(pairs.map!(x=> x.refBase).array); 889 writeln(pairs.map!(x=> x.queryBase).array); 890 assert(pairs.map!(x=> x.refBase).array == 891 // ================================================G============================ =A=============C======== 892 // =A=============C======== 893 "----------AGCTAGGGCACTTTTTGTCTGCCCAAATATAGGCAACCAAAAATAATTGCCAAGTTTTTAATGATTTGTTGCATATT----GAAAAAAACATTTTTCGGGTTTTT--------"); 894 assert(pairs.map!(x=> x.queryBase).array == 895 "------NNNNAGCTAGGGCACTTTTTGTCTGCCCAAATATAGGCAACCAAAAATAATTTCCAAGTTTTTAATGATTTGTTGCATATTCCCCG-AAAAAACATTTTTTGGGTTTTTNNN-----"); 896 // AGCTAGGGCACTTTTTGTCTGCCCAAATATAGGCAACCAAAAATAATTTCCAAGTTTTTAATGATTTGTTGCATATTCCCCG AAAAAACATTTTTTGGGTTTTT 897 auto pairs2 = read.getAlignedPairs!true(77, 77 + 5).array; 898 assert(pairs2.map!(x => x.rpos).array == rposArr[91..96]); 899 assert(pairs2.map!(x => x.qpos).array == qposArr[91..96]); 900 901 import dhtslib.sam.md; 902 import std.range : dropExactly; 903 auto mdItr = MDItr(read); 904 905 mdItr = MDItr(read); 906 mdItr = mdItr.dropExactly(77); 907 908 assert(pairs2.map!(x => x.refBase).array == "GAAAA"); 909 assert(pairs2.map!(x => x.queryBase).array == "G-AAA"); 910 } 911 912 /// 913 debug(dhtslib_unittest) unittest 914 { 915 import std.stdio : writeln, File; 916 import std.range : drop; 917 import std.utf : toUTFz; 918 import htslib.hts_log; // @suppress(dscanner.suspicious.local_imports) 919 import std.path:buildPath,dirName; 920 import std.conv:to; 921 import dhtslib.sam : parseSam; 922 923 hts_set_log_level(htsLogLevel.HTS_LOG_TRACE); 924 hts_log_info(__FUNCTION__, "Loading sam file"); 925 auto range = File(buildPath(dirName(dirName(dirName(dirName(__FILE__)))),"htslib","test","realn01_exp-a.sam")).byLineCopy(); 926 auto b = bam_init1(); 927 auto hdr = bam_hdr_init(); 928 string hdr_str; 929 assert(range.empty == false); 930 for (auto i = 0; i < 4; i++) 931 { 932 hdr_str ~= range.front ~ "\n"; 933 range.popFront; 934 } 935 hts_log_info(__FUNCTION__, "Header"); 936 writeln(hdr_str); 937 hdr = sam_hdr_parse(cast(int) hdr_str.length, toUTFz!(char*)(hdr_str)); 938 hts_log_info(__FUNCTION__, "Read status:" ~ parseSam(range.front, hdr, b).to!string); 939 auto r = new SAMRecord(b); 940 hts_log_info(__FUNCTION__, "Cigar" ~ r.cigar.toString); 941 assert(r.cigar.toString == "6M1D117M5D28M"); 942 } 943 944 /// 945 debug(dhtslib_unittest) unittest 946 { 947 import std.stdio; 948 import dhtslib.sam; 949 import dhtslib.sam.md : MDItr; 950 import std.algorithm: map; 951 import std.array: array; 952 import std.parallelism : parallel; 953 import std.path:buildPath,dirName; 954 hts_set_log_level(htsLogLevel.HTS_LOG_TRACE); 955 956 auto bam = SAMFile(buildPath(dirName(dirName(dirName(dirName(__FILE__)))),"htslib","test","range.bam"), 0); 957 foreach(rec;parallel(bam.allRecords)){ 958 rec.queryName = "multithreading test"; 959 } 960 961 }