1 module dhtslib.sam.record; 2 3 import core.stdc.stdlib : calloc, free, realloc; 4 import core.stdc.string : memset; 5 import core.stdc.errno : EINVAL, EOVERFLOW, ENOMEM, ERANGE, errno; 6 import std.string : fromStringz, toStringz; 7 import std.algorithm : filter; 8 import std.range.interfaces : InputRange, inputRangeObject; 9 import std.traits : isArray, isIntegral, isSomeString; 10 11 import htslib.hts : seq_nt16_str, seq_nt16_table; 12 13 import htslib.hts_log; 14 import htslib.sam; 15 import dhtslib.sam.cigar; 16 import dhtslib.sam.tagvalue; 17 import dhtslib.sam.header; 18 import dhtslib.coordinates; 19 import dhtslib.memory; 20 21 /** 22 Encapsulates a SAM/BAM/CRAM record, 23 using the bam1_t type for memory efficiency, 24 and the htslib helper functions for speed. 25 **/ 26 struct SAMRecord 27 { 28 /// Backing SAM/BAM row record 29 Bam1 b; 30 31 /// Corresponding SAM/BAM header data 32 SAMHeader h; 33 34 /// ditto 35 this(SAMHeader h) 36 { 37 this.b = Bam1(bam_init1); 38 this.h = h; 39 } 40 41 /// Construct SAMRecord from supplied bam1_t 42 deprecated("Construct with sam_hdr_t* for mapped contig (tid) name support") 43 this(bam1_t* b) 44 { 45 this.b = Bam1(b); 46 } 47 48 /// Construct SAMRecord from supplied bam1_t and sam_hdr_type 49 this(bam1_t* b, SAMHeader h) 50 { 51 this.b = Bam1(b); 52 this.h = h; 53 } 54 55 /* bam1_core_t fields */ 56 57 /// chromosome ID, defined by sam_hdr_t 58 pragma(inline, true) 59 @nogc @safe nothrow 60 @property int tid() { return this.b.core.tid; } 61 /// ditto 62 pragma(inline, true) 63 @nogc @safe nothrow 64 @property void tid(int tid) { this.b.core.tid = tid; } 65 66 /// mapped contig (tid) name, defined by sam_hdr_t* h 67 pragma(inline, true) 68 @property const(char)[] referenceName() 69 { 70 assert(!this.h.isNull); 71 return this.h.targetName(this.b.core.tid); 72 } 73 74 /// 0-based leftmost coordinate 75 pragma(inline, true) 76 @nogc @safe nothrow 77 @property ZB pos() { return ZB(this.b.core.pos); } 78 /// ditto 79 pragma(inline, true) 80 @nogc @safe nothrow 81 @property void pos(ZB pos) { this.b.core.pos = pos.pos; } 82 83 /// 0-based, half-open coordinates that represent 84 /// the mapped length of the alignment 85 pragma(inline, true) 86 @property ZBHO coordinates() 87 { 88 return ZBHO(this.pos, this.pos + this.cigar.alignedLength); 89 } 90 91 // TODO: @field bin bin calculated by bam_reg2bin() 92 93 /// mapping quality 94 pragma(inline, true) 95 @nogc @safe nothrow 96 @property ubyte qual() { return this.b.core.qual; } 97 /// ditto 98 pragma(inline, true) 99 @nogc @safe nothrow 100 @property void qual(ubyte q) { this.b.core.qual = q; } 101 102 /// length of query name. Terminated by 1-4 \0, see l_extranul. Never set directly. 103 pragma(inline, true) 104 @nogc @safe nothrow 105 @property int l_qname() { return this.b.core.l_qname; } 106 107 /// number of EXTRA trailing \0 in qname; 0 <= l_extranul <= 3, see l_qname. Never set directly. 108 pragma(inline, true) 109 @nogc @safe nothrow 110 @property int l_extranul() { return this.b.core.l_extranul; } 111 112 /// bitwise flag 113 pragma(inline, true) 114 @nogc @safe nothrow 115 @property ushort flag() { return this.b.core.flag; } 116 /// ditto 117 pragma(inline, true) 118 @nogc @safe nothrow 119 @property void flag(ushort fl) { this.b.core.flag = fl; } 120 121 /// is read paired? 122 pragma(inline, true) 123 @nogc @safe nothrow 124 @property bool isPaired() 125 { 126 return cast(bool)(b.core.flag & BAM_FPAIRED); 127 } 128 129 /// is read paired? 130 pragma(inline, true) 131 @nogc @safe nothrow 132 @property bool isProperPair() 133 { 134 return cast(bool)(b.core.flag & BAM_FPROPER_PAIR); 135 } 136 137 /// is read mapped? 138 pragma(inline, true) 139 @nogc @safe nothrow 140 @property bool isMapped() 141 { 142 return (b.core.flag & BAM_FUNMAP) == 0; 143 } 144 145 /// is read mate mapped? 146 pragma(inline, true) 147 @nogc @safe nothrow 148 @property bool isMateMapped() 149 { 150 return (b.core.flag & BAM_FMUNMAP) == 0; 151 } 152 153 /// is read reversed? 154 /// bool bam_is_rev(bam1_t *b) { return ( ((*b).core.flag & BAM_FREVERSE) != 0 ); } 155 pragma(inline, true) 156 @nogc @trusted nothrow 157 @property bool isReversed() 158 { 159 return bam_is_rev(this.b); 160 } 161 162 /// is mate reversed? 163 /// bool bam_is_mrev(bam1_t *b) { return( ((*b).core.flag & BAM_FMREVERSE) != 0); } 164 pragma(inline, true) 165 @nogc @trusted nothrow 166 @property bool mateReversed() 167 { 168 return bam_is_mrev(this.b); 169 } 170 171 /// is this read 1? 172 pragma(inline, true) 173 @nogc @safe nothrow 174 @property bool isRead1() 175 { 176 return cast(bool)(b.core.flag & BAM_FREAD1); 177 } 178 179 /// is this read 2? 180 pragma(inline, true) 181 @nogc @safe nothrow 182 @property bool isRead2() 183 { 184 return cast(bool)(b.core.flag & BAM_FREAD2); 185 } 186 187 /// is read secondary? 188 @property bool isSecondary() 189 { 190 version(LDC){ 191 pragma(inline, true); 192 } 193 return cast(bool)(b.core.flag & BAM_FSECONDARY); 194 } 195 196 /// Does this read fail qc? 197 pragma(inline, true) 198 @nogc @safe nothrow 199 @property bool isQCFail() 200 { 201 return cast(bool)(b.core.flag & BAM_FQCFAIL); 202 } 203 204 /// Is this read marked as a PCR or Optical duplicate? 205 pragma(inline, true) 206 @nogc @safe nothrow 207 @property bool isDuplicate() 208 { 209 return cast(bool)(b.core.flag & BAM_FDUP); 210 } 211 212 /// is read supplementary? 213 pragma(inline, true) 214 @nogc @safe nothrow 215 @property bool isSupplementary() 216 { 217 return cast(bool)(b.core.flag & BAM_FSUPPLEMENTARY); 218 } 219 220 /// Return read strand + or - (as char) 221 @property char strand(){ 222 return ['+','-'][isReversed()]; 223 } 224 225 /// Get a slice of query name (read name) 226 /// if you keep this around you should copy it 227 /// -- wraps bam_get_qname(bam1_t *b) 228 pragma(inline, true) 229 @nogc @trusted nothrow 230 @property const(char)[] queryName() 231 { 232 // auto bam_get_qname(bam1_t *b) { return (cast(char*)(*b).data); } 233 return fromStringz(bam_get_qname(this.b)); 234 } 235 236 /// Set query name (read name) 237 pragma(inline, true) 238 @trusted nothrow 239 @property void queryName(string qname) 240 { 241 auto ret = bam_set_qname(this.b, toStringz(qname)); 242 if(ret != 0) 243 hts_log_error(__FUNCTION__, "Could not set queryname"); 244 } 245 246 /// query (and quality string) length 247 pragma(inline, true) 248 @nogc @safe nothrow 249 @property int length() 250 { 251 return this.b.core.l_qseq; 252 } 253 254 /// Return char array of the sequence 255 /// see samtools/sam_view.c: get_read 256 pragma(inline, true) 257 @trusted nothrow 258 @property const(char)[] sequence() 259 { 260 // calloc fills with \0; +1 len for Cstring 261 // char* s = cast(char*) calloc(1, this.b.core.l_qseq + 1); 262 char[] s; 263 s.length = this.b.core.l_qseq; 264 265 // auto bam_get_seq(bam1_t *b) { return ((*b).data + ((*b).core.n_cigar<<2) + (*b).core.l_qname); } 266 auto seqdata = bam_get_seq(this.b); 267 268 for (int i; i < this.b.core.l_qseq; i++) 269 { 270 s[i] = seq_nt16_str[bam_seqi(seqdata, i)]; 271 } 272 return s; 273 } 274 275 /// Assigns sequence and resets quality score 276 pragma(inline, true) 277 @trusted nothrow 278 @property void sequence(const(char)[] seq) 279 { 280 /// There is no bam_seq_seq 281 282 //nibble encode sequence 283 ubyte[] en_seq; 284 en_seq.length = (seq.length + 1) >> 1; 285 286 for(auto i=0;i < seq.length;i++){ 287 bam_set_seqi(en_seq.ptr, i, seq_nt16_table[seq[i]]); 288 } 289 290 //get length of data before seq 291 uint l_prev = b.core.l_qname + cast(uint)(b.core.n_cigar * uint.sizeof); 292 293 //get starting point after seq 294 uint start_rest = l_prev + ((b.core.l_qseq + 1) >> 1) + b.core.l_qseq; 295 296 //copy previous data 297 ubyte[] prev=b.data[0 .. l_prev].dup; 298 299 //copy rest of data 300 ubyte[] rest=b.data[start_rest .. b.l_data].dup; 301 302 //if not enough space 303 if(en_seq.length + seq.length + l_prev + (b.l_data - start_rest) > b.m_data){ 304 305 // realloc 306 auto ptr = cast(ubyte*) realloc(b.data, en_seq.length + seq.length + l_prev + (b.l_data - start_rest)); 307 assert(ptr != null); 308 b.data = ptr; 309 310 // increase capacity 311 b.m_data = cast(uint)en_seq.length + cast(uint) seq.length + l_prev + (b.l_data - start_rest); 312 } 313 314 //reassign q_name and rest of data 315 b.data[0 .. l_prev] = prev; 316 b.data[l_prev .. en_seq.length + l_prev] = en_seq; 317 //set qscores to 255 318 memset(b.data + en_seq.length + l_prev, 255, seq.length); 319 b.data[l_prev + en_seq.length + seq.length .. l_prev + en_seq.length + seq.length + (b.l_data - start_rest)] = rest; 320 321 //reset data length, seq length 322 b.l_data = cast(uint) en_seq.length + cast(uint) seq.length + l_prev + (b.l_data-start_rest); 323 b.core.l_qseq = cast(int) (seq.length); 324 } 325 326 /// Return array of the quality scores 327 /// see samtools/sam_view.c: get_quality 328 /// TODO: Discussion -- should we return const(ubyte[]) or const(ubyte)[] instead? 329 pragma(inline, true) 330 @nogc @trusted nothrow 331 @property const(ubyte)[] qscores() 332 { 333 auto slice = bam_get_qual(this.b)[0 .. this.b.core.l_qseq]; 334 return slice; 335 } 336 337 /// Set quality scores from raw ubyte quality score array given that it is the same length as the bam sequence 338 pragma(inline, true) 339 @trusted nothrow 340 @property void qscores(const(ubyte)[] seq) 341 { 342 /// There is no bam_seq_qual 343 344 if(seq.length != b.core.l_qseq){ 345 hts_log_error(__FUNCTION__,"qscore length does not match sequence length"); 346 return; 347 } 348 349 //get length of data before seq 350 uint l_prev = b.core.l_qname + cast(uint) (b.core.n_cigar * uint.sizeof) + ((b.core.l_qseq + 1) >> 1); 351 b.data[l_prev .. seq.length + l_prev] = seq; 352 } 353 354 /// get phred-scaled base qualities as char array 355 pragma(inline, true) 356 @trusted nothrow 357 const(char)[] qscoresPhredScaled() 358 { 359 auto bqs = this.qscores.dup; 360 bqs[] += 33; 361 return cast(const(char)[]) bqs; 362 } 363 364 /// Create cigar from bam1_t record 365 pragma(inline, true) 366 @property Cigar cigar() 367 { 368 return Cigar(this.b); 369 } 370 371 /// Assign a cigar string 372 pragma(inline, true) 373 @trusted 374 @property void cigar(Cigar cigar) 375 { 376 // no bam_set_cigar 377 378 //get length of data before seq 379 uint l_prev = b.core.l_qname; 380 381 //get starting point after seq 382 uint start_rest = l_prev + cast(uint)(b.core.n_cigar * uint.sizeof); 383 384 //copy previous data 385 ubyte[] prev = b.data[0..l_prev].dup; 386 387 //copy rest of data 388 ubyte[] rest = b.data[start_rest..b.l_data].dup; 389 390 //if not enough space 391 if(uint.sizeof * cigar.length + l_prev + (b.l_data - start_rest) > b.m_data){ 392 // realloc 393 auto ptr = cast(ubyte*) realloc(b.data, uint.sizeof * cigar.length + l_prev + (b.l_data - start_rest)); 394 assert(ptr != null); 395 b.data = ptr; 396 397 // increase capacity 398 b.m_data=cast(uint)(uint.sizeof * cigar.length) + l_prev + (b.l_data - start_rest); 399 } 400 401 //reassign q_name and rest of data 402 b.data[0 .. l_prev]=prev; 403 //without cigar.ops.dup we get the overlapping array copy error 404 b.data[l_prev .. uint.sizeof * cigar.length + l_prev] = cast(ubyte[])( cast(uint[]) cigar.dup[]); 405 b.data[l_prev + uint.sizeof * cigar.length .. l_prev + uint.sizeof * cigar.length + (b.l_data - start_rest)] = rest; 406 407 //reset data length, seq length 408 b.l_data = cast(uint) (uint.sizeof * cigar.length) + l_prev + (b.l_data - start_rest); 409 b.core.n_cigar = cast(uint) (cigar.length); 410 } 411 412 /// Get aux tag from bam1_t record and return a TagValue 413 TagValue opIndex(string val) 414 { 415 return TagValue(this.b, val[0..2]); 416 } 417 418 /// Assign a tag key value pair 419 void opIndexAssign(T)(T value, string index) 420 if(!isArray!T || isSomeString!T) 421 { 422 static if(isIntegral!T){ 423 auto err = bam_aux_update_int(b, index[0..2], value); 424 }else static if(is(T==float)){ 425 auto err = bam_aux_update_float(b, index[0..2], value); 426 }else static if(isSomeString!T){ 427 auto err = bam_aux_update_str(b, index[0..2], cast(int) value.length, value.ptr); 428 } 429 if(err == 0) return; 430 switch(errno){ 431 case EINVAL: 432 throw new Exception("The bam record's aux data is corrupt or an existing tag" ~ 433 " with the given ID is not of an integer type (c, C, s, S, i or I)."); 434 case ENOMEM: 435 throw new Exception("The bam data needs to be expanded and either the attempt" ~ 436 " to reallocate the data buffer failed or the resulting buffer would be longer" ~ 437 " than the maximum size allowed in a bam record (2Gbytes)."); 438 case ERANGE: 439 case EOVERFLOW: 440 throw new Exception("val is outside the range that can be stored in an integer" ~ 441 " bam tag (-2147483647 to 4294967295)."); 442 case -1: 443 throw new Exception("Something went wrong adding the bam tag."); 444 case 0: 445 return; 446 default: 447 throw new Exception("Something went wrong adding the bam tag."); 448 } 449 } 450 451 /// Assign a tag key array pair 452 void opIndexAssign(T)(T[] value, string index) 453 if(!isSomeString!(T[])) 454 { 455 auto err = bam_aux_update_array(b, index[0..2], TypeChars[TypeIndex!T], cast(int) value.length, value.ptr); 456 if(err == 0) return; 457 switch(errno){ 458 case EINVAL: 459 throw new Exception("The bam record's aux data is corrupt or an existing tag" ~ 460 " with the given ID is not of an integer type (c, C, s, S, i or I)."); 461 case ENOMEM: 462 throw new Exception("The bam data needs to be expanded and either the attempt" ~ 463 " to reallocate the data buffer failed or the resulting buffer would be longer" ~ 464 " than the maximum size allowed in a bam record (2Gbytes)."); 465 case ERANGE: 466 case EOVERFLOW: 467 throw new Exception("val is outside the range that can be stored in an integer" ~ 468 " bam tag (-2147483647 to 4294967295)."); 469 case -1: 470 throw new Exception("Something went wrong adding the bam tag."); 471 case 0: 472 return; 473 default: 474 throw new Exception("Something went wrong adding the bam tag."); 475 } 476 } 477 478 /// chromosome ID of next read in template, defined by bam_hdr_t 479 pragma(inline, true) 480 @nogc @safe nothrow 481 @property int mateTID() { return this.b.core.mtid; } 482 /// ditto 483 pragma(inline, true) 484 @nogc @safe nothrow 485 @property void mateTID(int mtid) { this.b.core.mtid = mtid; } 486 487 /// 0-based leftmost coordinate of next read in template 488 pragma(inline, true) 489 @nogc @safe nothrow 490 @property long matePos() { return this.b.core.mpos; } 491 /// ditto 492 pragma(inline, true) 493 @nogc @safe nothrow 494 @property void matePos(long mpos) { this.b.core.mpos = mpos; } 495 496 /// Presumably Insert size, but is undocumented. 497 /// Per samtools source, is measured 5' to 5' 498 /// https://github.com/samtools/samtools/blob/bd1a409aa750d25d70a093405d174db8709a3f2c/bam_mate.c#L320 499 pragma(inline, true) 500 @nogc @safe nothrow 501 @property long insertSize() { return this.b.core.isize; } 502 /// ditto 503 pragma(inline, true) 504 @nogc @safe nothrow 505 @property void insertSize(long isize) { this.b.core.isize = isize; } 506 /+ BEGIN CHERRY-PICK: TODO: confirm below chunk with htslib-1.10 (mainly long coordinates +/ 507 /// get aligned coordinates per each cigar op 508 InputRange!AlignedCoordinate getAlignedCoordinates(){ 509 return AlignedCoordinatesItr(this.cigar).inputRangeObject; 510 } 511 512 /// get aligned coordinates per each cigar op within range 513 /// range is 0-based half open using chromosomal coordinates 514 InputRange!AlignedCoordinate getAlignedCoordinates(long start, long end){ 515 assert(start >= 0); 516 assert(end <= this.cigar.ref_bases_covered); 517 return this.getAlignedCoordinates.filter!(x=> x.rpos >= start).filter!(x=> x.rpos < end).inputRangeObject; 518 } 519 520 struct AlignedPair(bool refSeq) 521 { 522 Coordinate!(Basis.zero) qpos= ZB(-1); 523 524 Coordinate!(Basis.zero) rpos= ZB(-1); 525 526 Ops cigar_op = Ops.MATCH; 527 528 char queryBase = '-'; 529 530 static if(refSeq) 531 char refBase = '-'; 532 533 string toString(){ 534 import std.conv : to; 535 static if(refSeq) return rpos.pos.to!string ~ ":" ~ refBase ~ " > " ~ qpos.pos.to!string ~ ":" ~ queryBase; 536 else return rpos.pos.to!string ~ ":" ~ qpos.pos.to!string ~ ":" ~ queryBase; 537 } 538 } 539 struct AlignedPairs(bool refSeq) 540 { 541 import dhtslib.sam.md : MDItr; 542 import std.range : dropExactly; 543 544 InputRange!AlignedCoordinate coords; 545 static if(refSeq) MDItr mdItr; 546 AlignedPair!refSeq current; 547 Bam1 b; 548 ubyte * seq; 549 550 this(SAMRecord rec){ 551 this.b = rec.b; 552 this.seq = bam_get_seq(this.b); 553 this.coords = rec.getAlignedCoordinates; 554 static if(refSeq){ 555 assert(rec["MD"].exists,"MD tag must exist for record"); 556 mdItr = MDItr(rec); 557 } 558 current.qpos = coords.front.qpos; 559 current.rpos = coords.front.rpos; 560 current.cigar_op = coords.front.cigar_op; 561 if(current.cigar_op.isQueryConsuming) 562 current.queryBase = seq_nt16_str[bam_seqi(seq, current.qpos)]; 563 564 static if(refSeq){ 565 if(current.cigar_op.isReferenceConsuming){ 566 if(mdItr.front == '=') 567 current.refBase = current.queryBase; 568 else 569 current.refBase = mdItr.front; 570 mdItr.popFront; 571 } 572 573 } 574 } 575 576 this(SAMRecord rec, long start, long end){ 577 assert(start < end); 578 assert(start >= 0); 579 assert(end <= rec.cigar.ref_bases_covered); 580 this.b = rec.b; 581 this.seq = bam_get_seq(this.b); 582 this.coords = rec.getAlignedCoordinates(start, end); 583 static if(refSeq){ 584 assert(rec["MD"].exists,"MD tag must exist for record"); 585 mdItr = MDItr(rec); 586 mdItr = mdItr.dropExactly(start); 587 } 588 current.qpos = coords.front.qpos; 589 current.rpos = coords.front.rpos; 590 current.cigar_op = coords.front.cigar_op; 591 if(current.cigar_op.isQueryConsuming) 592 current.queryBase = seq_nt16_str[bam_seqi(seq, current.qpos)]; 593 594 static if(refSeq){ 595 if(current.cigar_op.isReferenceConsuming){ 596 if(mdItr.front == '=') 597 current.refBase = current.queryBase; 598 else 599 current.refBase = mdItr.front; 600 mdItr.popFront; 601 } 602 603 } 604 } 605 606 AlignedPair!refSeq front(){ 607 return current; 608 } 609 610 void popFront(){ 611 coords.popFront; 612 if(coords.empty) return; 613 614 current = current.init; 615 current.qpos = coords.front.qpos; 616 current.rpos = coords.front.rpos; 617 current.cigar_op = coords.front.cigar_op; 618 619 if(current.cigar_op.isQueryConsuming) 620 current.queryBase = seq_nt16_str[bam_seqi(seq, current.qpos)]; 621 622 static if(refSeq){ 623 if(current.cigar_op.isReferenceConsuming){ 624 if(mdItr.front == '=') 625 current.refBase = current.queryBase; 626 else 627 current.refBase = mdItr.front; 628 mdItr.popFront; 629 } 630 } 631 632 } 633 634 bool empty(){ 635 return coords.empty; 636 } 637 638 } 639 /// get a range of aligned read and reference positions 640 /// this is meant to recreate functionality from pysam: 641 /// https://pysam.readthedocs.io/en/latest/api.html#pysam.AlignedSegment.get_aligned_pairs 642 /// range is 0-based half open using chromosomal coordinates 643 auto getAlignedPairs(bool withRefSeq)(long start, long end) 644 { 645 return AlignedPairs!withRefSeq(this,start,end).inputRangeObject; 646 } 647 /// get a range of aligned read and reference positions 648 /// this is meant to recreate functionality from pysam: 649 /// https://pysam.readthedocs.io/en/latest/api.html#pysam.AlignedSegment.get_aligned_pairs 650 auto getAlignedPairs(bool withRefSeq)(){ 651 return AlignedPairs!withRefSeq(this); 652 } 653 /+ END CHERRY-PICK +/ 654 } 655 656 /// 657 debug(dhtslib_unittest) unittest 658 { 659 import dhtslib.sam; 660 import htslib.hts_log : hts_log_info; 661 import std.path:buildPath,dirName; 662 663 hts_set_log_level(htsLogLevel.HTS_LOG_TRACE); 664 hts_log_info(__FUNCTION__, "Testing SAMRecord mutation"); 665 hts_log_info(__FUNCTION__, "Loading test file"); 666 auto bam = SAMFile(buildPath(dirName(dirName(dirName(dirName(__FILE__)))),"htslib","test","range.bam"), 0); 667 auto ar = bam.allRecords; 668 assert(ar.empty == false); 669 auto read = ar.front; 670 671 //change queryname 672 hts_log_info(__FUNCTION__, "Testing queryname"); 673 assert(read.queryName=="HS18_09653:4:1315:19857:61712"); 674 read.queryName="HS18_09653:4:1315:19857:6171"; 675 assert(read.queryName=="HS18_09653:4:1315:19857:6171"); 676 assert(read.cigar.toString() == "78M1D22M"); 677 read.queryName="HS18_09653:4:1315:19857:617122"; 678 assert(read.queryName=="HS18_09653:4:1315:19857:617122"); 679 assert(read.cigar.toString() == "78M1D22M"); 680 assert(read["RG"].check!string); 681 assert(read["RG"].to!string=="1"); 682 683 //change sequence 684 hts_log_info(__FUNCTION__, "Testing sequence"); 685 assert(read.sequence=="AGCTAGGGCACTTTTTGTCTGCCCAAATATAGGCAACCAAAAATAATTTCCAAGTTTTTAATGATTTGTTGCATATTGAAAAAACATTTTTTGGGTTTTT"); 686 read.sequence=read.sequence[0..10]; 687 assert(read.sequence=="AGCTAGGGCA"); 688 689 ubyte[] q=[255,255,255,255,255,255,255,255,255,255]; 690 assert(read.qscores == q); 691 q = [1, 14, 20, 40, 30, 10, 14, 20, 40, 30]; 692 read.qscores(q); 693 assert(read.qscores == q); 694 q[] += 33; 695 696 assert(read.qscoresPhredScaled == cast(char[])(q)); 697 698 assert(read.cigar.toString() == "78M1D22M"); 699 assert(read["RG"].check!string); 700 assert(read["RG"].to!string=="1"); 701 702 //change cigar 703 hts_log_info(__FUNCTION__, "Testing cigar"); 704 auto c=read.cigar; 705 c[$-1].length=21; 706 read.cigar=c; 707 assert(read.cigar.toString() == "78M1D21M"); 708 assert(read["RG"].check!string); 709 assert(read["RG"].to!string=="1"); 710 711 //change tag 712 hts_log_info(__FUNCTION__, "Testing tags"); 713 read["X0"]=2; 714 assert(read["X0"].to!ubyte==2); 715 assert(read["RG"].check!string); 716 717 read["RG"]="test"; 718 assert(read["RG"].to!string=="test"); 719 hts_log_info(__FUNCTION__, "Cigar:" ~ read.cigar.toString()); 720 } 721 722 /// 723 debug(dhtslib_unittest) unittest 724 { 725 import dhtslib.sam; 726 import htslib.hts_log : hts_log_info; 727 import std.path:buildPath,dirName; 728 729 hts_set_log_level(htsLogLevel.HTS_LOG_TRACE); 730 hts_log_info(__FUNCTION__, "Testing SAMRecord mutation w/ realloc"); 731 hts_log_info(__FUNCTION__, "Loading test file"); 732 auto bam = SAMFile(buildPath(dirName(dirName(dirName(dirName(__FILE__)))),"htslib","test","range.bam"), 0); 733 auto ar = bam.allRecords; 734 assert(ar.empty == false); 735 auto read = ar.front; 736 737 //change queryname 738 hts_log_info(__FUNCTION__, "Testing queryname"); 739 assert(read.queryName=="HS18_09653:4:1315:19857:61712"); 740 741 //we do this only to force realloc 742 read.b.m_data=read.b.l_data; 743 744 745 auto prev_m=read.b.m_data; 746 read.queryName="HS18_09653:4:1315:19857:61712HS18_09653:4:1315:19857:61712"; 747 assert(read.b.m_data >prev_m); 748 assert(read.queryName=="HS18_09653:4:1315:19857:61712HS18_09653:4:1315:19857:61712"); 749 assert(read.cigar.toString() == "78M1D22M"); 750 assert(read["RG"].check!string); 751 assert(read["RG"].to!string=="1"); 752 753 //we do this only to force realloc 754 read.b.m_data=read.b.l_data; 755 756 //change sequence 757 hts_log_info(__FUNCTION__, "Testing sequence"); 758 assert(read.sequence=="AGCTAGGGCACTTTTTGTCTGCCCAAATATAGGCAACCAAAAATAATTTCCAAGTTTTTAATGATTTGTTGCATATTGAAAAAACATTTTTTGGGTTTTT"); 759 prev_m=read.b.m_data; 760 read.sequence="AGCTAGGGCACTTTTTGTCTGCCCAAATATAGGCAACCAAAAATAATTTCCAAGTTTTTAATGATTTGTTGCATATTGAAAAAACATTTTTTGGGTTTTTAGCTAGGGCACTTTTTGTCTGCCCAAATATAGGCAACCAAAAATAATTTCCAAGTTTTTAATGATTTGTTGCATATTGAAAAAACATTTTTTGGGTTTTT"; 761 assert(read.b.m_data >prev_m); 762 assert(read.sequence=="AGCTAGGGCACTTTTTGTCTGCCCAAATATAGGCAACCAAAAATAATTTCCAAGTTTTTAATGATTTGTTGCATATTGAAAAAACATTTTTTGGGTTTTTAGCTAGGGCACTTTTTGTCTGCCCAAATATAGGCAACCAAAAATAATTTCCAAGTTTTTAATGATTTGTTGCATATTGAAAAAACATTTTTTGGGTTTTT"); 763 assert(read.cigar.toString() == "78M1D22M"); 764 assert(read["RG"].check!string); 765 assert(read["RG"].to!string=="1"); 766 767 //change cigar 768 hts_log_info(__FUNCTION__, "Testing cigar"); 769 auto c=read.cigar; 770 c=c~c; 771 prev_m=read.b.m_data; 772 read.cigar=c; 773 assert(read.b.m_data >prev_m); 774 assert(read.cigar.toString() == "78M1D22M78M1D22M"); 775 assert(read["RG"].check!string); 776 assert(read["RG"].to!string=="1"); 777 778 //change tag 779 hts_log_info(__FUNCTION__, "Testing tags"); 780 read["X0"]=2; 781 assert(read["X0"].to!ubyte==2); 782 assert(read["RG"].check!string); 783 784 read["RG"]="test"; 785 assert(read["RG"].to!string=="test"); 786 hts_log_info(__FUNCTION__, "Cigar:" ~ read.cigar.toString()); 787 } 788 789 /// 790 debug(dhtslib_unittest) unittest 791 { 792 import std.stdio; 793 import dhtslib.sam; 794 import dhtslib.sam.md : MDItr; 795 import std.algorithm: map; 796 import std.array: array; 797 import std.path:buildPath,dirName; 798 hts_set_log_level(htsLogLevel.HTS_LOG_TRACE); 799 800 auto bam = SAMFile(buildPath(dirName(dirName(dirName(dirName(__FILE__)))),"htslib","test","range.bam"), 0); 801 auto ar = bam.allRecords; 802 assert(ar.empty == false); 803 SAMRecord read = ar.front; 804 805 auto pairs = read.getAlignedPairs!true(77, 77 + 5); 806 807 assert(pairs.map!(x => x.qpos).array == [77,77,78,79,80]); 808 pairs = read.getAlignedPairs!true(77, 77 + 5); 809 assert(pairs.map!(x => x.rpos).array == [77,78,79,80,81]); 810 pairs = read.getAlignedPairs!true(77, 77 + 5); 811 // assert(pairs.map!(x => x.refBase).array == "GAAAA"); 812 pairs = read.getAlignedPairs!true(77, 77 + 5); 813 assert(pairs.map!(x => x.queryBase).array == "G-AAA"); 814 } 815 816 817 debug(dhtslib_unittest) unittest 818 { 819 import std.stdio; 820 import dhtslib.sam; 821 import dhtslib.sam.md : MDItr; 822 import std.algorithm: map; 823 import std.range : iota; 824 import std.array: array; 825 import std.path:buildPath,dirName; 826 hts_set_log_level(htsLogLevel.HTS_LOG_TRACE); 827 SAMHeader h; 828 SAMRecord read = SAMRecord(h); 829 read.queryName = "test"; 830 read.sequence = 831 //6H [4S][ 77M][4I]MD[ 22M]SSS 5H 832 "NNNNAGCTAGGGCACTTTTTGTCTGCCCAAATATAGGCAACCAAAAATAATTTCCAAGTTTTTAATGATTTGTTGCATATTCCCCGAAAAAACATTTTTTGGGTTTTTNNN"; 833 read.cigar = cigarFromString("6H4S77M4I1M1D22M3S5H"); 834 read["MD"] = "48G29^A13C8"; 835 836 long[] rposArr = new long[read.getAlignedCoordinates.array.length]; 837 long[] qposArr = new long[read.getAlignedCoordinates.array.length]; 838 Ops[] opsArr = new Ops[read.getAlignedCoordinates.array.length]; 839 840 rposArr[0..6] = -1; // 6H 841 qposArr[0..6] = -1; // 6H 842 opsArr[0..6] = Ops.HARD_CLIP; 843 844 rposArr[6..10] = -1; // 4S 845 qposArr[6..10] = iota!long(4).array; // 4S 846 opsArr[6..10] = Ops.SOFT_CLIP; 847 848 rposArr[10..87] = iota!long(77).array; // 77M 849 qposArr[10..87] = iota!long(4, 81).array; // 77M 850 opsArr[10..87] = Ops.MATCH; 851 852 rposArr[87..91] = 76; // I4 853 qposArr[87..91] = iota!long(81, 85).array; // I4 854 opsArr[87..91] = Ops.INS; 855 856 rposArr[91] = 77; // 1M 857 qposArr[91] = 85; // 1M 858 opsArr[91] = Ops.MATCH; 859 860 rposArr[92] = 78; // 1D 861 qposArr[92] = 85; // 1D 862 opsArr[92] = Ops.DEL; 863 864 rposArr[93..115] = iota!long(79,101).array; // 22M 865 qposArr[93..115] = iota!long(86,108).array; // 22M 866 opsArr[93..115] = Ops.MATCH; 867 868 rposArr[115..118] = 100; // 3S 869 qposArr[115..118] = iota!long(108, 111).array; // 3S 870 opsArr[115..118] = Ops.SOFT_CLIP; 871 872 rposArr[118..123] = 100; // 5H 873 qposArr[118..123] = 110; // 5H 874 opsArr[118..123] = Ops.HARD_CLIP; 875 876 auto pairs = read.getAlignedPairs!true.array; 877 assert(pairs.map!(x=> x.rpos.pos).array == rposArr); 878 assert(pairs.map!(x=> x.qpos.pos).array == qposArr); 879 assert(pairs.map!(x=> x.cigar_op).array == opsArr); 880 writeln(pairs.map!(x=> x.refBase).array); 881 writeln(pairs.map!(x=> x.queryBase).array); 882 assert(pairs.map!(x=> x.refBase).array == 883 // ================================================G============================ =A=============C======== 884 // =A=============C======== 885 "----------AGCTAGGGCACTTTTTGTCTGCCCAAATATAGGCAACCAAAAATAATTGCCAAGTTTTTAATGATTTGTTGCATATT----GAAAAAAACATTTTTCGGGTTTTT--------"); 886 assert(pairs.map!(x=> x.queryBase).array == 887 "------NNNNAGCTAGGGCACTTTTTGTCTGCCCAAATATAGGCAACCAAAAATAATTTCCAAGTTTTTAATGATTTGTTGCATATTCCCCG-AAAAAACATTTTTTGGGTTTTTNNN-----"); 888 // AGCTAGGGCACTTTTTGTCTGCCCAAATATAGGCAACCAAAAATAATTTCCAAGTTTTTAATGATTTGTTGCATATTCCCCG AAAAAACATTTTTTGGGTTTTT 889 auto pairs2 = read.getAlignedPairs!true(77, 77 + 5).array; 890 assert(pairs2.map!(x => x.rpos).array == rposArr[91..96]); 891 assert(pairs2.map!(x => x.qpos).array == qposArr[91..96]); 892 893 import dhtslib.sam.md; 894 import std.range : dropExactly; 895 auto mdItr = MDItr(read); 896 897 mdItr = MDItr(read); 898 mdItr = mdItr.dropExactly(77); 899 900 assert(pairs2.map!(x => x.refBase).array == "GAAAA"); 901 assert(pairs2.map!(x => x.queryBase).array == "G-AAA"); 902 } 903 904 /// 905 debug(dhtslib_unittest) unittest 906 { 907 import std.stdio : writeln, File; 908 import std.range : drop; 909 import std.utf : toUTFz; 910 import htslib.hts_log; // @suppress(dscanner.suspicious.local_imports) 911 import std.path:buildPath,dirName; 912 import std.conv:to; 913 import dhtslib.sam : parseSam; 914 915 hts_set_log_level(htsLogLevel.HTS_LOG_TRACE); 916 hts_log_info(__FUNCTION__, "Loading sam file"); 917 auto range = File(buildPath(dirName(dirName(dirName(dirName(__FILE__)))),"htslib","test","realn01_exp-a.sam")).byLineCopy(); 918 auto b = bam_init1(); 919 auto hdr = bam_hdr_init(); 920 string hdr_str; 921 assert(range.empty == false); 922 for (auto i = 0; i < 4; i++) 923 { 924 hdr_str ~= range.front ~ "\n"; 925 range.popFront; 926 } 927 hts_log_info(__FUNCTION__, "Header"); 928 writeln(hdr_str); 929 hdr = sam_hdr_parse(cast(int) hdr_str.length, toUTFz!(char*)(hdr_str)); 930 hts_log_info(__FUNCTION__, "Read status:" ~ parseSam(range.front, hdr, b).to!string); 931 auto r = new SAMRecord(b); 932 hts_log_info(__FUNCTION__, "Cigar" ~ r.cigar.toString); 933 assert(r.cigar.toString == "6M1D117M5D28M"); 934 } 935 936 /// 937 debug(dhtslib_unittest) unittest 938 { 939 import std.stdio; 940 import dhtslib.sam; 941 import dhtslib.sam.md : MDItr; 942 import std.algorithm: map; 943 import std.array: array; 944 import std.parallelism : parallel; 945 import std.path:buildPath,dirName; 946 hts_set_log_level(htsLogLevel.HTS_LOG_TRACE); 947 948 auto bam = SAMFile(buildPath(dirName(dirName(dirName(dirName(__FILE__)))),"htslib","test","range.bam"), 0); 949 foreach(rec;parallel(bam.allRecords)){ 950 rec.queryName = "multithreading test"; 951 } 952 953 }