1 module dhtslib.sam.record; 2 3 import core.stdc.stdlib : calloc, free, realloc; 4 import core.stdc..string : memset; 5 import core.stdc.errno : EINVAL, EOVERFLOW, ENOMEM, ERANGE, errno; 6 import std..string : fromStringz, toStringz; 7 import std.algorithm : filter; 8 import std.range.interfaces : InputRange, inputRangeObject; 9 import std.traits : isArray, isIntegral, isSomeString; 10 11 import htslib.hts : seq_nt16_str, seq_nt16_table; 12 13 import htslib.hts_log; 14 import htslib.sam; 15 import dhtslib.sam.cigar; 16 import dhtslib.sam.tagvalue; 17 import dhtslib.sam.header; 18 import dhtslib.coordinates; 19 import dhtslib.memory; 20 21 /** 22 Encapsulates a SAM/BAM/CRAM record, 23 using the bam1_t type for memory efficiency, 24 and the htslib helper functions for speed. 25 **/ 26 struct SAMRecord 27 { 28 /// Backing SAM/BAM row record 29 Bam1 b; 30 31 /// Corresponding SAM/BAM header data 32 SAMHeader h; 33 34 /// ditto 35 this(SAMHeader h) 36 { 37 this.b = Bam1(bam_init1); 38 this.h = h; 39 } 40 41 /// Construct SAMRecord from supplied bam1_t 42 deprecated("Construct with sam_hdr_t* for mapped contig (tid) name support") 43 this(bam1_t* b) 44 { 45 this.b = Bam1(b); 46 } 47 48 /// Construct SAMRecord from supplied bam1_t and sam_hdr_type 49 this(bam1_t* b, SAMHeader h) 50 { 51 this.b = Bam1(b); 52 this.h = h; 53 } 54 55 /* bam1_core_t fields */ 56 57 /// chromosome ID, defined by sam_hdr_t 58 pragma(inline, true) 59 @nogc @safe nothrow 60 @property int tid() { return this.b.core.tid; } 61 /// ditto 62 pragma(inline, true) 63 @nogc @safe nothrow 64 @property void tid(int tid) { this.b.core.tid = tid; } 65 66 /// mapped contig (tid) name, defined by sam_hdr_t* h 67 pragma(inline, true) 68 @property const(char)[] referenceName() 69 { 70 assert(!this.h.isNull); 71 return this.h.targetName(this.b.core.tid); 72 } 73 74 /// 0-based leftmost coordinate 75 pragma(inline, true) 76 @nogc @safe nothrow 77 @property ZB pos() { return ZB(this.b.core.pos); } 78 /// ditto 79 pragma(inline, true) 80 @nogc @safe nothrow 81 @property void pos(ZB pos) { this.b.core.pos = pos.pos; } 82 83 /// 0-based, half-open coordinates that represent 84 /// the mapped length of the alignment 85 pragma(inline, true) 86 @property ZBHO coordinates() 87 { 88 return ZBHO(this.pos, this.pos + this.cigar.alignedLength); 89 } 90 91 // TODO: @field bin bin calculated by bam_reg2bin() 92 93 /// mapping quality 94 pragma(inline, true) 95 @nogc @safe nothrow 96 @property ubyte qual() { return this.b.core.qual; } 97 /// ditto 98 pragma(inline, true) 99 @nogc @safe nothrow 100 @property void qual(ubyte q) { this.b.core.qual = q; } 101 102 /// length of query name. Terminated by 1-4 \0, see l_extranul. Never set directly. 103 pragma(inline, true) 104 @nogc @safe nothrow 105 @property int l_qname() { return this.b.core.l_qname; } 106 107 /// number of EXTRA trailing \0 in qname; 0 <= l_extranul <= 3, see l_qname. Never set directly. 108 pragma(inline, true) 109 @nogc @safe nothrow 110 @property int l_extranul() { return this.b.core.l_extranul; } 111 112 /// bitwise flag 113 pragma(inline, true) 114 @nogc @safe nothrow 115 @property ushort flag() { return this.b.core.flag; } 116 /// ditto 117 pragma(inline, true) 118 @nogc @safe nothrow 119 @property void flag(ushort fl) { this.b.core.flag = fl; } 120 121 /// is read paired? 122 pragma(inline, true) 123 @nogc @safe nothrow 124 @property bool isPaired() 125 { 126 return cast(bool)(b.core.flag & BAM_FPAIRED); 127 } 128 129 /// is read paired? 130 pragma(inline, true) 131 @nogc @safe nothrow 132 @property bool isProperPair() 133 { 134 return cast(bool)(b.core.flag & BAM_FPROPER_PAIR); 135 } 136 137 /// is read mapped? 138 pragma(inline, true) 139 @nogc @safe nothrow 140 @property bool isMapped() 141 { 142 return (b.core.flag & BAM_FUNMAP) == 0; 143 } 144 145 /// is read mate mapped? 146 pragma(inline, true) 147 @nogc @safe nothrow 148 @property bool isMateMapped() 149 { 150 return (b.core.flag & BAM_FMUNMAP) == 0; 151 } 152 153 /// is read reversed? 154 /// bool bam_is_rev(bam1_t *b) { return ( ((*b).core.flag & BAM_FREVERSE) != 0 ); } 155 pragma(inline, true) 156 @nogc @trusted nothrow 157 @property bool isReversed() 158 { 159 return bam_is_rev(this.b); 160 } 161 162 /// is mate reversed? 163 /// bool bam_is_mrev(bam1_t *b) { return( ((*b).core.flag & BAM_FMREVERSE) != 0); } 164 pragma(inline, true) 165 @nogc @trusted nothrow 166 @property bool mateReversed() 167 { 168 return bam_is_mrev(this.b); 169 } 170 171 /// is this read 1? 172 pragma(inline, true) 173 @nogc @safe nothrow 174 @property bool isRead1() 175 { 176 return cast(bool)(b.core.flag & BAM_FREAD1); 177 } 178 179 /// is this read 2? 180 pragma(inline, true) 181 @nogc @safe nothrow 182 @property bool isRead2() 183 { 184 return cast(bool)(b.core.flag & BAM_FREAD2); 185 } 186 187 /// is read secondary? 188 @property bool isSecondary() 189 { 190 version(LDC){ 191 pragma(inline, true); 192 } 193 return cast(bool)(b.core.flag & BAM_FSECONDARY); 194 } 195 196 /// Does this read fail qc? 197 pragma(inline, true) 198 @nogc @safe nothrow 199 @property bool isQCFail() 200 { 201 return cast(bool)(b.core.flag & BAM_FQCFAIL); 202 } 203 204 /// Is this read marked as a PCR or Optical duplicate? 205 pragma(inline, true) 206 @nogc @safe nothrow 207 @property bool isDuplicate() 208 { 209 return cast(bool)(b.core.flag & BAM_FDUP); 210 } 211 212 /// is read supplementary? 213 pragma(inline, true) 214 @nogc @safe nothrow 215 @property bool isSupplementary() 216 { 217 return cast(bool)(b.core.flag & BAM_FSUPPLEMENTARY); 218 } 219 220 /// Return read strand + or - (as char) 221 @property char strand(){ 222 return ['+','-'][isReversed()]; 223 } 224 225 /// Get a slice of query name (read name) 226 /// if you keep this around you should copy it 227 /// -- wraps bam_get_qname(bam1_t *b) 228 pragma(inline, true) 229 @nogc @trusted nothrow 230 @property const(char)[] queryName() 231 { 232 // auto bam_get_qname(bam1_t *b) { return (cast(char*)(*b).data); } 233 return fromStringz(bam_get_qname(this.b)); 234 } 235 236 /// Set query name (read name) 237 pragma(inline, true) 238 @trusted nothrow 239 @property void queryName(string qname) 240 { 241 auto ret = bam_set_qname(this.b, toStringz(qname)); 242 if(ret != 0) 243 hts_log_error(__FUNCTION__, "Could not set queryname"); 244 } 245 246 /// query (and quality string) length 247 pragma(inline, true) 248 @nogc @safe nothrow 249 @property int length() 250 { 251 return this.b.core.l_qseq; 252 } 253 254 /// Return char array of the sequence 255 /// see samtools/sam_view.c: get_read 256 pragma(inline, true) 257 @trusted nothrow 258 @property const(char)[] sequence() 259 { 260 // calloc fills with \0; +1 len for Cstring 261 // char* s = cast(char*) calloc(1, this.b.core.l_qseq + 1); 262 char[] s; 263 s.length = this.b.core.l_qseq; 264 265 // auto bam_get_seq(bam1_t *b) { return ((*b).data + ((*b).core.n_cigar<<2) + (*b).core.l_qname); } 266 auto seqdata = bam_get_seq(this.b); 267 268 for (int i; i < this.b.core.l_qseq; i++) 269 { 270 s[i] = seq_nt16_str[bam_seqi(seqdata, i)]; 271 } 272 return s; 273 } 274 275 /// Assigns sequence and resets quality score 276 pragma(inline, true) 277 @trusted nothrow 278 @property void sequence(const(char)[] seq) 279 { 280 /// There is no bam_seq_seq 281 282 //nibble encode sequence 283 ubyte[] en_seq; 284 en_seq.length = (seq.length + 1) >> 1; 285 286 for(auto i=0;i < seq.length;i++){ 287 bam_set_seqi(en_seq.ptr, i, seq_nt16_table[seq[i]]); 288 } 289 290 //get length of data before seq 291 uint l_prev = b.core.l_qname + cast(uint)(b.core.n_cigar * uint.sizeof); 292 293 //get starting point after seq 294 uint start_rest = l_prev + ((b.core.l_qseq + 1) >> 1) + b.core.l_qseq; 295 296 //copy previous data 297 ubyte[] prev=b.data[0 .. l_prev].dup; 298 299 //copy rest of data 300 ubyte[] rest=b.data[start_rest .. b.l_data].dup; 301 302 //if not enough space 303 if(en_seq.length + seq.length + l_prev + (b.l_data - start_rest) > b.m_data){ 304 305 // realloc 306 auto ptr = cast(ubyte*) realloc(b.data, en_seq.length + seq.length + l_prev + (b.l_data - start_rest)); 307 assert(ptr != null); 308 b.data = ptr; 309 310 // increase capacity 311 b.m_data = cast(uint)en_seq.length + cast(uint) seq.length + l_prev + (b.l_data - start_rest); 312 } 313 314 //reassign q_name and rest of data 315 b.data[0 .. l_prev] = prev; 316 b.data[l_prev .. en_seq.length + l_prev] = en_seq; 317 //set qscores to 255 318 memset(b.data + en_seq.length + l_prev, 255, seq.length); 319 b.data[l_prev + en_seq.length + seq.length .. l_prev + en_seq.length + seq.length + (b.l_data - start_rest)] = rest; 320 321 //reset data length, seq length 322 b.l_data = cast(uint) en_seq.length + cast(uint) seq.length + l_prev + (b.l_data-start_rest); 323 b.core.l_qseq = cast(int) (seq.length); 324 } 325 326 /// Return array of the quality scores 327 /// see samtools/sam_view.c: get_quality 328 /// TODO: Discussion -- should we return const(ubyte[]) or const(ubyte)[] instead? 329 pragma(inline, true) 330 @nogc @trusted nothrow 331 @property const(ubyte)[] qscores() 332 { 333 auto slice = bam_get_qual(this.b)[0 .. this.b.core.l_qseq]; 334 return slice; 335 } 336 337 /// Set quality scores from raw ubyte quality score array given that it is the same length as the bam sequence 338 pragma(inline, true) 339 @trusted nothrow 340 @property void qscores(const(ubyte)[] seq) 341 { 342 /// There is no bam_seq_qual 343 344 if(seq.length != b.core.l_qseq){ 345 hts_log_error(__FUNCTION__,"qscore length does not match sequence length"); 346 return; 347 } 348 349 //get length of data before seq 350 uint l_prev = b.core.l_qname + cast(uint) (b.core.n_cigar * uint.sizeof) + ((b.core.l_qseq + 1) >> 1); 351 b.data[l_prev .. seq.length + l_prev] = seq; 352 } 353 354 /// get phred-scaled base qualities as char array 355 pragma(inline, true) 356 @trusted nothrow 357 const(char)[] qscoresPhredScaled() 358 { 359 auto bqs = this.qscores.dup; 360 bqs[] += 33; 361 return cast(const(char)[]) bqs; 362 } 363 364 /// Create cigar from bam1_t record 365 pragma(inline, true) 366 @property Cigar cigar() 367 { 368 return Cigar(this.b); 369 } 370 371 /// Assign a cigar string 372 pragma(inline, true) 373 @trusted 374 @property void cigar(Cigar cigar) 375 { 376 // no bam_set_cigar 377 378 //get length of data before seq 379 uint l_prev = b.core.l_qname; 380 381 //get starting point after seq 382 uint start_rest = l_prev + cast(uint)(b.core.n_cigar * uint.sizeof); 383 384 //copy previous data 385 ubyte[] prev = b.data[0..l_prev].dup; 386 387 //copy rest of data 388 ubyte[] rest = b.data[start_rest..b.l_data].dup; 389 390 //if not enough space 391 if(uint.sizeof * cigar.length + l_prev + (b.l_data - start_rest) > b.m_data){ 392 // realloc 393 auto ptr = cast(ubyte*) realloc(b.data, uint.sizeof * cigar.length + l_prev + (b.l_data - start_rest)); 394 assert(ptr != null); 395 b.data = ptr; 396 397 // increase capacity 398 b.m_data=cast(uint)(uint.sizeof * cigar.length) + l_prev + (b.l_data - start_rest); 399 } 400 401 //reassign q_name and rest of data 402 b.data[0 .. l_prev]=prev; 403 //without cigar.ops.dup we get the overlapping array copy error 404 b.data[l_prev .. uint.sizeof * cigar.length + l_prev] = cast(ubyte[])( cast(uint[]) cigar.dup[]); 405 b.data[l_prev + uint.sizeof * cigar.length .. l_prev + uint.sizeof * cigar.length + (b.l_data - start_rest)] = rest; 406 407 //reset data length, seq length 408 b.l_data = cast(uint) (uint.sizeof * cigar.length) + l_prev + (b.l_data - start_rest); 409 b.core.n_cigar = cast(uint) (cigar.length); 410 } 411 412 /// Get aux tag from bam1_t record and return a TagValue 413 TagValue opIndex(string val) 414 { 415 return TagValue(this.b, val[0..2]); 416 } 417 418 /// Assign a tag key value pair 419 void opIndexAssign(T)(T value, string index) 420 if(!isArray!T || isSomeString!T) 421 { 422 static if(isIntegral!T){ 423 auto err = bam_aux_update_int(b, index[0..2], value); 424 }else static if(is(T==float)){ 425 auto err = bam_aux_update_float(b, index[0..2], value); 426 }else static if(isSomeString!T){ 427 auto err = bam_aux_update_str(b, index[0..2], cast(int) value.length, value.ptr); 428 } 429 if(err == 0) return; 430 switch(errno){ 431 case EINVAL: 432 throw new Exception("The bam record's aux data is corrupt or an existing tag" ~ 433 " with the given ID is not of an integer type (c, C, s, S, i or I)."); 434 case ENOMEM: 435 throw new Exception("The bam data needs to be expanded and either the attempt" ~ 436 " to reallocate the data buffer failed or the resulting buffer would be longer" ~ 437 " than the maximum size allowed in a bam record (2Gbytes)."); 438 case ERANGE: 439 case EOVERFLOW: 440 throw new Exception("val is outside the range that can be stored in an integer" ~ 441 " bam tag (-2147483647 to 4294967295)."); 442 case -1: 443 throw new Exception("Something went wrong adding the bam tag."); 444 case 0: 445 return; 446 default: 447 throw new Exception("Something went wrong adding the bam tag."); 448 } 449 } 450 451 /// Assign a tag key array pair 452 void opIndexAssign(T)(T[] value, string index) 453 if(!isSomeString!(T[])) 454 { 455 auto err = bam_aux_update_array(b, index[0..2], TypeChars[TypeIndex!T], cast(int) value.length, value.ptr); 456 if(err == 0) return; 457 switch(errno){ 458 case EINVAL: 459 throw new Exception("The bam record's aux data is corrupt or an existing tag" ~ 460 " with the given ID is not of an integer type (c, C, s, S, i or I)."); 461 case ENOMEM: 462 throw new Exception("The bam data needs to be expanded and either the attempt" ~ 463 " to reallocate the data buffer failed or the resulting buffer would be longer" ~ 464 " than the maximum size allowed in a bam record (2Gbytes)."); 465 case ERANGE: 466 case EOVERFLOW: 467 throw new Exception("val is outside the range that can be stored in an integer" ~ 468 " bam tag (-2147483647 to 4294967295)."); 469 case -1: 470 throw new Exception("Something went wrong adding the bam tag."); 471 case 0: 472 return; 473 default: 474 throw new Exception("Something went wrong adding the bam tag."); 475 } 476 } 477 478 /// chromosome ID of next read in template, defined by bam_hdr_t 479 pragma(inline, true) 480 @nogc @safe nothrow 481 @property int mateTID() { return this.b.core.mtid; } 482 /// ditto 483 pragma(inline, true) 484 @nogc @safe nothrow 485 @property void mateTID(int mtid) { this.b.core.mtid = mtid; } 486 487 /// 0-based leftmost coordinate of next read in template 488 pragma(inline, true) 489 @nogc @safe nothrow 490 @property long matePos() { return this.b.core.mpos; } 491 /// ditto 492 pragma(inline, true) 493 @nogc @safe nothrow 494 @property void matePos(long mpos) { this.b.core.mpos = mpos; } 495 496 /// Presumably Insert size, but is undocumented. 497 /// Per samtools source, is measured 5' to 5' 498 /// https://github.com/samtools/samtools/blob/bd1a409aa750d25d70a093405d174db8709a3f2c/bam_mate.c#L320 499 pragma(inline, true) 500 @nogc @safe nothrow 501 @property long insertSize() { return this.b.core.isize; } 502 /// ditto 503 pragma(inline, true) 504 @nogc @safe nothrow 505 @property void insertSize(long isize) { this.b.core.isize = isize; } 506 /+ BEGIN CHERRY-PICK: TODO: confirm below chunk with htslib-1.10 (mainly long coordinates +/ 507 /// get aligned coordinates per each cigar op 508 InputRange!AlignedCoordinate getAlignedCoordinates(){ 509 return AlignedCoordinatesItr(this.cigar).inputRangeObject; 510 } 511 512 /// get aligned coordinates per each cigar op within range 513 /// range is 0-based half open using chromosomal coordinates 514 InputRange!AlignedCoordinate getAlignedCoordinates(long start, long end){ 515 assert(start >= 0); 516 assert(end <= this.cigar.ref_bases_covered); 517 return this.getAlignedCoordinates.filter!(x=> x.rpos >= start).filter!(x=> x.rpos < end).inputRangeObject; 518 } 519 520 struct AlignedPair(bool refSeq) 521 { 522 Coordinate!(Basis.zero) qpos, rpos; 523 Ops cigar_op; 524 char queryBase; 525 static if(refSeq) char refBase; 526 527 string toString(){ 528 import std.conv : to; 529 static if(refSeq) return rpos.pos.to!string ~ ":" ~ refBase ~ " > " ~ qpos.pos.to!string ~ ":" ~ queryBase; 530 else return rpos.pos.to!string ~ ":" ~ qpos.pos.to!string ~ ":" ~ queryBase; 531 } 532 } 533 534 /// get a range of aligned read and reference positions 535 /// this is meant to recreate functionality from pysam: 536 /// https://pysam.readthedocs.io/en/latest/api.html#pysam.AlignedSegment.get_aligned_pairs 537 /// range is 0-based half open using chromosomal coordinates 538 auto getAlignedPairs(bool withRefSeq)(long start, long end) 539 { 540 import dhtslib.sam.md : MDItr; 541 import std.range : dropExactly; 542 struct AlignedPairs(bool refSeq) 543 { 544 InputRange!AlignedCoordinate coords; 545 static if(refSeq) MDItr mdItr; 546 AlignedPair!refSeq current; 547 const(char)[] qseq; 548 549 this(InputRange!AlignedCoordinate coords, SAMRecord rec, long start, long end){ 550 assert(start < end); 551 assert(start >= 0); 552 assert(end <= rec.cigar.ref_bases_covered); 553 this.coords = coords; 554 qseq = rec.sequence; 555 static if(refSeq){ 556 assert(rec["MD"].exists,"MD tag must exist for record"); 557 mdItr = MDItr(rec); 558 mdItr = mdItr.dropExactly(start); 559 } 560 current.qpos = coords.front.qpos; 561 current.rpos = coords.front.rpos; 562 current.cigar_op = coords.front.cigar_op; 563 if(!CigarOp(0, current.cigar_op).is_query_consuming) current.queryBase = '-'; 564 else current.queryBase = qseq[current.qpos]; 565 566 static if(refSeq){ 567 if(!CigarOp(0, current.cigar_op).is_reference_consuming) current.refBase = '-'; 568 else if(mdItr.front == '=') current.refBase = current.queryBase; 569 else current.refBase = mdItr.front; 570 } 571 } 572 573 AlignedPair!refSeq front(){ 574 return current; 575 } 576 577 void popFront(){ 578 coords.popFront; 579 if(coords.empty) return; 580 current.qpos = coords.front.qpos; 581 current.rpos = coords.front.rpos; 582 current.cigar_op = coords.front.cigar_op; 583 if(!CigarOp(0, current.cigar_op).is_query_consuming) current.queryBase = '-'; 584 else current.queryBase = qseq[current.qpos]; 585 586 static if(refSeq){ 587 if(CigarOp(0, current.cigar_op).is_reference_consuming) mdItr.popFront; 588 if(!CigarOp(0, current.cigar_op).is_reference_consuming) current.refBase = '-'; 589 else if(mdItr.front == '=') current.refBase = current.queryBase; 590 else current.refBase = mdItr.front; 591 } 592 593 } 594 595 bool empty(){ 596 return coords.empty; 597 } 598 599 } 600 auto coords = this.getAlignedCoordinates(start, end); 601 return AlignedPairs!withRefSeq(coords,this,start,end).inputRangeObject; 602 } 603 /// get a range of aligned read and reference positions 604 /// this is meant to recreate functionality from pysam: 605 /// https://pysam.readthedocs.io/en/latest/api.html#pysam.AlignedSegment.get_aligned_pairs 606 auto getAlignedPairs(bool withRefSeq)(){ 607 return getAlignedPairs!withRefSeq(0, this.cigar.ref_bases_covered); 608 } 609 /+ END CHERRY-PICK +/ 610 } 611 612 /// 613 debug(dhtslib_unittest) unittest 614 { 615 import dhtslib.sam; 616 import htslib.hts_log : hts_log_info; 617 import std.path:buildPath,dirName; 618 619 hts_set_log_level(htsLogLevel.HTS_LOG_TRACE); 620 hts_log_info(__FUNCTION__, "Testing SAMRecord mutation"); 621 hts_log_info(__FUNCTION__, "Loading test file"); 622 auto bam = SAMFile(buildPath(dirName(dirName(dirName(dirName(__FILE__)))),"htslib","test","range.bam"), 0); 623 auto ar = bam.allRecords; 624 assert(ar.empty == false); 625 auto read = ar.front; 626 627 //change queryname 628 hts_log_info(__FUNCTION__, "Testing queryname"); 629 assert(read.queryName=="HS18_09653:4:1315:19857:61712"); 630 read.queryName="HS18_09653:4:1315:19857:6171"; 631 assert(read.queryName=="HS18_09653:4:1315:19857:6171"); 632 assert(read.cigar.toString() == "78M1D22M"); 633 read.queryName="HS18_09653:4:1315:19857:617122"; 634 assert(read.queryName=="HS18_09653:4:1315:19857:617122"); 635 assert(read.cigar.toString() == "78M1D22M"); 636 assert(read["RG"].check!string); 637 assert(read["RG"].to!string=="1"); 638 639 //change sequence 640 hts_log_info(__FUNCTION__, "Testing sequence"); 641 assert(read.sequence=="AGCTAGGGCACTTTTTGTCTGCCCAAATATAGGCAACCAAAAATAATTTCCAAGTTTTTAATGATTTGTTGCATATTGAAAAAACATTTTTTGGGTTTTT"); 642 read.sequence=read.sequence[0..10]; 643 assert(read.sequence=="AGCTAGGGCA"); 644 645 ubyte[] q=[255,255,255,255,255,255,255,255,255,255]; 646 assert(read.qscores == q); 647 q = [1, 14, 20, 40, 30, 10, 14, 20, 40, 30]; 648 read.qscores(q); 649 assert(read.qscores == q); 650 q[] += 33; 651 652 assert(read.qscoresPhredScaled == cast(char[])(q)); 653 654 assert(read.cigar.toString() == "78M1D22M"); 655 assert(read["RG"].check!string); 656 assert(read["RG"].to!string=="1"); 657 658 //change cigar 659 hts_log_info(__FUNCTION__, "Testing cigar"); 660 auto c=read.cigar; 661 c[$-1].length=21; 662 read.cigar=c; 663 assert(read.cigar.toString() == "78M1D21M"); 664 assert(read["RG"].check!string); 665 assert(read["RG"].to!string=="1"); 666 667 //change tag 668 hts_log_info(__FUNCTION__, "Testing tags"); 669 read["X0"]=2; 670 assert(read["X0"].to!ubyte==2); 671 assert(read["RG"].check!string); 672 673 read["RG"]="test"; 674 assert(read["RG"].to!string=="test"); 675 hts_log_info(__FUNCTION__, "Cigar:" ~ read.cigar.toString()); 676 } 677 678 /// 679 debug(dhtslib_unittest) unittest 680 { 681 import dhtslib.sam; 682 import htslib.hts_log : hts_log_info; 683 import std.path:buildPath,dirName; 684 685 hts_set_log_level(htsLogLevel.HTS_LOG_TRACE); 686 hts_log_info(__FUNCTION__, "Testing SAMRecord mutation w/ realloc"); 687 hts_log_info(__FUNCTION__, "Loading test file"); 688 auto bam = SAMFile(buildPath(dirName(dirName(dirName(dirName(__FILE__)))),"htslib","test","range.bam"), 0); 689 auto ar = bam.allRecords; 690 assert(ar.empty == false); 691 auto read = ar.front; 692 693 //change queryname 694 hts_log_info(__FUNCTION__, "Testing queryname"); 695 assert(read.queryName=="HS18_09653:4:1315:19857:61712"); 696 697 //we do this only to force realloc 698 read.b.m_data=read.b.l_data; 699 700 701 auto prev_m=read.b.m_data; 702 read.queryName="HS18_09653:4:1315:19857:61712HS18_09653:4:1315:19857:61712"; 703 assert(read.b.m_data >prev_m); 704 assert(read.queryName=="HS18_09653:4:1315:19857:61712HS18_09653:4:1315:19857:61712"); 705 assert(read.cigar.toString() == "78M1D22M"); 706 assert(read["RG"].check!string); 707 assert(read["RG"].to!string=="1"); 708 709 //we do this only to force realloc 710 read.b.m_data=read.b.l_data; 711 712 //change sequence 713 hts_log_info(__FUNCTION__, "Testing sequence"); 714 assert(read.sequence=="AGCTAGGGCACTTTTTGTCTGCCCAAATATAGGCAACCAAAAATAATTTCCAAGTTTTTAATGATTTGTTGCATATTGAAAAAACATTTTTTGGGTTTTT"); 715 prev_m=read.b.m_data; 716 read.sequence="AGCTAGGGCACTTTTTGTCTGCCCAAATATAGGCAACCAAAAATAATTTCCAAGTTTTTAATGATTTGTTGCATATTGAAAAAACATTTTTTGGGTTTTTAGCTAGGGCACTTTTTGTCTGCCCAAATATAGGCAACCAAAAATAATTTCCAAGTTTTTAATGATTTGTTGCATATTGAAAAAACATTTTTTGGGTTTTT"; 717 assert(read.b.m_data >prev_m); 718 assert(read.sequence=="AGCTAGGGCACTTTTTGTCTGCCCAAATATAGGCAACCAAAAATAATTTCCAAGTTTTTAATGATTTGTTGCATATTGAAAAAACATTTTTTGGGTTTTTAGCTAGGGCACTTTTTGTCTGCCCAAATATAGGCAACCAAAAATAATTTCCAAGTTTTTAATGATTTGTTGCATATTGAAAAAACATTTTTTGGGTTTTT"); 719 assert(read.cigar.toString() == "78M1D22M"); 720 assert(read["RG"].check!string); 721 assert(read["RG"].to!string=="1"); 722 723 //change cigar 724 hts_log_info(__FUNCTION__, "Testing cigar"); 725 auto c=read.cigar; 726 c=c~c; 727 prev_m=read.b.m_data; 728 read.cigar=c; 729 assert(read.b.m_data >prev_m); 730 assert(read.cigar.toString() == "78M1D22M78M1D22M"); 731 assert(read["RG"].check!string); 732 assert(read["RG"].to!string=="1"); 733 734 //change tag 735 hts_log_info(__FUNCTION__, "Testing tags"); 736 read["X0"]=2; 737 assert(read["X0"].to!ubyte==2); 738 assert(read["RG"].check!string); 739 740 read["RG"]="test"; 741 assert(read["RG"].to!string=="test"); 742 hts_log_info(__FUNCTION__, "Cigar:" ~ read.cigar.toString()); 743 } 744 745 /// 746 debug(dhtslib_unittest) unittest 747 { 748 import std.stdio; 749 import dhtslib.sam; 750 import dhtslib.sam.md : MDItr; 751 import std.algorithm: map; 752 import std.array: array; 753 import std.path:buildPath,dirName; 754 hts_set_log_level(htsLogLevel.HTS_LOG_TRACE); 755 756 auto bam = SAMFile(buildPath(dirName(dirName(dirName(dirName(__FILE__)))),"htslib","test","range.bam"), 0); 757 auto ar = bam.allRecords; 758 assert(ar.empty == false); 759 SAMRecord read = ar.front; 760 761 auto pairs = read.getAlignedPairs!true(77, 77 + 5); 762 763 assert(pairs.map!(x => x.qpos).array == [77,77,78,79,80]); 764 pairs = read.getAlignedPairs!true(77, 77 + 5); 765 assert(pairs.map!(x => x.rpos).array == [77,78,79,80,81]); 766 pairs = read.getAlignedPairs!true(77, 77 + 5); 767 assert(pairs.map!(x => x.refBase).array == "GAAAA"); 768 pairs = read.getAlignedPairs!true(77, 77 + 5); 769 assert(pairs.map!(x => x.queryBase).array == "G-AAA"); 770 } 771 772 /// 773 debug(dhtslib_unittest) unittest 774 { 775 import std.stdio : writeln, File; 776 import std.range : drop; 777 import std.utf : toUTFz; 778 import htslib.hts_log; // @suppress(dscanner.suspicious.local_imports) 779 import std.path:buildPath,dirName; 780 import std.conv:to; 781 import dhtslib.sam : parseSam; 782 783 hts_set_log_level(htsLogLevel.HTS_LOG_TRACE); 784 hts_log_info(__FUNCTION__, "Loading sam file"); 785 auto range = File(buildPath(dirName(dirName(dirName(dirName(__FILE__)))),"htslib","test","realn01_exp-a.sam")).byLineCopy(); 786 auto b = bam_init1(); 787 auto hdr = bam_hdr_init(); 788 string hdr_str; 789 assert(range.empty == false); 790 for (auto i = 0; i < 4; i++) 791 { 792 hdr_str ~= range.front ~ "\n"; 793 range.popFront; 794 } 795 hts_log_info(__FUNCTION__, "Header"); 796 writeln(hdr_str); 797 hdr = sam_hdr_parse(cast(int) hdr_str.length, toUTFz!(char*)(hdr_str)); 798 hts_log_info(__FUNCTION__, "Read status:" ~ parseSam(range.front, hdr, b).to!string); 799 auto r = new SAMRecord(b); 800 hts_log_info(__FUNCTION__, "Cigar" ~ r.cigar.toString); 801 assert(r.cigar.toString == "6M1D117M5D28M"); 802 } 803 804 /// 805 debug(dhtslib_unittest) unittest 806 { 807 import std.stdio; 808 import dhtslib.sam; 809 import dhtslib.sam.md : MDItr; 810 import std.algorithm: map; 811 import std.array: array; 812 import std.parallelism : parallel; 813 import std.path:buildPath,dirName; 814 hts_set_log_level(htsLogLevel.HTS_LOG_TRACE); 815 816 auto bam = SAMFile(buildPath(dirName(dirName(dirName(dirName(__FILE__)))),"htslib","test","range.bam"), 0); 817 foreach(rec;parallel(bam.allRecords)){ 818 rec.queryName = "multithreading test"; 819 } 820 821 }