1 /** 2 3 SAMRecord and SAMFile are wrappers for htslib functions relating to SAM/BAM/CRAM* files 4 5 SAMRecord is a structured representation of a SAM/BAM/CRAM* record, 6 backed internally by htslib's bam1_t, but with convenient getters and setters 7 for record attributes, including functions not included in vanilla htslib 8 like returning the sequence or the qscore string (NB: actually char*) 9 10 SAMFile is a structured representation of SAM/BAM/CRAM* file, 11 backed internally by htslib's htsFile and bam_hdr_t, 12 but with convenient getters and setters for file and header attributes, 13 as well as query functions accessible explicitly (`query("chr1:999-9999"`) 14 and by indexing (`samfile["chr1", 999 .. 9999]`). 15 The file object can be iterated as an InputRange to obtain every record in the file. 16 17 Authors: James S Blachly, MD <james.blachly@gmail.com> ; Thomas Gregory <charles.gregory@osumc.edu> 18 19 Bugs: SAMRecord and SAMFile function only as readers, rather than writers (i.e, cannot build SAMFile) 20 Bugs: (*CRAM functionality is limited and untested) 21 22 Date: 2019-01-27 23 24 License: Apache 2.0 25 26 Standards: Sequence Alignment/Map Format Specification v1 14 Dec 2018 http://samtools.github.io/hts-specs/ 27 28 */ 29 module dhtslib.sam; 30 31 import core.stdc.stdlib : calloc, free,realloc; 32 import core.stdc.string:memset; 33 import std.format; 34 import std.parallelism : totalCPUs; 35 import std.stdio : writeln, writefln, stderr, File; 36 import std.string : fromStringz, toStringz; 37 import std.traits : ReturnType; 38 import std.algorithm : filter; 39 40 import dhtslib.htslib.hts : htsFile, hts_open, hts_close, hts_hopen; 41 import dhtslib.htslib.hts : hts_idx_t, hts_itr_t, hts_itr_multi_t, hts_reglist_t, hts_pair32_t; 42 import dhtslib.htslib.hts : seq_nt16_str,seq_nt16_table; 43 import dhtslib.htslib.hts : hts_set_threads; 44 import dhtslib.htslib.hfile : hdopen, hclose, hFILE; 45 46 import dhtslib.htslib.hts_log; 47 import dhtslib.htslib.kstring; 48 import dhtslib.htslib.sam; 49 import dhtslib.cigar; 50 import dhtslib.tagvalue; 51 52 /** 53 Encapsulates a SAM/BAM/CRAM record, 54 using the bam1_t type for memory efficiency, 55 and the htslib helper functions for speed. 56 **/ 57 class SAMRecord 58 { 59 /// 60 bam1_t* b; 61 62 /// Construct blank SAMRecord with empty backing bam1_t 63 this() 64 { 65 //debug(dhtslib_debug) hts_log_debug(__FUNCTION__, "ctor()"); /// This line triggers memory error when __FUNCTION__, but not when "Other string" 66 //test_log(__FUNCTION__, "ctor()"); /// This line will also trigger the memory error when __FUNCTION__, but not other strings 67 //writeln(__FUNCTION__); // this will not trigger the memory error 68 this.b = bam_init1(); 69 //assert(0); // This will elide(?) the memory error 70 //assert(1 == 2); // This will elide(?) the memory error 71 } 72 73 /// Construct SAMRecord from supplied bam1_t 74 this(bam1_t* b) 75 { 76 //debug(dhtslib_debug) hts_log_debug(__FUNCTION__, "ctor(bam1_t *b)"); 77 this.b = b; 78 } 79 80 ~this() 81 { 82 //debug(dhtslib_debug) hts_log_debug(__FUNCTION__, "dtor"); 83 bam_destroy1(this.b); // we created our own in default ctor, or received copy via bam_dup1 84 } 85 86 87 /* bam1_core_t fields */ 88 89 /// chromosome ID, defined by bam_hdr_t 90 pragma(inline, true) 91 @nogc @safe nothrow 92 @property int tid() { return this.b.core.tid; } 93 /// ditto 94 pragma(inline, true) 95 @nogc @safe nothrow 96 @property void tid(int tid) { this.b.core.tid = tid; } 97 98 /// 0-based leftmost coordinate 99 pragma(inline, true) 100 @nogc @safe nothrow 101 @property int pos() { return this.b.core.pos; } 102 /// ditto 103 pragma(inline, true) 104 @nogc @safe nothrow 105 @property void pos(int pos) { this.b.core.pos = pos; } 106 107 // TODO: @field bin bin calculated by bam_reg2bin() 108 109 /// mapping quality 110 pragma(inline, true) 111 @nogc @safe nothrow 112 @property ubyte qual() { return this.b.core.qual; } 113 /// ditto 114 pragma(inline, true) 115 @nogc @safe nothrow 116 @property void qual(ubyte q) { this.b.core.qual = q; } 117 118 // TODO: @field l_qname length of the query name 119 120 /// bitwise flag 121 pragma(inline, true) 122 @nogc @safe nothrow 123 @property ushort flag() { return this.b.core.flag; } 124 /// ditto 125 pragma(inline, true) 126 @nogc @safe nothrow 127 @property void flag(ushort fl) { this.b.core.flag = fl; } 128 129 /// is read paired? 130 pragma(inline, true) 131 @property bool isPaired() 132 { 133 return (b.core.flag & BAM_FPAIRED); 134 } 135 136 /// is read reversed? 137 /// bool bam_is_rev(bam1_t *b) { return ( ((*b).core.flag & BAM_FREVERSE) != 0 ); } 138 @property bool isReversed() 139 { 140 version(LDC) pragma(inline, true); 141 return bam_is_rev(this.b); 142 } 143 144 /// is read mapped? 145 @property bool isMapped() 146 { 147 version(LDC){ 148 pragma(inline, true); 149 } 150 return (b.core.flag & BAM_FUNMAP) == 0; 151 } 152 153 /// is read mate mapped? 154 @property bool isMateMapped() 155 { 156 version(LDC){ 157 pragma(inline, true); 158 } 159 return (b.core.flag & BAM_FMUNMAP) == 0; 160 } 161 162 /// is mate reversed? 163 /// bool bam_is_mrev(bam1_t *b) { return( ((*b).core.flag & BAM_FMREVERSE) != 0); } 164 pragma(inline, true) 165 @property bool mateReversed() 166 { 167 return bam_is_mrev(this.b); 168 } 169 170 /// is read secondary? 171 @property bool isSecondary() 172 { 173 version(LDC){ 174 pragma(inline, true); 175 } 176 return cast(bool)(b.core.flag & BAM_FSECONDARY); 177 } 178 179 /// is read supplementary? 180 @property bool isSupplementary() 181 { 182 version(LDC){ 183 pragma(inline, true); 184 } 185 return cast(bool)(b.core.flag & BAM_FSUPPLEMENTARY); 186 } 187 188 /// Return read strand + or - (as char) 189 @property char strand(){ 190 return ['+','-'][isReversed()]; 191 } 192 193 /// auto bam_get_qname(bam1_t *b) { return (cast(char*)(*b).data); } 194 pragma(inline, true) 195 @property char[] queryName() 196 { 197 return fromStringz(bam_get_qname(this.b)); 198 } 199 200 /// Set query name 201 pragma(inline, true) 202 @property void queryName(string qname) 203 { 204 assert(qname.length<252); 205 206 //make cstring 207 auto qname_n=qname.dup~'\0'; 208 209 //append extra nulls to 32bit align cigar 210 auto extranul=0; 211 for (; qname_n.length % 4 != 0; extranul++) qname_n~='\0'; 212 assert(extranul<=3); 213 214 //get length of rest of data 215 auto l_rest=b.l_data-b.core.l_qname; 216 217 //copy rest of data 218 ubyte[] rest=b.data[b.core.l_qname..b.l_data].dup; 219 220 //if not enough space 221 if(qname_n.length+l_rest>b.m_data){ 222 auto ptr=cast(ubyte*)realloc(b.data,qname_n.length+l_rest); 223 assert(ptr!=null); 224 b.data=ptr; 225 b.m_data=cast(uint)qname_n.length+l_rest; 226 } 227 228 //reassign q_name and rest of data 229 b.data[0..qname_n.length]=(cast(ubyte[])qname_n); 230 b.data[qname_n.length..qname_n.length+l_rest]=rest; 231 232 //reset data length, qname length, extra nul length 233 b.l_data=cast(uint)qname_n.length+l_rest; 234 b.core.l_qname=cast(ubyte)(qname_n.length); 235 b.core.l_extranul=cast(ubyte)extranul; 236 } 237 238 /// query (and quality string) length 239 pragma(inline, true) 240 @property int length() 241 { 242 return this.b.core.l_qseq; 243 } 244 245 /// Return char array of the sequence 246 /// see samtools/sam_view.c: get_read 247 @property const(char)[] sequence() 248 { 249 // calloc fills with \0; +1 len for Cstring 250 // char* s = cast(char*) calloc(1, this.b.core.l_qseq + 1); 251 char[] s; 252 s.length = this.b.core.l_qseq; 253 254 // auto bam_get_seq(bam1_t *b) { return ((*b).data + ((*b).core.n_cigar<<2) + (*b).core.l_qname); } 255 auto seqdata = bam_get_seq(this.b); 256 257 for (int i; i < this.b.core.l_qseq; i++) 258 { 259 s[i] = seq_nt16_str[bam_seqi(seqdata, i)]; 260 } 261 return s; 262 } 263 264 /// Assigns sequence and resets quality score 265 pragma(inline, true) 266 @property void sequence(const(char)[] seq) 267 { 268 //nibble encode sequence 269 ubyte[] en_seq; 270 en_seq.length=(seq.length+1)>>1; 271 for(auto i=0;i<seq.length;i++){ 272 en_seq[i>>1]|=seq_nt16_table[seq[i]]<< ((~i&1)<<2); 273 } 274 275 //get length of data before seq 276 uint l_prev=b.core.l_qname + cast(uint)(b.core.n_cigar*uint.sizeof); 277 278 //get starting point after seq 279 uint start_rest=l_prev+((b.core.l_qseq+1)>>1)+b.core.l_qseq; 280 281 //copy previous data 282 ubyte[] prev=b.data[0..l_prev].dup; 283 284 //copy rest of data 285 ubyte[] rest=b.data[start_rest..b.l_data].dup; 286 287 //if not enough space 288 if(en_seq.length+seq.length+l_prev+(b.l_data-start_rest)>b.m_data){ 289 auto ptr=cast(ubyte*)realloc(b.data,en_seq.length+seq.length+l_prev+(b.l_data-start_rest)); 290 assert(ptr!=null); 291 b.data=ptr; 292 b.m_data=cast(uint)en_seq.length+cast(uint)seq.length+l_prev+(b.l_data-start_rest); 293 } 294 295 //reassign q_name and rest of data 296 b.data[0..l_prev]=prev; 297 b.data[l_prev..en_seq.length+l_prev]=en_seq; 298 //set qscores to 255 299 memset(b.data+en_seq.length+l_prev,255,seq.length); 300 b.data[l_prev+en_seq.length+seq.length..l_prev+en_seq.length+seq.length+(b.l_data-start_rest)]=rest; 301 302 //reset data length, seq length 303 b.l_data=cast(uint)en_seq.length+cast(uint)seq.length+l_prev+(b.l_data-start_rest); 304 b.core.l_qseq=cast(int)(seq.length); 305 } 306 307 /// Return char array of the qscores 308 /// see samtools/sam_view.c: get_quality 309 const(char)[] qscores(bool phredScale=true)() 310 { 311 // calloc fills with \0; +1 len for Cstring 312 // char* q = cast(char*) calloc(1, this.b.core.l_qseq + 1); 313 char[] q; 314 q.length = this.b.core.l_qseq; 315 316 // auto bam_get_qual(bam1_t *b) { return (*b).data + ((*b).core.n_cigar<<2) + (*b).core.l_qname + (((*b).core.l_qseq + 1)>>1); } 317 char* qualdata = cast(char*) bam_get_qual(this.b); 318 319 for (int i; i < this.b.core.l_qseq; i++) 320 static if(phredScale) q[i] = cast(char)(qualdata[i] + 33); 321 else q[i] = cast(char)(qualdata[i]); 322 return q; 323 } 324 325 /// Add qscore sequence given that it is the same length as the bam sequence 326 pragma(inline, true) 327 void q_scores(bool phredScale=true)(const(char)[] seq) 328 { 329 if(seq.length!=b.core.l_qseq){ 330 hts_log_error(__FUNCTION__,"qscore length does not match sequence length"); 331 return; 332 } 333 auto s= seq.dup; 334 for(auto i=0;i<seq.length;i++){ 335 static if(phredScale) s[i]=seq[i]-33; 336 } 337 338 //get length of data before seq 339 uint l_prev=b.core.l_qname + cast(uint)(b.core.n_cigar*uint.sizeof)+((b.core.l_qseq+1)>>1); 340 b.data[l_prev..s.length+l_prev]=cast(ubyte[])s; 341 } 342 343 /// Add qscore sequence given that it is the same length as the bam sequence 344 pragma(inline, true) 345 void qscores(ubyte[] seq) 346 { 347 if(seq.length!=b.core.l_qseq){ 348 hts_log_error(__FUNCTION__,"qscore length does not match sequence length"); 349 return; 350 } 351 352 //get length of data before seq 353 uint l_prev=b.core.l_qname + cast(uint)(b.core.n_cigar*uint.sizeof)+((b.core.l_qseq+1)>>1); 354 b.data[l_prev..seq.length+l_prev]=seq; 355 } 356 357 /// Create cigar from bam1_t record 358 @property Cigar cigar() 359 { 360 return Cigar(bam_get_cigar(b), (*b).core.n_cigar); 361 } 362 363 /// Assign a cigar string 364 pragma(inline, true) 365 @property void cigar(Cigar cigar) 366 { 367 368 //get length of data before seq 369 uint l_prev=b.core.l_qname; 370 371 //get starting point after seq 372 uint start_rest=l_prev+cast(uint)(b.core.n_cigar*uint.sizeof); 373 374 //copy previous data 375 ubyte[] prev=b.data[0..l_prev].dup; 376 377 //copy rest of data 378 ubyte[] rest=b.data[start_rest..b.l_data].dup; 379 380 //if not enough space 381 if(uint.sizeof*cigar.ops.length+l_prev+(b.l_data-start_rest)>b.m_data){ 382 auto ptr=cast(ubyte*)realloc(b.data,uint.sizeof*cigar.ops.length+l_prev+(b.l_data-start_rest)); 383 assert(ptr!=null); 384 b.data=ptr; 385 b.m_data=cast(uint)(uint.sizeof*cigar.ops.length)+l_prev+(b.l_data-start_rest); 386 } 387 388 //reassign q_name and rest of data 389 b.data[0..l_prev]=prev; 390 //without cigar.ops.dup we get the overlapping array copy error 391 b.data[l_prev..uint.sizeof*cigar.ops.length+l_prev]=cast(ubyte[])(cast(uint[])cigar.ops.dup); 392 b.data[l_prev+uint.sizeof*cigar.ops.length..l_prev+uint.sizeof*cigar.ops.length+(b.l_data-start_rest)]=rest; 393 394 //reset data length, seq length 395 b.l_data=cast(uint)(uint.sizeof*cigar.ops.length)+l_prev+(b.l_data-start_rest); 396 b.core.n_cigar=cast(int)(cigar.ops.length); 397 } 398 399 /// Get aux tag from bam1_t record and return a TagValue 400 TagValue opIndex(string val) 401 { 402 return TagValue(b, val[0..2]); 403 } 404 405 /// Assign a tag key value pair 406 void opIndexAssign(T)(T value,string index) 407 { 408 import std.traits:isIntegral,isSomeString; 409 static if(isIntegral!T){ 410 bam_aux_update_int(b,index[0..2],value); 411 }else static if(is(T==float)){ 412 bam_aux_update_float(b,index[0..2],value); 413 }else static if(isSomeString!T){ 414 bam_aux_update_str(b,index[0..2],cast(int)value.length+1,toStringz(value)); 415 } 416 } 417 418 /// Assign a tag key array pair 419 void opIndexAssign(T:T[])(T[] value,string index) 420 { 421 bam_aux_update_array(b,index[0..2],TypeChars[TypeIndex!T],value.length,value.ptr); 422 } 423 424 /// chromosome ID of next read in template, defined by bam_hdr_t 425 pragma(inline, true) 426 @nogc @safe nothrow 427 @property int mateTID() { return this.b.core.mtid; } 428 /// ditto 429 pragma(inline, true) 430 @nogc @safe nothrow 431 @property void mateTID(int mtid) { this.b.core.mtid = mtid; } 432 433 /// 0-based leftmost coordinate of next read in template 434 pragma(inline, true) 435 @nogc @safe nothrow 436 @property int matePos() { return this.b.core.mpos; } 437 /// ditto 438 pragma(inline, true) 439 @nogc @safe nothrow 440 @property void matePos(int mpos) { this.b.core.mpos = mpos; } 441 442 /// Presumably Insert size, but is undocumented. 443 /// Per samtools source, is measured 5' to 5' 444 /// https://github.com/samtools/samtools/blob/bd1a409aa750d25d70a093405d174db8709a3f2c/bam_mate.c#L320 445 pragma(inline, true) 446 @nogc @safe nothrow 447 @property int insertSize() { return this.b.core.isize; } 448 /// ditto 449 pragma(inline, true) 450 @nogc @safe nothrow 451 @property void insertSize(int isize) { this.b.core.isize = isize; } 452 453 /// get aligned coordinates per each cigar op 454 auto getAlignedCoordinates(){ 455 return AlignedCoordinatesItr(this.cigar); 456 } 457 458 /// get aligned coordinates per each cigar op within range 459 /// range is 0-based half open using chromosomal coordinates 460 auto getAlignedCoordinates(int start, int end){ 461 assert(start >= this.pos); 462 assert(end <= this.pos + this.cigar.ref_bases_covered); 463 return this.getAlignedCoordinates.filter!(x=> this.pos + x.rpos >= start).filter!(x=> this.pos + x.rpos < end); 464 } 465 466 struct AlignedPair(bool refSeq) 467 { 468 int qpos, rpos; 469 Ops cigar_op; 470 char queryBase; 471 static if(refSeq) char refBase; 472 473 string toString(){ 474 import std.conv : to; 475 static if(refSeq) return rpos.to!string ~ ":" ~ refBase ~ " > " ~ qpos.to!string ~ ":" ~ queryBase; 476 else return rpos.to!string ~ ":" ~ qpos.to!string ~ ":" ~ queryBase; 477 } 478 } 479 480 /// get a range of aligned read and reference positions 481 /// this is meant to recreate functionality from pysam: 482 /// https://pysam.readthedocs.io/en/latest/api.html#pysam.AlignedSegment.get_aligned_pairs 483 /// range is 0-based half open using chromosomal coordinates 484 auto getAlignedPairs(bool withRefSeq)(int start, int end) 485 { 486 import dhtslib.md : MDItr; 487 import std.range : dropExactly; 488 struct AlignedPairs(bool refSeq) 489 { 490 ReturnType!getAlignedCoordinates coords; 491 static if(refSeq) MDItr mdItr; 492 AlignedPair!refSeq current; 493 const(char)[] qseq; 494 495 auto getAlignedCoordinates(SAMRecord rec, int start, int end){ 496 return rec.getAlignedCoordinates(start,end); 497 } 498 499 this(SAMRecord rec, int start, int end){ 500 assert(start < end); 501 assert(start >= rec.pos); 502 assert(end <= rec.pos + rec.cigar.ref_bases_covered); 503 504 coords = getAlignedCoordinates(rec, start, end); 505 qseq = rec.sequence; 506 static if(refSeq){ 507 assert(rec["MD"].exists,"MD tag must exist for record"); 508 mdItr = MDItr(rec); 509 mdItr = mdItr.dropExactly(start - rec.pos); 510 } 511 current.qpos = coords.front.qpos; 512 current.rpos = coords.front.rpos; 513 current.cigar_op = coords.front.cigar_op; 514 if(!CigarOp(0, current.cigar_op).is_query_consuming) current.queryBase = '-'; 515 else current.queryBase = qseq[current.qpos]; 516 517 static if(refSeq){ 518 if(!CigarOp(0, current.cigar_op).is_reference_consuming) current.refBase = '-'; 519 else if(mdItr.front == '=') current.refBase = current.queryBase; 520 else current.refBase = mdItr.front; 521 } 522 } 523 524 AlignedPair!refSeq front(){ 525 return current; 526 } 527 528 void popFront(){ 529 coords.popFront; 530 if(coords.empty) return; 531 current.qpos = coords.front.qpos; 532 current.rpos = coords.front.rpos; 533 current.cigar_op = coords.front.cigar_op; 534 if(!CigarOp(0, current.cigar_op).is_query_consuming) current.queryBase = '-'; 535 else current.queryBase = qseq[current.qpos]; 536 537 static if(refSeq){ 538 if(CigarOp(0, current.cigar_op).is_reference_consuming) mdItr.popFront; 539 if(!CigarOp(0, current.cigar_op).is_reference_consuming) current.refBase = '-'; 540 else if(mdItr.front == '=') current.refBase = current.queryBase; 541 else current.refBase = mdItr.front; 542 } 543 544 } 545 546 bool empty(){ 547 return coords.empty; 548 } 549 550 } 551 return AlignedPairs!withRefSeq(this,start,end); 552 } 553 /// get a range of aligned read and reference positions 554 /// this is meant to recreate functionality from pysam: 555 /// https://pysam.readthedocs.io/en/latest/api.html#pysam.AlignedSegment.get_aligned_pairs 556 auto getAlignedPairs(bool withRefSeq)(){ 557 return getAlignedPairs!withRefSeq(this.pos, this.pos + this.cigar.ref_bases_covered); 558 } 559 } 560 561 debug(dhtslib_unittest) 562 unittest{ 563 writeln(); 564 import dhtslib.sam; 565 import dhtslib.htslib.hts_log : hts_log_info; 566 import std.path:buildPath,dirName; 567 hts_set_log_level(htsLogLevel.HTS_LOG_TRACE); 568 hts_log_info(__FUNCTION__, "Testing SAMRecord mutation"); 569 hts_log_info(__FUNCTION__, "Loading test file"); 570 auto bam = SAMFile(buildPath(dirName(dirName(dirName(__FILE__))),"htslib","test","range.bam"), 0); 571 auto read=bam.all_records.front; 572 573 //change queryname 574 hts_log_info(__FUNCTION__, "Testing queryname"); 575 assert(read.queryName=="HS18_09653:4:1315:19857:61712"); 576 read.queryName="HS18_09653:4:1315:19857:6171"; 577 assert(read.queryName=="HS18_09653:4:1315:19857:6171"); 578 assert(read.cigar.toString() == "78M1D22M"); 579 read.queryName="HS18_09653:4:1315:19857:617122"; 580 assert(read.queryName=="HS18_09653:4:1315:19857:617122"); 581 assert(read.cigar.toString() == "78M1D22M"); 582 assert(read["RG"].check!string); 583 assert(read["RG"].to!string=="1"); 584 585 //change sequence 586 hts_log_info(__FUNCTION__, "Testing sequence"); 587 assert(read.sequence=="AGCTAGGGCACTTTTTGTCTGCCCAAATATAGGCAACCAAAAATAATTTCCAAGTTTTTAATGATTTGTTGCATATTGAAAAAACATTTTTTGGGTTTTT"); 588 read.sequence=read.sequence[0..10]; 589 assert(read.sequence=="AGCTAGGGCA"); 590 ubyte[] q=[255,255,255,255,255,255,255,255,255,255]; 591 assert(read.qscores!false==cast(char[])(q)); 592 q=[1,14,20,40,30,10,14,20,40,30]; 593 read.qscores(q); 594 assert(read.qscores!false==cast(char[])(q)); 595 q[]+=33; 596 assert(read.qscores==cast(char[])(q)); 597 assert(read.cigar.toString() == "78M1D22M"); 598 assert(read["RG"].check!string); 599 assert(read["RG"].to!string=="1"); 600 601 //change cigar 602 hts_log_info(__FUNCTION__, "Testing cigar"); 603 auto c=read.cigar; 604 c.ops[$-1].length=21; 605 read.cigar=c; 606 assert(read.cigar.toString() == "78M1D21M"); 607 assert(read["RG"].check!string); 608 assert(read["RG"].to!string=="1"); 609 610 //change tag 611 hts_log_info(__FUNCTION__, "Testing tags"); 612 read["X0"]=2; 613 assert(read["X0"].to!ubyte==2); 614 assert(read["RG"].check!string); 615 616 read["RG"]="test"; 617 assert(read["RG"].to!string=="test"); 618 hts_log_info(__FUNCTION__, "Cigar:" ~ read.cigar.toString()); 619 } 620 621 debug(dhtslib_unittest) 622 unittest{ 623 writeln(); 624 import dhtslib.sam; 625 import dhtslib.htslib.hts_log : hts_log_info; 626 import std.path:buildPath,dirName; 627 hts_set_log_level(htsLogLevel.HTS_LOG_TRACE); 628 hts_log_info(__FUNCTION__, "Testing SAMRecord mutation w/ realloc"); 629 hts_log_info(__FUNCTION__, "Loading test file"); 630 auto bam = SAMFile(buildPath(dirName(dirName(dirName(__FILE__))),"htslib","test","range.bam"), 0); 631 auto read=bam.all_records.front; 632 633 //change queryname 634 hts_log_info(__FUNCTION__, "Testing queryname"); 635 assert(read.queryName=="HS18_09653:4:1315:19857:61712"); 636 637 //we do this only to force realloc 638 read.b.m_data=read.b.l_data; 639 640 641 auto prev_m=read.b.m_data; 642 read.queryName="HS18_09653:4:1315:19857:61712HS18_09653:4:1315:19857:61712"; 643 assert(read.b.m_data >prev_m); 644 assert(read.queryName=="HS18_09653:4:1315:19857:61712HS18_09653:4:1315:19857:61712"); 645 assert(read.cigar.toString() == "78M1D22M"); 646 assert(read["RG"].check!string); 647 assert(read["RG"].to!string=="1"); 648 649 //change sequence 650 hts_log_info(__FUNCTION__, "Testing sequence"); 651 assert(read.sequence=="AGCTAGGGCACTTTTTGTCTGCCCAAATATAGGCAACCAAAAATAATTTCCAAGTTTTTAATGATTTGTTGCATATTGAAAAAACATTTTTTGGGTTTTT"); 652 prev_m=read.b.m_data; 653 read.sequence="AGCTAGGGCACTTTTTGTCTGCCCAAATATAGGCAACCAAAAATAATTTCCAAGTTTTTAATGATTTGTTGCATATTGAAAAAACATTTTTTGGGTTTTTAGCTAGGGCACTTTTTGTCTGCCCAAATATAGGCAACCAAAAATAATTTCCAAGTTTTTAATGATTTGTTGCATATTGAAAAAACATTTTTTGGGTTTTT"; 654 assert(read.b.m_data >prev_m); 655 assert(read.sequence=="AGCTAGGGCACTTTTTGTCTGCCCAAATATAGGCAACCAAAAATAATTTCCAAGTTTTTAATGATTTGTTGCATATTGAAAAAACATTTTTTGGGTTTTTAGCTAGGGCACTTTTTGTCTGCCCAAATATAGGCAACCAAAAATAATTTCCAAGTTTTTAATGATTTGTTGCATATTGAAAAAACATTTTTTGGGTTTTT"); 656 assert(read.cigar.toString() == "78M1D22M"); 657 assert(read["RG"].check!string); 658 assert(read["RG"].to!string=="1"); 659 660 //change cigar 661 hts_log_info(__FUNCTION__, "Testing cigar"); 662 auto c=read.cigar; 663 c.ops=c.ops~c.ops; 664 prev_m=read.b.m_data; 665 read.cigar=c; 666 assert(read.b.m_data >prev_m); 667 assert(read.cigar.toString() == "78M1D22M78M1D22M"); 668 assert(read["RG"].check!string); 669 assert(read["RG"].to!string=="1"); 670 671 //change tag 672 hts_log_info(__FUNCTION__, "Testing tags"); 673 read["X0"]=2; 674 assert(read["X0"].to!ubyte==2); 675 assert(read["RG"].check!string); 676 677 read["RG"]="test"; 678 assert(read["RG"].to!string=="test"); 679 hts_log_info(__FUNCTION__, "Cigar:" ~ read.cigar.toString()); 680 } 681 682 unittest 683 { 684 import std.stdio; 685 import dhtslib.sam; 686 import dhtslib.md : MDItr; 687 import std.algorithm: map; 688 import std.array: array; 689 import std.path:buildPath,dirName; 690 auto bam = SAMFile(buildPath(dirName(dirName(dirName(__FILE__))),"htslib","test","range.bam"), 0); 691 auto read=bam.all_records.front; 692 auto pairs = read.getAlignedPairs!true(read.pos + 77, read.pos + 77 + 5); 693 assert(pairs.map!(x => x.qpos).array == [77,77,78,79,80]); 694 assert(pairs.map!(x => x.rpos).array == [77,78,79,80,81]); 695 assert(pairs.map!(x => x.refBase).array == "GAAAA"); 696 assert(pairs.map!(x => x.queryBase).array == "G-AAA"); 697 } 698 699 alias SAMFile = SAMReader; 700 /** 701 Encapsulates a SAM/BAM file. 702 703 Implements InputRange interface using htslib calls. 704 If indexed, Random-access query via multidimensional slicing. 705 */ 706 struct SAMReader 707 { 708 /// filename; as usable from D 709 string filename; 710 711 /// filename \0-terminated C string; reference needed to avoid GC reaping result of toStringz when ctor goes out of scope 712 private immutable(char)* fn; 713 714 /// htsFile 715 private htsFile* fp; 716 717 /// hFILE if required 718 private hFILE* f; 719 720 /// header struct 721 bam_hdr_t* header = null; 722 723 /// SAM/BAM/CRAM index 724 private hts_idx_t* idx; 725 726 private kstring_t line; 727 728 /// disallow copying 729 @disable this(this); 730 731 /** Create a representation of SAM/BAM/CRAM file from given filename or File 732 733 Params: 734 fn = string filename (complete path passed to htslib; may support S3:// https:// etc.) 735 extra_threads = extra threads for compression/decompression 736 -1 use all available cores (default) 737 0 use no extra threads 738 >1 add indicated number of threads (to a default of 1) 739 */ 740 this(T)(T f, int extra_threads = -1) 741 if (is(T == string) || is(T == File)) 742 { 743 import std.parallelism : totalCPUs; 744 745 // open file 746 static if (is(T == string)) 747 { 748 this.filename = f; 749 this.fn = toStringz(f); 750 this.fp = hts_open(this.fn, cast(immutable(char)*) "r"); 751 } 752 else static if (is(T == File)) 753 { 754 this.filename = f.name(); 755 this.fn = toStringz(f.name); 756 this.f = hdopen(f.fileno, cast(immutable(char)*) "r"); 757 this.fp = hts_hopen(this.f, this.fn, cast(immutable(char)*) "r"); 758 } 759 else assert(0); 760 761 if (extra_threads == -1) 762 { 763 if ( totalCPUs > 1) 764 { 765 hts_log_info(__FUNCTION__, 766 format("%d CPU cores detected; enabling multithreading", totalCPUs)); 767 // hts_set_threads adds N _EXTRA_ threads, so totalCPUs - 1 seemed reasonable, 768 // but overcomitting by 1 thread (i.e., passing totalCPUs) buys an extra 3% on my 2-core 2013 Mac 769 hts_set_threads(this.fp, totalCPUs); 770 } 771 } 772 else if (extra_threads > 0) 773 { 774 if ((extra_threads + 1) > totalCPUs) 775 hts_log_warning(__FUNCTION__, "More threads requested than CPU cores detected"); 776 hts_set_threads(this.fp, extra_threads); 777 } 778 else if (extra_threads == 0) 779 { 780 hts_log_debug(__FUNCTION__, "Zero extra threads requested"); 781 } 782 else 783 { 784 hts_log_warning(__FUNCTION__, "Invalid negative number of extra threads requested"); 785 } 786 787 // read header 788 this.header = sam_hdr_read(this.fp); 789 this.idx = sam_index_load(this.fp, this.fn); 790 if (this.idx == null) 791 { 792 hts_log_info(__FUNCTION__, "SAM index not found"); 793 // TODO: attempt to build 794 // TODO: edit range to return empty immediately if no idx 795 } 796 hts_log_debug(__FUNCTION__, format("SAM index: %s", this.idx)); 797 } 798 799 ~this() 800 { 801 debug (dhtslib_debug) 802 { 803 writeln("SAMFile dtor"); 804 } 805 806 bam_hdr_destroy(this.header); 807 808 if((this.fp !is null) && (this.f is null)) 809 { 810 const auto ret = hts_close(fp); 811 if (ret < 0) 812 writefln("There was an error closing %s", fromStringz(this.fn)); 813 } 814 } 815 816 /// number of reference sequences; from bam_hdr_t 817 @property int n_targets() const 818 { 819 return this.header.n_targets; 820 } 821 822 /// length of specific reference sequence (by number) 823 uint target_len(int target) const 824 { 825 return this.header.target_len[target]; 826 } 827 828 /// lengths of the reference sequences 829 @property uint[] target_lens() const 830 { 831 return this.header.target_len[0 .. this.n_targets].dup; 832 } 833 834 /// names of the reference sequences 835 @property string[] target_names() const 836 { 837 string[] names; 838 names.length = this.n_targets; 839 foreach (i; 0 .. this.n_targets) 840 { 841 names[i] = fromStringz(this.header.target_name[i]).idup; 842 } 843 return names; 844 } 845 846 /// reference contig name to integer id 847 /// Calls int bam_name2id(bam_hdr_t *h, const char *_ref); 848 int target_id(string name) 849 { 850 return bam_name2id(this.header, toStringz(name)); 851 } 852 853 /** Query a region and return matching alignments as an InputRange */ 854 /// Query by chr, start, end 855 auto query(string chrom, int start, int end) 856 { 857 string q = format("%s:%d-%d", chrom, start, end); 858 return query(q); 859 } 860 861 /// Query by string chr:start-end 862 auto query(string q) 863 { 864 auto itr = sam_itr_querys(this.idx, this.header, toStringz(q)); 865 return RecordRange(this.fp, itr); 866 } 867 868 /// Query by contig id, start, end 869 auto query(int tid, int start, int end) 870 { 871 auto itr = sam_itr_queryi(this.idx, tid, start, end); 872 return RecordRange(this.fp, itr); 873 } 874 875 /// Query by ["chr1:1-2","chr1:1000-1001"] 876 auto query(string[] regions) 877 { 878 return RecordRangeMulti(this.fp, this.idx, this.header, &(this), regions); 879 } 880 881 /// bam["chr1:1-2"] 882 auto opIndex(string q) 883 { 884 return query(q); 885 } 886 887 /// bam["chr1", 1..2] 888 auto opIndex(string tid, int[2] pos) 889 { 890 return query(tid, pos[0], pos[1]); 891 } 892 893 /// bam["chr1", 1] 894 auto opIndex(string tid, int pos) 895 { 896 return query(tid, pos, pos + 1); 897 } 898 899 /** Deprecated: use multidimensional slicing with second parameter as range (["chr1", 1 .. 2]) */ 900 /// bam["chr1", 1, 2] 901 deprecated auto opIndex(string tid, int pos1, int pos2) 902 { 903 return query(tid, pos1, pos2); 904 } 905 906 /// Integer-based chr below 907 /// bam[0, 1..2] 908 auto opIndex(int tid, int[2] pos) 909 { 910 return query(tid, pos[0], pos[1]); 911 } 912 913 /// bam[0, 1] 914 auto opIndex(int tid, int pos) 915 { 916 return query(tid, pos, pos + 1); 917 } 918 919 /// bam[0, 1, 2] 920 deprecated auto opIndex(int tid, int pos1, int pos2) 921 { 922 return query(tid, pos1, pos2); 923 } 924 925 /// support bam["chr1", 1..2 ] 926 int[2] opSlice(size_t dim)(int start, int end) if (dim == 1) 927 { 928 return [start, end]; 929 } 930 931 /// Return an InputRange representing all recods in the SAM/BAM/CRAM 932 AllRecordsRange all_records() 933 { 934 auto range = AllRecordsRange(this.fp, this.header, bam_init1()); 935 range.popFront(); 936 return range; 937 } 938 alias allRecords = all_records; 939 940 /// Iterate through all records in the SAM/BAM/CRAM 941 struct AllRecordsRange 942 { 943 private htsFile* fp; // belongs to parent; shared 944 private bam_hdr_t* header; // belongs to parent; shared 945 private bam1_t* b; 946 private int success; 947 ~this() 948 { 949 //debug(dhtslib_debug) hts_log_debug(__FUNCTION__, "dtor"); 950 } 951 952 /// InputRange interface 953 @property bool empty() // @suppress(dscanner.suspicious.incorrect_infinite_range) 954 { 955 // int sam_read1(samFile *fp, bam_hdr_t *h, bam1_t *b); 956 if (success >= 0) 957 return false; 958 else if (success == -1) 959 return true; 960 else 961 { 962 hts_log_error(__FUNCTION__, "*** ERROR sam_read1 < -1"); 963 return true; 964 } 965 } 966 /// ditto 967 void popFront() 968 { 969 success = sam_read1(this.fp, this.header, this.b); 970 // noop? 971 // free this.b ? 972 973 //bam_destroy1(this.b); 974 } 975 /// ditto 976 SAMRecord front() 977 { 978 return new SAMRecord(bam_dup1(this.b)); 979 } 980 981 } 982 983 /// Iterate over records falling within a queried region (TODO: itr_multi_query) 984 struct RecordRange 985 { 986 private htsFile* fp; 987 private hts_itr_t* itr; 988 private bam1_t* b; 989 990 private int r; 991 992 /// 993 this(htsFile* fp, hts_itr_t* itr) 994 { 995 this.itr = itr; 996 this.fp = fp; 997 b = bam_init1(); 998 //debug(dhtslib_debug) { writeln("sam_itr null? ",(cast(int)itr)==0); } 999 hts_log_debug(__FUNCTION__, format("SAM itr null?: %s", cast(int) itr == 0)); 1000 popFront(); 1001 } 1002 1003 /// InputRange interface 1004 @property bool empty() 1005 { 1006 return (r <= 0 && itr.finished) ? true : false; 1007 } 1008 /// ditto 1009 void popFront() 1010 { 1011 r = sam_itr_next(this.fp, this.itr, this.b); 1012 } 1013 /// ditto 1014 SAMRecord front() 1015 { 1016 return new SAMRecord(bam_dup1(b)); 1017 } 1018 } 1019 1020 /// Iterate over records falling within queried regions using a RegionList 1021 struct RecordRangeMulti 1022 { 1023 private htsFile* fp; 1024 private hts_itr_multi_t* itr; 1025 private bam1_t* b; 1026 private hts_reglist_t[] rlist; 1027 private int r; 1028 1029 /// 1030 this(htsFile* fp, hts_idx_t* idx, bam_hdr_t* header, SAMFile* sam, string[] regions) 1031 { 1032 rlist = RegionList(sam, regions).getRegList(); 1033 this.fp = fp; 1034 b = bam_init1(); 1035 itr = sam_itr_regions(idx, header, rlist.ptr, cast(uint) rlist.length); 1036 //debug(dhtslib_debug) { writeln("sam_itr null? ",(cast(int)itr)==0); } 1037 hts_log_debug(__FUNCTION__, format("SAM itr null?: %s", cast(int) itr == 0)); 1038 popFront(); 1039 } 1040 1041 /// InputRange interface 1042 @property bool empty() 1043 { 1044 return (r <= 0 && itr.finished) ? true : false; 1045 } 1046 /// ditto 1047 void popFront() 1048 { 1049 r = sam_itr_multi_next(this.fp, this.itr, this.b); 1050 } 1051 /// ditto 1052 SAMRecord front() 1053 { 1054 return new SAMRecord(bam_dup1(b)); 1055 } 1056 } 1057 1058 /// List of regions based on sam/bam 1059 struct RegionList 1060 { 1061 import std.algorithm.iteration : splitter; 1062 import std.algorithm.sorting : sort; 1063 import std.range : drop, array; 1064 import std.conv : to; 1065 1066 private hts_reglist_t[string] rlist; 1067 private SAMFile* sam; 1068 1069 /// 1070 this(SAMFile* sam, string[] queries) 1071 { 1072 this.sam = sam; 1073 foreach (q; queries) 1074 { 1075 addRegion(q); 1076 } 1077 } 1078 1079 /// Add a region in standard format (chr1:2000-3000) to the RegionList 1080 void addRegion(string reg) 1081 { 1082 //chr:1-2 1083 //split into chr and 1-2 1084 auto split = reg.splitter(":"); 1085 auto chr = split.front; 1086 //split 1-2 into 1 and 2 1087 split = split.drop(1).front.splitter("-"); 1088 if (sam.target_id(chr) < 0) 1089 { 1090 hts_log_error(__FUNCTION__, "tid not present in sam/bam"); 1091 } 1092 addRegion(sam.target_id(chr), split.front.to!int, split.drop(1).front.to!int); 1093 } 1094 1095 /// Add a region by {target/contig/chr id, start coord, end coord} to the RegionList 1096 void addRegion(int tid, int beg, int end) 1097 { 1098 if (tid > this.sam.n_targets || tid < 0) 1099 hts_log_error(__FUNCTION__, "tid not present in sam/bam"); 1100 1101 auto val = (this.sam.target_names[tid] in this.rlist); 1102 hts_pair32_t p; 1103 1104 if (beg < 0) 1105 hts_log_error(__FUNCTION__, "first coordinate < 0"); 1106 1107 if (beg >= this.sam.target_len(tid)) 1108 hts_log_error(__FUNCTION__, "first coordinate larger than tid length"); 1109 1110 if (end < 0) 1111 hts_log_error(__FUNCTION__, "second coordinate < 0"); 1112 1113 if (end >= this.sam.target_len(tid)) 1114 hts_log_error(__FUNCTION__, "second coordinate larger than tid length"); 1115 1116 p.beg = beg; 1117 p.end = end; 1118 hts_pair32_t[] plist; 1119 if (val is null) 1120 { 1121 hts_reglist_t r; 1122 1123 //set tid 1124 r.tid = tid; 1125 1126 //create intervals 1127 plist = plist ~ p; 1128 r.intervals = plist.ptr; 1129 r.count = cast(uint) plist.length; 1130 r.min_beg = p.beg; 1131 r.max_end = p.end; 1132 this.rlist[this.sam.target_names[tid]] = r; 1133 } 1134 else 1135 { 1136 plist = (val.intervals[0 .. val.count] ~ p).sort!(cmpInterval).array; 1137 val.intervals = plist.ptr; 1138 val.count = cast(uint) plist.length; 1139 val.min_beg = plist[0].beg; 1140 val.max_end = plist[$ - 1].end; 1141 } 1142 } 1143 1144 hts_reglist_t[] getRegList() 1145 { 1146 return rlist.byValue.array.sort!(cmpRegList).array; 1147 } 1148 } 1149 } 1150 1151 /// Nucleotide complement table; from samtools/sam_view.c 1152 private const(char)[16] seq_comp_table = [0, 8, 4, 12, 2, 10, 6, 14, 1, 9, 5, 13, 3, 11, 7, 15]; 1153 1154 /// Reverse a string in place; from samtools/sam_view.c 1155 // TODO: Could be sped up, and potentially made safer? by passing strlen since already known 1156 private char* reverse(char* str) 1157 { 1158 import core.stdc.string : strlen; 1159 1160 auto i = strlen(str) - 1, j = 0; 1161 char ch; 1162 while (i > j) 1163 { 1164 ch = str[i]; 1165 str[i] = str[j]; 1166 str[j] = ch; 1167 i--; 1168 j++; 1169 } 1170 return str; 1171 } 1172 1173 /// SAM/BAM/CRAM disk format 1174 enum SAMWriterTypes{ 1175 BAM, 1176 UBAM, 1177 SAM, 1178 CRAM, 1179 DEDUCE 1180 } 1181 1182 /// Encapsulates a SAM/BAM/CRAM, but as write-only 1183 struct SAMWriter 1184 { 1185 /// filename; as usable from D 1186 string filename; 1187 1188 /// filename \0-terminated C string; reference needed to avoid GC reaping result of toStringz when ctor goes out of scope 1189 private immutable(char)* fn; 1190 1191 /// htsFile 1192 private htsFile* fp; 1193 1194 /// hFILE if required 1195 private hFILE* f; 1196 1197 /// header struct 1198 bam_hdr_t* header = null; 1199 1200 private kstring_t line; 1201 1202 /// disallow copying 1203 @disable this(this); 1204 1205 /** Create a representation of SAM/BAM/CRAM file from given filename or File 1206 1207 Params: 1208 fn = string filename (complete path passed to htslib; may support S3:// https:// etc.) 1209 extra_threads = extra threads for compression/decompression 1210 -1 use all available cores (default) 1211 0 use no extra threads 1212 >1 add indicated number of threads (to a default of 1) 1213 */ 1214 this(T)(T f,bam_hdr_t * header, SAMWriterTypes t=SAMWriterTypes.DEDUCE,int extra_threads = -1) 1215 if (is(T == string) || is(T == File)) 1216 { 1217 import std.parallelism : totalCPUs; 1218 char[] mode; 1219 if(t == SAMWriterTypes.BAM) mode=['w','b','\0']; 1220 else if(t == SAMWriterTypes.UBAM) mode=['w','b','u','\0']; 1221 else if(t == SAMWriterTypes.SAM) mode=['w','\0']; 1222 else if(t == SAMWriterTypes.CRAM) mode=['w','c','\0']; 1223 // open file 1224 static if (is(T == string)) 1225 { 1226 if(t == SAMWriterTypes.DEDUCE){ 1227 import std.path:extension; 1228 auto ext=extension(f); 1229 if(ext==".bam") mode=['w','b','\0']; 1230 else if(ext==".sam") mode=['w','\0']; 1231 else if(ext==".cram") mode=['w','c','\0']; 1232 else { 1233 hts_log_error(__FUNCTION__,"extension "~ext~" not valid"); 1234 throw new Exception("DEDUCE SAMWriterType used with non-valid extension"); 1235 } 1236 } 1237 this.filename = f; 1238 this.fn = toStringz(f); 1239 this.fp = hts_open(this.fn, mode.ptr); 1240 } 1241 else static if (is(T == File)) 1242 { 1243 assert(t!=SAMWriterTypes.DEDUCE); 1244 this.filename = f.name(); 1245 this.fn = toStringz(f.name); 1246 this.f = hdopen(f.fileno, cast(immutable(char)*) "w"); 1247 this.fp = hts_hopen(this.f, this.fn, mode.ptr); 1248 } 1249 else assert(0); 1250 1251 if (extra_threads == -1) 1252 { 1253 if ( totalCPUs > 1) 1254 { 1255 hts_log_info(__FUNCTION__, 1256 format("%d CPU cores detected; enabling multithreading", totalCPUs)); 1257 // hts_set_threads adds N _EXTRA_ threads, so totalCPUs - 1 seemed reasonable, 1258 // but overcomitting by 1 thread (i.e., passing totalCPUs) buys an extra 3% on my 2-core 2013 Mac 1259 hts_set_threads(this.fp, totalCPUs); 1260 } 1261 } 1262 else if (extra_threads > 0) 1263 { 1264 if ((extra_threads + 1) > totalCPUs) 1265 hts_log_warning(__FUNCTION__, "More threads requested than CPU cores detected"); 1266 hts_set_threads(this.fp, extra_threads); 1267 } 1268 else if (extra_threads == 0) 1269 { 1270 hts_log_debug(__FUNCTION__, "Zero extra threads requested"); 1271 } 1272 else 1273 { 1274 hts_log_warning(__FUNCTION__, "Invalid negative number of extra threads requested"); 1275 } 1276 1277 // read header 1278 this.header = bam_hdr_dup(header); 1279 sam_hdr_write(this.fp,this.header); 1280 } 1281 1282 ~this() 1283 { 1284 debug (dhtslib_debug) 1285 { 1286 writeln("SAMWriter dtor"); 1287 } 1288 1289 bam_hdr_destroy(this.header); 1290 1291 } 1292 1293 /// close file 1294 void close(){ 1295 const auto ret = hts_close(this.fp); 1296 if (ret < 0) 1297 stderr.writefln("There was an error closing %s", fromStringz(this.fn)); 1298 } 1299 1300 /// write out to disk 1301 void write(SAMRecord * rec){ 1302 const auto ret = sam_write1(this.fp,this.header,rec.b); 1303 assert(ret>=0); 1304 } 1305 } 1306 debug(dhtslib_unittest) 1307 unittest{ 1308 writeln(); 1309 import dhtslib.sam; 1310 import dhtslib.htslib.hts_log : hts_log_info; 1311 import std.path:buildPath,dirName; 1312 import std.string:fromStringz; 1313 hts_set_log_level(htsLogLevel.HTS_LOG_TRACE); 1314 hts_log_info(__FUNCTION__, "Testing SAMWriter"); 1315 hts_log_info(__FUNCTION__, "Loading test file"); 1316 auto sam = SAMFile(buildPath(dirName(dirName(dirName(__FILE__))),"htslib","test","auxf#values.sam"), 0); 1317 auto readrange = sam.allRecords; 1318 auto sam2 = SAMWriter("test.bam",sam.header); 1319 hts_log_info(__FUNCTION__, "Getting read 1"); 1320 auto read = readrange.front(); 1321 sam2.write(&read); 1322 sam2.close; 1323 destroy(sam2); 1324 sam = SAMFile("test.bam"); 1325 readrange = sam.allRecords; 1326 read = readrange.front(); 1327 writeln(read.sequence); 1328 assert(read.sequence=="GCTAGCTCAG"); 1329 // destroy(sam2); 1330 } 1331 1332 /// Used in sorting 1333 bool cmpInterval(hts_pair32_t a, hts_pair32_t b) 1334 { 1335 if (a.beg < b.beg) 1336 { 1337 return true; 1338 } 1339 if (a.end < b.end) 1340 { 1341 return true; 1342 } 1343 return false; 1344 } 1345 1346 /// Used in sorting 1347 bool cmpRegList(hts_reglist_t a, hts_reglist_t b) 1348 { 1349 if (a.tid < b.tid) 1350 { 1351 return true; 1352 } 1353 return false; 1354 } 1355 1356 /// Parse text line of SAM; Used in unittest 1357 private int parseSam(string line, bam_hdr_t* header, bam1_t* b) 1358 { 1359 import dhtslib.htslib.kstring : kstring_t; 1360 import std.utf : toUTFz; 1361 1362 kstring_t k; 1363 k.s = toUTFz!(char*)(line.dup); 1364 k.m = line.length + 1; 1365 k.l = line.length + 1; 1366 return sam_parse1(&k, header, b); 1367 } 1368 debug(dhtslib_unittest) 1369 unittest{ 1370 writeln(); 1371 import dhtslib.sam; 1372 import dhtslib.htslib.hts_log : hts_log_info; 1373 import std.path:buildPath,dirName; 1374 import std.string:fromStringz; 1375 hts_set_log_level(htsLogLevel.HTS_LOG_TRACE); 1376 hts_log_info(__FUNCTION__, "Testing SAMFile & SAMRecord"); 1377 hts_log_info(__FUNCTION__, "Loading test file"); 1378 auto sam = SAMFile(buildPath(dirName(dirName(dirName(__FILE__))),"htslib","test","auxf#values.sam"), 0); 1379 auto readrange = sam.allRecords; 1380 hts_log_info(__FUNCTION__, "Getting read 1"); 1381 auto read = readrange.front(); 1382 writeln(read.sequence); 1383 assert(read.sequence=="GCTAGCTCAG"); 1384 } 1385 1386 debug(dhtslib_unittest) 1387 unittest 1388 { 1389 import std.stdio : writeln; 1390 import std.range : drop; 1391 import std.utf : toUTFz; 1392 import dhtslib.htslib.hts_log; // @suppress(dscanner.suspicious.local_imports) 1393 import std.path:buildPath,dirName; 1394 import std.conv:to; 1395 hts_set_log_level(htsLogLevel.HTS_LOG_TRACE); 1396 hts_log_info(__FUNCTION__, "Loading sam file"); 1397 auto range = File(buildPath(dirName(dirName(dirName(__FILE__))),"htslib","test","realn01_exp-a.sam")).byLineCopy(); 1398 auto b = bam_init1(); 1399 auto hdr = bam_hdr_init(); 1400 string hdr_str; 1401 for (auto i = 0; i < 4; i++) 1402 { 1403 hdr_str ~= range.front ~ "\n"; 1404 range.popFront; 1405 } 1406 hts_log_info(__FUNCTION__, "Header"); 1407 writeln(hdr_str); 1408 hdr = sam_hdr_parse(cast(int) hdr_str.length, toUTFz!(char*)(hdr_str)); 1409 hts_log_info(__FUNCTION__, "Read status:" ~ parseSam(range.front, hdr, b).to!string); 1410 auto r = new SAMRecord(b); 1411 hts_log_info(__FUNCTION__, "Cigar" ~ r.cigar.toString); 1412 assert(r.cigar.toString == "6M1D117M5D28M"); 1413 }