1 /** 2 3 SAMRecord and SAMFile are wrappers for htslib functions relating to SAM/BAM/CRAM* files 4 5 SAMRecord is a structured representation of a SAM/BAM/CRAM* record, 6 backed internally by htslib's bam1_t, but with convenient getters and setters 7 for record attributes, including functions not included in vanilla htslib 8 like returning the sequence or the qscore string (NB: actually char*) 9 10 SAMFile is a structured representation of SAM/BAM/CRAM* file, 11 backed internally by htslib's htsFile and bam_hdr_t, 12 but with convenient getters and setters for file and header attributes, 13 as well as query functions accessible explicitly (`query("chr1:999-9999"`) 14 and by indexing (`samfile["chr1", 999 .. 9999]`). 15 The file object can be iterated as an InputRange to obtain every record in the file. 16 17 Authors: James S Blachly, MD <james.blachly@gmail.com> ; Thomas Gregory <charles.gregory@osumc.edu> 18 19 Bugs: SAMRecord and SAMFile function only as readers, rather than writers (i.e, cannot build SAMFile) 20 Bugs: (*CRAM functionality is limited and untested) 21 22 Date: 2020-09-12 23 24 License: Apache 2.0 25 26 Standards: Sequence Alignment/Map Format Specification v1 14 Dec 2018 http://samtools.github.io/hts-specs/ 27 28 */ 29 module dhtslib.sam; 30 31 public import dhtslib.sam.header; 32 33 import core.stdc.stdlib : calloc, free,realloc; 34 import core.stdc..string:memset; 35 import std.format; 36 import std.parallelism : totalCPUs; 37 import std.stdio : writeln, writefln, stderr, File; 38 import std..string : fromStringz, toStringz; 39 import std.traits : ReturnType; 40 import std.algorithm : filter; 41 42 import htslib.hts : htsFile, hts_open, hts_close, hts_hopen; 43 import htslib.hts : hts_idx_t, hts_itr_t, hts_itr_multi_t, hts_reglist_t, hts_pair32_t; 44 import htslib.hts : seq_nt16_str,seq_nt16_table; 45 import htslib.hts : hts_set_threads; 46 import htslib.hfile : hdopen, hclose, hFILE; 47 48 import htslib.hts_log; 49 import htslib.kstring; 50 import htslib.sam; 51 import dhtslib.cigar; 52 import dhtslib.tagvalue; 53 54 /** 55 Encapsulates a SAM/BAM/CRAM record, 56 using the bam1_t type for memory efficiency, 57 and the htslib helper functions for speed. 58 **/ 59 class SAMRecord 60 { 61 /// 62 bam1_t* b; 63 64 /// Construct blank SAMRecord with empty backing bam1_t 65 this() 66 { 67 //debug(dhtslib_debug) hts_log_debug(__FUNCTION__, "ctor()"); /// This line triggers memory error when __FUNCTION__, but not when "Other string" 68 //test_log(__FUNCTION__, "ctor()"); /// This line will also trigger the memory error when __FUNCTION__, but not other strings 69 //writeln(__FUNCTION__); // this will not trigger the memory error 70 this.b = bam_init1(); 71 //assert(0); // This will elide(?) the memory error 72 //assert(1 == 2); // This will elide(?) the memory error 73 } 74 75 /// Construct SAMRecord from supplied bam1_t 76 this(bam1_t* b) 77 { 78 //debug(dhtslib_debug) hts_log_debug(__FUNCTION__, "ctor(bam1_t *b)"); 79 this.b = b; 80 } 81 82 ~this() 83 { 84 //debug(dhtslib_debug) hts_log_debug(__FUNCTION__, "dtor"); 85 bam_destroy1(this.b); // we created our own in default ctor, or received copy via bam_dup1 86 } 87 88 89 /* bam1_core_t fields */ 90 91 /// chromosome ID, defined by bam_hdr_t 92 pragma(inline, true) 93 @nogc @safe nothrow 94 @property int tid() { return this.b.core.tid; } 95 /// ditto 96 pragma(inline, true) 97 @nogc @safe nothrow 98 @property void tid(int tid) { this.b.core.tid = tid; } 99 100 /// 0-based leftmost coordinate 101 pragma(inline, true) 102 @nogc @safe nothrow 103 @property long pos() { return this.b.core.pos; } 104 /// ditto 105 pragma(inline, true) 106 @nogc @safe nothrow 107 @property void pos(long pos) { this.b.core.pos = pos; } 108 109 // TODO: @field bin bin calculated by bam_reg2bin() 110 111 /// mapping quality 112 pragma(inline, true) 113 @nogc @safe nothrow 114 @property ubyte qual() { return this.b.core.qual; } 115 /// ditto 116 pragma(inline, true) 117 @nogc @safe nothrow 118 @property void qual(ubyte q) { this.b.core.qual = q; } 119 120 /// length of query name. Terminated by 1-4 \0, see l_extranul. Never set directly. 121 pragma(inline, true) 122 @nogc @safe nothrow 123 @property int l_qname() { return this.b.core.l_qname; } 124 125 /// number of EXTRA trailing \0 in qname; 0 <= l_extranul <= 3, see l_qname. Never set directly. 126 pragma(inline, true) 127 @nogc @safe nothrow 128 @property int l_extranul() { return this.b.core.l_extranul; } 129 130 /// bitwise flag 131 pragma(inline, true) 132 @nogc @safe nothrow 133 @property ushort flag() { return this.b.core.flag; } 134 /// ditto 135 pragma(inline, true) 136 @nogc @safe nothrow 137 @property void flag(ushort fl) { this.b.core.flag = fl; } 138 139 /// is read paired? 140 pragma(inline, true) 141 @property bool isPaired() 142 { 143 return (b.core.flag & BAM_FPAIRED); 144 } 145 146 /// is read reversed? 147 /// bool bam_is_rev(bam1_t *b) { return ( ((*b).core.flag & BAM_FREVERSE) != 0 ); } 148 @property bool isReversed() 149 { 150 version(LDC) pragma(inline, true); 151 return bam_is_rev(this.b); 152 } 153 154 /// is read mapped? 155 @property bool isMapped() 156 { 157 version(LDC){ 158 pragma(inline, true); 159 } 160 return (b.core.flag & BAM_FUNMAP) == 0; 161 } 162 163 /// is read mate mapped? 164 @property bool isMateMapped() 165 { 166 version(LDC){ 167 pragma(inline, true); 168 } 169 return (b.core.flag & BAM_FMUNMAP) == 0; 170 } 171 172 /// is mate reversed? 173 /// bool bam_is_mrev(bam1_t *b) { return( ((*b).core.flag & BAM_FMREVERSE) != 0); } 174 pragma(inline, true) 175 @property bool mateReversed() 176 { 177 return bam_is_mrev(this.b); 178 } 179 180 /// is read secondary? 181 @property bool isSecondary() 182 { 183 version(LDC){ 184 pragma(inline, true); 185 } 186 return cast(bool)(b.core.flag & BAM_FSECONDARY); 187 } 188 189 /// is read supplementary? 190 @property bool isSupplementary() 191 { 192 version(LDC){ 193 pragma(inline, true); 194 } 195 return cast(bool)(b.core.flag & BAM_FSUPPLEMENTARY); 196 } 197 198 /// Return read strand + or - (as char) 199 @property char strand(){ 200 return ['+','-'][isReversed()]; 201 } 202 203 /// auto bam_get_qname(bam1_t *b) { return (cast(char*)(*b).data); } 204 pragma(inline, true) 205 @property char[] queryName() 206 { 207 return fromStringz(bam_get_qname(this.b)); 208 } 209 210 /// Set query name 211 pragma(inline, true) 212 @property void queryName(string qname) 213 { 214 assert(qname.length<252); 215 216 //make cstring 217 auto qname_n=qname.dup~'\0'; 218 219 //append extra nulls to 32bit align cigar 220 auto extranul=0; 221 for (; qname_n.length % 4 != 0; extranul++) qname_n~='\0'; 222 assert(extranul<=3); 223 224 //get length of rest of data 225 auto l_rest=b.l_data-b.core.l_qname; 226 227 //copy rest of data 228 ubyte[] rest=b.data[b.core.l_qname..b.l_data].dup; 229 230 //if not enough space 231 if(qname_n.length+l_rest>b.m_data){ 232 auto ptr=cast(ubyte*)realloc(b.data,qname_n.length+l_rest); 233 assert(ptr!=null); 234 b.data=ptr; 235 b.m_data=cast(uint)qname_n.length+l_rest; 236 } 237 238 //reassign q_name and rest of data 239 b.data[0..qname_n.length]=(cast(ubyte[])qname_n); 240 b.data[qname_n.length..qname_n.length+l_rest]=rest; 241 242 //reset data length, qname length, extra nul length 243 b.l_data=cast(uint)qname_n.length+l_rest; 244 b.core.l_qname=cast(ubyte)(qname_n.length); 245 b.core.l_extranul=cast(ubyte)extranul; 246 } 247 248 /// query (and quality string) length 249 pragma(inline, true) 250 @property int length() 251 { 252 return this.b.core.l_qseq; 253 } 254 255 /// Return char array of the sequence 256 /// see samtools/sam_view.c: get_read 257 @property const(char)[] sequence() 258 { 259 // calloc fills with \0; +1 len for Cstring 260 // char* s = cast(char*) calloc(1, this.b.core.l_qseq + 1); 261 char[] s; 262 s.length = this.b.core.l_qseq; 263 264 // auto bam_get_seq(bam1_t *b) { return ((*b).data + ((*b).core.n_cigar<<2) + (*b).core.l_qname); } 265 auto seqdata = bam_get_seq(this.b); 266 267 for (int i; i < this.b.core.l_qseq; i++) 268 { 269 s[i] = seq_nt16_str[bam_seqi(seqdata, i)]; 270 } 271 return s; 272 } 273 274 /// Assigns sequence and resets quality score 275 pragma(inline, true) 276 @property void sequence(const(char)[] seq) 277 { 278 //nibble encode sequence 279 ubyte[] en_seq; 280 en_seq.length=(seq.length+1)>>1; 281 for(auto i=0;i<seq.length;i++){ 282 en_seq[i>>1]|=seq_nt16_table[seq[i]]<< ((~i&1)<<2); 283 } 284 285 //get length of data before seq 286 uint l_prev=b.core.l_qname + cast(uint)(b.core.n_cigar*uint.sizeof); 287 288 //get starting point after seq 289 uint start_rest=l_prev+((b.core.l_qseq+1)>>1)+b.core.l_qseq; 290 291 //copy previous data 292 ubyte[] prev=b.data[0..l_prev].dup; 293 294 //copy rest of data 295 ubyte[] rest=b.data[start_rest..b.l_data].dup; 296 297 //if not enough space 298 if(en_seq.length+seq.length+l_prev+(b.l_data-start_rest)>b.m_data){ 299 auto ptr=cast(ubyte*)realloc(b.data,en_seq.length+seq.length+l_prev+(b.l_data-start_rest)); 300 assert(ptr!=null); 301 b.data=ptr; 302 b.m_data=cast(uint)en_seq.length+cast(uint)seq.length+l_prev+(b.l_data-start_rest); 303 } 304 305 //reassign q_name and rest of data 306 b.data[0..l_prev]=prev; 307 b.data[l_prev..en_seq.length+l_prev]=en_seq; 308 //set qscores to 255 309 memset(b.data+en_seq.length+l_prev,255,seq.length); 310 b.data[l_prev+en_seq.length+seq.length..l_prev+en_seq.length+seq.length+(b.l_data-start_rest)]=rest; 311 312 //reset data length, seq length 313 b.l_data=cast(uint)en_seq.length+cast(uint)seq.length+l_prev+(b.l_data-start_rest); 314 b.core.l_qseq=cast(int)(seq.length); 315 } 316 317 /// Return char array of the qscores 318 /// see samtools/sam_view.c: get_quality 319 const(char)[] qscores(bool phredScale=true)() 320 { 321 // calloc fills with \0; +1 len for Cstring 322 // char* q = cast(char*) calloc(1, this.b.core.l_qseq + 1); 323 char[] q; 324 q.length = this.b.core.l_qseq; 325 326 // auto bam_get_qual(bam1_t *b) { return (*b).data + ((*b).core.n_cigar<<2) + (*b).core.l_qname + (((*b).core.l_qseq + 1)>>1); } 327 char* qualdata = cast(char*) bam_get_qual(this.b); 328 329 for (int i; i < this.b.core.l_qseq; i++) 330 static if(phredScale) q[i] = cast(char)(qualdata[i] + 33); 331 else q[i] = cast(char)(qualdata[i]); 332 return q; 333 } 334 335 /// Add qscore sequence given that it is the same length as the bam sequence 336 pragma(inline, true) 337 void q_scores(bool phredScale=true)(const(char)[] seq) 338 { 339 if(seq.length!=b.core.l_qseq){ 340 hts_log_error(__FUNCTION__,"qscore length does not match sequence length"); 341 return; 342 } 343 auto s= seq.dup; 344 for(auto i=0;i<seq.length;i++){ 345 static if(phredScale) s[i]=seq[i]-33; 346 } 347 348 //get length of data before seq 349 uint l_prev=b.core.l_qname + cast(uint)(b.core.n_cigar*uint.sizeof)+((b.core.l_qseq+1)>>1); 350 b.data[l_prev..s.length+l_prev]=cast(ubyte[])s; 351 } 352 353 /// Add qscore sequence given that it is the same length as the bam sequence 354 pragma(inline, true) 355 void qscores(ubyte[] seq) 356 { 357 if(seq.length!=b.core.l_qseq){ 358 hts_log_error(__FUNCTION__,"qscore length does not match sequence length"); 359 return; 360 } 361 362 //get length of data before seq 363 uint l_prev=b.core.l_qname + cast(uint)(b.core.n_cigar*uint.sizeof)+((b.core.l_qseq+1)>>1); 364 b.data[l_prev..seq.length+l_prev]=seq; 365 } 366 367 /// Create cigar from bam1_t record 368 @property Cigar cigar() 369 { 370 return Cigar(bam_get_cigar(b), (*b).core.n_cigar); 371 } 372 373 /// Assign a cigar string 374 pragma(inline, true) 375 @property void cigar(Cigar cigar) 376 { 377 378 //get length of data before seq 379 uint l_prev=b.core.l_qname; 380 381 //get starting point after seq 382 uint start_rest=l_prev+cast(uint)(b.core.n_cigar*uint.sizeof); 383 384 //copy previous data 385 ubyte[] prev=b.data[0..l_prev].dup; 386 387 //copy rest of data 388 ubyte[] rest=b.data[start_rest..b.l_data].dup; 389 390 //if not enough space 391 if(uint.sizeof*cigar.ops.length+l_prev+(b.l_data-start_rest)>b.m_data){ 392 auto ptr=cast(ubyte*)realloc(b.data,uint.sizeof*cigar.ops.length+l_prev+(b.l_data-start_rest)); 393 assert(ptr!=null); 394 b.data=ptr; 395 b.m_data=cast(uint)(uint.sizeof*cigar.ops.length)+l_prev+(b.l_data-start_rest); 396 } 397 398 //reassign q_name and rest of data 399 b.data[0..l_prev]=prev; 400 //without cigar.ops.dup we get the overlapping array copy error 401 b.data[l_prev..uint.sizeof*cigar.ops.length+l_prev]=cast(ubyte[])(cast(uint[])cigar.ops.dup); 402 b.data[l_prev+uint.sizeof*cigar.ops.length..l_prev+uint.sizeof*cigar.ops.length+(b.l_data-start_rest)]=rest; 403 404 //reset data length, seq length 405 b.l_data=cast(uint)(uint.sizeof*cigar.ops.length)+l_prev+(b.l_data-start_rest); 406 b.core.n_cigar=cast(int)(cigar.ops.length); 407 } 408 409 /// Get aux tag from bam1_t record and return a TagValue 410 TagValue opIndex(string val) 411 { 412 return TagValue(b, val[0..2]); 413 } 414 415 /// Assign a tag key value pair 416 void opIndexAssign(T)(T value,string index) 417 { 418 import std.traits:isIntegral,isSomeString; 419 static if(isIntegral!T){ 420 bam_aux_update_int(b,index[0..2],value); 421 }else static if(is(T==float)){ 422 bam_aux_update_float(b,index[0..2],value); 423 }else static if(isSomeString!T){ 424 bam_aux_update_str(b,index[0..2],cast(int)value.length+1,toStringz(value)); 425 } 426 } 427 428 /// Assign a tag key array pair 429 void opIndexAssign(T:T[])(T[] value,string index) 430 { 431 bam_aux_update_array(b,index[0..2],TypeChars[TypeIndex!T],value.length,value.ptr); 432 } 433 434 /// chromosome ID of next read in template, defined by bam_hdr_t 435 pragma(inline, true) 436 @nogc @safe nothrow 437 @property int mateTID() { return this.b.core.mtid; } 438 /// ditto 439 pragma(inline, true) 440 @nogc @safe nothrow 441 @property void mateTID(int mtid) { this.b.core.mtid = mtid; } 442 443 /// 0-based leftmost coordinate of next read in template 444 pragma(inline, true) 445 @nogc @safe nothrow 446 @property long matePos() { return this.b.core.mpos; } 447 /// ditto 448 pragma(inline, true) 449 @nogc @safe nothrow 450 @property void matePos(long mpos) { this.b.core.mpos = mpos; } 451 452 /// Presumably Insert size, but is undocumented. 453 /// Per samtools source, is measured 5' to 5' 454 /// https://github.com/samtools/samtools/blob/bd1a409aa750d25d70a093405d174db8709a3f2c/bam_mate.c#L320 455 pragma(inline, true) 456 @nogc @safe nothrow 457 @property long insertSize() { return this.b.core.isize; } 458 /// ditto 459 pragma(inline, true) 460 @nogc @safe nothrow 461 @property void insertSize(long isize) { this.b.core.isize = isize; } 462 /+ BEGIN CHERRY-PICK: TODO: confirm below chunk with htslib-1.10 (mainly long coordinates +/ 463 /// get aligned coordinates per each cigar op 464 auto getAlignedCoordinates(){ 465 return AlignedCoordinatesItr(this.cigar); 466 } 467 468 /// get aligned coordinates per each cigar op within range 469 /// range is 0-based half open using chromosomal coordinates 470 auto getAlignedCoordinates(long start, long end){ 471 assert(start >= this.pos); 472 assert(end <= this.pos + this.cigar.ref_bases_covered); 473 return this.getAlignedCoordinates.filter!(x=> this.pos + x.rpos >= start).filter!(x=> this.pos + x.rpos < end); 474 } 475 476 struct AlignedPair(bool refSeq) 477 { 478 long qpos, rpos; 479 Ops cigar_op; 480 char queryBase; 481 static if(refSeq) char refBase; 482 483 string toString(){ 484 import std.conv : to; 485 static if(refSeq) return rpos.to!string ~ ":" ~ refBase ~ " > " ~ qpos.to!string ~ ":" ~ queryBase; 486 else return rpos.to!string ~ ":" ~ qpos.to!string ~ ":" ~ queryBase; 487 } 488 } 489 490 /// get a range of aligned read and reference positions 491 /// this is meant to recreate functionality from pysam: 492 /// https://pysam.readthedocs.io/en/latest/api.html#pysam.AlignedSegment.get_aligned_pairs 493 /// range is 0-based half open using chromosomal coordinates 494 auto getAlignedPairs(bool withRefSeq)(long start, long end) 495 { 496 import dhtslib.md : MDItr; 497 import std.range : dropExactly; 498 struct AlignedPairs(bool refSeq) 499 { 500 ReturnType!getAlignedCoordinates coords; 501 static if(refSeq) MDItr mdItr; 502 AlignedPair!refSeq current; 503 const(char)[] qseq; 504 505 auto getAlignedCoordinates(SAMRecord rec, long start, long end){ 506 return rec.getAlignedCoordinates(start,end); 507 } 508 509 this(SAMRecord rec, long start, long end){ 510 assert(start < end); 511 assert(start >= rec.pos); 512 assert(end <= rec.pos + rec.cigar.ref_bases_covered); 513 514 coords = getAlignedCoordinates(rec, start, end); 515 qseq = rec.sequence; 516 static if(refSeq){ 517 assert(rec["MD"].exists,"MD tag must exist for record"); 518 mdItr = MDItr(rec); 519 mdItr = mdItr.dropExactly(start - rec.pos); 520 } 521 current.qpos = coords.front.qpos; 522 current.rpos = coords.front.rpos; 523 current.cigar_op = coords.front.cigar_op; 524 if(!CigarOp(0, current.cigar_op).is_query_consuming) current.queryBase = '-'; 525 else current.queryBase = qseq[current.qpos]; 526 527 static if(refSeq){ 528 if(!CigarOp(0, current.cigar_op).is_reference_consuming) current.refBase = '-'; 529 else if(mdItr.front == '=') current.refBase = current.queryBase; 530 else current.refBase = mdItr.front; 531 } 532 } 533 534 AlignedPair!refSeq front(){ 535 return current; 536 } 537 538 void popFront(){ 539 coords.popFront; 540 if(coords.empty) return; 541 current.qpos = coords.front.qpos; 542 current.rpos = coords.front.rpos; 543 current.cigar_op = coords.front.cigar_op; 544 if(!CigarOp(0, current.cigar_op).is_query_consuming) current.queryBase = '-'; 545 else current.queryBase = qseq[current.qpos]; 546 547 static if(refSeq){ 548 if(CigarOp(0, current.cigar_op).is_reference_consuming) mdItr.popFront; 549 if(!CigarOp(0, current.cigar_op).is_reference_consuming) current.refBase = '-'; 550 else if(mdItr.front == '=') current.refBase = current.queryBase; 551 else current.refBase = mdItr.front; 552 } 553 554 } 555 556 bool empty(){ 557 return coords.empty; 558 } 559 560 } 561 return AlignedPairs!withRefSeq(this,start,end); 562 } 563 /// get a range of aligned read and reference positions 564 /// this is meant to recreate functionality from pysam: 565 /// https://pysam.readthedocs.io/en/latest/api.html#pysam.AlignedSegment.get_aligned_pairs 566 auto getAlignedPairs(bool withRefSeq)(){ 567 return getAlignedPairs!withRefSeq(this.pos, this.pos + this.cigar.ref_bases_covered); 568 } 569 /+ END CHERRY-PICK +/ 570 } 571 572 debug(dhtslib_unittest) 573 unittest{ 574 writeln(); 575 import dhtslib.sam; 576 import htslib.hts_log : hts_log_info; 577 import std.path:buildPath,dirName; 578 hts_set_log_level(htsLogLevel.HTS_LOG_TRACE); 579 hts_log_info(__FUNCTION__, "Testing SAMRecord mutation"); 580 hts_log_info(__FUNCTION__, "Loading test file"); 581 auto bam = SAMFile(buildPath(dirName(dirName(dirName(__FILE__))),"htslib","test","range.bam"), 0); 582 auto read=bam.all_records.front; 583 584 //change queryname 585 hts_log_info(__FUNCTION__, "Testing queryname"); 586 assert(read.queryName=="HS18_09653:4:1315:19857:61712"); 587 read.queryName="HS18_09653:4:1315:19857:6171"; 588 assert(read.queryName=="HS18_09653:4:1315:19857:6171"); 589 assert(read.cigar.toString() == "78M1D22M"); 590 read.queryName="HS18_09653:4:1315:19857:617122"; 591 assert(read.queryName=="HS18_09653:4:1315:19857:617122"); 592 assert(read.cigar.toString() == "78M1D22M"); 593 assert(read["RG"].check!string); 594 assert(read["RG"].to!string=="1"); 595 596 //change sequence 597 hts_log_info(__FUNCTION__, "Testing sequence"); 598 assert(read.sequence=="AGCTAGGGCACTTTTTGTCTGCCCAAATATAGGCAACCAAAAATAATTTCCAAGTTTTTAATGATTTGTTGCATATTGAAAAAACATTTTTTGGGTTTTT"); 599 read.sequence=read.sequence[0..10]; 600 assert(read.sequence=="AGCTAGGGCA"); 601 ubyte[] q=[255,255,255,255,255,255,255,255,255,255]; 602 assert(read.qscores!false==cast(char[])(q)); 603 q=[1,14,20,40,30,10,14,20,40,30]; 604 read.qscores(q); 605 assert(read.qscores!false==cast(char[])(q)); 606 q[]+=33; 607 assert(read.qscores==cast(char[])(q)); 608 assert(read.cigar.toString() == "78M1D22M"); 609 assert(read["RG"].check!string); 610 assert(read["RG"].to!string=="1"); 611 612 //change cigar 613 hts_log_info(__FUNCTION__, "Testing cigar"); 614 auto c=read.cigar; 615 c.ops[$-1].length=21; 616 read.cigar=c; 617 assert(read.cigar.toString() == "78M1D21M"); 618 assert(read["RG"].check!string); 619 assert(read["RG"].to!string=="1"); 620 621 //change tag 622 hts_log_info(__FUNCTION__, "Testing tags"); 623 read["X0"]=2; 624 assert(read["X0"].to!ubyte==2); 625 assert(read["RG"].check!string); 626 627 read["RG"]="test"; 628 assert(read["RG"].to!string=="test"); 629 hts_log_info(__FUNCTION__, "Cigar:" ~ read.cigar.toString()); 630 } 631 632 debug(dhtslib_unittest) 633 unittest{ 634 writeln(); 635 import dhtslib.sam; 636 import htslib.hts_log : hts_log_info; 637 import std.path:buildPath,dirName; 638 hts_set_log_level(htsLogLevel.HTS_LOG_TRACE); 639 hts_log_info(__FUNCTION__, "Testing SAMRecord mutation w/ realloc"); 640 hts_log_info(__FUNCTION__, "Loading test file"); 641 auto bam = SAMFile(buildPath(dirName(dirName(dirName(__FILE__))),"htslib","test","range.bam"), 0); 642 auto read=bam.all_records.front; 643 644 //change queryname 645 hts_log_info(__FUNCTION__, "Testing queryname"); 646 assert(read.queryName=="HS18_09653:4:1315:19857:61712"); 647 648 //we do this only to force realloc 649 read.b.m_data=read.b.l_data; 650 651 652 auto prev_m=read.b.m_data; 653 read.queryName="HS18_09653:4:1315:19857:61712HS18_09653:4:1315:19857:61712"; 654 assert(read.b.m_data >prev_m); 655 assert(read.queryName=="HS18_09653:4:1315:19857:61712HS18_09653:4:1315:19857:61712"); 656 assert(read.cigar.toString() == "78M1D22M"); 657 assert(read["RG"].check!string); 658 assert(read["RG"].to!string=="1"); 659 660 //change sequence 661 hts_log_info(__FUNCTION__, "Testing sequence"); 662 assert(read.sequence=="AGCTAGGGCACTTTTTGTCTGCCCAAATATAGGCAACCAAAAATAATTTCCAAGTTTTTAATGATTTGTTGCATATTGAAAAAACATTTTTTGGGTTTTT"); 663 prev_m=read.b.m_data; 664 read.sequence="AGCTAGGGCACTTTTTGTCTGCCCAAATATAGGCAACCAAAAATAATTTCCAAGTTTTTAATGATTTGTTGCATATTGAAAAAACATTTTTTGGGTTTTTAGCTAGGGCACTTTTTGTCTGCCCAAATATAGGCAACCAAAAATAATTTCCAAGTTTTTAATGATTTGTTGCATATTGAAAAAACATTTTTTGGGTTTTT"; 665 assert(read.b.m_data >prev_m); 666 assert(read.sequence=="AGCTAGGGCACTTTTTGTCTGCCCAAATATAGGCAACCAAAAATAATTTCCAAGTTTTTAATGATTTGTTGCATATTGAAAAAACATTTTTTGGGTTTTTAGCTAGGGCACTTTTTGTCTGCCCAAATATAGGCAACCAAAAATAATTTCCAAGTTTTTAATGATTTGTTGCATATTGAAAAAACATTTTTTGGGTTTTT"); 667 assert(read.cigar.toString() == "78M1D22M"); 668 assert(read["RG"].check!string); 669 assert(read["RG"].to!string=="1"); 670 671 //change cigar 672 hts_log_info(__FUNCTION__, "Testing cigar"); 673 auto c=read.cigar; 674 c.ops=c.ops~c.ops; 675 prev_m=read.b.m_data; 676 read.cigar=c; 677 assert(read.b.m_data >prev_m); 678 assert(read.cigar.toString() == "78M1D22M78M1D22M"); 679 assert(read["RG"].check!string); 680 assert(read["RG"].to!string=="1"); 681 682 //change tag 683 hts_log_info(__FUNCTION__, "Testing tags"); 684 read["X0"]=2; 685 assert(read["X0"].to!ubyte==2); 686 assert(read["RG"].check!string); 687 688 read["RG"]="test"; 689 assert(read["RG"].to!string=="test"); 690 hts_log_info(__FUNCTION__, "Cigar:" ~ read.cigar.toString()); 691 } 692 693 unittest 694 { 695 import std.stdio; 696 import dhtslib.sam; 697 import dhtslib.md : MDItr; 698 import std.algorithm: map; 699 import std.array: array; 700 import std.path:buildPath,dirName; 701 auto bam = SAMFile(buildPath(dirName(dirName(dirName(__FILE__))),"htslib","test","range.bam"), 0); 702 auto read=bam.all_records.front; 703 auto pairs = read.getAlignedPairs!true(read.pos + 77, read.pos + 77 + 5); 704 assert(pairs.map!(x => x.qpos).array == [77,77,78,79,80]); 705 assert(pairs.map!(x => x.rpos).array == [77,78,79,80,81]); 706 assert(pairs.map!(x => x.refBase).array == "GAAAA"); 707 assert(pairs.map!(x => x.queryBase).array == "G-AAA"); 708 } 709 710 alias SAMFile = SAMReader; 711 /** 712 Encapsulates a SAM/BAM file. 713 714 Implements InputRange interface using htslib calls. 715 If indexed, Random-access query via multidimensional slicing. 716 */ 717 struct SAMReader 718 { 719 /// filename; as usable from D 720 string filename; 721 722 /// filename \0-terminated C string; reference needed to avoid GC reaping result of toStringz when ctor goes out of scope 723 private immutable(char)* fn; 724 725 /// htsFile 726 private htsFile* fp; 727 728 /// hFILE if required 729 private hFILE* f; 730 731 /// header struct 732 bam_hdr_t* header = null; 733 734 /// SAM/BAM/CRAM index 735 private hts_idx_t* idx; 736 737 private kstring_t line; 738 739 /// disallow copying 740 @disable this(this); 741 742 /** Create a representation of SAM/BAM/CRAM file from given filename or File 743 744 Params: 745 fn = string filename (complete path passed to htslib; may support S3:// https:// etc.) 746 extra_threads = extra threads for compression/decompression 747 -1 use all available cores (default) 748 0 use no extra threads 749 >1 add indicated number of threads (to a default of 1) 750 */ 751 this(T)(T f, int extra_threads = -1) 752 if (is(T == string) || is(T == File)) 753 { 754 import std.parallelism : totalCPUs; 755 756 // open file 757 static if (is(T == string)) 758 { 759 this.filename = f; 760 this.fn = toStringz(f); 761 this.fp = hts_open(this.fn, cast(immutable(char)*) "r"); 762 } 763 else static if (is(T == File)) 764 { 765 this.filename = f.name(); 766 this.fn = toStringz(f.name); 767 this.f = hdopen(f.fileno, cast(immutable(char)*) "r"); 768 this.fp = hts_hopen(this.f, this.fn, cast(immutable(char)*) "r"); 769 } 770 else assert(0); 771 772 if (extra_threads == -1) 773 { 774 if ( totalCPUs > 1) 775 { 776 hts_log_info(__FUNCTION__, 777 format("%d CPU cores detected; enabling multithreading", totalCPUs)); 778 // hts_set_threads adds N _EXTRA_ threads, so totalCPUs - 1 seemed reasonable, 779 // but overcomitting by 1 thread (i.e., passing totalCPUs) buys an extra 3% on my 2-core 2013 Mac 780 hts_set_threads(this.fp, totalCPUs); 781 } 782 } 783 else if (extra_threads > 0) 784 { 785 if ((extra_threads + 1) > totalCPUs) 786 hts_log_warning(__FUNCTION__, "More threads requested than CPU cores detected"); 787 hts_set_threads(this.fp, extra_threads); 788 } 789 else if (extra_threads == 0) 790 { 791 hts_log_debug(__FUNCTION__, "Zero extra threads requested"); 792 } 793 else 794 { 795 hts_log_warning(__FUNCTION__, "Invalid negative number of extra threads requested"); 796 } 797 798 // read header 799 this.header = sam_hdr_read(this.fp); 800 this.idx = sam_index_load(this.fp, this.fn); 801 if (this.idx == null) 802 { 803 hts_log_info(__FUNCTION__, "SAM index not found"); 804 // TODO: attempt to build 805 // TODO: edit range to return empty immediately if no idx 806 } 807 hts_log_debug(__FUNCTION__, format("SAM index: %s", this.idx)); 808 } 809 810 ~this() 811 { 812 debug (dhtslib_debug) 813 { 814 writeln("SAMFile dtor"); 815 } 816 817 bam_hdr_destroy(this.header); 818 819 if((this.fp !is null) && (this.f is null)) 820 { 821 const auto ret = hts_close(fp); 822 if (ret < 0) 823 writefln("There was an error closing %s", fromStringz(this.fn)); 824 } 825 } 826 827 /// number of reference sequences; from bam_hdr_t 828 @property int n_targets() const 829 { 830 return this.header.n_targets; 831 } 832 833 /// length of specific reference sequence (by number) 834 uint target_len(int target) const 835 { 836 return this.header.target_len[target]; 837 } 838 839 /// lengths of the reference sequences 840 @property uint[] target_lens() const 841 { 842 return this.header.target_len[0 .. this.n_targets].dup; 843 } 844 845 /// names of the reference sequences 846 @property string[] target_names() const 847 { 848 string[] names; 849 names.length = this.n_targets; 850 foreach (i; 0 .. this.n_targets) 851 { 852 names[i] = fromStringz(this.header.target_name[i]).idup; 853 } 854 return names; 855 } 856 857 /// reference contig name to integer id 858 /// Calls int bam_name2id(bam_hdr_t *h, const char *_ref); 859 int target_id(string name) 860 { 861 return bam_name2id(this.header, toStringz(name)); 862 } 863 864 /** Query a region and return matching alignments as an InputRange */ 865 /// Query by chr, start, end 866 auto query(string chrom, long start, long end) 867 { 868 string q = format("%s:%d-%d", chrom, start, end); 869 return query(q); 870 } 871 872 /// Query by string chr:start-end 873 auto query(string q) 874 { 875 auto itr = sam_itr_querys(this.idx, this.header, toStringz(q)); 876 return RecordRange(this.fp, itr); 877 } 878 879 /// Query by contig id, start, end 880 auto query(int tid, long start, long end) 881 { 882 auto itr = sam_itr_queryi(this.idx, tid, start, end); 883 return RecordRange(this.fp, itr); 884 } 885 886 /// Query by ["chr1:1-2","chr1:1000-1001"] 887 auto query(string[] regions) 888 { 889 return RecordRangeMulti(this.fp, this.idx, this.header, &(this), regions); 890 } 891 892 /// bam["chr1:1-2"] 893 auto opIndex(string q) 894 { 895 return query(q); 896 } 897 898 /// bam["chr1", 1..2] 899 auto opIndex(string tid, int[2] pos) 900 { 901 return query(tid, pos[0], pos[1]); 902 } 903 904 /// bam["chr1", 1] 905 auto opIndex(string tid, int pos) 906 { 907 return query(tid, pos, pos + 1); 908 } 909 910 /** Deprecated: use multidimensional slicing with second parameter as range (["chr1", 1 .. 2]) */ 911 /// bam["chr1", 1, 2] 912 deprecated auto opIndex(string tid, int pos1, int pos2) 913 { 914 return query(tid, pos1, pos2); 915 } 916 917 /// Integer-based chr below 918 /// bam[0, 1..2] 919 auto opIndex(int tid, int[2] pos) 920 { 921 return query(tid, pos[0], pos[1]); 922 } 923 924 /// bam[0, 1] 925 auto opIndex(int tid, int pos) 926 { 927 return query(tid, pos, pos + 1); 928 } 929 930 /// bam[0, 1, 2] 931 deprecated auto opIndex(int tid, int pos1, int pos2) 932 { 933 return query(tid, pos1, pos2); 934 } 935 936 /// support bam["chr1", 1..2 ] 937 int[2] opSlice(size_t dim)(int start, int end) if (dim == 1) 938 { 939 return [start, end]; 940 } 941 942 /// Return an InputRange representing all recods in the SAM/BAM/CRAM 943 AllRecordsRange all_records() 944 { 945 auto range = AllRecordsRange(this.fp, this.header, bam_init1()); 946 range.popFront(); 947 return range; 948 } 949 alias allRecords = all_records; 950 951 /// Iterate through all records in the SAM/BAM/CRAM 952 struct AllRecordsRange 953 { 954 private htsFile* fp; // belongs to parent; shared 955 private bam_hdr_t* header; // belongs to parent; shared 956 private bam1_t* b; 957 private int success; 958 ~this() 959 { 960 //debug(dhtslib_debug) hts_log_debug(__FUNCTION__, "dtor"); 961 } 962 963 /// InputRange interface 964 @property bool empty() // @suppress(dscanner.suspicious.incorrect_infinite_range) 965 { 966 // int sam_read1(samFile *fp, bam_hdr_t *h, bam1_t *b); 967 if (success >= 0) 968 return false; 969 else if (success == -1) 970 return true; 971 else 972 { 973 hts_log_error(__FUNCTION__, "*** ERROR sam_read1 < -1"); 974 return true; 975 } 976 } 977 /// ditto 978 void popFront() 979 { 980 success = sam_read1(this.fp, this.header, this.b); 981 // noop? 982 // free this.b ? 983 984 //bam_destroy1(this.b); 985 } 986 /// ditto 987 SAMRecord front() 988 { 989 return new SAMRecord(bam_dup1(this.b)); 990 } 991 992 } 993 994 /// Iterate over records falling within a queried region (TODO: itr_multi_query) 995 struct RecordRange 996 { 997 private htsFile* fp; 998 private hts_itr_t* itr; 999 private bam1_t* b; 1000 1001 private int r; 1002 1003 /// 1004 this(htsFile* fp, hts_itr_t* itr) 1005 { 1006 this.itr = itr; 1007 this.fp = fp; 1008 b = bam_init1(); 1009 //debug(dhtslib_debug) { writeln("sam_itr null? ",(cast(int)itr)==0); } 1010 hts_log_debug(__FUNCTION__, format("SAM itr null?: %s", cast(int) itr == 0)); 1011 popFront(); 1012 } 1013 1014 /// InputRange interface 1015 @property bool empty() 1016 { 1017 return (r <= 0 && itr.finished) ? true : false; 1018 } 1019 /// ditto 1020 void popFront() 1021 { 1022 r = sam_itr_next(this.fp, this.itr, this.b); 1023 } 1024 /// ditto 1025 SAMRecord front() 1026 { 1027 return new SAMRecord(bam_dup1(b)); 1028 } 1029 } 1030 1031 /// Iterate over records falling within queried regions using a RegionList 1032 struct RecordRangeMulti 1033 { 1034 private htsFile* fp; 1035 private hts_itr_multi_t* itr; 1036 private bam1_t* b; 1037 private hts_reglist_t[] rlist; 1038 private int r; 1039 1040 /// 1041 this(htsFile* fp, hts_idx_t* idx, bam_hdr_t* header, SAMFile* sam, string[] regions) 1042 { 1043 rlist = RegionList(sam, regions).getRegList(); 1044 this.fp = fp; 1045 b = bam_init1(); 1046 itr = sam_itr_regions(idx, header, rlist.ptr, cast(uint) rlist.length); 1047 //debug(dhtslib_debug) { writeln("sam_itr null? ",(cast(int)itr)==0); } 1048 hts_log_debug(__FUNCTION__, format("SAM itr null?: %s", cast(int) itr == 0)); 1049 popFront(); 1050 } 1051 1052 /// InputRange interface 1053 @property bool empty() 1054 { 1055 return (r <= 0 && itr.finished) ? true : false; 1056 } 1057 /// ditto 1058 void popFront() 1059 { 1060 r = sam_itr_multi_next(this.fp, this.itr, this.b); 1061 } 1062 /// ditto 1063 SAMRecord front() 1064 { 1065 return new SAMRecord(bam_dup1(b)); 1066 } 1067 } 1068 1069 /// List of regions based on sam/bam 1070 struct RegionList 1071 { 1072 import std.algorithm.iteration : splitter; 1073 import std.algorithm.sorting : sort; 1074 import std.range : drop, array; 1075 import std.conv : to; 1076 1077 private hts_reglist_t[string] rlist; 1078 private SAMFile* sam; 1079 1080 /// 1081 this(SAMFile* sam, string[] queries) 1082 { 1083 this.sam = sam; 1084 foreach (q; queries) 1085 { 1086 addRegion(q); 1087 } 1088 } 1089 1090 /// Add a region in standard format (chr1:2000-3000) to the RegionList 1091 void addRegion(string reg) 1092 { 1093 //chr:1-2 1094 //split into chr and 1-2 1095 auto split = reg.splitter(":"); 1096 auto chr = split.front; 1097 //split 1-2 into 1 and 2 1098 split = split.drop(1).front.splitter("-"); 1099 if (sam.target_id(chr) < 0) 1100 { 1101 hts_log_error(__FUNCTION__, "tid not present in sam/bam"); 1102 } 1103 addRegion(sam.target_id(chr), split.front.to!int, split.drop(1).front.to!int); 1104 } 1105 1106 /// Add a region by {target/contig/chr id, start coord, end coord} to the RegionList 1107 void addRegion(int tid, int beg, int end) 1108 { 1109 if (tid > this.sam.n_targets || tid < 0) 1110 hts_log_error(__FUNCTION__, "tid not present in sam/bam"); 1111 1112 auto val = (this.sam.target_names[tid] in this.rlist); 1113 hts_pair32_t p; 1114 1115 if (beg < 0) 1116 hts_log_error(__FUNCTION__, "first coordinate < 0"); 1117 1118 if (beg >= this.sam.target_len(tid)) 1119 hts_log_error(__FUNCTION__, "first coordinate larger than tid length"); 1120 1121 if (end < 0) 1122 hts_log_error(__FUNCTION__, "second coordinate < 0"); 1123 1124 if (end >= this.sam.target_len(tid)) 1125 hts_log_error(__FUNCTION__, "second coordinate larger than tid length"); 1126 1127 p.beg = beg; 1128 p.end = end; 1129 hts_pair32_t[] plist; 1130 if (val is null) 1131 { 1132 hts_reglist_t r; 1133 1134 //set tid 1135 r.tid = tid; 1136 1137 //create intervals 1138 plist = plist ~ p; 1139 r.intervals = plist.ptr; 1140 r.count = cast(uint) plist.length; 1141 r.min_beg = p.beg; 1142 r.max_end = p.end; 1143 this.rlist[this.sam.target_names[tid]] = r; 1144 } 1145 else 1146 { 1147 plist = (val.intervals[0 .. val.count] ~ p).sort!(cmpInterval).array; 1148 val.intervals = plist.ptr; 1149 val.count = cast(uint) plist.length; 1150 val.min_beg = plist[0].beg; 1151 val.max_end = plist[$ - 1].end; 1152 } 1153 } 1154 1155 hts_reglist_t[] getRegList() 1156 { 1157 return rlist.byValue.array.sort!(cmpRegList).array; 1158 } 1159 } 1160 } 1161 1162 /// Nucleotide complement table; from samtools/sam_view.c 1163 private const(char)[16] seq_comp_table = [0, 8, 4, 12, 2, 10, 6, 14, 1, 9, 5, 13, 3, 11, 7, 15]; 1164 1165 /// Reverse a string in place; from samtools/sam_view.c 1166 // TODO: Could be sped up, and potentially made safer? by passing strlen since already known 1167 private char* reverse(char* str) 1168 { 1169 import core.stdc..string : strlen; 1170 1171 auto i = strlen(str) - 1, j = 0; 1172 char ch; 1173 while (i > j) 1174 { 1175 ch = str[i]; 1176 str[i] = str[j]; 1177 str[j] = ch; 1178 i--; 1179 j++; 1180 } 1181 return str; 1182 } 1183 1184 /// SAM/BAM/CRAM disk format 1185 enum SAMWriterTypes{ 1186 BAM, 1187 UBAM, 1188 SAM, 1189 CRAM, 1190 DEDUCE 1191 } 1192 1193 /// Encapsulates a SAM/BAM/CRAM, but as write-only 1194 struct SAMWriter 1195 { 1196 /// filename; as usable from D 1197 string filename; 1198 1199 /// filename \0-terminated C string; reference needed to avoid GC reaping result of toStringz when ctor goes out of scope 1200 private immutable(char)* fn; 1201 1202 /// htsFile 1203 private htsFile* fp; 1204 1205 /// hFILE if required 1206 private hFILE* f; 1207 1208 /// header struct 1209 bam_hdr_t* header = null; 1210 1211 private kstring_t line; 1212 1213 /// disallow copying 1214 @disable this(this); 1215 1216 /** Create a representation of SAM/BAM/CRAM file from given filename or File 1217 1218 Params: 1219 fn = string filename (complete path passed to htslib; may support S3:// https:// etc.) 1220 extra_threads = extra threads for compression/decompression 1221 -1 use all available cores (default) 1222 0 use no extra threads 1223 >1 add indicated number of threads (to a default of 1) 1224 */ 1225 this(T)(T f,bam_hdr_t * header, SAMWriterTypes t=SAMWriterTypes.DEDUCE,int extra_threads = -1) 1226 if (is(T == string) || is(T == File)) 1227 { 1228 import std.parallelism : totalCPUs; 1229 char[] mode; 1230 if(t == SAMWriterTypes.BAM) mode=['w','b','\0']; 1231 else if(t == SAMWriterTypes.UBAM) mode=['w','b','u','\0']; 1232 else if(t == SAMWriterTypes.SAM) mode=['w','\0']; 1233 else if(t == SAMWriterTypes.CRAM) mode=['w','c','\0']; 1234 // open file 1235 static if (is(T == string)) 1236 { 1237 if(t == SAMWriterTypes.DEDUCE){ 1238 import std.path:extension; 1239 auto ext=extension(f); 1240 if(ext==".bam") mode=['w','b','\0']; 1241 else if(ext==".sam") mode=['w','\0']; 1242 else if(ext==".cram") mode=['w','c','\0']; 1243 else { 1244 hts_log_error(__FUNCTION__,"extension "~ext~" not valid"); 1245 throw new Exception("DEDUCE SAMWriterType used with non-valid extension"); 1246 } 1247 } 1248 this.filename = f; 1249 this.fn = toStringz(f); 1250 this.fp = hts_open(this.fn, mode.ptr); 1251 } 1252 else static if (is(T == File)) 1253 { 1254 assert(t!=SAMWriterTypes.DEDUCE); 1255 this.filename = f.name(); 1256 this.fn = toStringz(f.name); 1257 this.f = hdopen(f.fileno, cast(immutable(char)*) "w"); 1258 this.fp = hts_hopen(this.f, this.fn, mode.ptr); 1259 } 1260 else assert(0); 1261 1262 if (extra_threads == -1) 1263 { 1264 if ( totalCPUs > 1) 1265 { 1266 hts_log_info(__FUNCTION__, 1267 format("%d CPU cores detected; enabling multithreading", totalCPUs)); 1268 // hts_set_threads adds N _EXTRA_ threads, so totalCPUs - 1 seemed reasonable, 1269 // but overcomitting by 1 thread (i.e., passing totalCPUs) buys an extra 3% on my 2-core 2013 Mac 1270 hts_set_threads(this.fp, totalCPUs); 1271 } 1272 } 1273 else if (extra_threads > 0) 1274 { 1275 if ((extra_threads + 1) > totalCPUs) 1276 hts_log_warning(__FUNCTION__, "More threads requested than CPU cores detected"); 1277 hts_set_threads(this.fp, extra_threads); 1278 } 1279 else if (extra_threads == 0) 1280 { 1281 hts_log_debug(__FUNCTION__, "Zero extra threads requested"); 1282 } 1283 else 1284 { 1285 hts_log_warning(__FUNCTION__, "Invalid negative number of extra threads requested"); 1286 } 1287 1288 // read header 1289 this.header = bam_hdr_dup(header); 1290 sam_hdr_write(this.fp,this.header); 1291 } 1292 1293 ~this() 1294 { 1295 debug (dhtslib_debug) 1296 { 1297 writeln("SAMWriter dtor"); 1298 } 1299 1300 bam_hdr_destroy(this.header); 1301 1302 } 1303 1304 /// close file 1305 void close(){ 1306 const auto ret = hts_close(this.fp); 1307 if (ret < 0) 1308 stderr.writefln("There was an error closing %s", fromStringz(this.fn)); 1309 } 1310 1311 /// write out to disk 1312 void write(SAMRecord * rec){ 1313 const auto ret = sam_write1(this.fp,this.header,rec.b); 1314 assert(ret>=0); 1315 } 1316 } 1317 debug(dhtslib_unittest) 1318 unittest{ 1319 writeln(); 1320 import dhtslib.sam; 1321 import htslib.hts_log : hts_log_info; 1322 import std.path:buildPath,dirName; 1323 import std..string:fromStringz; 1324 hts_set_log_level(htsLogLevel.HTS_LOG_TRACE); 1325 hts_log_info(__FUNCTION__, "Testing SAMWriter"); 1326 hts_log_info(__FUNCTION__, "Loading test file"); 1327 auto sam = SAMFile(buildPath(dirName(dirName(dirName(__FILE__))),"htslib","test","auxf#values.sam"), 0); 1328 auto readrange = sam.allRecords; 1329 auto sam2 = SAMWriter("test.bam",sam.header); 1330 hts_log_info(__FUNCTION__, "Getting read 1"); 1331 auto read = readrange.front(); 1332 sam2.write(&read); 1333 sam2.close; 1334 destroy(sam2); 1335 sam = SAMFile("test.bam"); 1336 readrange = sam.allRecords; 1337 read = readrange.front(); 1338 writeln(read.sequence); 1339 assert(read.sequence=="GCTAGCTCAG"); 1340 // destroy(sam2); 1341 } 1342 1343 /// Used in sorting 1344 bool cmpInterval(hts_pair32_t a, hts_pair32_t b) 1345 { 1346 if (a.beg < b.beg) 1347 { 1348 return true; 1349 } 1350 if (a.end < b.end) 1351 { 1352 return true; 1353 } 1354 return false; 1355 } 1356 1357 /// Used in sorting 1358 bool cmpRegList(hts_reglist_t a, hts_reglist_t b) 1359 { 1360 if (a.tid < b.tid) 1361 { 1362 return true; 1363 } 1364 return false; 1365 } 1366 1367 /// Parse text line of SAM; Used in unittest 1368 private int parseSam(string line, bam_hdr_t* header, bam1_t* b) 1369 { 1370 import htslib.kstring : kstring_t; 1371 import std.utf : toUTFz; 1372 1373 kstring_t k; 1374 k.s = toUTFz!(char*)(line.dup); 1375 k.m = line.length + 1; 1376 k.l = line.length + 1; 1377 return sam_parse1(&k, header, b); 1378 } 1379 debug(dhtslib_unittest) 1380 unittest{ 1381 writeln(); 1382 import dhtslib.sam; 1383 import htslib.hts_log : hts_log_info; 1384 import std.path:buildPath,dirName; 1385 import std..string:fromStringz; 1386 hts_set_log_level(htsLogLevel.HTS_LOG_TRACE); 1387 hts_log_info(__FUNCTION__, "Testing SAMFile & SAMRecord"); 1388 hts_log_info(__FUNCTION__, "Loading test file"); 1389 auto sam = SAMFile(buildPath(dirName(dirName(dirName(__FILE__))),"htslib","test","auxf#values.sam"), 0); 1390 auto readrange = sam.allRecords; 1391 hts_log_info(__FUNCTION__, "Getting read 1"); 1392 auto read = readrange.front(); 1393 writeln(read.sequence); 1394 assert(read.sequence=="GCTAGCTCAG"); 1395 } 1396 1397 debug(dhtslib_unittest) 1398 unittest 1399 { 1400 import std.stdio : writeln; 1401 import std.range : drop; 1402 import std.utf : toUTFz; 1403 import htslib.hts_log; // @suppress(dscanner.suspicious.local_imports) 1404 import std.path:buildPath,dirName; 1405 import std.conv:to; 1406 hts_set_log_level(htsLogLevel.HTS_LOG_TRACE); 1407 hts_log_info(__FUNCTION__, "Loading sam file"); 1408 auto range = File(buildPath(dirName(dirName(dirName(__FILE__))),"htslib","test","realn01_exp-a.sam")).byLineCopy(); 1409 auto b = bam_init1(); 1410 auto hdr = bam_hdr_init(); 1411 string hdr_str; 1412 for (auto i = 0; i < 4; i++) 1413 { 1414 hdr_str ~= range.front ~ "\n"; 1415 range.popFront; 1416 } 1417 hts_log_info(__FUNCTION__, "Header"); 1418 writeln(hdr_str); 1419 hdr = sam_hdr_parse(cast(int) hdr_str.length, toUTFz!(char*)(hdr_str)); 1420 hts_log_info(__FUNCTION__, "Read status:" ~ parseSam(range.front, hdr, b).to!string); 1421 auto r = new SAMRecord(b); 1422 hts_log_info(__FUNCTION__, "Cigar" ~ r.cigar.toString); 1423 assert(r.cigar.toString == "6M1D117M5D28M"); 1424 }