1 /// @file htslib/sam.h 2 /// High-level SAM/BAM/CRAM sequence file operations. 3 /* 4 Copyright (C) 2008, 2009, 2013-2019 Genome Research Ltd. 5 Copyright (C) 2010, 2012, 2013 Broad Institute. 6 7 Author: Heng Li <lh3@sanger.ac.uk> 8 9 Permission is hereby granted, free of charge, to any person obtaining a copy 10 of this software and associated documentation files (the "Software"), to deal 11 in the Software without restriction, including without limitation the rights 12 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 13 copies of the Software, and to permit persons to whom the Software is 14 furnished to do so, subject to the following conditions: 15 16 The above copyright notice and this permission notice shall be included in 17 all copies or substantial portions of the Software. 18 19 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 20 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 21 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 22 THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 23 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 24 FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 25 DEALINGS IN THE SOFTWARE. */ 26 27 module htslib.sam; 28 29 import core.stdc.stdint; 30 import htslib.hts; 31 import htslib.hts_log; 32 import htslib.bgzf: BGZF; 33 import htslib.kstring: kstring_t; 34 import std.format: format; 35 36 extern (C): 37 38 /// Highest SAM format version supported by this library 39 enum SAM_FORMAT_VERSION = "1.6"; 40 41 /*************************** 42 *** SAM/BAM/CRAM header *** 43 ***************************/ 44 45 /*! @typedef 46 * @abstract Header extension structure, grouping a collection 47 * of hash tables that contain the parsed header data. 48 */ 49 50 struct sam_hrecs_t; 51 52 /*! @typedef 53 @abstract Structure for the alignment header. 54 @field n_targets number of reference sequences 55 @field l_text length of the plain text in the header (may be zero if 56 the header has been edited) 57 @field target_len lengths of the reference sequences 58 @field target_name names of the reference sequences 59 @field text plain text (may be NULL if the header has been edited) 60 @field sdict header dictionary 61 @field hrecs pointer to the extended header struct (internal use only) 62 @field ref_count reference count 63 64 @note The text and l_text fields are included for backwards compatibility. 65 These fields may be set to NULL and zero respectively as a side-effect 66 of calling some header API functions. New code that needs to access the 67 header text should use the sam_hdr_str() and sam_hdr_length() functions 68 instead of these fields. 69 */ 70 71 struct sam_hdr_t 72 { 73 int n_targets; 74 int ignore_sam_err; 75 size_t l_text; 76 uint* target_len; 77 const(byte)* cigar_tab; 78 char** target_name; 79 char* text; 80 void* sdict; 81 sam_hrecs_t* hrecs; 82 uint ref_count; 83 } 84 85 /*! @typedef 86 * @abstract Old name for compatibility with existing code. 87 */ 88 alias bam_hdr_t = sam_hdr_t; 89 90 /**************************** 91 *** CIGAR related macros *** 92 ****************************/ 93 94 enum BAM_CMATCH = 0; 95 enum BAM_CINS = 1; 96 enum BAM_CDEL = 2; 97 enum BAM_CREF_SKIP = 3; 98 enum BAM_CSOFT_CLIP = 4; 99 enum BAM_CHARD_CLIP = 5; 100 enum BAM_CPAD = 6; 101 enum BAM_CEQUAL = 7; 102 enum BAM_CDIFF = 8; 103 enum BAM_CBACK = 9; 104 105 enum BAM_CIGAR_STR = "MIDNSHP=XB"; 106 enum BAM_CIGAR_SHIFT = 4; 107 enum BAM_CIGAR_MASK = 0xf; 108 enum BAM_CIGAR_TYPE = 0x3C1A7; 109 110 /*! @abstract Table for converting a CIGAR operator character to BAM_CMATCH etc. 111 Result is operator code or -1. Be sure to cast the index if it is a plain char: 112 int op = bam_cigar_table[(unsigned char) ch]; 113 */ 114 extern __gshared const(byte)[256] bam_cigar_table; 115 116 extern (D) auto bam_cigar_op(T)(auto ref T c) 117 { 118 return c & BAM_CIGAR_MASK; 119 } 120 121 extern (D) auto bam_cigar_oplen(T)(auto ref T c) 122 { 123 return c >> BAM_CIGAR_SHIFT; 124 } 125 126 // Note that BAM_CIGAR_STR is padded to length 16 bytes below so that 127 // the array look-up will not fall off the end. '?' is chosen as the 128 // padding character so it's easy to spot if one is emitted, and will 129 // result in a parsing failure (in sam_parse1(), at least) if read. 130 extern (D) auto bam_cigar_gen(T0, T1)(auto ref T0 l, auto ref T1 o) 131 { 132 return l << BAM_CIGAR_SHIFT | o; 133 } 134 135 /* bam_cigar_type returns a bit flag with: 136 * bit 1 set if the cigar operation consumes the query 137 * bit 2 set if the cigar operation consumes the reference 138 * 139 * For reference, the unobfuscated truth table for this function is: 140 * BAM_CIGAR_TYPE QUERY REFERENCE 141 * -------------------------------- 142 * BAM_CMATCH 1 1 143 * BAM_CINS 1 0 144 * BAM_CDEL 0 1 145 * BAM_CREF_SKIP 0 1 146 * BAM_CSOFT_CLIP 1 0 147 * BAM_CHARD_CLIP 0 0 148 * BAM_CPAD 0 0 149 * BAM_CEQUAL 1 1 150 * BAM_CDIFF 1 1 151 * BAM_CBACK 0 0 152 * -------------------------------- 153 */ 154 extern (D) auto bam_cigar_type(T)(auto ref T o) 155 { 156 return BAM_CIGAR_TYPE >> (o << 1) & 3; 157 } // bit 1: consume query; bit 2: consume reference 158 159 /*! @abstract the read is paired in sequencing, no matter whether it is mapped in a pair */ 160 enum BAM_FPAIRED = 1; 161 /*! @abstract the read is mapped in a proper pair */ 162 enum BAM_FPROPER_PAIR = 2; 163 /*! @abstract the read itself is unmapped; conflictive with BAM_FPROPER_PAIR */ 164 enum BAM_FUNMAP = 4; 165 /*! @abstract the mate is unmapped */ 166 enum BAM_FMUNMAP = 8; 167 /*! @abstract the read is mapped to the reverse strand */ 168 enum BAM_FREVERSE = 16; 169 /*! @abstract the mate is mapped to the reverse strand */ 170 enum BAM_FMREVERSE = 32; 171 /*! @abstract this is read1 */ 172 enum BAM_FREAD1 = 64; 173 /*! @abstract this is read2 */ 174 enum BAM_FREAD2 = 128; 175 /*! @abstract not primary alignment */ 176 enum BAM_FSECONDARY = 256; 177 /*! @abstract QC failure */ 178 enum BAM_FQCFAIL = 512; 179 /*! @abstract optical or PCR duplicate */ 180 enum BAM_FDUP = 1024; 181 /*! @abstract supplementary alignment */ 182 enum BAM_FSUPPLEMENTARY = 2048; 183 184 /************************* 185 *** Alignment records *** 186 *************************/ 187 188 /* 189 * Assumptions made here. While pos can be 64-bit, no sequence 190 * itself is that long, but due to ref skip CIGAR fields it 191 * may span more than that. (CIGAR itself is 28-bit len + 4 bit 192 * type, but in theory we can combine multiples together.) 193 * 194 * Mate position and insert size also need to be 64-bit, but 195 * we won't accept more than 32-bit for tid. 196 * 197 * The bam_core_t structure is the *in memory* layout and not 198 * the same as the on-disk format. 64-bit changes here permit 199 * SAM to work with very long chromosomes and permit BAM and CRAM 200 * to seamlessly update in the future without further API/ABI 201 * revisions. 202 */ 203 204 /*! @typedef 205 @abstract Structure for core alignment information. 206 @field pos 0-based leftmost coordinate 207 @field tid chromosome ID, defined by sam_hdr_t 208 @field bin bin calculated by bam_reg2bin() 209 @field qual mapping quality 210 @field l_extranul length of extra NULs between qname & cigar (for alignment) 211 @field flag bitwise flag 212 @field l_qname length of the query name 213 @field n_cigar number of CIGAR operations 214 @field l_qseq length of the query sequence (read) 215 @field mtid chromosome ID of next read in template, defined by sam_hdr_t 216 @field mpos 0-based leftmost coordinate of next read in template 217 @field isize observed template length ("insert size") 218 */ 219 struct bam1_core_t 220 { 221 hts_pos_t pos; 222 int tid; 223 ushort bin; // NB: invalid on 64-bit pos 224 ubyte qual; 225 ubyte l_extranul; 226 ushort flag; 227 ushort l_qname; 228 uint n_cigar; 229 int l_qseq; 230 int mtid; 231 hts_pos_t mpos; 232 hts_pos_t isize; 233 } 234 235 /*! @typedef 236 @abstract Structure for one alignment. 237 @field core core information about the alignment 238 @field id 239 @field data all variable-length data, concatenated; structure: qname-cigar-seq-qual-aux 240 @field l_data current length of bam1_t::data 241 @field m_data maximum length of bam1_t::data 242 @field mempolicy memory handling policy, see bam_set_mempolicy() 243 244 @discussion Notes: 245 246 1. The data blob should be accessed using bam_get_qname, bam_get_cigar, 247 bam_get_seq, bam_get_qual and bam_get_aux macros. These returns pointers 248 to the start of each type of data. 249 2. qname is terminated by one to four NULs, so that the following 250 cigar data is 32-bit aligned; core.l_qname includes these trailing NULs, 251 while core.l_extranul counts the excess NULs (so 0 <= l_extranul <= 3). 252 3. Cigar data is encoded 4 bytes per CIGAR operation. 253 See the bam_cigar_* macros for manipulation. 254 4. seq is nibble-encoded according to bam_nt16_table. 255 See the bam_seqi macro for retrieving individual bases. 256 5. Per base qualilties are stored in the Phred scale with no +33 offset. 257 Ie as per the BAM specification and not the SAM ASCII printable method. 258 */ 259 struct bam1_t 260 { 261 import std.bitmanip : bitfields; 262 263 bam1_core_t core; 264 ulong id; 265 ubyte* data; 266 int l_data; 267 uint m_data; 268 269 mixin(bitfields!( 270 uint, "mempolicy", 2, 271 uint, "", 30)); 272 273 /* Reserved */ 274 } 275 276 /*! @function 277 @abstract Get whether the query is on the reverse strand 278 @param b pointer to an alignment 279 @return boolean true if query is on the reverse strand 280 */ 281 extern (D) auto bam_is_rev(T)(auto ref T b) 282 { 283 return (b.core.flag & BAM_FREVERSE) != 0; 284 } 285 286 /*! @function 287 @abstract Get whether the query's mate is on the reverse strand 288 @param b pointer to an alignment 289 @return boolean true if query's mate on the reverse strand 290 */ 291 extern (D) auto bam_is_mrev(T)(auto ref T b) 292 { 293 return (b.core.flag & BAM_FMREVERSE) != 0; 294 } 295 296 /*! @function 297 @abstract Get the name of the query 298 @param b pointer to an alignment 299 @return pointer to the name string, null terminated 300 */ 301 extern (D) auto bam_get_qname(T)(auto ref T b) 302 { 303 return cast(char*) b.data; 304 } 305 306 /*! @function 307 @abstract Get the CIGAR array 308 @param b pointer to an alignment 309 @return pointer to the CIGAR array 310 311 @discussion In the CIGAR array, each element is a 32-bit integer. The 312 lower 4 bits gives a CIGAR operation and the higher 28 bits keep the 313 length of a CIGAR. 314 */ 315 extern (D) auto bam_get_cigar(bam1_t * b) 316 { 317 return cast(uint*) ((*b).data + (*b).core.l_qname); 318 } 319 320 /*! @function 321 @abstract Get query sequence 322 @param b pointer to an alignment 323 @return pointer to sequence 324 325 @discussion Each base is encoded in 4 bits: 1 for A, 2 for C, 4 for G, 326 8 for T and 15 for N. Two bases are packed in one byte with the base 327 at the higher 4 bits having smaller coordinate on the read. It is 328 recommended to use bam_seqi() macro to get the base. 329 */ 330 extern (D) auto bam_get_seq(T)(auto ref T b) 331 { 332 return b.data + (b.core.n_cigar << 2) + b.core.l_qname; 333 } 334 335 /*! @function 336 @abstract Get query quality 337 @param b pointer to an alignment 338 @return pointer to quality string 339 */ 340 extern (D) auto bam_get_qual(T)(auto ref T b) 341 { 342 return b.data + (b.core.n_cigar << 2) + b.core.l_qname + ((b.core.l_qseq + 1) >> 1); 343 } 344 345 /*! @function 346 @abstract Get auxiliary data 347 @param b pointer to an alignment 348 @return pointer to the concatenated auxiliary data 349 */ 350 extern (D) auto bam_get_aux(T)(auto ref T b) 351 { 352 return b.data + (b.core.n_cigar << 2) + b.core.l_qname + ((b.core.l_qseq + 1) >> 1) + b.core.l_qseq; 353 } 354 355 /*! @function 356 @abstract Get length of auxiliary data 357 @param b pointer to an alignment 358 @return length of the concatenated auxiliary data 359 */ 360 extern (D) auto bam_get_l_aux(T)(auto ref T b) 361 { 362 return b.l_data - (b.core.n_cigar << 2) - b.core.l_qname - b.core.l_qseq - ((b.core.l_qseq + 1) >> 1); 363 } 364 365 /*! @function 366 @abstract Get a base on read 367 @param s Query sequence returned by bam_get_seq() 368 @param i The i-th position, 0-based 369 @return 4-bit integer representing the base. 370 */ 371 extern (D) auto bam_seqi(T0, T1)(auto ref T0 s, auto ref T1 i) 372 { 373 return s[i >> 1] >> ((~i & 1) << 2) & 0xf; 374 } 375 376 /************************** 377 *** Exported functions *** 378 **************************/ 379 380 /*************** 381 *** BAM I/O *** 382 ***************/ 383 384 /* Header */ 385 386 /// Generates a new unpopulated header structure. 387 /*! 388 * 389 * @return A valid pointer to new header on success, NULL on failure 390 * 391 * The sam_hdr_t struct returned by a successful call should be freed 392 * via sam_hdr_destroy() when it is no longer needed. 393 */ 394 sam_hdr_t* sam_hdr_init(); 395 396 /// Read the header from a BAM compressed file. 397 /*! 398 * @param fp File pointer 399 * @return A valid pointer to new header on success, NULL on failure 400 * 401 * This function only works with BAM files. It is usually better to use 402 * sam_hdr_read(), which works on SAM, BAM and CRAM files. 403 * 404 * The sam_hdr_t struct returned by a successful call should be freed 405 * via sam_hdr_destroy() when it is no longer needed. 406 */ 407 sam_hdr_t* bam_hdr_read(BGZF* fp); 408 409 /// Writes the header to a BAM file. 410 /*! 411 * @param fp File pointer 412 * @param h Header pointer 413 * @return 0 on success, -1 on failure 414 * 415 * This function only works with BAM files. Use sam_hdr_write() to 416 * write in any of the SAM, BAM or CRAM formats. 417 */ 418 int bam_hdr_write(BGZF* fp, const(sam_hdr_t)* h); 419 420 /*! 421 * Frees the resources associated with a header. 422 */ 423 void sam_hdr_destroy(sam_hdr_t* h); 424 425 /// Duplicate a header structure. 426 /*! 427 * @return A valid pointer to new header on success, NULL on failure 428 * 429 * The sam_hdr_t struct returned by a successful call should be freed 430 * via sam_hdr_destroy() when it is no longer needed. 431 */ 432 sam_hdr_t* sam_hdr_dup(const(sam_hdr_t)* h0); 433 434 /*! 435 * @abstract Old names for compatibility with existing code. 436 */ 437 pragma(inline,true) 438 sam_hdr_t* bam_hdr_init() { return sam_hdr_init(); } 439 440 pragma(inline,true) 441 void bam_hdr_destroy(sam_hdr_t* h) { sam_hdr_destroy(h); } 442 443 pragma(inline,true) 444 sam_hdr_t* bam_hdr_dup(const(sam_hdr_t)* h0) { return sam_hdr_dup(h0); } 445 446 alias samFile = htsFile; 447 448 /// Create a header from existing text. 449 /*! 450 * @param l_text Length of text 451 * @param text Header text 452 * @return A populated sam_hdr_t structure on success; NULL on failure. 453 * @note The text field of the returned header will be NULL, and the l_text 454 * field will be zero. 455 * 456 * The sam_hdr_t struct returned by a successful call should be freed 457 * via sam_hdr_destroy() when it is no longer needed. 458 */ 459 sam_hdr_t* sam_hdr_parse(size_t l_text, const(char)* text); 460 461 /// Read a header from a SAM, BAM or CRAM file. 462 /*! 463 * @param fp Pointer to a SAM, BAM or CRAM file handle 464 * @return A populated sam_hdr_t struct on success; NULL on failure. 465 * 466 * The sam_hdr_t struct returned by a successful call should be freed 467 * via sam_hdr_destroy() when it is no longer needed. 468 */ 469 sam_hdr_t* sam_hdr_read(samFile* fp); 470 471 /// Write a header to a SAM, BAM or CRAM file. 472 /*! 473 * @param fp SAM, BAM or CRAM file header 474 * @param h Header structure to write 475 * @return 0 on success; -1 on failure 476 */ 477 int sam_hdr_write(samFile* fp, const(sam_hdr_t)* h); 478 479 /// Returns the current length of the header text. 480 /*! 481 * @return >= 0 on success, SIZE_MAX on failure 482 */ 483 size_t sam_hdr_length(sam_hdr_t* h); 484 485 /// Returns the text representation of the header. 486 /*! 487 * @return valid char pointer on success, NULL on failure 488 * 489 * The returned string is part of the header structure. It will remain 490 * valid until a call to a header API function causes the string to be 491 * invalidated, or the header is destroyed. 492 * 493 * The caller should not attempt to free or realloc this pointer. 494 */ 495 const(char)* sam_hdr_str(sam_hdr_t* h); 496 497 /// Returns the number of references in the header. 498 /*! 499 * @return >= 0 on success, -1 on failure 500 */ 501 int sam_hdr_nref(const(sam_hdr_t)* h); 502 503 /* ==== Line level methods ==== */ 504 505 /// Add formatted lines to an existing header. 506 /*! 507 * @param lines Full SAM header record, eg "@SQ\tSN:foo\tLN:100", with 508 * optional new-line. If it contains more than 1 line then 509 * multiple lines will be added in order 510 * @param len The maximum length of lines (if an early NUL is not 511 * encountered). len may be 0 if unknown, in which case 512 * lines must be NUL-terminated 513 * @return 0 on success, -1 on failure 514 * 515 * The lines will be appended to the end of the existing header 516 * (apart from HD, which always comes first). 517 */ 518 int sam_hdr_add_lines(sam_hdr_t* h, const(char)* lines, size_t len); 519 520 /// Adds a single line to an existing header. 521 /*! 522 * Specify type and one or more key,value pairs, ending with the NULL key. 523 * Eg. sam_hdr_add_line(h, "SQ", "ID", "foo", "LN", "100", NULL). 524 * 525 * @param type Type of the added line. Eg. "SQ" 526 * @return 0 on success, -1 on failure 527 * 528 * The new line will be added immediately after any others of the same 529 * type, or at the end of the existing header if no lines of the 530 * given type currently exist. The exception is HD lines, which always 531 * come first. If an HD line already exists, it will be replaced. 532 */ 533 int sam_hdr_add_line(sam_hdr_t* h, const(char)* type, ...); 534 535 /// Returns a complete line of formatted text for a given type and ID. 536 /*! 537 * @param type Type of the searched line. Eg. "SQ" 538 * @param ID_key Tag key defining the line. Eg. "SN" 539 * @param ID_value Tag value associated with the key above. Eg. "ref1" 540 * @param ks kstring to hold the result 541 * @return 0 on success; 542 * -1 if no matching line is found 543 * -2 on other failures 544 * 545 * Puts a complete line of formatted text for a specific header type/ID 546 * combination into @p ks. If ID_key is NULL then it returns the first line of 547 * the specified type. 548 * 549 * Any existing content in @p ks will be overwritten. 550 */ 551 int sam_hdr_find_line_id( 552 sam_hdr_t* h, 553 const(char)* type, 554 const(char)* ID_key, 555 const(char)* ID_val, 556 kstring_t* ks); 557 558 /// Returns a complete line of formatted text for a given type and index. 559 /*! 560 * @param type Type of the searched line. Eg. "SQ" 561 * @param position Index in lines of this type (zero-based) 562 * @param ks kstring to hold the result 563 * @return 0 on success; 564 * -1 if no matching line is found 565 * -2 on other failures 566 * 567 * Puts a complete line of formatted text for a specific line into @p ks. 568 * The header line is selected using the @p type and @p position parameters. 569 * 570 * Any existing content in @p ks will be overwritten. 571 */ 572 int sam_hdr_find_line_pos( 573 sam_hdr_t* h, 574 const(char)* type, 575 int pos, 576 kstring_t* ks); 577 578 /// Remove a line with given type / id from a header 579 /*! 580 * @param type Type of the searched line. Eg. "SQ" 581 * @param ID_key Tag key defining the line. Eg. "SN" 582 * @param ID_value Tag value associated with the key above. Eg. "ref1" 583 * @return 0 on success, -1 on error 584 * 585 * Remove a line from the header by specifying a tag:value that uniquely 586 * identifies the line, i.e. the @SQ line containing "SN:ref1". 587 * 588 * \@SQ line is uniquely identified by the SN tag. 589 * \@RG line is uniquely identified by the ID tag. 590 * \@PG line is uniquely identified by the ID tag. 591 * Eg. sam_hdr_remove_line_id(h, "SQ", "SN", "ref1") 592 * 593 * If no key:value pair is specified, the type MUST be followed by a NULL argument and 594 * the first line of the type will be removed, if any. 595 * Eg. sam_hdr_remove_line_id(h, "SQ", NULL, NULL) 596 * 597 * @note Removing \@PG lines is currently unsupported. 598 */ 599 int sam_hdr_remove_line_id( 600 sam_hdr_t* h, 601 const(char)* type, 602 const(char)* ID_key, 603 const(char)* ID_value); 604 605 /// Remove nth line of a given type from a header 606 /*! 607 * @param type Type of the searched line. Eg. "SQ" 608 * @param position Index in lines of this type (zero-based). E.g. 3 609 * @return 0 on success, -1 on error 610 * 611 * Remove a line from the header by specifying the position in the type 612 * group, i.e. 3rd @SQ line. 613 */ 614 int sam_hdr_remove_line_pos(sam_hdr_t* h, const(char)* type, int position); 615 616 /// Add or update tag key,value pairs in a header line. 617 /*! 618 * @param type Type of the searched line. Eg. "SQ" 619 * @param ID_key Tag key defining the line. Eg. "SN" 620 * @param ID_value Tag value associated with the key above. Eg. "ref1" 621 * @return 0 on success, -1 on error 622 * 623 * Adds or updates tag key,value pairs in a header line. 624 * Eg. for adding M5 tags to @SQ lines or updating sort order for the 625 * @HD line. 626 * 627 * Specify multiple key,value pairs ending in NULL. Eg. 628 * sam_hdr_update_line(h, "RG", "ID", "rg1", "DS", "description", "PG", "samtools", NULL) 629 * 630 * Attempting to update the record name (i.e. @SQ SN or @RG ID) will 631 * work as long as the new name is not already in use, however doing this 632 * on a file opened for reading may produce unexpected results. 633 * 634 * Renaming an @RG record in this way will only change the header. Alignment 635 * records written later will not be updated automatically even if they 636 * reference the old read group name. 637 * 638 * Attempting to change an @PG ID tag is not permitted. 639 */ 640 int sam_hdr_update_line( 641 sam_hdr_t* h, 642 const(char)* type, 643 const(char)* ID_key, 644 const(char)* ID_value, 645 ...); 646 647 /// Remove all lines of a given type from a header, except the one matching an ID 648 /*! 649 * @param type Type of the searched line. Eg. "SQ" 650 * @param ID_key Tag key defining the line. Eg. "SN" 651 * @param ID_value Tag value associated with the key above. Eg. "ref1" 652 * @return 0 on success, -1 on failure 653 * 654 * Remove all lines of type <type> from the header, except the one 655 * specified by tag:value, i.e. the @SQ line containing "SN:ref1". 656 * 657 * If no line matches the key:value ID, all lines of the given type are removed. 658 * To remove all lines of a given type, use NULL for both ID_key and ID_value. 659 */ 660 int sam_hdr_remove_except( 661 sam_hdr_t* h, 662 const(char)* type, 663 const(char)* ID_key, 664 const(char)* ID_value); 665 666 /// Remove header lines of a given type, except those in a given ID set 667 /*! 668 * @param type Type of the searched line. Eg. "RG" 669 * @param id Tag key defining the line. Eg. "ID" 670 * @param rh Hash set initialised by the caller with the values to be kept. 671 * See description for how to create this. If @p rh is NULL, all 672 * lines of this type will be removed. 673 * @return 0 on success, -1 on failure 674 * 675 * Remove all lines of type @p type from the header, except the ones 676 * specified in the hash set @p rh. If @p rh is NULL, all lines of 677 * this type will be removed. 678 * Declaration of @p rh is done using KHASH_SET_INIT_STR macro. Eg. 679 * @code{.c} 680 * #include "htslib/khash.h" 681 * KHASH_SET_INIT_STR(keep) 682 * typedef khash_t(keep) *keephash_t; 683 * 684 * void your_method() { 685 * samFile *sf = sam_open("alignment.bam", "r"); 686 * sam_hdr_t *h = sam_hdr_read(sf); 687 * keephash_t rh = kh_init(keep); 688 * int ret = 0; 689 * kh_put(keep, rh, strdup("chr2"), &ret); 690 * kh_put(keep, rh, strdup("chr3"), &ret); 691 * if (sam_hdr_remove_lines(h, "SQ", "SN", rh) == -1) 692 * fprintf(stderr, "Error removing lines\n"); 693 * khint_t k; 694 * for (k = 0; k < kh_end(rh); ++k) 695 * if (kh_exist(rh, k)) free((char*)kh_key(rh, k)); 696 * kh_destroy(keep, rh); 697 * sam_hdr_destroy(h); 698 * sam_close(sf); 699 * } 700 * @endcode 701 * 702 */ 703 int sam_hdr_remove_lines( 704 sam_hdr_t* h, 705 const(char)* type, 706 const(char)* id, 707 void* rh); 708 709 /// Count the number of lines for a given header type 710 /*! 711 * @param h BAM header 712 * @param type Header type to count. Eg. "RG" 713 * @return Number of lines of this type on success; -1 on failure 714 */ 715 int sam_hdr_count_lines(sam_hdr_t* h, const(char)* type); 716 717 /// Index of the line for the types that have dedicated look-up tables (SQ, RG, PG) 718 /*! 719 * @param h BAM header 720 * @param type Type of the searched line. Eg. "RG" 721 * @param key The value of the identifying key. Eg. "rg1" 722 * @return 0-based index on success; -1 if line does not exist; -2 on failure 723 */ 724 int sam_hdr_line_index(sam_hdr_t* bh, const(char)* type, const(char)* key); 725 726 /// Id key of the line for the types that have dedicated look-up tables (SQ, RG, PG) 727 /*! 728 * @param h BAM header 729 * @param type Type of the searched line. Eg. "RG" 730 * @param pos Zero-based index inside the type group. Eg. 2 (for the third RG line) 731 * @return Valid key string on success; NULL on failure 732 */ 733 const(char)* sam_hdr_line_name(sam_hdr_t* bh, const(char)* type, int pos); 734 735 /* ==== Key:val level methods ==== */ 736 737 /// Return the value associated with a key for a header line identified by ID_key:ID_val 738 /*! 739 * @param type Type of the line to which the tag belongs. Eg. "SQ" 740 * @param ID_key Tag key defining the line. Eg. "SN". Can be NULL, if looking for the first line. 741 * @param ID_value Tag value associated with the key above. Eg. "ref1". Can be NULL, if ID_key is NULL. 742 * @param key Key of the searched tag. Eg. "LN" 743 * @param ks kstring where the value will be written 744 * @return 0 on success 745 * -1 if the requested tag does not exist 746 * -2 on other errors 747 * 748 * Looks for a specific key in a single SAM header line and writes the 749 * associated value into @p ks. The header line is selected using the ID_key 750 * and ID_value parameters. Any pre-existing content in @p ks will be 751 * overwritten. 752 */ 753 int sam_hdr_find_tag_id( 754 sam_hdr_t* h, 755 const(char)* type, 756 const(char)* ID_key, 757 const(char)* ID_value, 758 const(char)* key, 759 kstring_t* ks); 760 761 /// Return the value associated with a key for a header line identified by position 762 /*! 763 * @param type Type of the line to which the tag belongs. Eg. "SQ" 764 * @param position Index in lines of this type (zero-based). E.g. 3 765 * @param key Key of the searched tag. Eg. "LN" 766 * @param ks kstring where the value will be written 767 * @return 0 on success 768 * -1 if the requested tag does not exist 769 * -2 on other errors 770 * 771 * Looks for a specific key in a single SAM header line and writes the 772 * associated value into @p ks. The header line is selected using the @p type 773 * and @p position parameters. Any pre-existing content in @p ks will be 774 * overwritten. 775 */ 776 int sam_hdr_find_tag_pos( 777 sam_hdr_t* h, 778 const(char)* type, 779 int pos, 780 const(char)* key, 781 kstring_t* ks); 782 783 /// Remove the key from the line identified by type, ID_key and ID_value. 784 /*! 785 * @param type Type of the line to which the tag belongs. Eg. "SQ" 786 * @param ID_key Tag key defining the line. Eg. "SN" 787 * @param ID_value Tag value associated with the key above. Eg. "ref1" 788 * @param key Key of the targeted tag. Eg. "M5" 789 * @return 1 if the key was removed; 0 if it was not present; -1 on error 790 */ 791 int sam_hdr_remove_tag_id( 792 sam_hdr_t* h, 793 const(char)* type, 794 const(char)* ID_key, 795 const(char)* ID_value, 796 const(char)* key); 797 798 /// Get the target id for a given reference sequence name 799 /*! 800 * @param ref Reference name 801 * @return Positive value on success, 802 * -1 if unknown reference, 803 * -2 if the header could not be parsed 804 * 805 * Looks up a reference sequence by name in the reference hash table 806 * and returns the numerical target id. 807 */ 808 int sam_hdr_name2tid(sam_hdr_t* h, const(char)* ref_); 809 810 /// Get the reference sequence name from a target index 811 /*! 812 * @param tid Target index 813 * @return Valid reference name on success, NULL on failure 814 * 815 * Fetch the reference sequence name from the target name array, 816 * using the numerical target id. 817 */ 818 const(char)* sam_hdr_tid2name(const(sam_hdr_t)* h, int tid); 819 820 /// Get the reference sequence length from a target index 821 /*! 822 * @param tid Target index 823 * @return Strictly positive value on success, 0 on failure 824 * 825 * Fetch the reference sequence length from the target length array, 826 * using the numerical target id. 827 */ 828 hts_pos_t sam_hdr_tid2len(const(sam_hdr_t)* h, int tid); 829 830 /// Alias of sam_hdr_name2tid(), for backwards compatibility. 831 /*! 832 * @param ref Reference name 833 * @return Positive value on success, 834 * -1 if unknown reference, 835 * -2 if the header could not be parsed 836 */ 837 pragma(inline,true) 838 int bam_name2id(sam_hdr_t* h, const(char)* ref_) { return sam_hdr_name2tid(h, ref_); } 839 840 /// Generate a unique \@PG ID: value 841 /*! 842 * @param name Name of the program. Eg. samtools 843 * @return Valid ID on success, NULL on failure 844 * 845 * Returns a unique ID from a base name. The string returned will remain 846 * valid until the next call to this function, or the header is destroyed. 847 * The caller should not attempt to free() or realloc() it. 848 */ 849 const(char)* sam_hdr_pg_id(sam_hdr_t* h, const(char)* name); 850 851 /// Add an \@PG line. 852 /*! 853 * @param name Name of the program. Eg. samtools 854 * @return 0 on success, -1 on failure 855 * 856 * If we wish complete control over this use sam_hdr_add_line() directly. This 857 * function uses that, but attempts to do a lot of tedious house work for 858 * you too. 859 * 860 * - It will generate a suitable ID if the supplied one clashes. 861 * - It will generate multiple \@PG records if we have multiple PG chains. 862 * 863 * Call it as per sam_hdr_add_line() with a series of key,value pairs ending 864 * in NULL. 865 */ 866 int sam_hdr_add_pg(sam_hdr_t* h, const(char)* name, ...); 867 868 /*! 869 * A function to help with construction of CL tags in @PG records. 870 * Takes an argc, argv pair and returns a single space-separated string. 871 * This string should be deallocated by the calling function. 872 * 873 * @return 874 * Returns malloced char * on success; 875 * NULL on failure 876 */ 877 char* stringify_argv(int argc, char** argv); 878 879 /// Increments the reference count on a header 880 /*! 881 * This permits multiple files to share the same header, all calling 882 * sam_hdr_destroy when done, without causing errors for other open files. 883 */ 884 void sam_hdr_incr_ref(sam_hdr_t* h); 885 886 /* 887 * Macros for changing the \@HD line. They eliminate the need to use NULL method arguments. 888 */ 889 890 /// Returns the SAM formatted text of the \@HD header line 891 extern (D) auto sam_hdr_find_hd(T0, T1)(auto ref T0 h, auto ref T1 ks) 892 { 893 return sam_hdr_find_line_id(h, "HD", NULL, NULL, ks); 894 } 895 896 /// Returns the value associated with a given \@HD line tag 897 extern (D) auto sam_hdr_find_tag_hd(T0, T1, T2)(auto ref T0 h, auto ref T1 key, auto ref T2 ks) 898 { 899 return sam_hdr_find_tag_id(h, "HD", NULL, NULL, key, ks); 900 } 901 902 /// Adds or updates tags on the header \@HD line 903 extern (D) auto sam_hdr_update_hd(T, A...)(auto ref T h, auto ref A a) 904 { 905 // NOTE: This macro was dropped by dstep due to variadic args 906 static assert (a.length %2 == 0); // K-V pairs => even number of variadic args 907 return sam_hdr_update_line(h, "HD", null, null, a, null); 908 } 909 910 /// Removes the \@HD line tag with the given key 911 extern (D) auto sam_hdr_remove_tag_hd(T0, T1)(auto ref T0 h, auto ref T1 key) 912 { 913 return sam_hdr_remove_tag_id(h, "HD", NULL, NULL, key); 914 } 915 916 /* Alignment */ 917 918 /// Create a new bam1_t alignment structure 919 /** 920 @return An empty bam1_t structure on success, NULL on failure 921 922 The bam1_t struct returned by a successful call should be freed 923 via bam_destroy1() when it is no longer needed. 924 */ 925 bam1_t* bam_init1(); 926 927 /// Destroy a bam1_t structure 928 /** 929 @param b structure to destroy 930 931 Does nothing if @p b is NULL. If not, all memory associated with @p b 932 will be freed, along with the structure itself. @p b should not be 933 accessed after calling this function. 934 */ 935 void bam_destroy1(bam1_t* b); 936 937 enum BAM_USER_OWNS_STRUCT = 1; 938 enum BAM_USER_OWNS_DATA = 2; 939 940 /// Set alignment record memory policy 941 /** 942 @param b Alignment record 943 @param policy Desired policy 944 945 Allows the way HTSlib reallocates and frees bam1_t data to be 946 changed. @policy can be set to the bitwise-or of the following 947 values: 948 949 \li \c BAM_USER_OWNS_STRUCT 950 If this is set then bam_destroy1() will not try to free the bam1_t struct. 951 952 \li \c BAM_USER_OWNS_DATA 953 If this is set, bam_destroy1() will not free the bam1_t::data pointer. 954 Also, functions which need to expand bam1_t::data memory will change 955 behaviour. Instead of calling realloc() on the pointer, they will 956 allocate a new data buffer and copy any existing content in to it. 957 The existing memory will \b not be freed. bam1_t::data will be 958 set to point to the new memory and the BAM_USER_OWNS_DATA flag will be 959 cleared. 960 961 BAM_USER_OWNS_STRUCT allows bam_destroy1() to be called on bam1_t 962 structures that are members of an array. 963 964 BAM_USER_OWNS_DATA can be used by applications that want more control 965 over where the variable-length parts of the bam record will be stored. 966 By preventing calls to free() and realloc(), it allows bam1_t::data 967 to hold pointers to memory that cannot be passed to those functions. 968 969 Example: Read a block of alignment records, storing the variable-length 970 data in a single buffer and the records in an array. Stop when either 971 the array or the buffer is full. 972 973 \code{.c} 974 #define MAX_RECS 1000 975 #define REC_LENGTH 400 // Average length estimate, to get buffer size 976 size_t bufsz = MAX_RECS * REC_LENGTH, nrecs, buff_used = 0; 977 bam1_t *recs = calloc(MAX_RECS, sizeof(bam1_t)); 978 uint8_t *buffer = malloc(bufsz); 979 int res = 0, result = EXIT_FAILURE; 980 uint32_t new_m_data; 981 982 if (!recs || !buffer) goto cleanup; 983 for (nrecs = 0; nrecs < MAX_RECS; nrecs++) { 984 bam_set_mempolicy(BAM_USER_OWNS_STRUCT|BAM_USER_OWNS_DATA); 985 986 // Set data pointer to unused part of buffer 987 recs[nrecs].data = &buffer[buff_used]; 988 989 // Set m_data to size of unused part of buffer. On 64-bit platforms it 990 // will be necessary to limit this to UINT32_MAX due to the size of 991 // bam1_t::m_data (not done here as our buffer is only 400K). 992 recs[nrecs].m_data = bufsz - buff_used; 993 994 // Read the record 995 res = sam_read1(file_handle, header, &recs[nrecs]); 996 if (res <= 0) break; // EOF or error 997 998 // Check if the record data didn't fit - if not, stop reading 999 if ((bam_get_mempolicy(&recs[nrecs]) & BAM_USER_OWNS_DATA) == 0) { 1000 nrecs++; // Include last record in count 1001 break; 1002 } 1003 1004 // Adjust m_data to the space actually used. If space is available, 1005 // round up to eight bytes so the next record aligns nicely. 1006 new_m_data = ((uint32_t) recs[nrecs].l_data + 7) & (~7U); 1007 if (new_m_data < recs[nrecs].m_data) recs[nrecs].m_data = new_m_data; 1008 1009 buff_used += recs[nrecs].m_data; 1010 } 1011 if (res < 0) goto cleanup; 1012 result = EXIT_SUCCESS; 1013 1014 // ... use data ... 1015 1016 cleanup: 1017 for (size_t i = 0; i < nrecs; i++) 1018 bam_destroy1(i); 1019 free(buffer); 1020 free(recs); 1021 1022 \endcode 1023 */ 1024 void bam_set_mempolicy(bam1_t* b, uint policy) { 1025 b.mempolicy = policy; 1026 } 1027 1028 /// Get alignment record memory policy 1029 /** @param b Alignment record 1030 1031 See bam_set_mempolicy() 1032 */ 1033 uint bam_get_mempolicy(bam1_t* b) { 1034 return b.mempolicy; 1035 } 1036 1037 /// Read a BAM format alignment record 1038 /** 1039 @param fp BGZF file being read 1040 @param b Destination for the alignment data 1041 @return number of bytes read on success 1042 -1 at end of file 1043 < -1 on failure 1044 1045 This function can only read BAM format files. Most code should use 1046 sam_read1() instead, which can be used with BAM, SAM and CRAM formats. 1047 */ 1048 int bam_read1(BGZF* fp, bam1_t* b); 1049 1050 /// Write a BAM format alignment record 1051 /** 1052 @param fp BGZF file being written 1053 @param b Alignment record to write 1054 @return number of bytes written on success 1055 -1 on error 1056 1057 This function can only write BAM format files. Most code should use 1058 sam_write1() instead, which can be used with BAM, SAM and CRAM formats. 1059 */ 1060 int bam_write1(BGZF* fp, const(bam1_t)* b); 1061 1062 /// Copy alignment record data 1063 /** 1064 @param bdst Destination alignment record 1065 @param bsrc Source alignment record 1066 @return bdst on success; NULL on failure 1067 */ 1068 bam1_t* bam_copy1(bam1_t* bdst, const(bam1_t)* bsrc); 1069 1070 /// Create a duplicate alignment record 1071 /** 1072 @param bsrc Source alignment record 1073 @return Pointer to a new alignment record on success; NULL on failure 1074 1075 The bam1_t struct returned by a successful call should be freed 1076 via bam_destroy1() when it is no longer needed. 1077 */ 1078 bam1_t* bam_dup1(const(bam1_t)* bsrc); 1079 1080 /// Calculate query length from CIGAR data 1081 /** 1082 @param n_cigar Number of items in @p cigar 1083 @param cigar CIGAR data 1084 @return Query length 1085 1086 CIGAR data is stored as in the BAM format, i.e. (op_len << 4) | op 1087 where op_len is the length in bases and op is a value between 0 and 8 1088 representing one of the operations "MIDNSHP=X" (M = 0; X = 8) 1089 1090 This function returns the sum of the lengths of the M, I, S, = and X 1091 operations in @p cigar (these are the operations that "consume" query 1092 bases). All other operations (including invalid ones) are ignored. 1093 1094 @note This return type of this function is hts_pos_t so that it can 1095 correctly return the length of CIGAR sequences including many long 1096 operations without overflow. However, other restrictions (notably the sizes 1097 of bam1_core_t::l_qseq and bam1_t::data) limit the maximum query sequence 1098 length supported by HTSlib to fewer than INT_MAX bases. 1099 */ 1100 hts_pos_t bam_cigar2qlen(int n_cigar, const(uint)* cigar); 1101 1102 /// Calculate reference length from CIGAR data 1103 /** 1104 @param n_cigar Number of items in @p cigar 1105 @param cigar CIGAR data 1106 @return Reference length 1107 1108 CIGAR data is stored as in the BAM format, i.e. (op_len << 4) | op 1109 where op_len is the length in bases and op is a value between 0 and 8 1110 representing one of the operations "MIDNSHP=X" (M = 0; X = 8) 1111 1112 This function returns the sum of the lengths of the M, D, N, = and X 1113 operations in @p cigar (these are the operations that "consume" reference 1114 bases). All other operations (including invalid ones) are ignored. 1115 */ 1116 hts_pos_t bam_cigar2rlen(int n_cigar, const(uint)* cigar); 1117 1118 /*! 1119 @abstract Calculate the rightmost base position of an alignment on the 1120 reference genome. 1121 1122 @param b pointer to an alignment 1123 @return the coordinate of the first base after the alignment, 0-based 1124 1125 @discussion For a mapped read, this is just b->core.pos + bam_cigar2rlen. 1126 For an unmapped read (either according to its flags or if it has no cigar 1127 string), we return b->core.pos + 1 by convention. 1128 */ 1129 hts_pos_t bam_endpos(const(bam1_t)* b); 1130 1131 int bam_str2flag(const(char)* str); /** returns negative value on error */ 1132 1133 char* bam_flag2str(int flag); /** The string must be freed by the user */ 1134 1135 /*! @function 1136 @abstract Set the name of the query 1137 @param b pointer to an alignment 1138 @return 0 on success, -1 on failure 1139 */ 1140 int bam_set_qname(bam1_t* b, const(char)* qname); 1141 1142 /************************* 1143 *** BAM/CRAM indexing *** 1144 *************************/ 1145 1146 // These BAM iterator functions work only on BAM files. To work with either 1147 // BAM or CRAM files use the sam_index_load() & sam_itr_*() functions. 1148 alias bam_itr_destroy = hts_itr_destroy; 1149 alias bam_itr_queryi = sam_itr_queryi; 1150 alias bam_itr_querys = sam_itr_querys; 1151 1152 pragma(inline, true) 1153 extern (D) auto bam_itr_next(T0, T1, T2)(auto ref T0 htsfp, auto ref T1 itr, auto ref T2 r) 1154 { 1155 return hts_itr_next(htsfp.fp.bgzf, itr, r, 0); 1156 } 1157 1158 // Load/build .csi or .bai BAM index file. Does not work with CRAM. 1159 // It is recommended to use the sam_index_* functions below instead. 1160 pragma(inline, true) 1161 extern (D) auto bam_index_load(T)(auto ref T fn) 1162 { 1163 return hts_idx_load(fn, HTS_FMT_BAI); 1164 } 1165 1166 pragma(inline, true) 1167 extern (D) auto bam_index_build(T0, T1)(auto ref T0 fn, auto ref T1 min_shift) 1168 { 1169 return sam_index_build(fn, min_shift); 1170 } 1171 1172 /// Initialise fp->idx for the current format type for SAM, BAM and CRAM types . 1173 /** @param fp File handle for the data file being written. 1174 @param h Bam header structured (needed for BAI and CSI). 1175 @param min_shift 0 for BAI, or larger for CSI (CSI defaults to 14). 1176 @param fnidx Filename to write index to. This pointer must remain valid 1177 until after sam_idx_save is called. 1178 @return 0 on success, <0 on failure. 1179 1180 @note This must be called after the header has been written, but before 1181 any other data. 1182 */ 1183 int sam_idx_init(htsFile* fp, sam_hdr_t* h, int min_shift, const(char)* fnidx); 1184 1185 /// Writes the index initialised with sam_idx_init to disk. 1186 /** @param fp File handle for the data file being written. 1187 @return 0 on success, <0 on filaure. 1188 */ 1189 int sam_idx_save(htsFile* fp); 1190 1191 /// Load a BAM (.csi or .bai) or CRAM (.crai) index file 1192 /** @param fp File handle of the data file whose index is being opened 1193 @param fn BAM/CRAM/etc filename to search alongside for the index file 1194 @return The index, or NULL if an error occurred. 1195 1196 Equivalent to sam_index_load3(fp, fn, NULL, HTS_IDX_SAVE_REMOTE); 1197 */ 1198 hts_idx_t* sam_index_load(htsFile* fp, const(char)* fn); 1199 1200 /// Load a specific BAM (.csi or .bai) or CRAM (.crai) index file 1201 /** @param fp File handle of the data file whose index is being opened 1202 @param fn BAM/CRAM/etc data file filename 1203 @param fnidx Index filename, or NULL to search alongside @a fn 1204 @return The index, or NULL if an error occurred. 1205 1206 Equivalent to sam_index_load3(fp, fn, fnidx, HTS_IDX_SAVE_REMOTE); 1207 */ 1208 hts_idx_t* sam_index_load2(htsFile* fp, const(char)* fn, const(char)* fnidx); 1209 1210 /// Load or stream a BAM (.csi or .bai) or CRAM (.crai) index file 1211 /** @param fp File handle of the data file whose index is being opened 1212 @param fn BAM/CRAM/etc data file filename 1213 @param fnidx Index filename, or NULL to search alongside @a fn 1214 @param flags Flags to alter behaviour (see description) 1215 @return The index, or NULL if an error occurred. 1216 1217 The @p flags parameter can be set to a combination of the following values: 1218 1219 HTS_IDX_SAVE_REMOTE Save a local copy of any remote indexes 1220 HTS_IDX_SILENT_FAIL Fail silently if the index is not present 1221 1222 Note that HTS_IDX_SAVE_REMOTE has no effect for remote CRAM indexes. They 1223 are always downloaded and never cached locally. 1224 1225 The index struct returned by a successful call should be freed 1226 via hts_idx_destroy() when it is no longer needed. 1227 */ 1228 hts_idx_t* sam_index_load3( 1229 htsFile* fp, 1230 const(char)* fn, 1231 const(char)* fnidx, 1232 int flags); 1233 1234 /// Generate and save an index file 1235 /** @param fn Input BAM/etc filename, to which .csi/etc will be added 1236 @param min_shift Positive to generate CSI, or 0 to generate BAI 1237 @return 0 if successful, or negative if an error occurred (usually -1; or 1238 -2: opening fn failed; -3: format not indexable; -4: 1239 failed to create and/or save the index) 1240 */ 1241 int sam_index_build(const(char)* fn, int min_shift); 1242 1243 /// Generate and save an index to a specific file 1244 /** @param fn Input BAM/CRAM/etc filename 1245 @param fnidx Output filename, or NULL to add .bai/.csi/etc to @a fn 1246 @param min_shift Positive to generate CSI, or 0 to generate BAI 1247 @return 0 if successful, or negative if an error occurred (see 1248 sam_index_build for error codes) 1249 */ 1250 int sam_index_build2(const(char)* fn, const(char)* fnidx, int min_shift); 1251 1252 /// Generate and save an index to a specific file 1253 /** @param fn Input BAM/CRAM/etc filename 1254 @param fnidx Output filename, or NULL to add .bai/.csi/etc to @a fn 1255 @param min_shift Positive to generate CSI, or 0 to generate BAI 1256 @param nthreads Number of threads to use when building the index 1257 @return 0 if successful, or negative if an error occurred (see 1258 sam_index_build for error codes) 1259 */ 1260 int sam_index_build3( 1261 const(char)* fn, 1262 const(char)* fnidx, 1263 int min_shift, 1264 int nthreads); 1265 1266 /// Free a SAM iterator 1267 /// @param iter Iterator to free 1268 alias sam_itr_destroy = hts_itr_destroy; 1269 1270 /// Create a BAM/CRAM iterator 1271 /** @param idx Index 1272 @param tid Target id 1273 @param beg Start position in target 1274 @param end End position in target 1275 @return An iterator on success; NULL on failure 1276 1277 The following special values (defined in htslib/hts.h)can be used for @p tid. 1278 When using one of these values, @p beg and @p end are ignored. 1279 1280 HTS_IDX_NOCOOR iterates over unmapped reads sorted at the end of the file 1281 HTS_IDX_START iterates over the entire file 1282 HTS_IDX_REST iterates from the current position to the end of the file 1283 HTS_IDX_NONE always returns "no more alignment records" 1284 1285 When using HTS_IDX_REST or HTS_IDX_NONE, NULL can be passed in to @p idx. 1286 */ 1287 hts_itr_t* sam_itr_queryi( 1288 const(hts_idx_t)* idx, 1289 int tid, 1290 hts_pos_t beg, 1291 hts_pos_t end); 1292 1293 /// Create a SAM/BAM/CRAM iterator 1294 /** @param idx Index 1295 @param hdr Header 1296 @param region Region specification 1297 @return An iterator on success; NULL on failure 1298 1299 Regions are parsed by hts_parse_reg(), and take one of the following forms: 1300 1301 region | Outputs 1302 --------------- | ------------- 1303 REF | All reads with RNAME REF 1304 REF: | All reads with RNAME REF 1305 REF:START | Reads with RNAME REF overlapping START to end of REF 1306 REF:-END | Reads with RNAME REF overlapping start of REF to END 1307 REF:START-END | Reads with RNAME REF overlapping START to END 1308 . | All reads from the start of the file 1309 * | Unmapped reads at the end of the file (RNAME '*' in SAM) 1310 1311 The form `REF:` should be used when the reference name itself contains a colon. 1312 1313 Note that SAM files must be bgzf-compressed for iterators to work. 1314 */ 1315 hts_itr_t* sam_itr_querys( 1316 const(hts_idx_t)* idx, 1317 sam_hdr_t* hdr, 1318 const(char)* region); 1319 1320 /// Create a multi-region iterator 1321 /** @param idx Index 1322 @param hdr Header 1323 @param reglist Array of regions to iterate over 1324 @param regcount Number of items in reglist 1325 1326 Each @p reglist entry should have the reference name in the `reg` field, an 1327 array of regions for that reference in `intervals` and the number of items 1328 in `intervals` should be stored in `count`. No other fields need to be filled 1329 in. 1330 1331 The iterator will return all reads overlapping the given regions. If a read 1332 overlaps more than one region, it will only be returned once. 1333 */ 1334 hts_itr_t* sam_itr_regions( 1335 const(hts_idx_t)* idx, 1336 sam_hdr_t* hdr, 1337 hts_reglist_t* reglist, 1338 uint regcount); 1339 1340 /// Create a multi-region iterator 1341 /** @param idx Index 1342 @param hdr Header 1343 @param regarray Array of ref:interval region specifiers 1344 @param regcount Number of items in regarray 1345 1346 Each @p regarray entry is parsed by hts_parse_reg(), and takes one of the 1347 following forms: 1348 1349 region | Outputs 1350 --------------- | ------------- 1351 REF | All reads with RNAME REF 1352 REF: | All reads with RNAME REF 1353 REF:START | Reads with RNAME REF overlapping START to end of REF 1354 REF:-END | Reads with RNAME REF overlapping start of REF to END 1355 REF:START-END | Reads with RNAME REF overlapping START to END 1356 . | All reads from the start of the file 1357 * | Unmapped reads at the end of the file (RNAME '*' in SAM) 1358 1359 The form `REF:` should be used when the reference name itself contains a colon. 1360 1361 The iterator will return all reads overlapping the given regions. If a read 1362 overlaps more than one region, it will only be returned once. 1363 */ 1364 hts_itr_t* sam_itr_regarray( 1365 const(hts_idx_t)* idx, 1366 sam_hdr_t* hdr, 1367 char** regarray, 1368 uint regcount); 1369 1370 /// Get the next read from a SAM/BAM/CRAM iterator 1371 /** @param htsfp Htsfile pointer for the input file 1372 @param itr Iterator 1373 @param r Pointer to a bam1_t struct 1374 @return >= 0 on success; -1 when there is no more data; < -1 on error 1375 */ 1376 int sam_itr_next(htsFile* htsfp, hts_itr_t* itr, bam1_t* r) { 1377 if (!htsfp.is_bgzf && !htsfp.is_cram) { 1378 hts_log_error(__FUNCTION__, format("%s not BGZF compressed", htsfp.fn ? htsfp.fn : "File")); 1379 return -2; 1380 } 1381 if (!itr) { 1382 hts_log_error(__FUNCTION__,"Null iterator"); 1383 return -2; 1384 } 1385 1386 if (itr.multi) 1387 return hts_itr_multi_next(htsfp, itr, r); 1388 else 1389 return hts_itr_next(htsfp.is_bgzf ? htsfp.fp.bgzf : null, itr, r, htsfp); 1390 } 1391 1392 /// Get the next read from a BAM/CRAM multi-iterator 1393 /** @param htsfp Htsfile pointer for the input file 1394 @param itr Iterator 1395 @param r Pointer to a bam1_t struct 1396 @return >= 0 on success; -1 when there is no more data; < -1 on error 1397 */ 1398 alias sam_itr_multi_next = sam_itr_next; 1399 1400 const(char)* sam_parse_region( 1401 sam_hdr_t* h, 1402 const(char)* s, 1403 int* tid, 1404 hts_pos_t* beg, 1405 hts_pos_t* end, 1406 int flags); 1407 1408 /*************** 1409 *** SAM I/O *** 1410 ***************/ 1411 1412 alias sam_open = hts_open; 1413 alias sam_open_format = hts_open_format; 1414 alias sam_close = hts_close; 1415 1416 int sam_open_mode(char* mode, const(char)* fn, const(char)* format); 1417 1418 // A version of sam_open_mode that can handle ,key=value options. 1419 // The format string is allocated and returned, to be freed by the caller. 1420 // Prefix should be "r" or "w", 1421 char* sam_open_mode_opts( 1422 const(char)* fn, 1423 const(char)* mode, 1424 const(char)* format); 1425 1426 int sam_hdr_change_HD(sam_hdr_t* h, const(char)* key, const(char)* val); 1427 1428 int sam_parse1(kstring_t* s, sam_hdr_t* h, bam1_t* b); 1429 int sam_format1(const(sam_hdr_t)* h, const(bam1_t)* b, kstring_t* str); 1430 1431 /// sam_read1 - Read a record from a file 1432 /** @param fp Pointer to the source file 1433 * @param h Pointer to the header previously read (fully or partially) 1434 * @param b Pointer to the record placeholder 1435 * @return >= 0 on successfully reading a new record, -1 on end of stream, < -1 on error 1436 */ 1437 int sam_read1(samFile* fp, sam_hdr_t* h, bam1_t* b); 1438 /// sam_write1 - Write a record to a file 1439 /** @param fp Pointer to the destination file 1440 * @param h Pointer to the header structure previously read 1441 * @param b Pointer to the record to be written 1442 * @return >= 0 on successfully writing the record, -1 on error 1443 */ 1444 int sam_write1(samFile* fp, const(sam_hdr_t)* h, const(bam1_t)* b); 1445 1446 /************************************* 1447 *** Manipulating auxiliary fields *** 1448 *************************************/ 1449 1450 /// Return a pointer to an aux record 1451 /** @param b Pointer to the bam record 1452 @param tag Desired aux tag 1453 @return Pointer to the tag data, or NULL if tag is not present or on error 1454 If the tag is not present, this function returns NULL and sets errno to 1455 ENOENT. If the bam record's aux data is corrupt (either a tag has an 1456 invalid type, or the last record is incomplete) then errno is set to 1457 EINVAL and NULL is returned. 1458 */ 1459 ubyte* bam_aux_get(const(bam1_t)* b, ref const(char)[2] tag); 1460 1461 /// Get an integer aux value 1462 /** @param s Pointer to the tag data, as returned by bam_aux_get() 1463 @return The value, or 0 if the tag was not an integer type 1464 If the tag is not an integer type, errno is set to EINVAL. This function 1465 will not return the value of floating-point tags. 1466 */ 1467 long bam_aux2i(const(ubyte)* s); 1468 1469 /// Get an integer aux value 1470 /** @param s Pointer to the tag data, as returned by bam_aux_get() 1471 @return The value, or 0 if the tag was not an integer type 1472 If the tag is not an numeric type, errno is set to EINVAL. The value of 1473 integer flags will be returned cast to a double. 1474 */ 1475 double bam_aux2f(const(ubyte)* s); 1476 1477 /// Get a character aux value 1478 /** @param s Pointer to the tag data, as returned by bam_aux_get(). 1479 @return The value, or 0 if the tag was not a character ('A') type 1480 If the tag is not a character type, errno is set to EINVAL. 1481 */ 1482 char bam_aux2A(const(ubyte)* s); 1483 1484 /// Get a string aux value 1485 /** @param s Pointer to the tag data, as returned by bam_aux_get(). 1486 @return Pointer to the string, or NULL if the tag was not a string type 1487 If the tag is not a string type ('Z' or 'H'), errno is set to EINVAL. 1488 */ 1489 char* bam_aux2Z(const(ubyte)* s); 1490 1491 /// Get the length of an array-type ('B') tag 1492 /** @param s Pointer to the tag data, as returned by bam_aux_get(). 1493 @return The length of the array, or 0 if the tag is not an array type. 1494 If the tag is not an array type, errno is set to EINVAL. 1495 */ 1496 uint bam_auxB_len(const(ubyte)* s); 1497 1498 /// Get an integer value from an array-type tag 1499 /** @param s Pointer to the tag data, as returned by bam_aux_get(). 1500 @param idx 0-based Index into the array 1501 @return The idx'th value, or 0 on error. 1502 If the array is not an integer type, errno is set to EINVAL. If idx 1503 is greater than or equal to the value returned by bam_auxB_len(s), 1504 errno is set to ERANGE. In both cases, 0 will be returned. 1505 */ 1506 long bam_auxB2i(const(ubyte)* s, uint idx); 1507 1508 /// Get a floating-point value from an array-type tag 1509 /** @param s Pointer to the tag data, as returned by bam_aux_get(). 1510 @param idx 0-based Index into the array 1511 @return The idx'th value, or 0.0 on error. 1512 If the array is not a numeric type, errno is set to EINVAL. This can 1513 only actually happen if the input record has an invalid type field. If 1514 idx is greater than or equal to the value returned by bam_auxB_len(s), 1515 errno is set to ERANGE. In both cases, 0.0 will be returned. 1516 */ 1517 double bam_auxB2f(const(ubyte)* s, uint idx); 1518 1519 /// Append tag data to a bam record 1520 /* @param b The bam record to append to. 1521 @param tag Tag identifier 1522 @param type Tag data type 1523 @param len Length of the data in bytes 1524 @param data The data to append 1525 @return 0 on success; -1 on failure. 1526 If there is not enough space to store the additional tag, errno is set to 1527 ENOMEM. If the type is invalid, errno may be set to EINVAL. errno is 1528 also set to EINVAL if the bam record's aux data is corrupt. 1529 */ 1530 int bam_aux_append( 1531 bam1_t* b, 1532 ref const(char)[2] tag, 1533 char type, 1534 int len, 1535 const(ubyte)* data); 1536 1537 /// Delete tag data from a bam record 1538 /* @param b The bam record to update 1539 @param s Pointer to the tag to delete, as returned by bam_aux_get(). 1540 @return 0 on success; -1 on failure 1541 If the bam record's aux data is corrupt, errno is set to EINVAL and this 1542 function returns -1; 1543 */ 1544 int bam_aux_del(bam1_t* b, ubyte* s); 1545 1546 /// Update or add a string-type tag 1547 /* @param b The bam record to update 1548 @param tag Tag identifier 1549 @param len The length of the new string 1550 @param data The new string 1551 @return 0 on success, -1 on failure 1552 This function will not change the ordering of tags in the bam record. 1553 New tags will be appended to any existing aux records. 1554 1555 On failure, errno may be set to one of the following values: 1556 1557 EINVAL: The bam record's aux data is corrupt or an existing tag with the 1558 given ID is not of type 'Z'. 1559 1560 ENOMEM: The bam data needs to be expanded and either the attempt to 1561 reallocate the data buffer failed or the resulting buffer would be 1562 longer than the maximum size allowed in a bam record (2Gbytes). 1563 */ 1564 int bam_aux_update_str( 1565 bam1_t* b, 1566 ref const(char)[2] tag, 1567 int len, 1568 const(char)* data); 1569 1570 /// Update or add an integer tag 1571 /* @param b The bam record to update 1572 @param tag Tag identifier 1573 @param val The new value 1574 @return 0 on success, -1 on failure 1575 This function will not change the ordering of tags in the bam record. 1576 New tags will be appended to any existing aux records. 1577 1578 On failure, errno may be set to one of the following values: 1579 1580 EINVAL: The bam record's aux data is corrupt or an existing tag with the 1581 given ID is not of an integer type (c, C, s, S, i or I). 1582 1583 EOVERFLOW (or ERANGE on systems that do not have EOVERFLOW): val is 1584 outside the range that can be stored in an integer bam tag (-2147483647 1585 to 4294967295). 1586 1587 ENOMEM: The bam data needs to be expanded and either the attempt to 1588 reallocate the data buffer failed or the resulting buffer would be 1589 longer than the maximum size allowed in a bam record (2Gbytes). 1590 */ 1591 int bam_aux_update_int(bam1_t* b, ref const(char)[2] tag, long val); 1592 1593 /// Update or add a floating-point tag 1594 /* @param b The bam record to update 1595 @param tag Tag identifier 1596 @param val The new value 1597 @return 0 on success, -1 on failure 1598 This function will not change the ordering of tags in the bam record. 1599 New tags will be appended to any existing aux records. 1600 1601 On failure, errno may be set to one of the following values: 1602 1603 EINVAL: The bam record's aux data is corrupt or an existing tag with the 1604 given ID is not of a float type. 1605 1606 ENOMEM: The bam data needs to be expanded and either the attempt to 1607 reallocate the data buffer failed or the resulting buffer would be 1608 longer than the maximum size allowed in a bam record (2Gbytes). 1609 */ 1610 int bam_aux_update_float(bam1_t* b, ref const(char)[2] tag, float val); 1611 1612 /// Update or add an array tag 1613 /* @param b The bam record to update 1614 @param tag Tag identifier 1615 @param type Data type (one of c, C, s, S, i, I or f) 1616 @param items Number of items 1617 @param data Pointer to data 1618 @return 0 on success, -1 on failure 1619 The type parameter indicates the how the data is interpreted: 1620 1621 Letter code | Data type | Item Size (bytes) 1622 ----------- | --------- | ----------------- 1623 c | int8_t | 1 1624 C | uint8_t | 1 1625 s | int16_t | 2 1626 S | uint16_t | 2 1627 i | int32_t | 4 1628 I | uint32_t | 4 1629 f | float | 4 1630 1631 This function will not change the ordering of tags in the bam record. 1632 New tags will be appended to any existing aux records. The bam record 1633 will grow or shrink in order to accomodate the new data. 1634 1635 The data parameter must not point to any data in the bam record itself or 1636 undefined behaviour may result. 1637 1638 On failure, errno may be set to one of the following values: 1639 1640 EINVAL: The bam record's aux data is corrupt, an existing tag with the 1641 given ID is not of an array type or the type parameter is not one of 1642 the values listed above. 1643 1644 ENOMEM: The bam data needs to be expanded and either the attempt to 1645 reallocate the data buffer failed or the resulting buffer would be 1646 longer than the maximum size allowed in a bam record (2Gbytes). 1647 */ 1648 int bam_aux_update_array( 1649 bam1_t* b, 1650 ref const(char)[2] tag, 1651 ubyte type, 1652 uint items, 1653 void* data); 1654 1655 /************************** 1656 *** Pileup and Mpileup *** 1657 **************************/ 1658 1659 /*! @typedef 1660 @abstract Generic pileup 'client data'. 1661 1662 @discussion The pileup iterator allows setting a constructor and 1663 destructor function, which will be called every time a sequence is 1664 fetched and discarded. This permits caching of per-sequence data in 1665 a tidy manner during the pileup process. This union is the cached 1666 data to be manipulated by the "client" (the caller of pileup). 1667 */ 1668 union bam_pileup_cd 1669 { 1670 void* p; 1671 long i; 1672 double f; 1673 } 1674 1675 /*! @typedef 1676 @abstract Structure for one alignment covering the pileup position. 1677 @field b pointer to the alignment 1678 @field qpos position of the read base at the pileup site, 0-based 1679 @field indel indel length; 0 for no indel, positive for ins and negative for del 1680 @field level the level of the read in the "viewer" mode 1681 @field is_del 1 iff the base on the padded read is a deletion 1682 @field is_head 1 iff this is the first base in the query sequence 1683 @field is_tail 1 iff this is the last base in the query sequence 1684 @field is_refskip 1 iff the base on the padded read is part of CIGAR N op 1685 @field aux (used by bcf_call_gap_prep()) 1686 @field cigar_ind index of the CIGAR operator that has just been processed 1687 1688 @discussion See also bam_plbuf_push() and bam_lplbuf_push(). The 1689 difference between the two functions is that the former does not 1690 set bam_pileup1_t::level, while the later does. Level helps the 1691 implementation of alignment viewers, but calculating this has some 1692 overhead. 1693 */ 1694 struct bam_pileup1_t 1695 { 1696 import std.bitmanip : bitfields; 1697 1698 bam1_t* b; 1699 int qpos; 1700 int indel; 1701 int level; 1702 1703 mixin(bitfields!( 1704 uint, "is_del", 1, 1705 uint, "is_head", 1, 1706 uint, "is_tail", 1, 1707 uint, "is_refskip", 1, 1708 uint, "", 1, 1709 uint, "aux", 27)); 1710 1711 /* reserved */ 1712 bam_pileup_cd cd; // generic per-struct data, owned by caller. 1713 int cigar_ind; 1714 } 1715 1716 alias bam_plp_auto_f = int function(void* data, bam1_t* b); 1717 1718 struct __bam_plp_t; 1719 alias bam_plp_t = __bam_plp_t*; 1720 1721 struct __bam_mplp_t; 1722 alias bam_mplp_t = __bam_mplp_t*; 1723 1724 /** 1725 * bam_plp_init() - sets an iterator over multiple 1726 * @func: see mplp_func in bam_plcmd.c in samtools for an example. Expected return 1727 * status: 0 on success, -1 on end, < -1 on non-recoverable errors 1728 * @data: user data to pass to @func 1729 * 1730 * The struct returned by a successful call should be freed 1731 * via bam_plp_destroy() when it is no longer needed. 1732 */ 1733 bam_plp_t bam_plp_init(bam_plp_auto_f func, void* data); 1734 1735 void bam_plp_destroy(bam_plp_t iter); 1736 1737 int bam_plp_push(bam_plp_t iter, const(bam1_t)* b); 1738 1739 const(bam_pileup1_t)* bam_plp_next( 1740 bam_plp_t iter, 1741 int* _tid, 1742 int* _pos, 1743 int* _n_plp); 1744 1745 const(bam_pileup1_t)* bam_plp_auto( 1746 bam_plp_t iter, 1747 int* _tid, 1748 int* _pos, 1749 int* _n_plp); 1750 1751 const(bam_pileup1_t)* bam_plp64_next( 1752 bam_plp_t iter, 1753 int* _tid, 1754 hts_pos_t* _pos, 1755 int* _n_plp); 1756 1757 const(bam_pileup1_t)* bam_plp64_auto( 1758 bam_plp_t iter, 1759 int* _tid, 1760 hts_pos_t* _pos, 1761 int* _n_plp); 1762 1763 void bam_plp_set_maxcnt(bam_plp_t iter, int maxcnt); 1764 1765 void bam_plp_reset(bam_plp_t iter); 1766 1767 /** 1768 * bam_plp_constructor() - sets a callback to initialise any per-pileup1_t fields. 1769 * @plp: The bam_plp_t initialised using bam_plp_init. 1770 * @func: The callback function itself. When called, it is given the 1771 * data argument (specified in bam_plp_init), the bam structure and 1772 * a pointer to a locally allocated bam_pileup_cd union. This union 1773 * will also be present in each bam_pileup1_t created. 1774 */ 1775 void bam_plp_constructor( 1776 bam_plp_t plp, 1777 int function(void* data, const(bam1_t)* b, bam_pileup_cd* cd) func); 1778 void bam_plp_destructor( 1779 bam_plp_t plp, 1780 int function(void* data, const(bam1_t)* b, bam_pileup_cd* cd) func); 1781 1782 /// Get pileup padded insertion sequence 1783 /** 1784 * @param p pileup data 1785 * @param ins the kstring where the insertion sequence will be written 1786 * @param del_len location for deletion length 1787 * @return the length of insertion string on success; -1 on failure. 1788 * 1789 * Fills out the kstring with the padded insertion sequence for the current 1790 * location in 'p'. If this is not an insertion site, the string is blank. 1791 * 1792 * If del_len is not NULL, the location pointed to is set to the length of 1793 * any deletion immediately following the insertion, or zero if none. 1794 */ 1795 int bam_plp_insertion(const(bam_pileup1_t)* p, kstring_t* ins, int* del_len); 1796 1797 /// Create a new bam_mplp_t structure 1798 /** The struct returned by a successful call should be freed 1799 * via bam_mplp_destroy() when it is no longer needed. 1800 */ 1801 bam_mplp_t bam_mplp_init(int n, bam_plp_auto_f func, void** data); 1802 1803 /// Set up mpileup overlap detection 1804 /** 1805 * @param iter mpileup iterator 1806 * @return 0 on success; a negative value on error 1807 * 1808 * If called, mpileup will detect overlapping 1809 * read pairs and for each base pair set the base quality of the 1810 * lower-quality base to zero, thus effectively discarding it from 1811 * calling. If the two bases are identical, the quality of the other base 1812 * is increased to the sum of their qualities (capped at 200), otherwise 1813 * it is multiplied by 0.8. 1814 */ 1815 int bam_mplp_init_overlaps(bam_mplp_t iter); 1816 1817 void bam_mplp_destroy(bam_mplp_t iter); 1818 1819 void bam_mplp_set_maxcnt(bam_mplp_t iter, int maxcnt); 1820 1821 int bam_mplp_auto( 1822 bam_mplp_t iter, 1823 int* _tid, 1824 int* _pos, 1825 int* n_plp, 1826 const(bam_pileup1_t*)* plp); 1827 1828 int bam_mplp64_auto( 1829 bam_mplp_t iter, 1830 int* _tid, 1831 hts_pos_t* _pos, 1832 int* n_plp, 1833 const(bam_pileup1_t*)* plp); 1834 1835 void bam_mplp_reset(bam_mplp_t iter); 1836 1837 void bam_mplp_constructor( 1838 bam_mplp_t iter, 1839 int function(void* data, const(bam1_t)* b, bam_pileup_cd* cd) func); 1840 1841 void bam_mplp_destructor( 1842 bam_mplp_t iter, 1843 int function(void* data, const(bam1_t)* b, bam_pileup_cd* cd) func); 1844 1845 // ~!defined(BAM_NO_PILEUP) 1846 1847 /*********************************** 1848 * BAQ calculation and realignment * 1849 ***********************************/ 1850 1851 int sam_cap_mapq(bam1_t* b, const(char)* ref_, hts_pos_t ref_len, int thres); 1852 1853 /// Calculate BAQ scores 1854 /** @param b BAM record 1855 @param ref Reference sequence 1856 @param ref_len Reference sequence length 1857 @param flag Flags, see description 1858 @return 0 on success \n 1859 -1 if the read was unmapped, zero length, had no quality values, did not have at least one M, X or = CIGAR operator, or included a reference skip. \n 1860 -3 if BAQ alignment has already been done and does not need to be applied, or has already been applied. \n 1861 -4 if alignment failed (most likely due to running out of memory) 1862 1863 This function calculates base alignment quality (BAQ) values using the method 1864 described in "Improving SNP discovery by base alignment quality", Heng Li, 1865 Bioinformatics, Volume 27, Issue 8 (https://doi.org/10.1093/bioinformatics/btr076). 1866 1867 The following @param flag bits can be used: 1868 1869 Bit 0: Adjust the quality values using the BAQ values 1870 1871 If set, the data in the BQ:Z tag is used to adjust the quality values, and 1872 the BQ:Z tag is renamed to ZQ:Z. 1873 1874 If clear, and a ZQ:Z tag is present, the quality values are reverted using 1875 the data in the tag, and the tag is renamed to BQ:Z. 1876 1877 Bit 1: Use "extended" BAQ. 1878 1879 Changes the BAQ calculation to increase sensitivity at the expense of 1880 reduced specificity. 1881 1882 Bit 2: Recalculate BAQ, even if a BQ tag is present. 1883 1884 Force BAQ to be recalculated. Note that a ZQ:Z tag will always disable 1885 recalculation. 1886 1887 @bug 1888 If the input read has both BQ:Z and ZQ:Z tags, the ZQ:Z one will be removed. 1889 Depending on what previous processing happened, this may or may not be the 1890 correct thing to do. It would be wise to avoid this situation if possible. 1891 */ 1892 1893 int sam_prob_realn(bam1_t* b, const(char)* ref_, hts_pos_t ref_len, int flag); 1894