1 /// @file htslib/sam.h
2 /// High-level SAM/BAM/CRAM sequence file operations.
3 /*
4     Copyright (C) 2008, 2009, 2013-2019 Genome Research Ltd.
5     Copyright (C) 2010, 2012, 2013 Broad Institute.
6 
7     Author: Heng Li <lh3@sanger.ac.uk>
8 
9 Permission is hereby granted, free of charge, to any person obtaining a copy
10 of this software and associated documentation files (the "Software"), to deal
11 in the Software without restriction, including without limitation the rights
12 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
13 copies of the Software, and to permit persons to whom the Software is
14 furnished to do so, subject to the following conditions:
15 
16 The above copyright notice and this permission notice shall be included in
17 all copies or substantial portions of the Software.
18 
19 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
25 DEALINGS IN THE SOFTWARE.  */
26 
27 module htslib.sam;
28 
29 import core.stdc.stdint;
30 import htslib.hts;
31 import htslib.hts_log;
32 import htslib.bgzf: BGZF;
33 import htslib.kstring: kstring_t;
34 import std.format: format;
35 
36 extern (C):
37 
38 /// Highest SAM format version supported by this library
39 enum SAM_FORMAT_VERSION = "1.6";
40 
41 /***************************
42  *** SAM/BAM/CRAM header ***
43  ***************************/
44 
45 /*! @typedef
46  * @abstract Header extension structure, grouping a collection
47  *  of hash tables that contain the parsed header data.
48  */
49 
50 struct sam_hrecs_t;
51 
52 /*! @typedef
53  @abstract Structure for the alignment header.
54  @field n_targets   number of reference sequences
55  @field l_text      length of the plain text in the header (may be zero if
56                     the header has been edited)
57  @field target_len  lengths of the reference sequences
58  @field target_name names of the reference sequences
59  @field text        plain text (may be NULL if the header has been edited)
60  @field sdict       header dictionary
61  @field hrecs       pointer to the extended header struct (internal use only)
62  @field ref_count   reference count
63 
64  @note The text and l_text fields are included for backwards compatibility.
65  These fields may be set to NULL and zero respectively as a side-effect
66  of calling some header API functions.  New code that needs to access the
67  header text should use the sam_hdr_str() and sam_hdr_length() functions
68  instead of these fields.
69  */
70 
71 struct sam_hdr_t
72 {
73     int n_targets;
74     int ignore_sam_err;
75     size_t l_text;
76     uint* target_len;
77     const(byte)* cigar_tab;
78     char** target_name;
79     char* text;
80     void* sdict;
81     sam_hrecs_t* hrecs;
82     uint ref_count;
83 }
84 
85 /*! @typedef
86  * @abstract Old name for compatibility with existing code.
87  */
88 alias bam_hdr_t = sam_hdr_t;
89 
90 /****************************
91  *** CIGAR related macros ***
92  ****************************/
93 
94 enum BAM_CMATCH = 0;
95 enum BAM_CINS = 1;
96 enum BAM_CDEL = 2;
97 enum BAM_CREF_SKIP = 3;
98 enum BAM_CSOFT_CLIP = 4;
99 enum BAM_CHARD_CLIP = 5;
100 enum BAM_CPAD = 6;
101 enum BAM_CEQUAL = 7;
102 enum BAM_CDIFF = 8;
103 enum BAM_CBACK = 9;
104 
105 enum BAM_CIGAR_STR = "MIDNSHP=XB";
106 enum BAM_CIGAR_SHIFT = 4;
107 enum BAM_CIGAR_MASK = 0xf;
108 enum BAM_CIGAR_TYPE = 0x3C1A7;
109 
110 /*! @abstract Table for converting a CIGAR operator character to BAM_CMATCH etc.
111 Result is operator code or -1. Be sure to cast the index if it is a plain char:
112     int op = bam_cigar_table[(unsigned char) ch];
113 */
114 extern __gshared const(byte)[256] bam_cigar_table;
115 
116 extern (D) auto bam_cigar_op(T)(auto ref T c)
117 {
118     return c & BAM_CIGAR_MASK;
119 }
120 
121 extern (D) auto bam_cigar_oplen(T)(auto ref T c)
122 {
123     return c >> BAM_CIGAR_SHIFT;
124 }
125 
126 // Note that BAM_CIGAR_STR is padded to length 16 bytes below so that
127 // the array look-up will not fall off the end.  '?' is chosen as the
128 // padding character so it's easy to spot if one is emitted, and will
129 // result in a parsing failure (in sam_parse1(), at least) if read.
130 extern (D) auto bam_cigar_gen(T0, T1)(auto ref T0 l, auto ref T1 o)
131 {
132     return l << BAM_CIGAR_SHIFT | o;
133 }
134 
135 /* bam_cigar_type returns a bit flag with:
136  *   bit 1 set if the cigar operation consumes the query
137  *   bit 2 set if the cigar operation consumes the reference
138  *
139  * For reference, the unobfuscated truth table for this function is:
140  * BAM_CIGAR_TYPE  QUERY  REFERENCE
141  * --------------------------------
142  * BAM_CMATCH      1      1
143  * BAM_CINS        1      0
144  * BAM_CDEL        0      1
145  * BAM_CREF_SKIP   0      1
146  * BAM_CSOFT_CLIP  1      0
147  * BAM_CHARD_CLIP  0      0
148  * BAM_CPAD        0      0
149  * BAM_CEQUAL      1      1
150  * BAM_CDIFF       1      1
151  * BAM_CBACK       0      0
152  * --------------------------------
153  */
154 extern (D) auto bam_cigar_type(T)(auto ref T o)
155 {
156     return BAM_CIGAR_TYPE >> (o << 1) & 3;
157 } // bit 1: consume query; bit 2: consume reference
158 
159 /*! @abstract the read is paired in sequencing, no matter whether it is mapped in a pair */
160 enum BAM_FPAIRED = 1;
161 /*! @abstract the read is mapped in a proper pair */
162 enum BAM_FPROPER_PAIR = 2;
163 /*! @abstract the read itself is unmapped; conflictive with BAM_FPROPER_PAIR */
164 enum BAM_FUNMAP = 4;
165 /*! @abstract the mate is unmapped */
166 enum BAM_FMUNMAP = 8;
167 /*! @abstract the read is mapped to the reverse strand */
168 enum BAM_FREVERSE = 16;
169 /*! @abstract the mate is mapped to the reverse strand */
170 enum BAM_FMREVERSE = 32;
171 /*! @abstract this is read1 */
172 enum BAM_FREAD1 = 64;
173 /*! @abstract this is read2 */
174 enum BAM_FREAD2 = 128;
175 /*! @abstract not primary alignment */
176 enum BAM_FSECONDARY = 256;
177 /*! @abstract QC failure */
178 enum BAM_FQCFAIL = 512;
179 /*! @abstract optical or PCR duplicate */
180 enum BAM_FDUP = 1024;
181 /*! @abstract supplementary alignment */
182 enum BAM_FSUPPLEMENTARY = 2048;
183 
184 /*************************
185  *** Alignment records ***
186  *************************/
187 
188 /*
189  * Assumptions made here.  While pos can be 64-bit, no sequence
190  * itself is that long, but due to ref skip CIGAR fields it
191  * may span more than that.  (CIGAR itself is 28-bit len + 4 bit
192  * type, but in theory we can combine multiples together.)
193  *
194  * Mate position and insert size also need to be 64-bit, but
195  * we won't accept more than 32-bit for tid.
196  *
197  * The bam_core_t structure is the *in memory* layout and not
198  * the same as the on-disk format.  64-bit changes here permit
199  * SAM to work with very long chromosomes and permit BAM and CRAM
200  * to seamlessly update in the future without further API/ABI
201  * revisions.
202  */
203 
204 /*! @typedef
205  @abstract Structure for core alignment information.
206  @field  pos     0-based leftmost coordinate
207  @field  tid     chromosome ID, defined by sam_hdr_t
208  @field  bin     bin calculated by bam_reg2bin()
209  @field  qual    mapping quality
210  @field  l_extranul length of extra NULs between qname & cigar (for alignment)
211  @field  flag    bitwise flag
212  @field  l_qname length of the query name
213  @field  n_cigar number of CIGAR operations
214  @field  l_qseq  length of the query sequence (read)
215  @field  mtid    chromosome ID of next read in template, defined by sam_hdr_t
216  @field  mpos    0-based leftmost coordinate of next read in template
217  @field  isize   observed template length ("insert size")
218  */
219 struct bam1_core_t
220 {
221     hts_pos_t pos;
222     int tid;
223     ushort bin; // NB: invalid on 64-bit pos
224     ubyte qual;
225     ubyte l_extranul;
226     ushort flag;
227     ushort l_qname;
228     uint n_cigar;
229     int l_qseq;
230     int mtid;
231     hts_pos_t mpos;
232     hts_pos_t isize;
233 }
234 
235 /*! @typedef
236  @abstract Structure for one alignment.
237  @field  core       core information about the alignment
238  @field  id
239  @field  data       all variable-length data, concatenated; structure: qname-cigar-seq-qual-aux
240  @field  l_data     current length of bam1_t::data
241  @field  m_data     maximum length of bam1_t::data
242  @field  mempolicy  memory handling policy, see bam_set_mempolicy()
243 
244  @discussion Notes:
245 
246  1. The data blob should be accessed using bam_get_qname, bam_get_cigar,
247     bam_get_seq, bam_get_qual and bam_get_aux macros.  These returns pointers
248     to the start of each type of data.
249  2. qname is terminated by one to four NULs, so that the following
250     cigar data is 32-bit aligned; core.l_qname includes these trailing NULs,
251     while core.l_extranul counts the excess NULs (so 0 <= l_extranul <= 3).
252  3. Cigar data is encoded 4 bytes per CIGAR operation.
253     See the bam_cigar_* macros for manipulation.
254  4. seq is nibble-encoded according to bam_nt16_table.
255     See the bam_seqi macro for retrieving individual bases.
256  5. Per base qualilties are stored in the Phred scale with no +33 offset.
257     Ie as per the BAM specification and not the SAM ASCII printable method.
258  */
259 struct bam1_t
260 {
261     import std.bitmanip : bitfields;
262 
263     bam1_core_t core;
264     ulong id;
265     ubyte* data;
266     int l_data;
267     uint m_data;
268 
269     mixin(bitfields!(
270         uint, "mempolicy", 2,
271         uint, "", 30));
272 
273     /* Reserved */
274 }
275 
276 /*! @function
277  @abstract  Get whether the query is on the reverse strand
278  @param  b  pointer to an alignment
279  @return    boolean true if query is on the reverse strand
280  */
281 extern (D) auto bam_is_rev(T)(auto ref T b)
282 {
283     return (b.core.flag & BAM_FREVERSE) != 0;
284 }
285 
286 /*! @function
287  @abstract  Get whether the query's mate is on the reverse strand
288  @param  b  pointer to an alignment
289  @return    boolean true if query's mate on the reverse strand
290  */
291 extern (D) auto bam_is_mrev(T)(auto ref T b)
292 {
293     return (b.core.flag & BAM_FMREVERSE) != 0;
294 }
295 
296 /*! @function
297  @abstract  Get the name of the query
298  @param  b  pointer to an alignment
299  @return    pointer to the name string, null terminated
300  */
301 extern (D) auto bam_get_qname(T)(auto ref T b)
302 {
303     return cast(char*) b.data;
304 }
305 
306 /*! @function
307  @abstract  Get the CIGAR array
308  @param  b  pointer to an alignment
309  @return    pointer to the CIGAR array
310 
311  @discussion In the CIGAR array, each element is a 32-bit integer. The
312  lower 4 bits gives a CIGAR operation and the higher 28 bits keep the
313  length of a CIGAR.
314  */
315 extern (D) auto bam_get_cigar(bam1_t * b)
316 {
317     return cast(uint*) ((*b).data + (*b).core.l_qname);
318 }
319 
320 /*! @function
321  @abstract  Get query sequence
322  @param  b  pointer to an alignment
323  @return    pointer to sequence
324 
325  @discussion Each base is encoded in 4 bits: 1 for A, 2 for C, 4 for G,
326  8 for T and 15 for N. Two bases are packed in one byte with the base
327  at the higher 4 bits having smaller coordinate on the read. It is
328  recommended to use bam_seqi() macro to get the base.
329  */
330 extern (D) auto bam_get_seq(T)(auto ref T b)
331 {
332     return b.data + (b.core.n_cigar << 2) + b.core.l_qname;
333 }
334 
335 /*! @function
336  @abstract  Get query quality
337  @param  b  pointer to an alignment
338  @return    pointer to quality string
339  */
340 extern (D) auto bam_get_qual(T)(auto ref T b)
341 {
342     return b.data + (b.core.n_cigar << 2) + b.core.l_qname + ((b.core.l_qseq + 1) >> 1);
343 }
344 
345 /*! @function
346  @abstract  Get auxiliary data
347  @param  b  pointer to an alignment
348  @return    pointer to the concatenated auxiliary data
349  */
350 extern (D) auto bam_get_aux(T)(auto ref T b)
351 {
352     return b.data + (b.core.n_cigar << 2) + b.core.l_qname + ((b.core.l_qseq + 1) >> 1) + b.core.l_qseq;
353 }
354 
355 /*! @function
356  @abstract  Get length of auxiliary data
357  @param  b  pointer to an alignment
358  @return    length of the concatenated auxiliary data
359  */
360 extern (D) auto bam_get_l_aux(T)(auto ref T b)
361 {
362     return b.l_data - (b.core.n_cigar << 2) - b.core.l_qname - b.core.l_qseq - ((b.core.l_qseq + 1) >> 1);
363 }
364 
365 /*! @function
366  @abstract  Get a base on read
367  @param  s  Query sequence returned by bam_get_seq()
368  @param  i  The i-th position, 0-based
369  @return    4-bit integer representing the base.
370  */
371 extern (D) auto bam_seqi(T0, T1)(auto ref T0 s, auto ref T1 i)
372 {
373     return s[i >> 1] >> ((~i & 1) << 2) & 0xf;
374 }
375 
376 /**************************
377  *** Exported functions ***
378  **************************/
379 
380 /***************
381  *** BAM I/O ***
382  ***************/
383 
384 /* Header */
385 
386 /// Generates a new unpopulated header structure.
387 /*!
388  *
389  * @return  A valid pointer to new header on success, NULL on failure
390  *
391  * The sam_hdr_t struct returned by a successful call should be freed
392  * via sam_hdr_destroy() when it is no longer needed.
393  */
394 sam_hdr_t* sam_hdr_init();
395 
396 /// Read the header from a BAM compressed file.
397 /*!
398  * @param fp  File pointer
399  * @return    A valid pointer to new header on success, NULL on failure
400  *
401  * This function only works with BAM files.  It is usually better to use
402  * sam_hdr_read(), which works on SAM, BAM and CRAM files.
403  *
404  * The sam_hdr_t struct returned by a successful call should be freed
405  * via sam_hdr_destroy() when it is no longer needed.
406  */
407 sam_hdr_t* bam_hdr_read(BGZF* fp);
408 
409 /// Writes the header to a BAM file.
410 /*!
411  * @param fp  File pointer
412  * @param h   Header pointer
413  * @return    0 on success, -1 on failure
414  *
415  * This function only works with BAM files.  Use sam_hdr_write() to
416  * write in any of the SAM, BAM or CRAM formats.
417  */
418 int bam_hdr_write(BGZF* fp, const(sam_hdr_t)* h);
419 
420 /*!
421  * Frees the resources associated with a header.
422  */
423 void sam_hdr_destroy(sam_hdr_t* h);
424 
425 /// Duplicate a header structure.
426 /*!
427  * @return  A valid pointer to new header on success, NULL on failure
428  *
429  * The sam_hdr_t struct returned by a successful call should be freed
430  * via sam_hdr_destroy() when it is no longer needed.
431  */
432 sam_hdr_t* sam_hdr_dup(const(sam_hdr_t)* h0);
433 
434 /*!
435  * @abstract Old names for compatibility with existing code.
436  */
437 pragma(inline,true) 
438 sam_hdr_t* bam_hdr_init() { return sam_hdr_init(); }
439 
440 pragma(inline,true) 
441 void bam_hdr_destroy(sam_hdr_t* h) { sam_hdr_destroy(h); }
442 
443 pragma(inline,true) 
444 sam_hdr_t* bam_hdr_dup(const(sam_hdr_t)* h0) { return sam_hdr_dup(h0); }
445 
446 alias samFile = htsFile;
447 
448 /// Create a header from existing text.
449 /*!
450  * @param l_text    Length of text
451  * @param text      Header text
452  * @return A populated sam_hdr_t structure on success; NULL on failure.
453  * @note The text field of the returned header will be NULL, and the l_text
454  * field will be zero.
455  *
456  * The sam_hdr_t struct returned by a successful call should be freed
457  * via sam_hdr_destroy() when it is no longer needed.
458  */
459 sam_hdr_t* sam_hdr_parse(size_t l_text, const(char)* text);
460 
461 /// Read a header from a SAM, BAM or CRAM file.
462 /*!
463  * @param fp    Pointer to a SAM, BAM or CRAM file handle
464  * @return  A populated sam_hdr_t struct on success; NULL on failure.
465  *
466  * The sam_hdr_t struct returned by a successful call should be freed
467  * via sam_hdr_destroy() when it is no longer needed.
468  */
469 sam_hdr_t* sam_hdr_read(samFile* fp);
470 
471 /// Write a header to a SAM, BAM or CRAM file.
472 /*!
473  * @param fp    SAM, BAM or CRAM file header
474  * @param h     Header structure to write
475  * @return  0 on success; -1 on failure
476  */
477 int sam_hdr_write(samFile* fp, const(sam_hdr_t)* h);
478 
479 /// Returns the current length of the header text.
480 /*!
481  * @return  >= 0 on success, SIZE_MAX on failure
482  */
483 size_t sam_hdr_length(sam_hdr_t* h);
484 
485 /// Returns the text representation of the header.
486 /*!
487  * @return  valid char pointer on success, NULL on failure
488  *
489  * The returned string is part of the header structure.  It will remain
490  * valid until a call to a header API function causes the string to be
491  * invalidated, or the header is destroyed.
492  *
493  * The caller should not attempt to free or realloc this pointer.
494  */
495 const(char)* sam_hdr_str(sam_hdr_t* h);
496 
497 /// Returns the number of references in the header.
498 /*!
499  * @return  >= 0 on success, -1 on failure
500  */
501 int sam_hdr_nref(const(sam_hdr_t)* h);
502 
503 /* ==== Line level methods ==== */
504 
505 /// Add formatted lines to an existing header.
506 /*!
507  * @param lines  Full SAM header record, eg "@SQ\tSN:foo\tLN:100", with
508  *               optional new-line. If it contains more than 1 line then
509  *               multiple lines will be added in order
510  * @param len    The maximum length of lines (if an early NUL is not
511  *               encountered). len may be 0 if unknown, in which case
512  *               lines must be NUL-terminated
513  * @return       0 on success, -1 on failure
514  *
515  * The lines will be appended to the end of the existing header
516  * (apart from HD, which always comes first).
517  */
518 int sam_hdr_add_lines(sam_hdr_t* h, const(char)* lines, size_t len);
519 
520 /// Adds a single line to an existing header.
521 /*!
522  * Specify type and one or more key,value pairs, ending with the NULL key.
523  * Eg. sam_hdr_add_line(h, "SQ", "ID", "foo", "LN", "100", NULL).
524  *
525  * @param type  Type of the added line. Eg. "SQ"
526  * @return      0 on success, -1 on failure
527  *
528  * The new line will be added immediately after any others of the same
529  * type, or at the end of the existing header if no lines of the
530  * given type currently exist.  The exception is HD lines, which always
531  * come first.  If an HD line already exists, it will be replaced.
532  */
533 int sam_hdr_add_line(sam_hdr_t* h, const(char)* type, ...);
534 
535 /// Returns a complete line of formatted text for a given type and ID.
536 /*!
537  * @param type      Type of the searched line. Eg. "SQ"
538  * @param ID_key    Tag key defining the line. Eg. "SN"
539  * @param ID_value  Tag value associated with the key above. Eg. "ref1"
540  * @param ks        kstring to hold the result
541  * @return          0 on success;
542  *                 -1 if no matching line is found
543  *                 -2 on other failures
544  *
545  * Puts a complete line of formatted text for a specific header type/ID
546  * combination into @p ks. If ID_key is NULL then it returns the first line of
547  * the specified type.
548  *
549  * Any existing content in @p ks will be overwritten.
550  */
551 int sam_hdr_find_line_id(
552     sam_hdr_t* h,
553     const(char)* type,
554     const(char)* ID_key,
555     const(char)* ID_val,
556     kstring_t* ks);
557 
558 /// Returns a complete line of formatted text for a given type and index.
559 /*!
560  * @param type      Type of the searched line. Eg. "SQ"
561  * @param position  Index in lines of this type (zero-based)
562  * @param ks        kstring to hold the result
563  * @return          0 on success;
564  *                 -1 if no matching line is found
565  *                 -2 on other failures
566  *
567  * Puts a complete line of formatted text for a specific line into @p ks.
568  * The header line is selected using the @p type and @p position parameters.
569  *
570  * Any existing content in @p ks will be overwritten.
571  */
572 int sam_hdr_find_line_pos(
573     sam_hdr_t* h,
574     const(char)* type,
575     int pos,
576     kstring_t* ks);
577 
578 /// Remove a line with given type / id from a header
579 /*!
580  * @param type      Type of the searched line. Eg. "SQ"
581  * @param ID_key    Tag key defining the line. Eg. "SN"
582  * @param ID_value  Tag value associated with the key above. Eg. "ref1"
583  * @return          0 on success, -1 on error
584  *
585  * Remove a line from the header by specifying a tag:value that uniquely
586  * identifies the line, i.e. the @SQ line containing "SN:ref1".
587  *
588  * \@SQ line is uniquely identified by the SN tag.
589  * \@RG line is uniquely identified by the ID tag.
590  * \@PG line is uniquely identified by the ID tag.
591  * Eg. sam_hdr_remove_line_id(h, "SQ", "SN", "ref1")
592  *
593  * If no key:value pair is specified, the type MUST be followed by a NULL argument and
594  * the first line of the type will be removed, if any.
595  * Eg. sam_hdr_remove_line_id(h, "SQ", NULL, NULL)
596  *
597  * @note Removing \@PG lines is currently unsupported.
598  */
599 int sam_hdr_remove_line_id(
600     sam_hdr_t* h,
601     const(char)* type,
602     const(char)* ID_key,
603     const(char)* ID_value);
604 
605 /// Remove nth line of a given type from a header
606 /*!
607  * @param type     Type of the searched line. Eg. "SQ"
608  * @param position Index in lines of this type (zero-based). E.g. 3
609  * @return         0 on success, -1 on error
610  *
611  * Remove a line from the header by specifying the position in the type
612  * group, i.e. 3rd @SQ line.
613  */
614 int sam_hdr_remove_line_pos(sam_hdr_t* h, const(char)* type, int position);
615 
616 /// Add or update tag key,value pairs in a header line.
617 /*!
618  * @param type      Type of the searched line. Eg. "SQ"
619  * @param ID_key    Tag key defining the line. Eg. "SN"
620  * @param ID_value  Tag value associated with the key above. Eg. "ref1"
621  * @return          0 on success, -1 on error
622  *
623  * Adds or updates tag key,value pairs in a header line.
624  * Eg. for adding M5 tags to @SQ lines or updating sort order for the
625  * @HD line.
626  *
627  * Specify multiple key,value pairs ending in NULL. Eg.
628  * sam_hdr_update_line(h, "RG", "ID", "rg1", "DS", "description", "PG", "samtools", NULL)
629  *
630  * Attempting to update the record name (i.e. @SQ SN or @RG ID) will
631  * work as long as the new name is not already in use, however doing this
632  * on a file opened for reading may produce unexpected results.
633  *
634  * Renaming an @RG record in this way will only change the header.  Alignment
635  * records written later will not be updated automatically even if they
636  * reference the old read group name.
637  *
638  * Attempting to change an @PG ID tag is not permitted.
639  */
640 int sam_hdr_update_line(
641     sam_hdr_t* h,
642     const(char)* type,
643     const(char)* ID_key,
644     const(char)* ID_value,
645     ...);
646 
647 /// Remove all lines of a given type from a header, except the one matching an ID
648 /*!
649  * @param type      Type of the searched line. Eg. "SQ"
650  * @param ID_key    Tag key defining the line. Eg. "SN"
651  * @param ID_value  Tag value associated with the key above. Eg. "ref1"
652  * @return          0 on success, -1 on failure
653  *
654  * Remove all lines of type <type> from the header, except the one
655  * specified by tag:value, i.e. the @SQ line containing "SN:ref1".
656  *
657  * If no line matches the key:value ID, all lines of the given type are removed.
658  * To remove all lines of a given type, use NULL for both ID_key and ID_value.
659  */
660 int sam_hdr_remove_except(
661     sam_hdr_t* h,
662     const(char)* type,
663     const(char)* ID_key,
664     const(char)* ID_value);
665 
666 /// Remove header lines of a given type, except those in a given ID set
667 /*!
668  * @param type  Type of the searched line. Eg. "RG"
669  * @param id    Tag key defining the line. Eg. "ID"
670  * @param rh    Hash set initialised by the caller with the values to be kept.
671  *              See description for how to create this. If @p rh is NULL, all
672  *              lines of this type will be removed.
673  * @return      0 on success, -1 on failure
674  *
675  * Remove all lines of type @p type from the header, except the ones
676  * specified in the hash set @p rh. If @p rh is NULL, all lines of
677  * this type will be removed.
678  * Declaration of @p rh is done using KHASH_SET_INIT_STR macro. Eg.
679  * @code{.c}
680  *              #include "htslib/khash.h"
681  *              KHASH_SET_INIT_STR(keep)
682  *              typedef khash_t(keep) *keephash_t;
683  *
684  *              void your_method() {
685  *                  samFile *sf = sam_open("alignment.bam", "r");
686  *                  sam_hdr_t *h = sam_hdr_read(sf);
687  *                  keephash_t rh = kh_init(keep);
688  *                  int ret = 0;
689  *                  kh_put(keep, rh, strdup("chr2"), &ret);
690  *                  kh_put(keep, rh, strdup("chr3"), &ret);
691  *                  if (sam_hdr_remove_lines(h, "SQ", "SN", rh) == -1)
692  *                      fprintf(stderr, "Error removing lines\n");
693  *                  khint_t k;
694  *                  for (k = 0; k < kh_end(rh); ++k)
695  *                     if (kh_exist(rh, k)) free((char*)kh_key(rh, k));
696  *                  kh_destroy(keep, rh);
697  *                  sam_hdr_destroy(h);
698  *                  sam_close(sf);
699  *              }
700  * @endcode
701  *
702  */
703 int sam_hdr_remove_lines(
704     sam_hdr_t* h,
705     const(char)* type,
706     const(char)* id,
707     void* rh);
708 
709 /// Count the number of lines for a given header type
710 /*!
711  * @param h     BAM header
712  * @param type  Header type to count. Eg. "RG"
713  * @return  Number of lines of this type on success; -1 on failure
714  */
715 int sam_hdr_count_lines(sam_hdr_t* h, const(char)* type);
716 
717 /// Index of the line for the types that have dedicated look-up tables (SQ, RG, PG)
718 /*!
719  * @param h     BAM header
720  * @param type  Type of the searched line. Eg. "RG"
721  * @param key   The value of the identifying key. Eg. "rg1"
722  * @return  0-based index on success; -1 if line does not exist; -2 on failure
723  */
724 int sam_hdr_line_index(sam_hdr_t* bh, const(char)* type, const(char)* key);
725 
726 /// Id key of the line for the types that have dedicated look-up tables (SQ, RG, PG)
727 /*!
728  * @param h     BAM header
729  * @param type  Type of the searched line. Eg. "RG"
730  * @param pos   Zero-based index inside the type group. Eg. 2 (for the third RG line)
731  * @return  Valid key string on success; NULL on failure
732  */
733 const(char)* sam_hdr_line_name(sam_hdr_t* bh, const(char)* type, int pos);
734 
735 /* ==== Key:val level methods ==== */
736 
737 /// Return the value associated with a key for a header line identified by ID_key:ID_val
738 /*!
739  * @param type      Type of the line to which the tag belongs. Eg. "SQ"
740  * @param ID_key    Tag key defining the line. Eg. "SN". Can be NULL, if looking for the first line.
741  * @param ID_value  Tag value associated with the key above. Eg. "ref1". Can be NULL, if ID_key is NULL.
742  * @param key       Key of the searched tag. Eg. "LN"
743  * @param ks        kstring where the value will be written
744  * @return          0 on success
745  *                 -1 if the requested tag does not exist
746  *                 -2 on other errors
747  *
748  * Looks for a specific key in a single SAM header line and writes the
749  * associated value into @p ks.  The header line is selected using the ID_key
750  * and ID_value parameters.  Any pre-existing content in @p ks will be
751  * overwritten.
752  */
753 int sam_hdr_find_tag_id(
754     sam_hdr_t* h,
755     const(char)* type,
756     const(char)* ID_key,
757     const(char)* ID_value,
758     const(char)* key,
759     kstring_t* ks);
760 
761 /// Return the value associated with a key for a header line identified by position
762 /*!
763  * @param type      Type of the line to which the tag belongs. Eg. "SQ"
764  * @param position  Index in lines of this type (zero-based). E.g. 3
765  * @param key       Key of the searched tag. Eg. "LN"
766  * @param ks        kstring where the value will be written
767  * @return          0 on success
768  *                 -1 if the requested tag does not exist
769  *                 -2 on other errors
770  *
771  * Looks for a specific key in a single SAM header line and writes the
772  * associated value into @p ks.  The header line is selected using the @p type
773  * and @p position parameters.  Any pre-existing content in @p ks will be
774  * overwritten.
775  */
776 int sam_hdr_find_tag_pos(
777     sam_hdr_t* h,
778     const(char)* type,
779     int pos,
780     const(char)* key,
781     kstring_t* ks);
782 
783 /// Remove the key from the line identified by type, ID_key and ID_value.
784 /*!
785  * @param type      Type of the line to which the tag belongs. Eg. "SQ"
786  * @param ID_key    Tag key defining the line. Eg. "SN"
787  * @param ID_value  Tag value associated with the key above. Eg. "ref1"
788  * @param key       Key of the targeted tag. Eg. "M5"
789  * @return          1 if the key was removed; 0 if it was not present; -1 on error
790  */
791 int sam_hdr_remove_tag_id(
792     sam_hdr_t* h,
793     const(char)* type,
794     const(char)* ID_key,
795     const(char)* ID_value,
796     const(char)* key);
797 
798 /// Get the target id for a given reference sequence name
799 /*!
800  * @param ref  Reference name
801  * @return     Positive value on success,
802  *             -1 if unknown reference,
803  *             -2 if the header could not be parsed
804  *
805  * Looks up a reference sequence by name in the reference hash table
806  * and returns the numerical target id.
807  */
808 int sam_hdr_name2tid(sam_hdr_t* h, const(char)* ref_);
809 
810 /// Get the reference sequence name from a target index
811 /*!
812  * @param tid  Target index
813  * @return     Valid reference name on success, NULL on failure
814  *
815  * Fetch the reference sequence name from the target name array,
816  * using the numerical target id.
817  */
818 const(char)* sam_hdr_tid2name(const(sam_hdr_t)* h, int tid);
819 
820 /// Get the reference sequence length from a target index
821 /*!
822  * @param tid  Target index
823  * @return     Strictly positive value on success, 0 on failure
824  *
825  * Fetch the reference sequence length from the target length array,
826  * using the numerical target id.
827  */
828 hts_pos_t sam_hdr_tid2len(const(sam_hdr_t)* h, int tid);
829 
830 /// Alias of sam_hdr_name2tid(), for backwards compatibility.
831 /*!
832  * @param ref  Reference name
833  * @return     Positive value on success,
834  *             -1 if unknown reference,
835  *             -2 if the header could not be parsed
836  */
837 pragma(inline,true)
838 int bam_name2id(sam_hdr_t* h, const(char)* ref_) { return sam_hdr_name2tid(h, ref_); }
839 
840 /// Generate a unique \@PG ID: value
841 /*!
842  * @param name  Name of the program. Eg. samtools
843  * @return      Valid ID on success, NULL on failure
844  *
845  * Returns a unique ID from a base name.  The string returned will remain
846  * valid until the next call to this function, or the header is destroyed.
847  * The caller should not attempt to free() or realloc() it.
848  */
849 const(char)* sam_hdr_pg_id(sam_hdr_t* h, const(char)* name);
850 
851 /// Add an \@PG line.
852 /*!
853  * @param name  Name of the program. Eg. samtools
854  * @return      0 on success, -1 on failure
855  *
856  * If we wish complete control over this use sam_hdr_add_line() directly. This
857  * function uses that, but attempts to do a lot of tedious house work for
858  * you too.
859  *
860  * - It will generate a suitable ID if the supplied one clashes.
861  * - It will generate multiple \@PG records if we have multiple PG chains.
862  *
863  * Call it as per sam_hdr_add_line() with a series of key,value pairs ending
864  * in NULL.
865  */
866 int sam_hdr_add_pg(sam_hdr_t* h, const(char)* name, ...);
867 
868 /*!
869  * A function to help with construction of CL tags in @PG records.
870  * Takes an argc, argv pair and returns a single space-separated string.
871  * This string should be deallocated by the calling function.
872  *
873  * @return
874  * Returns malloced char * on success;
875  *         NULL on failure
876  */
877 char* stringify_argv(int argc, char** argv);
878 
879 /// Increments the reference count on a header
880 /*!
881  * This permits multiple files to share the same header, all calling
882  * sam_hdr_destroy when done, without causing errors for other open files.
883  */
884 void sam_hdr_incr_ref(sam_hdr_t* h);
885 
886 /*
887  * Macros for changing the \@HD line. They eliminate the need to use NULL method arguments.
888  */
889 
890 /// Returns the SAM formatted text of the \@HD header line
891 extern (D) auto sam_hdr_find_hd(T0, T1)(auto ref T0 h, auto ref T1 ks)
892 {
893     return sam_hdr_find_line_id(h, "HD", NULL, NULL, ks);
894 }
895 
896 /// Returns the value associated with a given \@HD line tag
897 extern (D) auto sam_hdr_find_tag_hd(T0, T1, T2)(auto ref T0 h, auto ref T1 key, auto ref T2 ks)
898 {
899     return sam_hdr_find_tag_id(h, "HD", NULL, NULL, key, ks);
900 }
901 
902 /// Adds or updates tags on the header \@HD line
903 extern (D) auto sam_hdr_update_hd(T, A...)(auto ref T h, auto ref A a)
904 {
905     // NOTE: This macro was dropped by dstep due to variadic args
906     static assert (a.length %2 == 0);   // K-V pairs => even number of variadic args
907     return sam_hdr_update_line(h, "HD", null, null, a, null);
908 }
909 
910 /// Removes the \@HD line tag with the given key
911 extern (D) auto sam_hdr_remove_tag_hd(T0, T1)(auto ref T0 h, auto ref T1 key)
912 {
913     return sam_hdr_remove_tag_id(h, "HD", NULL, NULL, key);
914 }
915 
916 /* Alignment */
917 
918 /// Create a new bam1_t alignment structure
919 /**
920    @return An empty bam1_t structure on success, NULL on failure
921 
922    The bam1_t struct returned by a successful call should be freed
923    via bam_destroy1() when it is no longer needed.
924  */
925 bam1_t* bam_init1();
926 
927 /// Destroy a bam1_t structure
928 /**
929    @param b  structure to destroy
930 
931    Does nothing if @p b is NULL.  If not, all memory associated with @p b
932    will be freed, along with the structure itself.  @p b should not be
933    accessed after calling this function.
934  */
935 void bam_destroy1(bam1_t* b);
936 
937 enum BAM_USER_OWNS_STRUCT = 1;
938 enum BAM_USER_OWNS_DATA = 2;
939 
940 /// Set alignment record memory policy
941 /**
942    @param b       Alignment record
943    @param policy  Desired policy
944 
945    Allows the way HTSlib reallocates and frees bam1_t data to be
946    changed.  @policy can be set to the bitwise-or of the following
947    values:
948 
949    \li \c BAM_USER_OWNS_STRUCT
950    If this is set then bam_destroy1() will not try to free the bam1_t struct.
951 
952    \li \c BAM_USER_OWNS_DATA
953    If this is set, bam_destroy1() will not free the bam1_t::data pointer.
954    Also, functions which need to expand bam1_t::data memory will change
955    behaviour.  Instead of calling realloc() on the pointer, they will
956    allocate a new data buffer and copy any existing content in to it.
957    The existing memory will \b not be freed.  bam1_t::data will be
958    set to point to the new memory and the BAM_USER_OWNS_DATA flag will be
959    cleared.
960 
961    BAM_USER_OWNS_STRUCT allows bam_destroy1() to be called on bam1_t
962    structures that are members of an array.
963 
964    BAM_USER_OWNS_DATA can be used by applications that want more control
965    over where the variable-length parts of the bam record will be stored.
966    By preventing calls to free() and realloc(), it allows bam1_t::data
967    to hold pointers to memory that cannot be passed to those functions.
968 
969    Example:  Read a block of alignment records, storing the variable-length
970    data in a single buffer and the records in an array.  Stop when either
971    the array or the buffer is full.
972 
973    \code{.c}
974    #define MAX_RECS 1000
975    #define REC_LENGTH 400  // Average length estimate, to get buffer size
976    size_t bufsz = MAX_RECS * REC_LENGTH, nrecs, buff_used = 0;
977    bam1_t *recs = calloc(MAX_RECS, sizeof(bam1_t));
978    uint8_t *buffer = malloc(bufsz);
979    int res = 0, result = EXIT_FAILURE;
980    uint32_t new_m_data;
981 
982    if (!recs || !buffer) goto cleanup;
983    for (nrecs = 0; nrecs < MAX_RECS; nrecs++) {
984       bam_set_mempolicy(BAM_USER_OWNS_STRUCT|BAM_USER_OWNS_DATA);
985 
986       // Set data pointer to unused part of buffer
987       recs[nrecs].data = &buffer[buff_used];
988 
989       // Set m_data to size of unused part of buffer.  On 64-bit platforms it
990       // will be necessary to limit this to UINT32_MAX due to the size of
991       // bam1_t::m_data (not done here as our buffer is only 400K).
992       recs[nrecs].m_data = bufsz - buff_used;
993 
994       // Read the record
995       res = sam_read1(file_handle, header, &recs[nrecs]);
996       if (res <= 0) break; // EOF or error
997 
998       // Check if the record data didn't fit - if not, stop reading
999       if ((bam_get_mempolicy(&recs[nrecs]) & BAM_USER_OWNS_DATA) == 0) {
1000          nrecs++; // Include last record in count
1001          break;
1002       }
1003 
1004       // Adjust m_data to the space actually used.  If space is available,
1005       // round up to eight bytes so the next record aligns nicely.
1006       new_m_data = ((uint32_t) recs[nrecs].l_data + 7) & (~7U);
1007       if (new_m_data < recs[nrecs].m_data) recs[nrecs].m_data = new_m_data;
1008 
1009       buff_used += recs[nrecs].m_data;
1010    }
1011    if (res < 0) goto cleanup;
1012    result = EXIT_SUCCESS;
1013 
1014    // ... use data ...
1015 
1016  cleanup:
1017    for (size_t i = 0; i < nrecs; i++)
1018      bam_destroy1(i);
1019    free(buffer);
1020    free(recs);
1021 
1022    \endcode
1023 */
1024 void bam_set_mempolicy(bam1_t* b, uint policy) {
1025     b.mempolicy = policy;
1026 }
1027 
1028 /// Get alignment record memory policy
1029 /** @param b    Alignment record
1030 
1031     See bam_set_mempolicy()
1032  */
1033 uint bam_get_mempolicy(bam1_t* b) {
1034     return b.mempolicy;
1035 }
1036 
1037 /// Read a BAM format alignment record
1038 /**
1039    @param fp   BGZF file being read
1040    @param b    Destination for the alignment data
1041    @return number of bytes read on success
1042            -1 at end of file
1043            < -1 on failure
1044 
1045    This function can only read BAM format files.  Most code should use
1046    sam_read1() instead, which can be used with BAM, SAM and CRAM formats.
1047 */
1048 int bam_read1(BGZF* fp, bam1_t* b);
1049 
1050 /// Write a BAM format alignment record
1051 /**
1052    @param fp  BGZF file being written
1053    @param b   Alignment record to write
1054    @return number of bytes written on success
1055            -1 on error
1056 
1057    This function can only write BAM format files.  Most code should use
1058    sam_write1() instead, which can be used with BAM, SAM and CRAM formats.
1059 */
1060 int bam_write1(BGZF* fp, const(bam1_t)* b);
1061 
1062 /// Copy alignment record data
1063 /**
1064    @param bdst  Destination alignment record
1065    @param bsrc  Source alignment record
1066    @return bdst on success; NULL on failure
1067  */
1068 bam1_t* bam_copy1(bam1_t* bdst, const(bam1_t)* bsrc);
1069 
1070 /// Create a duplicate alignment record
1071 /**
1072    @param bsrc  Source alignment record
1073    @return Pointer to a new alignment record on success; NULL on failure
1074 
1075    The bam1_t struct returned by a successful call should be freed
1076    via bam_destroy1() when it is no longer needed.
1077  */
1078 bam1_t* bam_dup1(const(bam1_t)* bsrc);
1079 
1080 /// Calculate query length from CIGAR data
1081 /**
1082    @param n_cigar   Number of items in @p cigar
1083    @param cigar     CIGAR data
1084    @return Query length
1085 
1086    CIGAR data is stored as in the BAM format, i.e. (op_len << 4) | op
1087    where op_len is the length in bases and op is a value between 0 and 8
1088    representing one of the operations "MIDNSHP=X" (M = 0; X = 8)
1089 
1090    This function returns the sum of the lengths of the M, I, S, = and X
1091    operations in @p cigar (these are the operations that "consume" query
1092    bases).  All other operations (including invalid ones) are ignored.
1093 
1094    @note This return type of this function is hts_pos_t so that it can
1095    correctly return the length of CIGAR sequences including many long
1096    operations without overflow. However, other restrictions (notably the sizes
1097    of bam1_core_t::l_qseq and bam1_t::data) limit the maximum query sequence
1098    length supported by HTSlib to fewer than INT_MAX bases.
1099  */
1100 hts_pos_t bam_cigar2qlen(int n_cigar, const(uint)* cigar);
1101 
1102 /// Calculate reference length from CIGAR data
1103 /**
1104    @param n_cigar   Number of items in @p cigar
1105    @param cigar     CIGAR data
1106    @return Reference length
1107 
1108    CIGAR data is stored as in the BAM format, i.e. (op_len << 4) | op
1109    where op_len is the length in bases and op is a value between 0 and 8
1110    representing one of the operations "MIDNSHP=X" (M = 0; X = 8)
1111 
1112    This function returns the sum of the lengths of the M, D, N, = and X
1113    operations in @p cigar (these are the operations that "consume" reference
1114    bases).  All other operations (including invalid ones) are ignored.
1115  */
1116 hts_pos_t bam_cigar2rlen(int n_cigar, const(uint)* cigar);
1117 
1118 /*!
1119       @abstract Calculate the rightmost base position of an alignment on the
1120       reference genome.
1121 
1122       @param  b  pointer to an alignment
1123       @return    the coordinate of the first base after the alignment, 0-based
1124 
1125       @discussion For a mapped read, this is just b->core.pos + bam_cigar2rlen.
1126       For an unmapped read (either according to its flags or if it has no cigar
1127       string), we return b->core.pos + 1 by convention.
1128  */
1129 hts_pos_t bam_endpos(const(bam1_t)* b);
1130 
1131 int bam_str2flag(const(char)* str); /** returns negative value on error */
1132 
1133 char* bam_flag2str(int flag); /** The string must be freed by the user */
1134 
1135 /*! @function
1136  @abstract  Set the name of the query
1137  @param  b  pointer to an alignment
1138  @return    0 on success, -1 on failure
1139  */
1140 int bam_set_qname(bam1_t* b, const(char)* qname);
1141 
1142 /*************************
1143  *** BAM/CRAM indexing ***
1144  *************************/
1145 
1146 // These BAM iterator functions work only on BAM files.  To work with either
1147 // BAM or CRAM files use the sam_index_load() & sam_itr_*() functions.
1148 alias bam_itr_destroy = hts_itr_destroy;
1149 alias bam_itr_queryi = sam_itr_queryi;
1150 alias bam_itr_querys = sam_itr_querys;
1151 
1152 pragma(inline, true)
1153 extern (D) auto bam_itr_next(T0, T1, T2)(auto ref T0 htsfp, auto ref T1 itr, auto ref T2 r)
1154 {
1155     return hts_itr_next(htsfp.fp.bgzf, itr, r, 0);
1156 }
1157 
1158 // Load/build .csi or .bai BAM index file.  Does not work with CRAM.
1159 // It is recommended to use the sam_index_* functions below instead.
1160 pragma(inline, true)
1161 extern (D) auto bam_index_load(T)(auto ref T fn)
1162 {
1163     return hts_idx_load(fn, HTS_FMT_BAI);
1164 }
1165 
1166 pragma(inline, true)
1167 extern (D) auto bam_index_build(T0, T1)(auto ref T0 fn, auto ref T1 min_shift)
1168 {
1169     return sam_index_build(fn, min_shift);
1170 }
1171 
1172 /// Initialise fp->idx for the current format type for SAM, BAM and CRAM types .
1173 /** @param fp        File handle for the data file being written.
1174     @param h         Bam header structured (needed for BAI and CSI).
1175     @param min_shift 0 for BAI, or larger for CSI (CSI defaults to 14).
1176     @param fnidx     Filename to write index to.  This pointer must remain valid
1177                      until after sam_idx_save is called.
1178     @return          0 on success, <0 on failure.
1179 
1180     @note This must be called after the header has been written, but before
1181           any other data.
1182 */
1183 int sam_idx_init(htsFile* fp, sam_hdr_t* h, int min_shift, const(char)* fnidx);
1184 
1185 /// Writes the index initialised with sam_idx_init to disk.
1186 /** @param fp        File handle for the data file being written.
1187     @return          0 on success, <0 on filaure.
1188 */
1189 int sam_idx_save(htsFile* fp);
1190 
1191 /// Load a BAM (.csi or .bai) or CRAM (.crai) index file
1192 /** @param fp  File handle of the data file whose index is being opened
1193     @param fn  BAM/CRAM/etc filename to search alongside for the index file
1194     @return  The index, or NULL if an error occurred.
1195 
1196 Equivalent to sam_index_load3(fp, fn, NULL, HTS_IDX_SAVE_REMOTE);
1197 */
1198 hts_idx_t* sam_index_load(htsFile* fp, const(char)* fn);
1199 
1200 /// Load a specific BAM (.csi or .bai) or CRAM (.crai) index file
1201 /** @param fp     File handle of the data file whose index is being opened
1202     @param fn     BAM/CRAM/etc data file filename
1203     @param fnidx  Index filename, or NULL to search alongside @a fn
1204     @return  The index, or NULL if an error occurred.
1205 
1206 Equivalent to sam_index_load3(fp, fn, fnidx, HTS_IDX_SAVE_REMOTE);
1207 */
1208 hts_idx_t* sam_index_load2(htsFile* fp, const(char)* fn, const(char)* fnidx);
1209 
1210 /// Load or stream a BAM (.csi or .bai) or CRAM (.crai) index file
1211 /** @param fp     File handle of the data file whose index is being opened
1212     @param fn     BAM/CRAM/etc data file filename
1213     @param fnidx  Index filename, or NULL to search alongside @a fn
1214     @param flags  Flags to alter behaviour (see description)
1215     @return  The index, or NULL if an error occurred.
1216 
1217 The @p flags parameter can be set to a combination of the following values:
1218 
1219         HTS_IDX_SAVE_REMOTE   Save a local copy of any remote indexes
1220         HTS_IDX_SILENT_FAIL   Fail silently if the index is not present
1221 
1222 Note that HTS_IDX_SAVE_REMOTE has no effect for remote CRAM indexes.  They
1223 are always downloaded and never cached locally.
1224 
1225 The index struct returned by a successful call should be freed
1226 via hts_idx_destroy() when it is no longer needed.
1227 */
1228 hts_idx_t* sam_index_load3(
1229     htsFile* fp,
1230     const(char)* fn,
1231     const(char)* fnidx,
1232     int flags);
1233 
1234 /// Generate and save an index file
1235 /** @param fn        Input BAM/etc filename, to which .csi/etc will be added
1236     @param min_shift Positive to generate CSI, or 0 to generate BAI
1237     @return  0 if successful, or negative if an error occurred (usually -1; or
1238              -2: opening fn failed; -3: format not indexable; -4:
1239              failed to create and/or save the index)
1240 */
1241 int sam_index_build(const(char)* fn, int min_shift);
1242 
1243 /// Generate and save an index to a specific file
1244 /** @param fn        Input BAM/CRAM/etc filename
1245     @param fnidx     Output filename, or NULL to add .bai/.csi/etc to @a fn
1246     @param min_shift Positive to generate CSI, or 0 to generate BAI
1247     @return  0 if successful, or negative if an error occurred (see
1248              sam_index_build for error codes)
1249 */
1250 int sam_index_build2(const(char)* fn, const(char)* fnidx, int min_shift);
1251 
1252 /// Generate and save an index to a specific file
1253 /** @param fn        Input BAM/CRAM/etc filename
1254     @param fnidx     Output filename, or NULL to add .bai/.csi/etc to @a fn
1255     @param min_shift Positive to generate CSI, or 0 to generate BAI
1256     @param nthreads  Number of threads to use when building the index
1257     @return  0 if successful, or negative if an error occurred (see
1258              sam_index_build for error codes)
1259 */
1260 int sam_index_build3(
1261     const(char)* fn,
1262     const(char)* fnidx,
1263     int min_shift,
1264     int nthreads);
1265 
1266 /// Free a SAM iterator
1267 /// @param iter     Iterator to free
1268 alias sam_itr_destroy = hts_itr_destroy;
1269 
1270 /// Create a BAM/CRAM iterator
1271 /** @param idx     Index
1272     @param tid     Target id
1273     @param beg     Start position in target
1274     @param end     End position in target
1275     @return An iterator on success; NULL on failure
1276 
1277 The following special values (defined in htslib/hts.h)can be used for @p tid.
1278 When using one of these values, @p beg and @p end are ignored.
1279 
1280   HTS_IDX_NOCOOR iterates over unmapped reads sorted at the end of the file
1281   HTS_IDX_START  iterates over the entire file
1282   HTS_IDX_REST   iterates from the current position to the end of the file
1283   HTS_IDX_NONE   always returns "no more alignment records"
1284 
1285 When using HTS_IDX_REST or HTS_IDX_NONE, NULL can be passed in to @p idx.
1286  */
1287 hts_itr_t* sam_itr_queryi(
1288     const(hts_idx_t)* idx,
1289     int tid,
1290     hts_pos_t beg,
1291     hts_pos_t end);
1292 
1293 /// Create a SAM/BAM/CRAM iterator
1294 /** @param idx     Index
1295     @param hdr     Header
1296     @param region  Region specification
1297     @return An iterator on success; NULL on failure
1298 
1299 Regions are parsed by hts_parse_reg(), and take one of the following forms:
1300 
1301 region          | Outputs
1302 --------------- | -------------
1303 REF             | All reads with RNAME REF
1304 REF:            | All reads with RNAME REF
1305 REF:START       | Reads with RNAME REF overlapping START to end of REF
1306 REF:-END        | Reads with RNAME REF overlapping start of REF to END
1307 REF:START-END   | Reads with RNAME REF overlapping START to END
1308 .               | All reads from the start of the file
1309 *               | Unmapped reads at the end of the file (RNAME '*' in SAM)
1310 
1311 The form `REF:` should be used when the reference name itself contains a colon.
1312 
1313 Note that SAM files must be bgzf-compressed for iterators to work.
1314  */
1315 hts_itr_t* sam_itr_querys(
1316     const(hts_idx_t)* idx,
1317     sam_hdr_t* hdr,
1318     const(char)* region);
1319 
1320 /// Create a multi-region iterator
1321 /** @param idx       Index
1322     @param hdr       Header
1323     @param reglist   Array of regions to iterate over
1324     @param regcount  Number of items in reglist
1325 
1326 Each @p reglist entry should have the reference name in the `reg` field, an
1327 array of regions for that reference in `intervals` and the number of items
1328 in `intervals` should be stored in `count`.  No other fields need to be filled
1329 in.
1330 
1331 The iterator will return all reads overlapping the given regions.  If a read
1332 overlaps more than one region, it will only be returned once.
1333  */
1334 hts_itr_t* sam_itr_regions(
1335     const(hts_idx_t)* idx,
1336     sam_hdr_t* hdr,
1337     hts_reglist_t* reglist,
1338     uint regcount);
1339 
1340 /// Create a multi-region iterator
1341 /** @param idx       Index
1342     @param hdr       Header
1343     @param regarray  Array of ref:interval region specifiers
1344     @param regcount  Number of items in regarray
1345 
1346 Each @p regarray entry is parsed by hts_parse_reg(), and takes one of the
1347 following forms:
1348 
1349 region          | Outputs
1350 --------------- | -------------
1351 REF             | All reads with RNAME REF
1352 REF:            | All reads with RNAME REF
1353 REF:START       | Reads with RNAME REF overlapping START to end of REF
1354 REF:-END        | Reads with RNAME REF overlapping start of REF to END
1355 REF:START-END   | Reads with RNAME REF overlapping START to END
1356 .               | All reads from the start of the file
1357 *               | Unmapped reads at the end of the file (RNAME '*' in SAM)
1358 
1359 The form `REF:` should be used when the reference name itself contains a colon.
1360 
1361 The iterator will return all reads overlapping the given regions.  If a read
1362 overlaps more than one region, it will only be returned once.
1363  */
1364 hts_itr_t* sam_itr_regarray(
1365     const(hts_idx_t)* idx,
1366     sam_hdr_t* hdr,
1367     char** regarray,
1368     uint regcount);
1369 
1370 /// Get the next read from a SAM/BAM/CRAM iterator
1371 /** @param htsfp       Htsfile pointer for the input file
1372     @param itr         Iterator
1373     @param r           Pointer to a bam1_t struct
1374     @return >= 0 on success; -1 when there is no more data; < -1 on error
1375  */
1376 int sam_itr_next(htsFile* htsfp, hts_itr_t* itr, bam1_t* r) {
1377     if (!htsfp.is_bgzf && !htsfp.is_cram) {
1378         hts_log_error(__FUNCTION__, format("%s not BGZF compressed", htsfp.fn ? htsfp.fn : "File"));
1379         return -2;
1380     }
1381     if (!itr) {
1382         hts_log_error(__FUNCTION__,"Null iterator");
1383         return -2;
1384     }
1385 
1386     if (itr.multi)
1387         return hts_itr_multi_next(htsfp, itr, r);
1388     else
1389         return hts_itr_next(htsfp.is_bgzf ? htsfp.fp.bgzf : null, itr, r, htsfp);
1390 }
1391 
1392 /// Get the next read from a BAM/CRAM multi-iterator
1393 /** @param htsfp       Htsfile pointer for the input file
1394     @param itr         Iterator
1395     @param r           Pointer to a bam1_t struct
1396     @return >= 0 on success; -1 when there is no more data; < -1 on error
1397  */
1398 alias sam_itr_multi_next = sam_itr_next;
1399 
1400 const(char)* sam_parse_region(
1401     sam_hdr_t* h,
1402     const(char)* s,
1403     int* tid,
1404     hts_pos_t* beg,
1405     hts_pos_t* end,
1406     int flags);
1407 
1408 /***************
1409  *** SAM I/O ***
1410  ***************/
1411 
1412 alias sam_open = hts_open;
1413 alias sam_open_format = hts_open_format;
1414 alias sam_close = hts_close;
1415 
1416 int sam_open_mode(char* mode, const(char)* fn, const(char)* format);
1417 
1418 // A version of sam_open_mode that can handle ,key=value options.
1419 // The format string is allocated and returned, to be freed by the caller.
1420 // Prefix should be "r" or "w",
1421 char* sam_open_mode_opts(
1422     const(char)* fn,
1423     const(char)* mode,
1424     const(char)* format);
1425 
1426 int sam_hdr_change_HD(sam_hdr_t* h, const(char)* key, const(char)* val);
1427 
1428 int sam_parse1(kstring_t* s, sam_hdr_t* h, bam1_t* b);
1429 int sam_format1(const(sam_hdr_t)* h, const(bam1_t)* b, kstring_t* str);
1430 
1431 /// sam_read1 - Read a record from a file
1432 /** @param fp   Pointer to the source file
1433  *  @param h    Pointer to the header previously read (fully or partially)
1434  *  @param b    Pointer to the record placeholder
1435  *  @return >= 0 on successfully reading a new record, -1 on end of stream, < -1 on error
1436  */
1437 int sam_read1(samFile* fp, sam_hdr_t* h, bam1_t* b);
1438 /// sam_write1 - Write a record to a file
1439 /** @param fp    Pointer to the destination file
1440  *  @param h     Pointer to the header structure previously read
1441  *  @param b     Pointer to the record to be written
1442  *  @return >= 0 on successfully writing the record, -1 on error
1443  */
1444 int sam_write1(samFile* fp, const(sam_hdr_t)* h, const(bam1_t)* b);
1445 
1446 /*************************************
1447  *** Manipulating auxiliary fields ***
1448  *************************************/
1449 
1450 /// Return a pointer to an aux record
1451 /** @param b   Pointer to the bam record
1452     @param tag Desired aux tag
1453     @return Pointer to the tag data, or NULL if tag is not present or on error
1454     If the tag is not present, this function returns NULL and sets errno to
1455     ENOENT.  If the bam record's aux data is corrupt (either a tag has an
1456     invalid type, or the last record is incomplete) then errno is set to
1457     EINVAL and NULL is returned.
1458  */
1459 ubyte* bam_aux_get(const(bam1_t)* b, ref const(char)[2] tag);
1460 
1461 /// Get an integer aux value
1462 /** @param s Pointer to the tag data, as returned by bam_aux_get()
1463     @return The value, or 0 if the tag was not an integer type
1464     If the tag is not an integer type, errno is set to EINVAL.  This function
1465     will not return the value of floating-point tags.
1466 */
1467 long bam_aux2i(const(ubyte)* s);
1468 
1469 /// Get an integer aux value
1470 /** @param s Pointer to the tag data, as returned by bam_aux_get()
1471     @return The value, or 0 if the tag was not an integer type
1472     If the tag is not an numeric type, errno is set to EINVAL.  The value of
1473     integer flags will be returned cast to a double.
1474 */
1475 double bam_aux2f(const(ubyte)* s);
1476 
1477 /// Get a character aux value
1478 /** @param s Pointer to the tag data, as returned by bam_aux_get().
1479     @return The value, or 0 if the tag was not a character ('A') type
1480     If the tag is not a character type, errno is set to EINVAL.
1481 */
1482 char bam_aux2A(const(ubyte)* s);
1483 
1484 /// Get a string aux value
1485 /** @param s Pointer to the tag data, as returned by bam_aux_get().
1486     @return Pointer to the string, or NULL if the tag was not a string type
1487     If the tag is not a string type ('Z' or 'H'), errno is set to EINVAL.
1488 */
1489 char* bam_aux2Z(const(ubyte)* s);
1490 
1491 /// Get the length of an array-type ('B') tag
1492 /** @param s Pointer to the tag data, as returned by bam_aux_get().
1493     @return The length of the array, or 0 if the tag is not an array type.
1494     If the tag is not an array type, errno is set to EINVAL.
1495  */
1496 uint bam_auxB_len(const(ubyte)* s);
1497 
1498 /// Get an integer value from an array-type tag
1499 /** @param s   Pointer to the tag data, as returned by bam_aux_get().
1500     @param idx 0-based Index into the array
1501     @return The idx'th value, or 0 on error.
1502     If the array is not an integer type, errno is set to EINVAL.  If idx
1503     is greater than or equal to  the value returned by bam_auxB_len(s),
1504     errno is set to ERANGE.  In both cases, 0 will be returned.
1505  */
1506 long bam_auxB2i(const(ubyte)* s, uint idx);
1507 
1508 /// Get a floating-point value from an array-type tag
1509 /** @param s   Pointer to the tag data, as returned by bam_aux_get().
1510     @param idx 0-based Index into the array
1511     @return The idx'th value, or 0.0 on error.
1512     If the array is not a numeric type, errno is set to EINVAL.  This can
1513     only actually happen if the input record has an invalid type field.  If
1514     idx is greater than or equal to  the value returned by bam_auxB_len(s),
1515     errno is set to ERANGE.  In both cases, 0.0 will be returned.
1516  */
1517 double bam_auxB2f(const(ubyte)* s, uint idx);
1518 
1519 /// Append tag data to a bam record
1520 /* @param b    The bam record to append to.
1521    @param tag  Tag identifier
1522    @param type Tag data type
1523    @param len  Length of the data in bytes
1524    @param data The data to append
1525    @return 0 on success; -1 on failure.
1526 If there is not enough space to store the additional tag, errno is set to
1527 ENOMEM.  If the type is invalid, errno may be set to EINVAL.  errno is
1528 also set to EINVAL if the bam record's aux data is corrupt.
1529 */
1530 int bam_aux_append(
1531     bam1_t* b,
1532     ref const(char)[2] tag,
1533     char type,
1534     int len,
1535     const(ubyte)* data);
1536 
1537 /// Delete tag data from a bam record
1538 /* @param b The bam record to update
1539    @param s Pointer to the tag to delete, as returned by bam_aux_get().
1540    @return 0 on success; -1 on failure
1541    If the bam record's aux data is corrupt, errno is set to EINVAL and this
1542    function returns -1;
1543 */
1544 int bam_aux_del(bam1_t* b, ubyte* s);
1545 
1546 /// Update or add a string-type tag
1547 /* @param b    The bam record to update
1548    @param tag  Tag identifier
1549    @param len  The length of the new string
1550    @param data The new string
1551    @return 0 on success, -1 on failure
1552    This function will not change the ordering of tags in the bam record.
1553    New tags will be appended to any existing aux records.
1554 
1555    On failure, errno may be set to one of the following values:
1556 
1557    EINVAL: The bam record's aux data is corrupt or an existing tag with the
1558    given ID is not of type 'Z'.
1559 
1560    ENOMEM: The bam data needs to be expanded and either the attempt to
1561    reallocate the data buffer failed or the resulting buffer would be
1562    longer than the maximum size allowed in a bam record (2Gbytes).
1563 */
1564 int bam_aux_update_str(
1565     bam1_t* b,
1566     ref const(char)[2] tag,
1567     int len,
1568     const(char)* data);
1569 
1570 /// Update or add an integer tag
1571 /* @param b    The bam record to update
1572    @param tag  Tag identifier
1573    @param val  The new value
1574    @return 0 on success, -1 on failure
1575    This function will not change the ordering of tags in the bam record.
1576    New tags will be appended to any existing aux records.
1577 
1578    On failure, errno may be set to one of the following values:
1579 
1580    EINVAL: The bam record's aux data is corrupt or an existing tag with the
1581    given ID is not of an integer type (c, C, s, S, i or I).
1582 
1583    EOVERFLOW (or ERANGE on systems that do not have EOVERFLOW): val is
1584    outside the range that can be stored in an integer bam tag (-2147483647
1585    to 4294967295).
1586 
1587    ENOMEM: The bam data needs to be expanded and either the attempt to
1588    reallocate the data buffer failed or the resulting buffer would be
1589    longer than the maximum size allowed in a bam record (2Gbytes).
1590 */
1591 int bam_aux_update_int(bam1_t* b, ref const(char)[2] tag, long val);
1592 
1593 /// Update or add a floating-point tag
1594 /* @param b    The bam record to update
1595    @param tag  Tag identifier
1596    @param val  The new value
1597    @return 0 on success, -1 on failure
1598    This function will not change the ordering of tags in the bam record.
1599    New tags will be appended to any existing aux records.
1600 
1601    On failure, errno may be set to one of the following values:
1602 
1603    EINVAL: The bam record's aux data is corrupt or an existing tag with the
1604    given ID is not of a float type.
1605 
1606    ENOMEM: The bam data needs to be expanded and either the attempt to
1607    reallocate the data buffer failed or the resulting buffer would be
1608    longer than the maximum size allowed in a bam record (2Gbytes).
1609 */
1610 int bam_aux_update_float(bam1_t* b, ref const(char)[2] tag, float val);
1611 
1612 /// Update or add an array tag
1613 /* @param b     The bam record to update
1614    @param tag   Tag identifier
1615    @param type  Data type (one of c, C, s, S, i, I or f)
1616    @param items Number of items
1617    @param data  Pointer to data
1618    @return 0 on success, -1 on failure
1619    The type parameter indicates the how the data is interpreted:
1620 
1621    Letter code | Data type | Item Size (bytes)
1622    ----------- | --------- | -----------------
1623    c           | int8_t    | 1
1624    C           | uint8_t   | 1
1625    s           | int16_t   | 2
1626    S           | uint16_t  | 2
1627    i           | int32_t   | 4
1628    I           | uint32_t  | 4
1629    f           | float     | 4
1630 
1631    This function will not change the ordering of tags in the bam record.
1632    New tags will be appended to any existing aux records.  The bam record
1633    will grow or shrink in order to accomodate the new data.
1634 
1635    The data parameter must not point to any data in the bam record itself or
1636    undefined behaviour may result.
1637 
1638    On failure, errno may be set to one of the following values:
1639 
1640    EINVAL: The bam record's aux data is corrupt, an existing tag with the
1641    given ID is not of an array type or the type parameter is not one of
1642    the values listed above.
1643 
1644    ENOMEM: The bam data needs to be expanded and either the attempt to
1645    reallocate the data buffer failed or the resulting buffer would be
1646    longer than the maximum size allowed in a bam record (2Gbytes).
1647 */
1648 int bam_aux_update_array(
1649     bam1_t* b,
1650     ref const(char)[2] tag,
1651     ubyte type,
1652     uint items,
1653     void* data);
1654 
1655 /**************************
1656  *** Pileup and Mpileup ***
1657  **************************/
1658 
1659 /*! @typedef
1660  @abstract Generic pileup 'client data'.
1661 
1662  @discussion The pileup iterator allows setting a constructor and
1663  destructor function, which will be called every time a sequence is
1664  fetched and discarded.  This permits caching of per-sequence data in
1665  a tidy manner during the pileup process.  This union is the cached
1666  data to be manipulated by the "client" (the caller of pileup).
1667 */
1668 union bam_pileup_cd
1669 {
1670     void* p;
1671     long i;
1672     double f;
1673 }
1674 
1675 /*! @typedef
1676  @abstract Structure for one alignment covering the pileup position.
1677  @field  b          pointer to the alignment
1678  @field  qpos       position of the read base at the pileup site, 0-based
1679  @field  indel      indel length; 0 for no indel, positive for ins and negative for del
1680  @field  level      the level of the read in the "viewer" mode
1681  @field  is_del     1 iff the base on the padded read is a deletion
1682  @field  is_head    1 iff this is the first base in the query sequence
1683  @field  is_tail    1 iff this is the last base in the query sequence
1684  @field  is_refskip 1 iff the base on the padded read is part of CIGAR N op
1685  @field  aux        (used by bcf_call_gap_prep())
1686  @field  cigar_ind  index of the CIGAR operator that has just been processed
1687 
1688  @discussion See also bam_plbuf_push() and bam_lplbuf_push(). The
1689  difference between the two functions is that the former does not
1690  set bam_pileup1_t::level, while the later does. Level helps the
1691  implementation of alignment viewers, but calculating this has some
1692  overhead.
1693  */
1694 struct bam_pileup1_t
1695 {
1696     import std.bitmanip : bitfields;
1697 
1698     bam1_t* b;
1699     int qpos;
1700     int indel;
1701     int level;
1702 
1703     mixin(bitfields!(
1704         uint, "is_del", 1,
1705         uint, "is_head", 1,
1706         uint, "is_tail", 1,
1707         uint, "is_refskip", 1,
1708         uint, "", 1,
1709         uint, "aux", 27));
1710 
1711     /* reserved */
1712     bam_pileup_cd cd; // generic per-struct data, owned by caller.
1713     int cigar_ind;
1714 }
1715 
1716 alias bam_plp_auto_f = int function(void* data, bam1_t* b);
1717 
1718 struct __bam_plp_t;
1719 alias bam_plp_t = __bam_plp_t*;
1720 
1721 struct __bam_mplp_t;
1722 alias bam_mplp_t = __bam_mplp_t*;
1723 
1724 /**
1725  *  bam_plp_init() - sets an iterator over multiple
1726  *  @func:      see mplp_func in bam_plcmd.c in samtools for an example. Expected return
1727  *              status: 0 on success, -1 on end, < -1 on non-recoverable errors
1728  *  @data:      user data to pass to @func
1729  *
1730  *  The struct returned by a successful call should be freed
1731  *  via bam_plp_destroy() when it is no longer needed.
1732  */
1733 bam_plp_t bam_plp_init(bam_plp_auto_f func, void* data);
1734 
1735 void bam_plp_destroy(bam_plp_t iter);
1736 
1737 int bam_plp_push(bam_plp_t iter, const(bam1_t)* b);
1738 
1739 const(bam_pileup1_t)* bam_plp_next(
1740     bam_plp_t iter,
1741     int* _tid,
1742     int* _pos,
1743     int* _n_plp);
1744 
1745 const(bam_pileup1_t)* bam_plp_auto(
1746     bam_plp_t iter,
1747     int* _tid,
1748     int* _pos,
1749     int* _n_plp);
1750 
1751 const(bam_pileup1_t)* bam_plp64_next(
1752     bam_plp_t iter,
1753     int* _tid,
1754     hts_pos_t* _pos,
1755     int* _n_plp);
1756 
1757 const(bam_pileup1_t)* bam_plp64_auto(
1758     bam_plp_t iter,
1759     int* _tid,
1760     hts_pos_t* _pos,
1761     int* _n_plp);
1762 
1763 void bam_plp_set_maxcnt(bam_plp_t iter, int maxcnt);
1764 
1765 void bam_plp_reset(bam_plp_t iter);
1766 
1767 /**
1768  *  bam_plp_constructor() - sets a callback to initialise any per-pileup1_t fields.
1769  *  @plp:       The bam_plp_t initialised using bam_plp_init.
1770  *  @func:      The callback function itself.  When called, it is given the
1771  *              data argument (specified in bam_plp_init), the bam structure and
1772  *              a pointer to a locally allocated bam_pileup_cd union.  This union
1773  *              will also be present in each bam_pileup1_t created.
1774  */
1775 void bam_plp_constructor(
1776     bam_plp_t plp,
1777     int function(void* data, const(bam1_t)* b, bam_pileup_cd* cd) func);
1778 void bam_plp_destructor(
1779     bam_plp_t plp,
1780     int function(void* data, const(bam1_t)* b, bam_pileup_cd* cd) func);
1781 
1782 /// Get pileup padded insertion sequence
1783 /**
1784  * @param p       pileup data
1785  * @param ins     the kstring where the insertion sequence will be written
1786  * @param del_len location for deletion length
1787  * @return the length of insertion string on success; -1 on failure.
1788  *
1789  * Fills out the kstring with the padded insertion sequence for the current
1790  * location in 'p'.  If this is not an insertion site, the string is blank.
1791  *
1792  * If del_len is not NULL, the location pointed to is set to the length of
1793  * any deletion immediately following the insertion, or zero if none.
1794  */
1795 int bam_plp_insertion(const(bam_pileup1_t)* p, kstring_t* ins, int* del_len);
1796 
1797 /// Create a new bam_mplp_t structure
1798 /** The struct returned by a successful call should be freed
1799  *  via bam_mplp_destroy() when it is no longer needed.
1800  */
1801 bam_mplp_t bam_mplp_init(int n, bam_plp_auto_f func, void** data);
1802 
1803 /// Set up mpileup overlap detection
1804 /**
1805  * @param iter    mpileup iterator
1806  * @return 0 on success; a negative value on error
1807  *
1808  *  If called, mpileup will detect overlapping
1809  *  read pairs and for each base pair set the base quality of the
1810  *  lower-quality base to zero, thus effectively discarding it from
1811  *  calling. If the two bases are identical, the quality of the other base
1812  *  is increased to the sum of their qualities (capped at 200), otherwise
1813  *  it is multiplied by 0.8.
1814  */
1815 int bam_mplp_init_overlaps(bam_mplp_t iter);
1816 
1817 void bam_mplp_destroy(bam_mplp_t iter);
1818 
1819 void bam_mplp_set_maxcnt(bam_mplp_t iter, int maxcnt);
1820 
1821 int bam_mplp_auto(
1822     bam_mplp_t iter,
1823     int* _tid,
1824     int* _pos,
1825     int* n_plp,
1826     const(bam_pileup1_t*)* plp);
1827 
1828 int bam_mplp64_auto(
1829     bam_mplp_t iter,
1830     int* _tid,
1831     hts_pos_t* _pos,
1832     int* n_plp,
1833     const(bam_pileup1_t*)* plp);
1834 
1835 void bam_mplp_reset(bam_mplp_t iter);
1836 
1837 void bam_mplp_constructor(
1838     bam_mplp_t iter,
1839     int function(void* data, const(bam1_t)* b, bam_pileup_cd* cd) func);
1840 
1841 void bam_mplp_destructor(
1842     bam_mplp_t iter,
1843     int function(void* data, const(bam1_t)* b, bam_pileup_cd* cd) func);
1844 
1845 // ~!defined(BAM_NO_PILEUP)
1846 
1847 /***********************************
1848  * BAQ calculation and realignment *
1849  ***********************************/
1850 
1851 int sam_cap_mapq(bam1_t* b, const(char)* ref_, hts_pos_t ref_len, int thres);
1852 
1853 /// Calculate BAQ scores
1854 /** @param b   BAM record
1855     @param ref     Reference sequence
1856     @param ref_len Reference sequence length
1857     @param flag    Flags, see description
1858     @return 0 on success \n
1859            -1 if the read was unmapped, zero length, had no quality values, did not have at least one M, X or = CIGAR operator, or included a reference skip. \n
1860            -3 if BAQ alignment has already been done and does not need to be applied, or has already been applied. \n
1861            -4 if alignment failed (most likely due to running out of memory)
1862 
1863 This function calculates base alignment quality (BAQ) values using the method
1864 described in "Improving SNP discovery by base alignment quality", Heng Li,
1865 Bioinformatics, Volume 27, Issue 8 (https://doi.org/10.1093/bioinformatics/btr076).
1866 
1867 The following @param flag bits can be used:
1868 
1869 Bit 0: Adjust the quality values using the BAQ values
1870 
1871  If set, the data in the BQ:Z tag is used to adjust the quality values, and
1872  the BQ:Z tag is renamed to ZQ:Z.
1873 
1874  If clear, and a ZQ:Z tag is present, the quality values are reverted using
1875  the data in the tag, and the tag is renamed to BQ:Z.
1876 
1877 Bit 1: Use "extended" BAQ.
1878 
1879  Changes the BAQ calculation to increase sensitivity at the expense of
1880  reduced specificity.
1881 
1882 Bit 2: Recalculate BAQ, even if a BQ tag is present.
1883 
1884  Force BAQ to be recalculated.  Note that a ZQ:Z tag will always disable
1885  recalculation.
1886 
1887 @bug
1888 If the input read has both BQ:Z and ZQ:Z tags, the ZQ:Z one will be removed.
1889 Depending on what previous processing happened, this may or may not be the
1890 correct thing to do.  It would be wise to avoid this situation if possible.
1891 */
1892 
1893 int sam_prob_realn(bam1_t* b, const(char)* ref_, hts_pos_t ref_len, int flag);
1894